<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>Forem: Ma Uttaram</title>
    <description>The latest articles on Forem by Ma Uttaram (@ma_uttaram_f822b3b02ec546).</description>
    <link>https://forem.com/ma_uttaram_f822b3b02ec546</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3817033%2Fa22df131-c16d-488d-8322-e273f2357c0e.png</url>
      <title>Forem: Ma Uttaram</title>
      <link>https://forem.com/ma_uttaram_f822b3b02ec546</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://forem.com/feed/ma_uttaram_f822b3b02ec546"/>
    <language>en</language>
    <item>
      <title>Part 3</title>
      <dc:creator>Ma Uttaram</dc:creator>
      <pubDate>Fri, 17 Apr 2026 21:11:04 +0000</pubDate>
      <link>https://forem.com/ma_uttaram_f822b3b02ec546/part-3-3119</link>
      <guid>https://forem.com/ma_uttaram_f822b3b02ec546/part-3-3119</guid>
      <description>&lt;p&gt;Facebook News Feed&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fa4cc8cn88qlusa23es7z.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fa4cc8cn88qlusa23es7z.png" alt=" " width="800" height="379"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Tinder&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fo5x880yx1657ow5d3o02.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fo5x880yx1657ow5d3o02.png" alt=" " width="800" height="432"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;LeetCode&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fy3snt7prr8ucw7sxdxt5.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fy3snt7prr8ucw7sxdxt5.png" alt=" " width="800" height="372"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;yelp &lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fty6ka25k0k0o5bgppgso.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fty6ka25k0k0o5bgppgso.png" alt=" " width="800" height="333"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Rate Limiter&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fkjcrxf5danyqzobakia7.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fkjcrxf5danyqzobakia7.png" alt=" " width="800" height="487"&gt;&lt;/a&gt;&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Part 2</title>
      <dc:creator>Ma Uttaram</dc:creator>
      <pubDate>Fri, 17 Apr 2026 19:04:45 +0000</pubDate>
      <link>https://forem.com/ma_uttaram_f822b3b02ec546/part-2-2j59</link>
      <guid>https://forem.com/ma_uttaram_f822b3b02ec546/part-2-2j59</guid>
      <description>&lt;p&gt;Uber &lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fmrtgw3jc5lmff4914og8.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fmrtgw3jc5lmff4914og8.png" alt=" " width="800" height="375"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Robinhood&lt;br&gt;
-- Key is the Number of requests from various IPs is not allowed so limit src Ips. &lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fcmd72hj2is581l7z1m55.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fcmd72hj2is581l7z1m55.png" alt=" " width="800" height="425"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Google Docs&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fsj2ztc28n7btgervlw4d.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fsj2ztc28n7btgervlw4d.png" alt=" " width="800" height="393"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Distributed Cache&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fq5kok2mj9fi3wk2rbuq6.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fq5kok2mj9fi3wk2rbuq6.png" alt=" " width="800" height="420"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Youtube&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fj4if2qyfuqw750h2gouq.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fj4if2qyfuqw750h2gouq.png" alt=" " width="800" height="499"&gt;&lt;/a&gt;&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Parts 1</title>
      <dc:creator>Ma Uttaram</dc:creator>
      <pubDate>Thu, 16 Apr 2026 19:49:43 +0000</pubDate>
      <link>https://forem.com/ma_uttaram_f822b3b02ec546/parts-1-1cho</link>
      <guid>https://forem.com/ma_uttaram_f822b3b02ec546/parts-1-1cho</guid>
      <description>&lt;p&gt;News Feed Service&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fyf0w0ex2bbhtjp0pfg4v.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fyf0w0ex2bbhtjp0pfg4v.png" alt=" " width="800" height="518"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Drop Box&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Ftwse0z1h91f229dv3tu9.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Ftwse0z1h91f229dv3tu9.png" alt=" " width="800" height="428"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Tiny URL&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fce37ls4pawt4abzap8v0.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fce37ls4pawt4abzap8v0.png" alt=" " width="800" height="343"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Local Delivery Service&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fpa0hmisn6npqmc11tgdt.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fpa0hmisn6npqmc11tgdt.png" alt=" " width="800" height="491"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Ticket Master&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fyhnelzwlqn49pwr0i136.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fyhnelzwlqn49pwr0i136.png" alt=" " width="800" height="372"&gt;&lt;/a&gt;&lt;/p&gt;

</description>
    </item>
    <item>
      <title>DAG vs Langraph Nodes</title>
      <dc:creator>Ma Uttaram</dc:creator>
      <pubDate>Sun, 29 Mar 2026 00:39:21 +0000</pubDate>
      <link>https://forem.com/ma_uttaram_f822b3b02ec546/dag-vs-langraph-nodes-3en2</link>
      <guid>https://forem.com/ma_uttaram_f822b3b02ec546/dag-vs-langraph-nodes-3en2</guid>
      <description>&lt;p&gt;When we have DAG that represents our tasks and its dependencies do we still need Langraph nodes?&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fpavz85becb7fws0kmr3y.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fpavz85becb7fws0kmr3y.png" alt=" " width="800" height="795"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;LangGraph nodes are most valuable when your flow has cycles — loops, retries, HITL resume, conditional branching back to earlier steps. A pure DAG has none of that, so LangGraph's core value proposition doesn't apply.&lt;/p&gt;

&lt;p&gt;So are they mutually exclusive? No! They can complement each other. See the beautiful project to understand why.&lt;/p&gt;

&lt;p&gt;LangGraph nodes for the orchestration layer: intent_parser → entity_extractor → plan_builder → hitl_plan → cot_builder → hitl_confirm. This is the stateful, cyclic part.&lt;/p&gt;

&lt;p&gt;Your own DAG executor for task execution: takes the confirmed TaskGraph, runs a topological sort, fans out independent tasks with asyncio, collects results. Twenty to thirty lines of plain Python. No framework.&lt;/p&gt;

&lt;p&gt;| &lt;code&gt;Send&lt;/code&gt; API for parallel dispatch | LangGraph-native fan-out; no manual thread/asyncio management; results merged automatically |&lt;/p&gt;

&lt;p&gt;The DAG (networkx) and LangGraph Send API solve different problems and are complementary — not alternatives. See the 'Why both networkx DAG and LangGraph Send API?' section added to the Overview. Short answer: networkx tells us WHAT to run (which steps are ready, via topological ordering and predecessor checks); LangGraph Send API handles HOW to run them concurrently (fan-out to parallel node invocations, automatic state merge on fan-in). You could replace Send with asyncio.gather inside a single node, but you'd lose per-step checkpointing (partial progress survives failures), automatic state merging, and consistency with the rest of the LangGraph pipeline.&lt;/p&gt;

&lt;p&gt;Your mental model was almost right — let me clarify the full split:\n\n| Concern | Tool | Why |\n|---|---|---|\n| Dependency graph structure | networkx DAG | Planner builds it, CoT validator walks it, executor queries it for readiness |\n| Parallel task execution | LangGraph Send API | Fan-out ready steps as concurrent node invocations; LangGraph auto-merges results |\n| HITL + session resume | LangGraph interrupt() + PostgresSaver | Only LangGraph provides checkpointed pause/resume semantics |\n\nThe DAG is used across all pipeline stages — not just the executor — as a data structure for graph operations (cycle detection, topological sort, predecessor queries). LangGraph is the execution engine throughout. The executor uses both: DAG to decide which steps are ready, Send API to run them in parallel. Added a dedicated section in the executor design doc explaining this.&lt;/p&gt;

&lt;p&gt;The DAG (networkx) is a data structure for representing and querying step dependencies — it lives in dag.py and is used by three different pipeline stages (planner for cycle detection, CoT validator for topological walk, executor for readiness checks). LangGraph graph nodes (validate_cot, execute_step, etc.) are the execution units in the agent runtime. They use the DAG as a utility library. There's no conflict — the DAG tells the LangGraph nodes what order/parallelism is required; LangGraph handles actually running them, checkpointing state, and managing HITL interrupts.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Ftz6dg03tw8y7nwzkjyly.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Ftz6dg03tw8y7nwzkjyly.png" alt=" " width="800" height="1045"&gt;&lt;/a&gt;&lt;/p&gt;

</description>
      <category>agents</category>
      <category>ai</category>
      <category>architecture</category>
      <category>llm</category>
    </item>
    <item>
      <title>Langraph vs Langchain</title>
      <dc:creator>Ma Uttaram</dc:creator>
      <pubDate>Sat, 28 Mar 2026 22:32:39 +0000</pubDate>
      <link>https://forem.com/ma_uttaram_f822b3b02ec546/langraph-vs-langchain-23g6</link>
      <guid>https://forem.com/ma_uttaram_f822b3b02ec546/langraph-vs-langchain-23g6</guid>
      <description>&lt;p&gt;As I was working on HITL, I was checking to see if we can start with existing Agentic Frameworks like Langraph and Langchain. The below was very interesting&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F106ynunkc0z3l1qe04p2.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F106ynunkc0z3l1qe04p2.png" alt=" " width="800" height="850"&gt;&lt;/a&gt;&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Evaluation Techniques</title>
      <dc:creator>Ma Uttaram</dc:creator>
      <pubDate>Sat, 28 Mar 2026 20:07:01 +0000</pubDate>
      <link>https://forem.com/ma_uttaram_f822b3b02ec546/evaluation-techniques-50mm</link>
      <guid>https://forem.com/ma_uttaram_f822b3b02ec546/evaluation-techniques-50mm</guid>
      <description>&lt;p&gt;There are six main evaluation techniques, falling into two broad families: those that compare against a known answer, and those that use judgment. Here's a visual overview, then the explanation of each.Here's each technique explained:&lt;/p&gt;




&lt;p&gt;&lt;strong&gt;Exact match&lt;/strong&gt; — the simplest form. You know the correct answer, and you check whether the output equals it exactly. Works well for structured tasks: intent classification ("is this a booking request?"), entity extraction where the expected output is a fixed JSON, or tool selection ("should the agent call the calendar API or the email API here?"). Brittle for open-ended text because two correct answers can be worded differently.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Schema / constraint validation&lt;/strong&gt; — instead of checking exact values, you check the shape of the output. Does entity extraction return a valid &lt;code&gt;Task&lt;/code&gt; schema with all required fields? Did the plan builder produce a properly ordered list? This is what Pydantic and Zod do, and it's directly relevant to BuddingBuilder's FR #7. It catches malformed outputs even when the content is hard to verify.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Code execution / unit test&lt;/strong&gt; — the gold standard for any agent that produces code or structured plans. You run the output and check whether tests pass. For BuddingBuilder this applies to any task whose result is deterministically verifiable — a calculation, a formatted document, a database query result.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Reference-based LLM judge&lt;/strong&gt; — you have a golden answer, and you ask a judge model to compare the agent's output against it and score the match. Returns a score and a reason. More flexible than exact match because it can handle paraphrasing, but requires you to maintain a library of golden examples, which takes effort to build and keep current.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Rubric-based LLM judge&lt;/strong&gt; — no golden answer needed. You give the judge a scoring rubric ("rate this response 1–5 on correctness, task completion, and safety") and it evaluates the output on its own. This is the most practical technique for staging, because you can write rubrics faster than you can curate golden answers. The key is writing rubrics that are specific enough that the judge can't wriggle around them.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Pairwise preference&lt;/strong&gt; — the judge sees two outputs side by side and picks the better one. You're not asking "is this good?" but "which is better — the old prompt or the new one?" This is the right technique for promotion gates: before moving from dev to staging, run pairwise eval between the new version and the current prod version. If the new version wins consistently, promote. This is also how RLHF preference data is collected.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Human eval&lt;/strong&gt; — a human reads and rates the output. Highest signal, but too slow and expensive to run on everything. Its real job is to calibrate your automated judges — you periodically sample flagged traces, have a human rate them, and check whether your judge model's scores agree. If they don't, your rubric needs refining.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Online monitoring&lt;/strong&gt; — the only technique running continuously in prod. The guard model scores inputs before the agent acts; the output validator scores responses after. Neither produces a detailed critique — they produce a fast pass/fail signal with enough metadata to route flagged interactions to the human review queue. This is what closes BuddingBuilder's prod → dev feedback loop.&lt;/p&gt;


&lt;div class="crayons-card c-embed text-styles text-styles--secondary"&gt;
    &lt;div class="c-embed__content"&gt;
      &lt;div class="c-embed__body flex items-center justify-between"&gt;
        &lt;a href="https://claude.ai/public/artifacts/6e95af46-0c33-4cce-a6dd-eeb9679a9a28" rel="noopener noreferrer" class="c-link fw-bold flex items-center"&gt;
          &lt;span class="mr-2"&gt;claude.ai&lt;/span&gt;
          

        &lt;/a&gt;
      &lt;/div&gt;
    &lt;/div&gt;
&lt;/div&gt;


</description>
      <category>ai</category>
      <category>llm</category>
      <category>machinelearning</category>
      <category>testing</category>
    </item>
    <item>
      <title>Reasoning Techniques</title>
      <dc:creator>Ma Uttaram</dc:creator>
      <pubDate>Sat, 28 Mar 2026 18:49:11 +0000</pubDate>
      <link>https://forem.com/ma_uttaram_f822b3b02ec546/reasoning-techniques-3j82</link>
      <guid>https://forem.com/ma_uttaram_f822b3b02ec546/reasoning-techniques-3j82</guid>
      <description>&lt;p&gt;Reasoning is the process of working through a problem in steps rather than jumping straight to an answer. For an LLM, it means producing intermediate thoughts — a chain of logic — before committing to a response.&lt;br&gt;
Think of it this way: when you ask someone a hard question, a thoughtful person doesn't blurt the first thing that comes to mind. They pause, break the problem down, consider what they know, check their logic, and then answer. Reasoning gives LLMs that same pause.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fbupdzonqze1b1z1otwg4.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fbupdzonqze1b1z1otwg4.png" alt=" " width="800" height="896"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fboeos4aglv653whg2sfp.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fboeos4aglv653whg2sfp.png" alt=" " width="800" height="645"&gt;&lt;/a&gt;&lt;/p&gt;

</description>
    </item>
  </channel>
</rss>
