<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: TheProdSDE</title>
    <description>The latest articles on DEV Community by TheProdSDE (@theprodsde).</description>
    <link>https://dev.to/theprodsde</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3752909%2F36ae54b2-61fd-4d29-b097-ed6fbe2cd7ce.png</url>
      <title>DEV Community: TheProdSDE</title>
      <link>https://dev.to/theprodsde</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/theprodsde"/>
    <language>en</language>
    <item>
      <title>Worth Each minute read if you work on RAG</title>
      <dc:creator>TheProdSDE</dc:creator>
      <pubDate>Mon, 06 Apr 2026 09:59:14 +0000</pubDate>
      <link>https://dev.to/theprodsde/worth-each-minute-read-if-you-work-on-rag-g1d</link>
      <guid>https://dev.to/theprodsde/worth-each-minute-read-if-you-work-on-rag-g1d</guid>
      <description>&lt;div class="ltag__link--embedded"&gt;
  &lt;div class="crayons-story "&gt;
  &lt;a href="https://dev.to/theprodsde/why-most-rag-systems-fail-in-production-and-how-to-design-one-that-actually-works-j55" class="crayons-story__hidden-navigation-link"&gt;Why Most RAG Systems Fail in Production (And How to Design One That Actually Works)&lt;/a&gt;


  &lt;div class="crayons-story__body crayons-story__body-full_post"&gt;
    &lt;div class="crayons-story__top"&gt;
      &lt;div class="crayons-story__meta"&gt;
        &lt;div class="crayons-story__author-pic"&gt;

          &lt;a href="/theprodsde" class="crayons-avatar  crayons-avatar--l  "&gt;
            &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3752909%2F36ae54b2-61fd-4d29-b097-ed6fbe2cd7ce.png" alt="theprodsde profile" class="crayons-avatar__image" width="500" height="500"&gt;
          &lt;/a&gt;
        &lt;/div&gt;
        &lt;div&gt;
          &lt;div&gt;
            &lt;a href="/theprodsde" class="crayons-story__secondary fw-medium m:hidden"&gt;
              TheProdSDE
            &lt;/a&gt;
            &lt;div class="profile-preview-card relative mb-4 s:mb-0 fw-medium hidden m:inline-block"&gt;
              
                TheProdSDE
                
              
              &lt;div id="story-author-preview-content-3367068" class="profile-preview-card__content crayons-dropdown branded-7 p-4 pt-0"&gt;
                &lt;div class="gap-4 grid"&gt;
                  &lt;div class="-mt-4"&gt;
                    &lt;a href="/theprodsde" class="flex"&gt;
                      &lt;span class="crayons-avatar crayons-avatar--xl mr-2 shrink-0"&gt;
                        &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3752909%2F36ae54b2-61fd-4d29-b097-ed6fbe2cd7ce.png" class="crayons-avatar__image" alt="" width="500" height="500"&gt;
                      &lt;/span&gt;
                      &lt;span class="crayons-link crayons-subtitle-2 mt-5"&gt;TheProdSDE&lt;/span&gt;
                    &lt;/a&gt;
                  &lt;/div&gt;
                  &lt;div class="print-hidden"&gt;
                    
                      Follow
                    
                  &lt;/div&gt;
                  &lt;div class="author-preview-metadata-container"&gt;&lt;/div&gt;
                &lt;/div&gt;
              &lt;/div&gt;
            &lt;/div&gt;

          &lt;/div&gt;
          &lt;a href="https://dev.to/theprodsde/why-most-rag-systems-fail-in-production-and-how-to-design-one-that-actually-works-j55" class="crayons-story__tertiary fs-xs"&gt;&lt;time&gt;Mar 24&lt;/time&gt;&lt;span class="time-ago-indicator-initial-placeholder"&gt;&lt;/span&gt;&lt;/a&gt;
        &lt;/div&gt;
      &lt;/div&gt;

    &lt;/div&gt;

    &lt;div class="crayons-story__indention"&gt;
      &lt;h2 class="crayons-story__title crayons-story__title-full_post"&gt;
        &lt;a href="https://dev.to/theprodsde/why-most-rag-systems-fail-in-production-and-how-to-design-one-that-actually-works-j55" id="article-link-3367068"&gt;
          Why Most RAG Systems Fail in Production (And How to Design One That Actually Works)
        &lt;/a&gt;
      &lt;/h2&gt;
        &lt;div class="crayons-story__tags"&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/ai"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;ai&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/rag"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;rag&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/agents"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;agents&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/software"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;software&lt;/a&gt;
        &lt;/div&gt;
      &lt;div class="crayons-story__bottom"&gt;
        &lt;div class="crayons-story__details"&gt;
            &lt;a href="https://dev.to/theprodsde/why-most-rag-systems-fail-in-production-and-how-to-design-one-that-actually-works-j55#comments" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left flex items-center"&gt;
              Comments


              1&lt;span class="hidden s:inline"&gt; comment&lt;/span&gt;
            &lt;/a&gt;
        &lt;/div&gt;
        &lt;div class="crayons-story__save"&gt;
          &lt;small class="crayons-story__tertiary fs-xs mr-2"&gt;
            4 min read
          &lt;/small&gt;
            
              &lt;span class="bm-initial"&gt;
                

              &lt;/span&gt;
              &lt;span class="bm-success"&gt;
                

              &lt;/span&gt;
            
        &lt;/div&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;/div&gt;


</description>
    </item>
    <item>
      <title>Agentic AI Fails in Production for Simple Reasons — What MLDS 2026 Taught Me</title>
      <dc:creator>TheProdSDE</dc:creator>
      <pubDate>Tue, 31 Mar 2026 14:56:35 +0000</pubDate>
      <link>https://dev.to/theprodsde/agentic-ai-fails-in-production-for-simple-reasons-what-mlds-2026-taught-me-2ec</link>
      <guid>https://dev.to/theprodsde/agentic-ai-fails-in-production-for-simple-reasons-what-mlds-2026-taught-me-2ec</guid>
      <description>&lt;p&gt;&lt;strong&gt;TL;DR:&lt;/strong&gt;&lt;br&gt;
Most agentic AI failures in production are not caused by weak models, but by &lt;strong&gt;stale data, poor validation, lost context, and lack of governance&lt;/strong&gt;. MLDS 2026 reinforced that enterprise‑grade agentic AI is a &lt;strong&gt;system design problem&lt;/strong&gt;, requiring validation‑first agents, structural intelligence, strong observability, memory discipline, and cost‑aware orchestration—not just bigger LLMs.&lt;/p&gt;

&lt;p&gt;I recently attended &lt;strong&gt;MLDS 2026 (Machine Learning Developer Summit)&lt;/strong&gt; by Analytics India Magazine (AIM) in Bangalore. While many sessions featured advanced models and agentic frameworks, the most valuable insight was unexpected:&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Most AI systems don’t fail in production because of bad models — they fail because of bad systems.&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;Across the summit, speakers repeatedly showed that issues like stale data, missing validation, poor observability, and uncontrolled execution are what derail agentic AI at scale—not lack of intelligence.&lt;/p&gt;

&lt;p&gt;A recurring theme across sessions was clear: &lt;strong&gt;the hardest problem in AI today is no longer building impressive demos, but running AI systems reliably at enterprise scale&lt;/strong&gt;. Many real-world failures stem from system design gaps rather than model limitations.&lt;/p&gt;




&lt;h2&gt;
  
  
  A Key Shift: From Models to Systems
&lt;/h2&gt;

&lt;p&gt;One of the most important takeaways from the summit was that &lt;strong&gt;enterprise AI is fundamentally a system design problem&lt;/strong&gt;, not a model selection problem.&lt;/p&gt;

&lt;p&gt;Multiple speakers highlighted common failure modes seen in production:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Stale or outdated data&lt;/li&gt;
&lt;li&gt;Poor data granularity&lt;/li&gt;
&lt;li&gt;Context loss across multi-step workflows&lt;/li&gt;
&lt;li&gt;False confidence and lack of validation&lt;/li&gt;
&lt;li&gt;Black-box decisions with no observability&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;This explains why many AI solutions look powerful in prototypes but break down in real operational environments.&lt;/p&gt;




&lt;h2&gt;
  
  
  Policy Learning vs. Structural Intelligence
&lt;/h2&gt;

&lt;p&gt;A particularly insightful discussion contrasted two approaches:&lt;/p&gt;

&lt;h3&gt;
  
  
  Runtime Policy Learning
&lt;/h3&gt;

&lt;p&gt;Examples include &lt;strong&gt;Reinforcement Learning (RL)&lt;/strong&gt;, &lt;strong&gt;MADDPG&lt;/strong&gt;, and &lt;strong&gt;Graph Neural Networks (GNNs)&lt;/strong&gt;:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Dynamic decision-making&lt;/li&gt;
&lt;li&gt;GPU-intensive&lt;/li&gt;
&lt;li&gt;Higher cost and latency&lt;/li&gt;
&lt;li&gt;Harder to govern and observe&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  Structural Intelligence at Design Time
&lt;/h3&gt;

&lt;p&gt;In this approach, intelligence is &lt;strong&gt;encoded into the system structure itself&lt;/strong&gt;, often using graph-based designs:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Relationships are resolved at construction time&lt;/li&gt;
&lt;li&gt;Minimal runtime inference&lt;/li&gt;
&lt;li&gt;Deterministic behavior&lt;/li&gt;
&lt;li&gt;Lower cost and faster response&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;Key insight:&lt;/strong&gt; Not every intelligent system needs continuous runtime learning. When relationships are stable, embedding intelligence structurally can be more efficient and reliable.&lt;/p&gt;




&lt;h2&gt;
  
  
  Validation-First Agent Design
&lt;/h2&gt;

&lt;p&gt;Another strong theme was the shift toward &lt;strong&gt;validation-first agents&lt;/strong&gt;, not answer-first agents.&lt;/p&gt;

&lt;p&gt;Successful agentic systems:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Ground every important output to source data&lt;/li&gt;
&lt;li&gt;Track freshness and provenance&lt;/li&gt;
&lt;li&gt;Validate semantics before taking actions&lt;/li&gt;
&lt;li&gt;Plan explicitly before executing&lt;/li&gt;
&lt;li&gt;Expose confidence where appropriate&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Several talks emphasized that observability should evolve from &lt;em&gt;“what happened?”&lt;/em&gt; to &lt;em&gt;“was the result actually correct?”&lt;/em&gt;.&lt;/p&gt;




&lt;h2&gt;
  
  
  Agentic Memory: Accuracy, Cost, and Trust
&lt;/h2&gt;

&lt;p&gt;Sessions on agentic memory highlighted how &lt;strong&gt;short-term memory, long-term memory, and pruning strategies&lt;/strong&gt; directly influence:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Accuracy&lt;/li&gt;
&lt;li&gt;Latency&lt;/li&gt;
&lt;li&gt;Cost&lt;/li&gt;
&lt;li&gt;User trust&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;The key takeaway was that memory should be treated as a &lt;strong&gt;first-class architectural concern&lt;/strong&gt;, with explicit design choices and benchmarks—rather than an ad-hoc cache bolted on later.&lt;/p&gt;




&lt;h2&gt;
  
  
  Data Platforms and Practical Architecture Choices
&lt;/h2&gt;

&lt;p&gt;The summit also covered modern data platforms that unify &lt;strong&gt;OLTP and OLAP workloads&lt;/strong&gt;, with strong support for &lt;strong&gt;time-series data&lt;/strong&gt;. These architectures reduce complexity and make near–real-time analytics more accessible.&lt;/p&gt;

&lt;p&gt;A broader lesson emerged: &lt;strong&gt;cost, latency, reliability, and accuracy must be designed together&lt;/strong&gt;. Choosing larger models without optimizing workflows, routing, and memory leads to unnecessary compute cost and slower systems.&lt;/p&gt;




&lt;h2&gt;
  
  
  Putting Agents into Production: Real-World Risks
&lt;/h2&gt;

&lt;p&gt;One session focused entirely on lessons learned from deploying agents in production. Four recurring risks were highlighted:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Silent failures&lt;/strong&gt; – systems appear healthy but produce wrong outputs
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Black-box decisions&lt;/strong&gt; – lack of explainability and traceability
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Permission explosion&lt;/strong&gt; – agents accumulating excessive access
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Runaway execution&lt;/strong&gt; – uncontrolled tool calls and rising costs
&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;These issues reinforce the importance of governance, guardrails, observability, and scoped execution from day one.&lt;/p&gt;




&lt;h2&gt;
  
  
  AI-Assisted Development Needs Guardrails
&lt;/h2&gt;

&lt;p&gt;Another notable takeaway was the need to pair &lt;strong&gt;AI-assisted code generation&lt;/strong&gt; with strong &lt;strong&gt;static analysis and security validation&lt;/strong&gt;. Integrations with tools like SonarQube demonstrate how AI-written and human-written code can be:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Validated automatically&lt;/li&gt;
&lt;li&gt;Secured against vulnerabilities&lt;/li&gt;
&lt;li&gt;Fixed via generated pull requests&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;This closes the gap between productivity gains and production reliability.&lt;/p&gt;




&lt;h2&gt;
  
  
  Final Reflections
&lt;/h2&gt;

&lt;p&gt;MLDS 2026 reinforced a critical idea:  &lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;The future of AI in enterprises depends more on architecture, validation, and governance than on model strength alone.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;Agentic AI succeeds when it is:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Grounded in reliable data&lt;/li&gt;
&lt;li&gt;Observable and debuggable&lt;/li&gt;
&lt;li&gt;Cost-aware and execution-bounded&lt;/li&gt;
&lt;li&gt;Designed around real workflows&lt;/li&gt;
&lt;li&gt;Rolled out with clear trust and adoption strategies&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;The biggest mindset shift is moving from &lt;em&gt;“How powerful is the model?”&lt;/em&gt; to &lt;em&gt;“How reliable and efficient is the end-to-end intelligent workflow?”&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;That, more than anything, was the most valuable learning from the summit.&lt;/p&gt;




&lt;p&gt;If you’re working on agentic AI in production, I’d love to hear:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Where have agents broken down for you?&lt;/li&gt;
&lt;li&gt;What controls or guardrails helped the most?&lt;/li&gt;
&lt;li&gt;Are you handling validation and memory explicitly—or implicitly?&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Let’s compare notes.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>architecture</category>
      <category>devops</category>
      <category>llm</category>
    </item>
    <item>
      <title>Why Most RAG Systems Fail in Production (And How to Design One That Actually Works)</title>
      <dc:creator>TheProdSDE</dc:creator>
      <pubDate>Tue, 24 Mar 2026 12:23:04 +0000</pubDate>
      <link>https://dev.to/theprodsde/why-most-rag-systems-fail-in-production-and-how-to-design-one-that-actually-works-j55</link>
      <guid>https://dev.to/theprodsde/why-most-rag-systems-fail-in-production-and-how-to-design-one-that-actually-works-j55</guid>
      <description>&lt;blockquote&gt;
&lt;p&gt;&lt;em&gt;A practical, system design–focused breakdown of why RAG systems degrade after launch—and what actually works in production.&lt;/em&gt;&lt;/p&gt;
&lt;/blockquote&gt;




&lt;p&gt;Everyone builds a RAG system.&lt;/p&gt;

&lt;p&gt;And almost all of them work — in demos.&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Clean query&lt;/li&gt;
&lt;li&gt;Relevant chunks&lt;/li&gt;
&lt;li&gt;Decent answer&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Ship it.&lt;/p&gt;

&lt;p&gt;Then production happens.&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Users ask vague follow-ups&lt;/li&gt;
&lt;li&gt;Retrieval returns partial context&lt;/li&gt;
&lt;li&gt;The model answers confidently… and incorrectly&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;And suddenly:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;Your “working” RAG system becomes unreliable.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  The Reality: RAG Fails Quietly
&lt;/h2&gt;

&lt;p&gt;RAG doesn’t crash. It degrades.&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Slightly wrong answers&lt;/li&gt;
&lt;li&gt;Missing context&lt;/li&gt;
&lt;li&gt;Hallucinated explanations with citations&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Which is worse than a system that fails loudly.&lt;/p&gt;

&lt;p&gt;Most teams blame:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;embeddings&lt;/li&gt;
&lt;li&gt;vector database&lt;/li&gt;
&lt;li&gt;chunk size&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;But in real systems:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;RAG failures are usually system design failures—not retrieval failures.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  What a Production RAG System Actually Looks Like
&lt;/h2&gt;

&lt;p&gt;Not this:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;Query → Vector DB → LLM&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;But this:&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F2up9k15qpcbmdnpz2kto.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F2up9k15qpcbmdnpz2kto.png" alt="Prod Arch" width="775" height="1804"&gt;&lt;/a&gt;&lt;/p&gt;




&lt;h2&gt;
  
  
  Step 1: Parsing Matters More Than You Think
&lt;/h2&gt;

&lt;p&gt;Most pipelines start like this:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="n"&gt;text&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;pdf&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;read&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
&lt;span class="n"&gt;chunks&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;split&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;text&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;embeddings&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;embed&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;This is where things already break.&lt;/p&gt;

&lt;h3&gt;
  
  
  Problem
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;PDFs lose structure&lt;/li&gt;
&lt;li&gt;Tables turn into noise&lt;/li&gt;
&lt;li&gt;Headers/footers pollute chunks&lt;/li&gt;
&lt;li&gt;Sections lose meaning&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  Production Approach
&lt;/h3&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;Document → Layout-aware parsing → Structured sections → Clean chunks
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Key principles:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;preserve headings and hierarchy&lt;/li&gt;
&lt;li&gt;remove boilerplate&lt;/li&gt;
&lt;li&gt;chunk by meaning, not length&lt;/li&gt;
&lt;/ul&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;If parsing is wrong, retrieval will always be wrong.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  Step 2: Dense vs Sparse Retrieval (You Need Both)
&lt;/h2&gt;

&lt;h3&gt;
  
  
  Dense Retrieval (Embeddings)
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;semantic similarity&lt;/li&gt;
&lt;li&gt;handles vague queries&lt;/li&gt;
&lt;li&gt;fails on exact matches&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  Sparse Retrieval (BM25 / Keyword)
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;exact term matching&lt;/li&gt;
&lt;li&gt;works for IDs, clauses&lt;/li&gt;
&lt;li&gt;ignores meaning&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  Production Pattern: Hybrid Retrieval
&lt;/h3&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F2p3dos13k150u211qog5.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F2p3dos13k150u211qog5.png" alt="Hybrid Retrieval" width="800" height="197"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;This gives:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;semantic understanding&lt;/li&gt;
&lt;li&gt;exact precision&lt;/li&gt;
&lt;/ul&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;Using only vector search is a common production mistake.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  Step 3: Reranking (The Accuracy Multiplier)
&lt;/h2&gt;

&lt;p&gt;Top-K retrieval is noisy.&lt;/p&gt;

&lt;p&gt;Add a &lt;strong&gt;reranker&lt;/strong&gt; (cross-encoder):&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;evaluates (query, chunk) pairs&lt;/li&gt;
&lt;li&gt;reorders by true relevance&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;This significantly improves answer quality without changing your database.&lt;/p&gt;




&lt;h2&gt;
  
  
  Step 4: Context Building (Where Systems Win or Lose)
&lt;/h2&gt;

&lt;p&gt;Even with good retrieval, most failures happen here.&lt;/p&gt;

&lt;h3&gt;
  
  
  Common Mistakes
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;stuffing too many chunks&lt;/li&gt;
&lt;li&gt;mixing unrelated documents&lt;/li&gt;
&lt;li&gt;ignoring token limits&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  Production Approach
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;select top-ranked chunks only&lt;/li&gt;
&lt;li&gt;preserve document structure&lt;/li&gt;
&lt;li&gt;enforce token budget&lt;/li&gt;
&lt;li&gt;maintain ordering&lt;/li&gt;
&lt;/ul&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;Better context &amp;gt; more context&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  Vector DB vs Graph DB — When to Use What
&lt;/h2&gt;




&lt;h3&gt;
  
  
  Use Vector Database When
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;unstructured data&lt;/li&gt;
&lt;li&gt;semantic search&lt;/li&gt;
&lt;li&gt;document retrieval&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fctfdm6ce0okep4c7z159.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fctfdm6ce0okep4c7z159.png" alt="Vector DB usecase" width="794" height="764"&gt;&lt;/a&gt;&lt;/p&gt;




&lt;h3&gt;
  
  
  Use Graph Database When
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;relationships matter&lt;/li&gt;
&lt;li&gt;multi-hop reasoning&lt;/li&gt;
&lt;li&gt;structured entities&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fdu2uqier2j76b1rqzyyq.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fdu2uqier2j76b1rqzyyq.png" alt="Graph DB usecase" width="722" height="972"&gt;&lt;/a&gt;&lt;/p&gt;




&lt;h3&gt;
  
  
  Hybrid (Real Systems)
&lt;/h3&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fulorxshvfzyn486hmf8u.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fulorxshvfzyn486hmf8u.png" alt="HYbrid " width="794" height="972"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;Use graph when relationships matter.&lt;br&gt;
Use vector when meaning matters.&lt;br&gt;
Use both when systems get complex.&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  RAG Is Not Single-Turn — Managing Context Over Time
&lt;/h2&gt;

&lt;p&gt;Most systems fail here.&lt;/p&gt;

&lt;p&gt;RAG is not just:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;retrieve → answer&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;It’s:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;retrieve → answer → follow-up → correction → refinement&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  The Problem: Context Drift
&lt;/h2&gt;

&lt;p&gt;If you blindly append chat history:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;token usage explodes&lt;/li&gt;
&lt;li&gt;wrong answers get reinforced&lt;/li&gt;
&lt;li&gt;relevance drops&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  Production Strategy: Context Is a Filter
&lt;/h2&gt;

&lt;p&gt;Not a dump.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F2zmb0bj9k0do74h101no.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F2zmb0bj9k0do74h101no.png" alt=" " width="514" height="1180"&gt;&lt;/a&gt;&lt;/p&gt;




&lt;h3&gt;
  
  
  Context Layers
&lt;/h3&gt;

&lt;ol&gt;
&lt;li&gt;Store full history&lt;/li&gt;
&lt;li&gt;Select only relevant turns&lt;/li&gt;
&lt;li&gt;Exclude invalid or corrected responses&lt;/li&gt;
&lt;li&gt;Combine with retrieved context&lt;/li&gt;
&lt;/ol&gt;




&lt;h2&gt;
  
  
  When to Summarize vs Include Raw History
&lt;/h2&gt;

&lt;h3&gt;
  
  
  Include Raw
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;short conversations&lt;/li&gt;
&lt;li&gt;active refinement&lt;/li&gt;
&lt;li&gt;recent corrections&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  Summarize
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;long conversations (&amp;gt;5–7 turns)&lt;/li&gt;
&lt;li&gt;approaching token limits&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fdm6w9gprolpifc5ct0t8.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fdm6w9gprolpifc5ct0t8.png" alt=" " width="800" height="558"&gt;&lt;/a&gt;&lt;/p&gt;




&lt;h3&gt;
  
  
  Critical Rule
&lt;/h3&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;Summarize facts—not hallucinations.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;If a previous answer was wrong:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;exclude it&lt;/li&gt;
&lt;li&gt;prioritize user correction&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  Handling User Corrections (Critical for Trust)
&lt;/h2&gt;

&lt;p&gt;Users will fix your system.&lt;/p&gt;

&lt;p&gt;If you ignore that, the system feels broken.&lt;/p&gt;




&lt;h3&gt;
  
  
  Strategy
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;mark incorrect responses&lt;/li&gt;
&lt;li&gt;exclude them from future context&lt;/li&gt;
&lt;li&gt;boost corrected information&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Example:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight json"&gt;&lt;code&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"turn_id"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="mi"&gt;8&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"valid"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="kc"&gt;false&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"corrected"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="kc"&gt;true&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;






&lt;h2&gt;
  
  
  Agentic RAG (When Retrieval Needs Reasoning)
&lt;/h2&gt;

&lt;p&gt;Basic RAG is static.&lt;/p&gt;

&lt;p&gt;Agentic RAG adds:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;planning&lt;/li&gt;
&lt;li&gt;iteration&lt;/li&gt;
&lt;li&gt;tool usage&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  Architecture
&lt;/h3&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fcerbgga5knsatpplsf9z.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fcerbgga5knsatpplsf9z.png" alt=" " width="649" height="1099"&gt;&lt;/a&gt;&lt;/p&gt;




&lt;h3&gt;
  
  
  Use It When
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;multi-step queries&lt;/li&gt;
&lt;li&gt;missing context&lt;/li&gt;
&lt;li&gt;dynamic retrieval&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  Avoid It When
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;simple Q&amp;amp;A&lt;/li&gt;
&lt;li&gt;strict latency requirements&lt;/li&gt;
&lt;/ul&gt;

&lt;blockquote&gt;
&lt;p&gt;Otherwise you're adding complexity without ROI.&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  Confidence Scores and Citations (Trust Layer)
&lt;/h2&gt;

&lt;p&gt;Without trust signals, users don’t trust answers.&lt;/p&gt;




&lt;h3&gt;
  
  
  Citations
&lt;/h3&gt;

&lt;p&gt;Always return:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;source document&lt;/li&gt;
&lt;li&gt;section or chunk reference&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  Confidence Score (Simple Heuristic)
&lt;/h3&gt;

&lt;p&gt;Combine:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;retrieval score&lt;/li&gt;
&lt;li&gt;reranker score&lt;/li&gt;
&lt;li&gt;validation signal&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Example:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;confidence =
  0.4 * retrieval +
  0.4 * reranker +
  0.2 * validation
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;






&lt;h3&gt;
  
  
  Optional Validation Step
&lt;/h3&gt;

&lt;p&gt;Ask the model:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;“Is this answer fully supported by the context?”&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;Lower confidence if not.&lt;/p&gt;




&lt;h2&gt;
  
  
  Guardrail: Don’t Trust the Model Alone
&lt;/h2&gt;

&lt;p&gt;Even with RAG:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;hallucinations still happen&lt;/li&gt;
&lt;li&gt;citations can be fabricated&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Enforce:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;answers must reference retrieved chunks&lt;/li&gt;
&lt;li&gt;no context → no answer&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  Final Architecture (Multi-Turn RAG System)
&lt;/h2&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fft7ekmt83a7u0btw68yq.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fft7ekmt83a7u0btw68yq.png" alt=" " width="800" height="533"&gt;&lt;/a&gt;&lt;/p&gt;




&lt;h2&gt;
  
  
  Production Checklist
&lt;/h2&gt;

&lt;p&gt;If your system doesn’t have these, it will fail:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;structured parsing&lt;/li&gt;
&lt;li&gt;hybrid retrieval&lt;/li&gt;
&lt;li&gt;reranking&lt;/li&gt;
&lt;li&gt;controlled context building&lt;/li&gt;
&lt;li&gt;memory filtering&lt;/li&gt;
&lt;li&gt;correction handling&lt;/li&gt;
&lt;li&gt;confidence + citations&lt;/li&gt;
&lt;li&gt;observability&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  The Real Rule
&lt;/h2&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;RAG is not a retrieval problem. It’s a system design problem.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  What Actually Works
&lt;/h2&gt;

&lt;p&gt;The best RAG systems are:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;simple&lt;/li&gt;
&lt;li&gt;structured&lt;/li&gt;
&lt;li&gt;observable&lt;/li&gt;
&lt;li&gt;measurable&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Not over-engineered.&lt;/p&gt;




&lt;h2&gt;
  
  
  Final Thought
&lt;/h2&gt;

&lt;p&gt;If your system only works when:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;the query is perfect&lt;/li&gt;
&lt;li&gt;the data is clean&lt;/li&gt;
&lt;li&gt;the demo is controlled&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Then it doesn’t work.&lt;/p&gt;




&lt;h2&gt;
  
  
  What’s Next
&lt;/h2&gt;

&lt;p&gt;Once RAG works, the next bottleneck is:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;Cost.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;Why LLM systems become expensive in production—and how to control it without killing performance.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>rag</category>
      <category>agents</category>
      <category>software</category>
    </item>
    <item>
      <title>Most AI Agent Frameworks Are Overkill — Here's How to Choose the Right One in 30 Seconds</title>
      <dc:creator>TheProdSDE</dc:creator>
      <pubDate>Wed, 18 Mar 2026 12:58:03 +0000</pubDate>
      <link>https://dev.to/theprodsde/most-ai-agent-frameworks-are-overkill-heres-how-to-choose-the-right-one-in-30-seconds-37k3</link>
      <guid>https://dev.to/theprodsde/most-ai-agent-frameworks-are-overkill-heres-how-to-choose-the-right-one-in-30-seconds-37k3</guid>
      <description>&lt;blockquote&gt;
&lt;p&gt;&lt;em&gt;A senior engineer's field-tested breakdown of LangGraph, AutoGen, CrewAI, Microsoft Agent Framework, and Haystack — from reviewing real production systems across teams.&lt;/em&gt;&lt;/p&gt;
&lt;/blockquote&gt;




&lt;p&gt;Everyone is building AI agents right now.&lt;/p&gt;

&lt;p&gt;LangGraph. AutoGen. CrewAI. Semantic Kernel. Microsoft Agent Framework.&lt;/p&gt;

&lt;p&gt;But most production AI systems don't actually need an agent framework.&lt;/p&gt;

&lt;p&gt;Across multiple teams and production codebases I've reviewed, the same two failure modes appear constantly — over-engineering and under-engineering. In one case, replacing a complex agent framework with ~200 lines of plain tool-calling code made the system 3× faster, easier to debug, and easier to maintain. In another, the absence of a framework caused a codebase to collapse under its own complexity.&lt;/p&gt;

&lt;p&gt;Both failures had the same root cause:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;The problem isn't choosing the wrong framework. It's choosing a framework before understanding the workflow.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;This guide is the decision framework I now use before touching any agent tooling.&lt;/p&gt;




&lt;h2&gt;
  
  
  TL;DR — Pick Your Approach in 30 Seconds
&lt;/h2&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Workflow Shape&lt;/th&gt;
&lt;th&gt;Recommended Approach&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Single request → tools → response&lt;/td&gt;
&lt;td&gt;Plain tool calling (no framework)&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Self-correcting loops, retries, re-evaluation&lt;/td&gt;
&lt;td&gt;LangGraph&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Parallel specialist agents&lt;/td&gt;
&lt;td&gt;AutoGen 0.7.5&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Enterprise persistent agents (Azure)&lt;/td&gt;
&lt;td&gt;Microsoft Agent Framework RC&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Sequential role-based task delegation&lt;/td&gt;
&lt;td&gt;CrewAI 1.10.1&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Document analysis and extraction pipelines&lt;/td&gt;
&lt;td&gt;Haystack 2.x&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;




&lt;h2&gt;
  
  
  Workflow Shapes at a Glance
&lt;/h2&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F43ss9dwvby161agwccf7.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F43ss9dwvby161agwccf7.png" alt="How to decide framework?" width="800" height="823"&gt;&lt;/a&gt;&lt;/p&gt;




&lt;h2&gt;
  
  
  Two Real Failure Modes I've Seen Across Teams
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;The over-engineering case.&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;During a cross-team architecture review, I found a financial data analysis pipeline — pull market metrics, cross-reference filings, produce a risk score — built with three microservices, a LangGraph orchestrator with twelve nodes, Redis for inter-agent memory, and a separate evaluation loop. Six weeks of engineering. It worked.&lt;/p&gt;

&lt;p&gt;What it actually needed: 180 lines of FastAPI with direct OpenAI tool calling. Same output. 3× faster inference. Any engineer on the team could debug it in minutes.&lt;/p&gt;

&lt;p&gt;After the review, the team simplified it together. The framework had been adopted before anyone mapped the workflow shape — a straight line — and LangGraph's graph model added cost with no return.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;The under-engineering case.&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;In a separate review, I found the opposite problem. An infrastructure incident response system — Azure alerts, metric retrieval, remediation decisions, rollback logic, retry conditions, escalation thresholds — built with hand-rolled state machines, custom retry logic, and a bespoke tool orchestration layer. Three weeks in, the codebase was a maze. Every new remediation path required rewriting core routing logic.&lt;/p&gt;

&lt;p&gt;A proper agent framework would have provided state management, conditional branching, retry handling, and checkpointing for free. Instead the team was reinventing those primitives by hand — the most expensive kind of technical debt.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;The pattern is the same in both directions: the architecture was chosen before the workflow was understood.&lt;/strong&gt;&lt;/p&gt;




&lt;h2&gt;
  
  
  The Core Question: Does Your Workflow Resist Simplicity?
&lt;/h2&gt;

&lt;p&gt;Before touching any framework, draw the workflow on paper. Then answer these:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Does step N's output determine whether to &lt;strong&gt;redo&lt;/strong&gt; step N-1? → &lt;em&gt;You have a loop&lt;/em&gt;
&lt;/li&gt;
&lt;li&gt;Do multiple specialized agents need to run &lt;strong&gt;simultaneously&lt;/strong&gt;? → &lt;em&gt;You have parallelism&lt;/em&gt;
&lt;/li&gt;
&lt;li&gt;Does the workflow run for &lt;strong&gt;minutes or hours&lt;/strong&gt;, surviving restarts? → &lt;em&gt;You need persistent state&lt;/em&gt;
&lt;/li&gt;
&lt;li&gt;Does the agent need to &lt;strong&gt;decide its next action&lt;/strong&gt; from intermediate results? → &lt;em&gt;Dynamic planning&lt;/em&gt;
&lt;/li&gt;
&lt;li&gt;Do independent agents need to &lt;strong&gt;hand off context&lt;/strong&gt; to each other? → &lt;em&gt;Multi-agent delegation&lt;/em&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;None of these? Use plain tool calling. Here's what that looks like at production quality.&lt;/strong&gt;&lt;/p&gt;




&lt;h2&gt;
  
  
  The Baseline: Plain Tool Calling (No Framework Needed)
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Use case:&lt;/strong&gt; Real-time ESG (Environmental, Social, Governance) risk scoring. Given a ticker, pull sustainability metrics, cross-reference regulatory filings, produce a risk-adjusted score, and persist an audit trail — one clean, observable service.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fzm121pgrz784peoebgn5.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fzm121pgrz784peoebgn5.png" alt="Real-time ESG (Environmental, Social, Governance) risk scoring" width="800" height="387"&gt;&lt;/a&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="c1"&gt;# requirements: fastapi, openai, psycopg2-binary, httpx, pydantic
&lt;/span&gt;&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;fastapi&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;FastAPI&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;openai&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;OpenAI&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;pydantic&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;BaseModel&lt;/span&gt;
&lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;json&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;httpx&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;psycopg2&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;os&lt;/span&gt;

&lt;span class="n"&gt;app&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;FastAPI&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
&lt;span class="n"&gt;client&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;OpenAI&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;api_key&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;environ&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;OPENAI_API_KEY&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt;

&lt;span class="k"&gt;class&lt;/span&gt; &lt;span class="nc"&gt;ESGRequest&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;BaseModel&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
    &lt;span class="n"&gt;ticker&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;
    &lt;span class="n"&gt;portfolio_id&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;

&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;get_esg_metrics&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;ticker&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;-&amp;gt;&lt;/span&gt; &lt;span class="nb"&gt;dict&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
    &lt;span class="n"&gt;resp&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;httpx&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;get&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
        &lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;https://api.sustainalytics.com/v1/esg/&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;ticker&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="n"&gt;headers&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Authorization&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Bearer &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;environ&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="s"&gt;ESG_API_KEY&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;},&lt;/span&gt;
        &lt;span class="n"&gt;timeout&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mi"&gt;10&lt;/span&gt;
    &lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;resp&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;

&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;get_regulatory_flags&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;ticker&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;-&amp;gt;&lt;/span&gt; &lt;span class="nb"&gt;dict&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
    &lt;span class="k"&gt;with&lt;/span&gt; &lt;span class="n"&gt;psycopg2&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;connect&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;environ&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;DATABASE_URL&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt; &lt;span class="k"&gt;as&lt;/span&gt; &lt;span class="n"&gt;conn&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
        &lt;span class="k"&gt;with&lt;/span&gt; &lt;span class="n"&gt;conn&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;cursor&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="k"&gt;as&lt;/span&gt; &lt;span class="n"&gt;cur&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
            &lt;span class="n"&gt;cur&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;execute&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;
                SELECT flag_type, severity, filing_date, description
                FROM regulatory_flags WHERE ticker = %s
                AND filing_date &amp;gt; NOW() - INTERVAL &lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="s"&gt;2 years&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="s"&gt;
                ORDER BY severity DESC LIMIT 10
            &lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;ticker&lt;/span&gt;&lt;span class="p"&gt;,))&lt;/span&gt;
            &lt;span class="n"&gt;rows&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;cur&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;fetchall&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;flags&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;type&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;r&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;severity&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;r&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;date&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;str&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;r&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;2&lt;/span&gt;&lt;span class="p"&gt;]),&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;detail&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;r&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;3&lt;/span&gt;&lt;span class="p"&gt;]}&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;r&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;rows&lt;/span&gt;&lt;span class="p"&gt;]}&lt;/span&gt;

&lt;span class="n"&gt;TOOLS&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;
    &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;type&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;function&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;function&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;name&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;get_esg_metrics&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;description&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Fetch ESG sustainability scores for a stock ticker&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;parameters&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;type&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;object&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;properties&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;ticker&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;type&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;string&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;}},&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;required&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;ticker&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]}&lt;/span&gt;
    &lt;span class="p"&gt;}},&lt;/span&gt;
    &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;type&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;function&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;function&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;name&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;get_regulatory_flags&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;description&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Retrieve regulatory violations and compliance flags&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;parameters&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;type&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;object&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;properties&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;ticker&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;type&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;string&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;}},&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;required&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;ticker&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]}&lt;/span&gt;
    &lt;span class="p"&gt;}}&lt;/span&gt;
&lt;span class="p"&gt;]&lt;/span&gt;

&lt;span class="n"&gt;TOOL_MAP&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;get_esg_metrics&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;get_esg_metrics&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;get_regulatory_flags&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;get_regulatory_flags&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="nd"&gt;@app.post&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;/assess-esg&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;assess_esg&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;req&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;ESGRequest&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
    &lt;span class="n"&gt;messages&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;role&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;user&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;content&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;
        &lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Run a full ESG risk assessment for &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;ticker&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt; in portfolio &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;portfolio_id&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt;. &lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
        &lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Produce a risk-adjusted score with recommendation: HOLD, REDUCE, or DIVEST.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
    &lt;span class="p"&gt;)}]&lt;/span&gt;
    &lt;span class="k"&gt;while&lt;/span&gt; &lt;span class="bp"&gt;True&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
        &lt;span class="n"&gt;response&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;client&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;chat&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;completions&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
            &lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;gpt-4o&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;messages&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;messages&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;tools&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;TOOLS&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;tool_choice&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;auto&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
        &lt;span class="p"&gt;)&lt;/span&gt;
        &lt;span class="n"&gt;msg&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;response&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;choices&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;].&lt;/span&gt;&lt;span class="n"&gt;message&lt;/span&gt;
        &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="ow"&gt;not&lt;/span&gt; &lt;span class="n"&gt;msg&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;tool_calls&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
            &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;ticker&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;ticker&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;assessment&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;msg&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;content&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;
        &lt;span class="n"&gt;messages&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;append&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;msg&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
        &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;tc&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;msg&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;tool_calls&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
            &lt;span class="n"&gt;result&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;TOOL_MAP&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;function&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;name&lt;/span&gt;&lt;span class="p"&gt;](&lt;/span&gt;&lt;span class="o"&gt;**&lt;/span&gt;&lt;span class="n"&gt;json&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;loads&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;function&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;arguments&lt;/span&gt;&lt;span class="p"&gt;))&lt;/span&gt;
            &lt;span class="n"&gt;messages&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;append&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;role&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;tool&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;tool_call_id&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nb"&gt;id&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;content&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;json&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;dumps&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;result&lt;/span&gt;&lt;span class="p"&gt;)})&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Multi-step tool calling, audit trail, fully debuggable by any engineer. If this solves the problem — stop here. No framework needed.&lt;/p&gt;




&lt;h2&gt;
  
  
  1. LangGraph — Stateful Cyclic Workflows
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Version:&lt;/strong&gt; 1.1.2 | &lt;code&gt;pip install langgraph langgraph-checkpoint-redis&lt;/code&gt;&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Use when:&lt;/strong&gt; Your workflow has genuine loops — the result of one step conditionally re-runs a previous step. Redis checkpointing lets state survive service restarts mid-workflow.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Avoid when:&lt;/strong&gt; The workflow is strictly sequential with no conditional branching. The graph model adds measurable overhead for zero architectural return.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Use case:&lt;/strong&gt; Automated cloud cost optimization — scan Azure VMs for underutilization, simulate right-sizing savings, apply low-risk changes, re-scan. The loop continues until no optimization exceeds the savings threshold.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fyc30200qg4ej5ma44nsk.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fyc30200qg4ej5ma44nsk.png" alt="Automated cloud cost optimization" width="800" height="1411"&gt;&lt;/a&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="c1"&gt;# requirements: langgraph, langgraph-checkpoint-redis, openai
&lt;/span&gt;&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;langgraph.graph&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;StateGraph&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;END&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;langgraph.checkpoint.redis&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;RedisSaver&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;typing&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;TypedDict&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;Annotated&lt;/span&gt;
&lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;operator&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;json&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;os&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;openai&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;OpenAI&lt;/span&gt;

&lt;span class="n"&gt;oai&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;OpenAI&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;api_key&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;environ&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;OPENAI_API_KEY&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt;

&lt;span class="k"&gt;class&lt;/span&gt; &lt;span class="nc"&gt;CostState&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;TypedDict&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
    &lt;span class="n"&gt;subscription_id&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;
    &lt;span class="n"&gt;resources&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;list&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nb"&gt;dict&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
    &lt;span class="n"&gt;candidates&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;Annotated&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nb"&gt;list&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;operator&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;add&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
    &lt;span class="n"&gt;applied&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;Annotated&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nb"&gt;list&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;operator&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;add&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
    &lt;span class="n"&gt;total_savings&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;float&lt;/span&gt;
    &lt;span class="n"&gt;iteration&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;int&lt;/span&gt;

&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;scan_resources&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;state&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;CostState&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;-&amp;gt;&lt;/span&gt; &lt;span class="nb"&gt;dict&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
    &lt;span class="n"&gt;resources&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;azure_monitor_client&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;list_resources&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;state&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;subscription_id&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt;
    &lt;span class="n"&gt;underutilized&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;r&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;r&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;resources&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;r&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;avg_cpu_7d&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt; &lt;span class="mi"&gt;15&lt;/span&gt; &lt;span class="ow"&gt;and&lt;/span&gt; &lt;span class="n"&gt;r&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;avg_memory_7d&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt; &lt;span class="mi"&gt;30&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;resources&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;underutilized&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;iteration&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;state&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;iteration&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;analyze_savings&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;state&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;CostState&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;-&amp;gt;&lt;/span&gt; &lt;span class="nb"&gt;dict&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
    &lt;span class="n"&gt;resp&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;oai&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;chat&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;completions&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
        &lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;gpt-4o&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="n"&gt;messages&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;role&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;user&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;content&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;
            Analyze these underutilized Azure resources and recommend right-sizing.
            For each: resource_id, current_sku, recommended_sku, monthly_savings_usd, risk_level (LOW/MEDIUM/HIGH).
            Only include LOW or MEDIUM risk recommendations.
            Resources: &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;json&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;dumps&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;state&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;resources&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt;
            Respond: {{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;recommendations&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;: [...]}}
        &lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="p"&gt;}],&lt;/span&gt;
        &lt;span class="n"&gt;response_format&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;type&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;json_object&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;
    &lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="n"&gt;candidates&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;json&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;loads&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;resp&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;choices&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;].&lt;/span&gt;&lt;span class="n"&gt;message&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;content&lt;/span&gt;&lt;span class="p"&gt;)[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;recommendations&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;candidates&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;candidates&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;total_savings&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;sum&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;c&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;monthly_savings_usd&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;candidates&lt;/span&gt;&lt;span class="p"&gt;)}&lt;/span&gt;

&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;apply_changes&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;state&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;CostState&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;-&amp;gt;&lt;/span&gt; &lt;span class="nb"&gt;dict&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
    &lt;span class="n"&gt;applied&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[]&lt;/span&gt;
    &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;state&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;candidates&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]:&lt;/span&gt;
        &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;risk_level&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;==&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;LOW&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
            &lt;span class="n"&gt;azure_compute&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;resize_vm&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;c&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;resource_id&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;recommended_sku&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt;
            &lt;span class="n"&gt;applied&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;append&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;c&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;applied&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;applied&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;check_threshold&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;state&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;CostState&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;-&amp;gt;&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;loop&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;state&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;total_savings&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="mi"&gt;500&lt;/span&gt; &lt;span class="ow"&gt;and&lt;/span&gt; &lt;span class="n"&gt;state&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;iteration&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt; &lt;span class="mi"&gt;5&lt;/span&gt; &lt;span class="k"&gt;else&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;done&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;

&lt;span class="n"&gt;graph&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;StateGraph&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;CostState&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;graph&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_node&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;scan&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;scan_resources&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;graph&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_node&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;analyze&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;analyze_savings&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;graph&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_node&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;apply&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;apply_changes&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;graph&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;set_entry_point&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;scan&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;graph&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_edge&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;scan&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;analyze&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;graph&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_edge&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;analyze&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;apply&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;graph&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_conditional_edges&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;apply&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;check_threshold&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;loop&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;scan&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;done&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;END&lt;/span&gt;&lt;span class="p"&gt;})&lt;/span&gt;

&lt;span class="n"&gt;optimizer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;graph&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;compile&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;checkpointer&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;RedisSaver&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;from_conn_string&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;environ&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;REDIS_URL&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]))&lt;/span&gt;
&lt;span class="n"&gt;result&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;optimizer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;invoke&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
    &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;subscription_id&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;environ&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;AZURE_SUBSCRIPTION_ID&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
    &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;candidates&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[],&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;applied&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[],&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;total_savings&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mf"&gt;0.0&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;iteration&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;
&lt;span class="p"&gt;})&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Why this justifies LangGraph:&lt;/strong&gt; The self-correcting re-scan is genuinely hard to express cleanly in plain tool calling without writing a custom state machine — which is exactly the under-engineering trap described earlier.&lt;/p&gt;




&lt;h2&gt;
  
  
  2. AutoGen 0.7.5 — Parallel Multi-Agent Collaboration
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Version:&lt;/strong&gt; 0.7.5 | &lt;code&gt;pip install "autogen-agentchat&amp;gt;=0.7.5" "autogen-ext[openai,redis]"&lt;/code&gt;&lt;/p&gt;

&lt;p&gt;Key additions in 0.7.5: linear memory via &lt;code&gt;RedisMemory&lt;/code&gt;, fixed &lt;code&gt;GraphFlow&lt;/code&gt; cycle detection, Anthropic thinking mode support, &lt;code&gt;reasoning_effort&lt;/code&gt; parameter for GPT-5 models, improved Azure AI client streaming.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Use when:&lt;/strong&gt; Multiple specialized agents run independent analysis simultaneously. Async, event-driven — agents can be deployed on separate containers with zero blocking between them.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Avoid when:&lt;/strong&gt; The task is sequential and single-agent. Multi-agent coordination overhead only pays off when genuine parallelism exists.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Use case:&lt;/strong&gt; Automated M&amp;amp;A due diligence — Legal, Financial, and Tech Audit agents work in parallel, then a Synthesis agent consolidates findings into an investment decision. Sequential execution would add hours of unnecessary latency to a time-sensitive deal process.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fps8lxqs6tzetlgw95uif.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fps8lxqs6tzetlgw95uif.png" alt="Automated M&amp;amp;A due diligence" width="800" height="403"&gt;&lt;/a&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="c1"&gt;# requirements: autogen-agentchat&amp;gt;=0.7.5, autogen-ext[openai,redis]
&lt;/span&gt;&lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;asyncio&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;os&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;autogen_agentchat.agents&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;AssistantAgent&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;autogen_agentchat.teams&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;SelectorGroupChat&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;autogen_agentchat.conditions&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;TextMentionTermination&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;autogen_ext.models.openai&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;OpenAIChatCompletionClient&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;autogen_ext.memory.redis&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;RedisMemory&lt;/span&gt;  

&lt;span class="n"&gt;model_client&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;OpenAIChatCompletionClient&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;gpt-4o&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;api_key&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;environ&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;OPENAI_API_KEY&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt;

&lt;span class="n"&gt;legal_agent&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;AssistantAgent&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;name&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;LegalAgent&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;model_client&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;model_client&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;memory&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nc"&gt;RedisMemory&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;redis_url&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;environ&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;REDIS_URL&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt; &lt;span class="n"&gt;session_id&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;ma-legal&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)],&lt;/span&gt;
    &lt;span class="n"&gt;system_message&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;M&amp;amp;A legal counsel. Identify: IP gaps, change-of-control clauses,
    litigation exposure, employment liabilities.
    Output JSON: {risk_items, severity: BLOCKER|HIGH|MEDIUM|LOW, deal_impact}&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;tools&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;fetch_contract_repository&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;search_litigation_database&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;financial_agent&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;AssistantAgent&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;name&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;FinancialAgent&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;model_client&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;model_client&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;memory&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nc"&gt;RedisMemory&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;redis_url&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;environ&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;REDIS_URL&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt; &lt;span class="n"&gt;session_id&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;ma-financial&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)],&lt;/span&gt;
    &lt;span class="n"&gt;system_message&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;M&amp;amp;A financial analyst. Identify: revenue quality, off-balance-sheet
    liabilities, working capital needs post-acquisition, EBITDA normalization.
    Output JSON: {financial_flags, normalized_ebitda, recommended_valuation_range}&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;tools&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;fetch_financial_statements&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;compute_dcf_model&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;tech_audit_agent&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;AssistantAgent&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;name&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;TechAuditAgent&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;model_client&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;model_client&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;system_message&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;Technical due diligence expert. Assess: tech debt score (1–10),
    security vulnerabilities, scalability ceiling, bus-factor risk.
    Output JSON: {tech_risks, estimated_remediation_cost_usd, integration_complexity}&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;tools&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;clone_and_analyze_repo&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;run_dependency_scan&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;synthesis_agent&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;AssistantAgent&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;name&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;SynthesisAgent&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;model_client&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;model_client&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;system_message&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;M&amp;amp;A deal lead. Wait for all domain agents to complete.
    Synthesize: PROCEED / PROCEED_WITH_CONDITIONS / ABORT.
    Include: top 5 risks, price adjustment recommendation, 90-day priorities.
    End your message with: ANALYSIS_COMPLETE&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;tools&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;generate_pdf_memo&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;notify_deal_team_slack&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;run_ma_due_diligence&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;target&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
    &lt;span class="n"&gt;team&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;SelectorGroupChat&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
        &lt;span class="n"&gt;participants&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;legal_agent&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;financial_agent&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;tech_audit_agent&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;synthesis_agent&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
        &lt;span class="n"&gt;model_client&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;model_client&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="n"&gt;termination_condition&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="nc"&gt;TextMentionTermination&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;ANALYSIS_COMPLETE&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="n"&gt;selector_prompt&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;Run LegalAgent, FinancialAgent, TechAuditAgent first (order flexible,
        can run in parallel). Only select SynthesisAgent after all three have reported.&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;
    &lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;msg&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;team&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;run_stream&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;task&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Full M&amp;amp;A due diligence for: &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;target&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
        &lt;span class="nf"&gt;print&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;[&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;msg&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;source&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt;] &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="nf"&gt;str&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;msg&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;content&lt;/span&gt;&lt;span class="p"&gt;)[&lt;/span&gt;&lt;span class="si"&gt;:&lt;/span&gt;&lt;span class="mi"&gt;120&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt;...&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;asyncio&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;run&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nf"&gt;run_ma_due_diligence&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;TargetCorp Inc.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;))&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;






&lt;h2&gt;
  
  
  3. Microsoft Agent Framework RC — Enterprise Production Agents
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Version:&lt;/strong&gt; Release Candidate, Feb 19, 2026 | &lt;code&gt;pip install agent-framework --pre&lt;/code&gt;&lt;/p&gt;

&lt;p&gt;The most significant shift in the Microsoft AI ecosystem right now. This framework &lt;strong&gt;unifies Semantic Kernel and AutoGen into a single SDK&lt;/strong&gt; — both are entering maintenance mode as of this writing, and all new Microsoft investment flows into Agent Framework first.&lt;/p&gt;

&lt;p&gt;RC signals a frozen, stable API surface with GA targeting Q1 2026. Core capabilities: persistent threads (Cosmos DB), Service Bus integration, MCP + A2A protocol support, multi-agent orchestration with handoff and group chat patterns, streaming checkpointing for long-running agents, full .NET and Python support.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Use when:&lt;/strong&gt; Azure-first enterprise teams, production 24/7 background agents, regulated systems requiring full audit trails, or any team currently building on Semantic Kernel or AutoGen.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Avoid when:&lt;/strong&gt; Greenfield Python-only stacks with no Azure dependency, or where GA stability is a hard requirement before adoption.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Use case:&lt;/strong&gt; Autonomous DevOps deployment agent — monitors CI/CD completion events, validates health via Application Insights, promotes through dev → staging → prod automatically, pages on-call only when a gate fails. Runs continuously as a persistent, resumable agent.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Ffu6hu6bcqcyl1ww6pgal.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Ffu6hu6bcqcyl1ww6pgal.png" alt="Autonomous DevOps deployment agent" width="800" height="844"&gt;&lt;/a&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="c1"&gt;# requirements: agent-framework --pre, azure-identity, azure-monitor-query, kubernetes
&lt;/span&gt;&lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;os&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;agent_framework&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;AgentClient&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;azure.identity&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;DefaultAzureCredential&lt;/span&gt;

&lt;span class="n"&gt;credential&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;DefaultAzureCredential&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
&lt;span class="n"&gt;agent_client&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;AgentClient&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;endpoint&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;environ&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;AZURE_AI_FOUNDRY_ENDPOINT&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
    &lt;span class="n"&gt;credential&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;credential&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;check_app_insights&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;resource_id&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;environment&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;window_minutes&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;int&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;5&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;-&amp;gt;&lt;/span&gt; &lt;span class="nb"&gt;dict&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
    &lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;azure.monitor.query&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;MetricsQueryClient&lt;/span&gt;
    &lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;datetime&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;timedelta&lt;/span&gt;
    &lt;span class="n"&gt;monitor&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;MetricsQueryClient&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;credential&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="n"&gt;result&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;monitor&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;query_resource&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
        &lt;span class="n"&gt;resource_id&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="n"&gt;metrics&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;requests/failed&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;requests/duration&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
        &lt;span class="n"&gt;timespan&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="nf"&gt;timedelta&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;minutes&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;window_minutes&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;environment&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;environment&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;error_rate_percent&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;_calculate_error_rate&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;result&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;p99_latency_ms&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;_calculate_p99&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;result&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;gate_passed&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;_calculate_error_rate&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;result&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt; &lt;span class="mf"&gt;1.0&lt;/span&gt; &lt;span class="ow"&gt;and&lt;/span&gt; &lt;span class="nf"&gt;_calculate_p99&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;result&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt; &lt;span class="mi"&gt;500&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;promote_deployment&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;service&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;image_tag&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;environment&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;-&amp;gt;&lt;/span&gt; &lt;span class="nb"&gt;dict&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
    &lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;kubernetes&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;client&lt;/span&gt; &lt;span class="k"&gt;as&lt;/span&gt; &lt;span class="n"&gt;k8s&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;config&lt;/span&gt;
    &lt;span class="n"&gt;config&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;load_incluster_config&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
    &lt;span class="n"&gt;k8s&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nc"&gt;AppsV1Api&lt;/span&gt;&lt;span class="p"&gt;().&lt;/span&gt;&lt;span class="nf"&gt;patch_namespaced_deployment&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
        &lt;span class="n"&gt;name&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;service&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;namespace&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;environment&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="n"&gt;body&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;spec&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;template&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;spec&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;containers&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[{&lt;/span&gt;
            &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;name&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;service&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;image&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;environ&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="s"&gt;ACR_REGISTRY&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt;/&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;service&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt;:&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;image_tag&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
        &lt;span class="p"&gt;}]}}}}&lt;/span&gt;
    &lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;status&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;promoted&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;service&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;service&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;tag&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;image_tag&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;environment&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;environment&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="c1"&gt;# Persistent thread — Cosmos DB preserves state across restarts
&lt;/span&gt;&lt;span class="n"&gt;agent&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;agent_client&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create_agent&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;gpt-4o&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;name&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;DeploymentOrchestrator&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;instructions&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;Autonomous DevOps deployment agent.
    1. Validate health: error rate &amp;lt; 1%, p99 &amp;lt; 500ms, zero pod restarts
    2. Run smoke tests on the deployed environment
    3. All gates pass → promote to next environment
    4. Any gate fails → halt, capture full diagnostics, page on-call
    5. After prod deploy: monitor 15 minutes, auto-rollback if error rate &amp;gt; 2%
    Log every action: timestamp, metric values, decision, outcome.&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;tools&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;check_app_insights&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;promote_deployment&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;run_smoke_tests&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
           &lt;span class="n"&gt;rollback_deployment&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;page_oncall&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;log_deployment_event&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;thread&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;agent_client&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create_thread&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;

&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;handle_pipeline_completion&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;event&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;dict&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
    &lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;Azure Event Grid webhook — fires on every pipeline completion&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;
    &lt;span class="n"&gt;agent_client&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create_message&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
        &lt;span class="n"&gt;thread_id&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;thread&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nb"&gt;id&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;role&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;user&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="n"&gt;content&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
            &lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Deployment complete — Service: &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;event&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="s"&gt;service_name&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt;, &lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
            &lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Tag: &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;event&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="s"&gt;image_tag&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt;, Environment: &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;event&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="s"&gt;environment&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt;. &lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
            &lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Begin validation and promotion workflow.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
        &lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;agent_client&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create_and_process_run&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;thread_id&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;thread&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nb"&gt;id&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;agent_id&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;agent&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nb"&gt;id&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;






&lt;h2&gt;
  
  
  4. CrewAI 1.10.1 — Role-Based Sequential Pipelines
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Version:&lt;/strong&gt; 1.10.1 (March 3, 2026) | &lt;code&gt;pip install crewai==1.10.1&lt;/code&gt;&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;⚠️ Note:&lt;/strong&gt; v1.10.0 was yanked from PyPI due to an AMP runtime issue. Always pin to &lt;code&gt;1.10.1&lt;/code&gt; directly. This release lazy loading in Memory module and resolves a concurrent multi-process &lt;code&gt;LockException&lt;/code&gt; in production flows.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Use when:&lt;/strong&gt; Clearly defined specialist roles, sequential task handoff, and fastest path from idea to working prototype. CrewAI is the most opinionated framework and the fastest to build with.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Avoid when:&lt;/strong&gt; Fine-grained conditional edge control is needed, or throughput-sensitive production paths where abstraction overhead is measurable.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Use case:&lt;/strong&gt; Automated system architecture pipeline — an Architect designs from requirements, a Reviewer stress-tests for production failures, a Documentation Lead produces the Architecture Decision Record for the engineering wiki.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fq63pavxrx7inawd3vbfv.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fq63pavxrx7inawd3vbfv.png" alt="Automated system architecture pipeline" width="800" height="255"&gt;&lt;/a&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="c1"&gt;# requirements: crewai==1.10.1
&lt;/span&gt;&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;crewai&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;Agent&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;Task&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;Crew&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;Process&lt;/span&gt;

&lt;span class="n"&gt;architect&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;Agent&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;role&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Principal Solutions Architect&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;goal&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Design a scalable, cloud-native system architecture from requirements&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;backstory&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;10+ years on Azure and AWS. Expert in event-driven architecture, CQRS, and zero-trust.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;verbose&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="bp"&gt;True&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;reviewer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;Agent&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;role&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Staff Engineer — Technical Reviewer&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;goal&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Identify scalability bottlenecks, security vulnerabilities, and operational risks&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;backstory&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Former SRE. Has seen every production failure mode. Only approves designs that hold at 10x load.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;verbose&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="bp"&gt;True&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;doc_lead&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;Agent&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;role&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Technical Documentation Lead&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;goal&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Produce a complete Architecture Decision Record capturing every design trade-off&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;backstory&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Undocumented architecture is a liability. Writes for the engineer joining 2 years later.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;verbose&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="bp"&gt;True&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;design_task&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;Task&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;description&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;Design a complete architecture for: {requirements}
    Output JSON: components (name + responsibility), data_flows (source → destination),
    tech_choices (with justification), scaling_strategy per component, security_controls.&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;expected_output&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;JSON architecture specification&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;agent&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;architect&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;review_task&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;Task&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;description&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;Stress-test the architecture:
    1. 10x traffic spike — what breaks first?
    2. Single region failure — what is the blast radius?
    3. Compromised service account — what can an attacker reach?
    Return: {approved: YES/NO, blocking_issues: [], recommended_changes: []}&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;expected_output&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Review JSON with approval and findings&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;agent&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;reviewer&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;context&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;design_task&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;adr_task&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;Task&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;description&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Write a complete ADR: Context, Decision, Consequences, Alternatives Considered, Risk Register. Format: Markdown.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;expected_output&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Complete ADR in Markdown&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;agent&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;doc_lead&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;context&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;design_task&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;review_task&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;crew&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;Crew&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;agents&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;architect&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;reviewer&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;doc_lead&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
    &lt;span class="n"&gt;tasks&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;design_task&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;review_task&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;adr_task&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
    &lt;span class="n"&gt;process&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;Process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;sequential&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;verbose&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="bp"&gt;True&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;result&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;crew&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;kickoff&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;inputs&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;requirements&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Real-time fraud detection — 50K TPS, sub-100ms decisions, 99.99% uptime, multi-region active-active&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
&lt;span class="p"&gt;})&lt;/span&gt;
&lt;span class="nf"&gt;print&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;result&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;






&lt;h2&gt;
  
  
  5. Haystack 2.x — Document Intelligence Pipelines
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Version:&lt;/strong&gt; 2.x | &lt;code&gt;pip install haystack-ai&lt;/code&gt;&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Use when:&lt;/strong&gt; Document processing, extraction, or compliance analysis at scale is the core product — not general agentic behavior. Purpose-built for this and measurably outperforms general frameworks on document-centric workloads.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Avoid when:&lt;/strong&gt; The problem is general agentic orchestration, multi-agent coordination, or any real-time interactive system.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Use case:&lt;/strong&gt; Automated SOC 2 evidence collection — ingests all internal policy documents, maps clauses against Trust Service Criteria, produces a compliance gap report showing which controls are missing or non-conformant.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F65yg8it7bjtpknlvy5uo.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F65yg8it7bjtpknlvy5uo.png" alt="Automated SOC 2 evidence collection" width="800" height="1952"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The pipeline maps extracted content against all six SOC 2 Trust Service Criteria (CC6, CC7, CC8, CC9, A1, C1), flags NOT_COVERED gaps with descriptions, and returns a structured compliance report with overall coverage percentage — no custom parsing layer required.&lt;/p&gt;




&lt;h2&gt;
  
  
  The Practical Decision Rule
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;If your AI system needs:

  Retries or self-correction    → LangGraph
  Multiple parallel specialists → AutoGen
  Enterprise Azure persistence  → Microsoft Agent Framework
  Sequential role-based tasks   → CrewAI
  Document extraction at scale  → Haystack
  None of the above             → No framework. Plain tool calling.
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;






&lt;h2&gt;
  
  
  Framework Reference
&lt;/h2&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Framework&lt;/th&gt;
&lt;th&gt;Version&lt;/th&gt;
&lt;th&gt;Primary Strength&lt;/th&gt;
&lt;th&gt;Use When&lt;/th&gt;
&lt;th&gt;Avoid When&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Plain tool calling&lt;/td&gt;
&lt;td&gt;—&lt;/td&gt;
&lt;td&gt;Speed, simplicity, debuggability&lt;/td&gt;
&lt;td&gt;Straight-line workflow&lt;/td&gt;
&lt;td&gt;Never — if no loops exist&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;LangGraph&lt;/td&gt;
&lt;td&gt;1.1.2&lt;/td&gt;
&lt;td&gt;Cyclic graphs, checkpointing&lt;/td&gt;
&lt;td&gt;Self-correcting loops, retries&lt;/td&gt;
&lt;td&gt;No conditional branching&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;AutoGen&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;0.7.5&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Parallel async agents, RedisMemory&lt;/td&gt;
&lt;td&gt;Multiple specialists in parallel&lt;/td&gt;
&lt;td&gt;Sequential single-agent tasks&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Microsoft Agent Framework&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;RC Feb 2026&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Enterprise persistence, SK + AutoGen unified&lt;/td&gt;
&lt;td&gt;Azure production 24/7 agents&lt;/td&gt;
&lt;td&gt;Python-only, no Azure stack&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;CrewAI&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;1.10.1&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Role-based prototyping, fast iteration&lt;/td&gt;
&lt;td&gt;Sequential delegation, fast prototyping&lt;/td&gt;
&lt;td&gt;Fine-grained production control&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Haystack&lt;/td&gt;
&lt;td&gt;2.x&lt;/td&gt;
&lt;td&gt;Document extraction pipelines&lt;/td&gt;
&lt;td&gt;Document processing as core product&lt;/td&gt;
&lt;td&gt;General agentic tasks&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;




&lt;h2&gt;
  
  
  Production Lessons From Real Systems
&lt;/h2&gt;

&lt;p&gt;After reviewing AI systems across multiple teams, a few patterns appear consistently:&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;1. Most workflows are simpler than they look.&lt;/strong&gt;&lt;br&gt;
The instinct when building with LLMs is to reach for orchestration layers. That instinct is usually wrong. Start with the simplest thing that could work, measure it, then add complexity only where the problem resists simplicity.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;2. Agent frameworks pay off only when the workflow has the right shape.&lt;/strong&gt;&lt;br&gt;
Loops, parallel specialists, or long-running persistent state. If none of these exist, the framework is cost with no architectural return.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;3. Debuggability matters more than clever architecture.&lt;/strong&gt;&lt;br&gt;
A system the team can debug at 2AM is worth more than an elegant multi-agent pipeline nobody fully understands. Production incidents don't wait for framework comprehension.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;4. Hand-rolling framework primitives is the most expensive mistake.&lt;/strong&gt;&lt;br&gt;
Custom state machines, retry logic, and checkpointing written to avoid a framework dependency consistently cost more engineering time than learning the framework properly. Both failure modes described earlier confirm this.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;5. Decide the architecture before writing the first line.&lt;/strong&gt;&lt;br&gt;
Draw the workflow. Identify the shape. Pick the tool. That decision should take 30 minutes, not six weeks of refactoring.&lt;/p&gt;




&lt;h2&gt;
  
  
  The Real Rule
&lt;/h2&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;If you can draw your workflow as a straight line — plain tool calling is your production architecture.&lt;/strong&gt;&lt;br&gt;
&lt;strong&gt;If that line needs to loop, branch, or coordinate parallel specialists — match the tool to the shape.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;The best production AI systems are architecturally boring. One clean service, typed tool schemas, structured output, observable logs. No framework overhead unless the problem demands it.&lt;/p&gt;

&lt;p&gt;Add complexity only when the problem resists simplicity. That's the whole framework.&lt;/p&gt;




&lt;h2&gt;
  
  
  What's Next in This Series
&lt;/h2&gt;

&lt;p&gt;This post focused on orchestration — how to structure and run AI workflows in production.&lt;/p&gt;

&lt;p&gt;But once orchestration is solved, most teams run into a different problem:&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Their RAG system doesn’t actually work.&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;Not in demos — those look fine.&lt;br&gt;
In production — it breaks.&lt;/p&gt;

&lt;p&gt;Wrong answers. Missing context. Hallucinations with high confidence.&lt;/p&gt;

&lt;p&gt;And in most cases, the root cause is &lt;em&gt;not&lt;/em&gt; the vector database or the embedding model.&lt;/p&gt;

&lt;p&gt;It’s the architecture around it.&lt;/p&gt;

&lt;p&gt;The next article breaks down:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Why most RAG systems fail after launch&lt;/li&gt;
&lt;li&gt;The common design mistakes teams repeat&lt;/li&gt;
&lt;li&gt;And the production architecture that actually fixes it&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;If you’re building anything on top of retrieval, this is where things either scale — or quietly fail.&lt;/p&gt;




&lt;p&gt;&lt;em&gt;Are you running an agent framework in production — or did you strip one out and go back to basics?&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;&lt;em&gt;What made that decision clear? Drop your stack and the reason in the comments. The real production stories are always more useful than the official docs.&lt;/em&gt;&lt;/p&gt;




&lt;p&gt;&lt;strong&gt;Tags:&lt;/strong&gt; &lt;code&gt;#ai&lt;/code&gt; &lt;code&gt;#llm&lt;/code&gt; &lt;code&gt;#machinelearning&lt;/code&gt; &lt;code&gt;#softwareengineering&lt;/code&gt; &lt;code&gt;#devops&lt;/code&gt;&lt;/p&gt;




&lt;p&gt;&lt;em&gt;Version data sourced from official release channels: &lt;a href="//github.com/microsoft/autogen/releases"&gt;AutoGen 0.7.5&lt;/a&gt; , &lt;a href="//learn.microsoft.com/agent-framework"&gt;Microsoft Agent Framework RC&lt;/a&gt;, &lt;a href="//docs.crewai.com/changelog,%20pypi.org/project/crewai"&gt;CrewAI 1.10.1&lt;/a&gt;, &lt;a href="//langchain-ai.github.io/langgraph"&gt;LangGraph&lt;/a&gt;, &lt;a href="//docs.haystack.deepset.ai"&gt;Haystack&lt;/a&gt;&lt;/em&gt;&lt;/p&gt;

</description>
      <category>ai</category>
      <category>software</category>
      <category>agents</category>
      <category>systemdesign</category>
    </item>
    <item>
      <title>[Boost]</title>
      <dc:creator>TheProdSDE</dc:creator>
      <pubDate>Wed, 18 Mar 2026 09:37:06 +0000</pubDate>
      <link>https://dev.to/theprodsde/-1582</link>
      <guid>https://dev.to/theprodsde/-1582</guid>
      <description>&lt;div class="ltag__link"&gt;
  &lt;a href="/theprodsde" class="ltag__link__link"&gt;
    &lt;div class="ltag__link__pic"&gt;
      &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3752909%2F36ae54b2-61fd-4d29-b097-ed6fbe2cd7ce.png" alt="theprodsde"&gt;
    &lt;/div&gt;
  &lt;/a&gt;
  &lt;a href="https://dev.to/theprodsde/stop-guessing-your-llm-replacement-5f7m" class="ltag__link__link"&gt;
    &lt;div class="ltag__link__content"&gt;
      &lt;h2&gt;Stop Guessing Your LLM Replacement&lt;/h2&gt;
      &lt;h3&gt;TheProdSDE ・ Mar 14&lt;/h3&gt;
      &lt;div class="ltag__link__taglist"&gt;
        &lt;span class="ltag__link__tag"&gt;#ai&lt;/span&gt;
        &lt;span class="ltag__link__tag"&gt;#llm&lt;/span&gt;
        &lt;span class="ltag__link__tag"&gt;#cloud&lt;/span&gt;
        &lt;span class="ltag__link__tag"&gt;#systemdesign&lt;/span&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/a&gt;
&lt;/div&gt;


</description>
      <category>ai</category>
      <category>llm</category>
      <category>cloud</category>
      <category>systemdesign</category>
    </item>
    <item>
      <title>Stop Guessing Your LLM Replacement</title>
      <dc:creator>TheProdSDE</dc:creator>
      <pubDate>Sat, 14 Mar 2026 11:32:29 +0000</pubDate>
      <link>https://dev.to/theprodsde/stop-guessing-your-llm-replacement-5f7m</link>
      <guid>https://dev.to/theprodsde/stop-guessing-your-llm-replacement-5f7m</guid>
      <description>&lt;h2&gt;
  
  
  A Practical Guide to Migrating GPT Apps Across Azure, AWS, and GCP
&lt;/h2&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;TL;DR&lt;/strong&gt; — Most LLM migrations are not caused by model performance. They happen because of &lt;strong&gt;data residency laws&lt;/strong&gt;, &lt;strong&gt;enterprise deployment requirements&lt;/strong&gt;, or &lt;strong&gt;cloud standardisation decisions&lt;/strong&gt;. This guide helps you narrow the search space to the right replacement candidates — not replace real testing.&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  Why Companies Actually Migrate LLMs
&lt;/h2&gt;

&lt;p&gt;Engineers rarely wake up and decide to migrate their AI stack. Most migrations are triggered by &lt;strong&gt;business constraints&lt;/strong&gt;, not technical ones.&lt;/p&gt;

&lt;h2&gt;
  
  
  &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Feemc41l3n4xa67lsj8mv.png" alt="Why Companies Actually Migrate LLMs" width="800" height="237"&gt;
&lt;/h2&gt;

&lt;h2&gt;
  
  
  Common Migration Scenarios
&lt;/h2&gt;

&lt;h3&gt;
  
  
  1️⃣ Expanding into Regions With Data Residency Laws
&lt;/h3&gt;

&lt;p&gt;Your product currently runs on Azure OpenAI (&lt;code&gt;gpt-4o-mini&lt;/code&gt;), but a new region requires:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;EU sovereign cloud&lt;/li&gt;
&lt;li&gt;Local data processing&lt;/li&gt;
&lt;li&gt;Provider-specific compliance certifications&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;You may need to move to &lt;strong&gt;AWS Bedrock&lt;/strong&gt;, &lt;strong&gt;Google Vertex AI&lt;/strong&gt;, or &lt;strong&gt;Azure AI Foundry with open models&lt;/strong&gt; — even though your application logic stays identical.&lt;/p&gt;




&lt;h3&gt;
  
  
  2️⃣ Enterprise Customers Want AI Inside Their Environment
&lt;/h3&gt;

&lt;p&gt;This is extremely common in &lt;strong&gt;B2B SaaS&lt;/strong&gt;. Enterprise customers often require:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Private cloud deployment&lt;/li&gt;
&lt;li&gt;VPC-only access&lt;/li&gt;
&lt;li&gt;On-prem inference&lt;/li&gt;
&lt;li&gt;Sovereign cloud environments&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Your API-based model suddenly needs to become &lt;strong&gt;Llama&lt;/strong&gt;, &lt;strong&gt;Qwen&lt;/strong&gt;, &lt;strong&gt;DeepSeek&lt;/strong&gt;, or &lt;strong&gt;Mistral&lt;/strong&gt; — running inside &lt;em&gt;their&lt;/em&gt; infrastructure.&lt;/p&gt;




&lt;h3&gt;
  
  
  3️⃣ Corporate Cloud Standardisation
&lt;/h3&gt;

&lt;p&gt;A classic scenario:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;AI team → Azure&lt;/li&gt;
&lt;li&gt;Platform team → AWS&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Leadership decides: &lt;em&gt;"All workloads must run on AWS."&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;Now your team must translate &lt;code&gt;gpt-4o-mini&lt;/code&gt; into an AWS Bedrock equivalent — and the model catalog doesn't make that obvious.&lt;/p&gt;




&lt;h2&gt;
  
  
  The Problem: Model Names Don't Translate
&lt;/h2&gt;

&lt;p&gt;Each vendor uses completely different naming schemes. There is no official cross-provider model map.&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Vendor&lt;/th&gt;
&lt;th&gt;Entry Model&lt;/th&gt;
&lt;th&gt;Mid Model&lt;/th&gt;
&lt;th&gt;Reasoning Model&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;OpenAI&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;&lt;code&gt;gpt-4o-mini&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;&lt;code&gt;gpt-4o&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;o-series&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Anthropic&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Haiku&lt;/td&gt;
&lt;td&gt;Sonnet&lt;/td&gt;
&lt;td&gt;Opus&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Google&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Flash&lt;/td&gt;
&lt;td&gt;Pro&lt;/td&gt;
&lt;td&gt;Pro reasoning&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Meta&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Scout&lt;/td&gt;
&lt;td&gt;Maverick&lt;/td&gt;
&lt;td&gt;Large variants&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Qwen&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Small (7B–14B)&lt;/td&gt;
&lt;td&gt;72B–110B&lt;/td&gt;
&lt;td&gt;235B Thinking&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;DeepSeek&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;V3 (non-thinking)&lt;/td&gt;
&lt;td&gt;V3 standard&lt;/td&gt;
&lt;td&gt;R1 (reasoning)&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;h3&gt;
  
  
  Common mistakes teams make
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;❌ Picking the &lt;strong&gt;cheapest&lt;/strong&gt; model in the new catalog&lt;/li&gt;
&lt;li&gt;❌ Picking the &lt;strong&gt;newest&lt;/strong&gt; model by release date&lt;/li&gt;
&lt;li&gt;❌ Picking the &lt;strong&gt;highest benchmark&lt;/strong&gt; model regardless of tier&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;All three approaches can silently break production behaviour.&lt;/p&gt;




&lt;h2&gt;
  
  
  The Tier Model That Actually Works
&lt;/h2&gt;

&lt;p&gt;Instead of comparing names, compare &lt;strong&gt;capability tiers&lt;/strong&gt;. Every major provider follows the same five-tier structure.&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Tier&lt;/th&gt;
&lt;th&gt;Typical Use Cases&lt;/th&gt;
&lt;th&gt;Latency&lt;/th&gt;
&lt;th&gt;Relative Cost&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Mini / Flash / Small&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Chatbots, RAG, classification&lt;/td&gt;
&lt;td&gt;Fastest&lt;/td&gt;
&lt;td&gt;Lowest&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Standard / Mid&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Assistants, summarisation, coding&lt;/td&gt;
&lt;td&gt;Medium&lt;/td&gt;
&lt;td&gt;Moderate&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Reasoning / Pro&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Agents, planning, complex Q&amp;amp;A&lt;/td&gt;
&lt;td&gt;Slower&lt;/td&gt;
&lt;td&gt;Higher&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Frontier / Flagship&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Research workloads, safety-critical&lt;/td&gt;
&lt;td&gt;Slowest&lt;/td&gt;
&lt;td&gt;Highest&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;Once you know your current model's tier, finding candidates becomes systematic — not guesswork.&lt;/p&gt;




&lt;h2&gt;
  
  
  Tier 1 — Replacing &lt;code&gt;gpt-4o-mini&lt;/code&gt;
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Typical workloads:&lt;/strong&gt; chat assistants, RAG pipelines, tool calling, lightweight coding&lt;/p&gt;

&lt;h3&gt;
  
  
  Candidates by cloud
&lt;/h3&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Cloud&lt;/th&gt;
&lt;th&gt;Replacement Candidates&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Azure AI Foundry&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Llama 4 Scout, Qwen3-8B/14B, DeepSeek V3, Claude Haiku&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;AWS Bedrock&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Claude Haiku, Mistral Small&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;GCP Vertex AI&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Gemini Flash-Lite, Gemini Flash&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;h3&gt;
  
  
  Behaviour differences at this tier
&lt;/h3&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Model&lt;/th&gt;
&lt;th&gt;Strengths&lt;/th&gt;
&lt;th&gt;Watch out for&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Claude Haiku&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Reliable, low hallucination rate&lt;/td&gt;
&lt;td&gt;~7× more expensive than &lt;code&gt;gpt-4o-mini&lt;/code&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Gemini Flash&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Extremely fast, 1M token context&lt;/td&gt;
&lt;td&gt;GCP-only; not available on Azure&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Llama 4 Scout&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Open-weight, 10M token context, Azure-hosted&lt;/td&gt;
&lt;td&gt;Not a pure reasoning-tuned model&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;DeepSeek V3&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Unusually strong reasoning (MMLU-Pro ~75.9, GPQA ~59.1) for this price tier&lt;/td&gt;
&lt;td&gt;Direct API or Azure Foundry; no native AWS/GCP&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Qwen3-8B/14B&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Strong multilingual + math, Apache 2.0&lt;/td&gt;
&lt;td&gt;Smaller context than Gemini/Llama&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;




&lt;h2&gt;
  
  
  Tier 2 — Replacing &lt;code&gt;gpt-4o&lt;/code&gt;
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Typical workloads:&lt;/strong&gt; document summarisation, coding assistance, enterprise chat assistants&lt;/p&gt;

&lt;h3&gt;
  
  
  Candidates by cloud
&lt;/h3&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Cloud&lt;/th&gt;
&lt;th&gt;Replacement Candidates&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Azure AI Foundry&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Claude Sonnet, Llama 4 Maverick, DeepSeek V3&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;AWS Bedrock&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Claude Sonnet, Mistral Medium&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;GCP Vertex AI&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Gemini Flash, Gemini 2.5 Pro&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;h3&gt;
  
  
  Benchmark reference (reasoning quality)
&lt;/h3&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Model&lt;/th&gt;
&lt;th&gt;MMLU&lt;/th&gt;
&lt;th&gt;MMLU-Pro&lt;/th&gt;
&lt;th&gt;GPQA-Diamond&lt;/th&gt;
&lt;th&gt;Notes&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Claude Sonnet 4.x&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;~88+&lt;/td&gt;
&lt;td&gt;Strong&lt;/td&gt;
&lt;td&gt;Competitive&lt;/td&gt;
&lt;td&gt;Best SWE-bench coding score at this tier&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Llama 4 Maverick&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;~85+&lt;/td&gt;
&lt;td&gt;~80.5&lt;/td&gt;
&lt;td&gt;~69.8&lt;/td&gt;
&lt;td&gt;Beats GPT-4o on Meta's coding benchmarks&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;DeepSeek V3&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;88.5&lt;/td&gt;
&lt;td&gt;75.9–81.2&lt;/td&gt;
&lt;td&gt;59.1–68.4&lt;/td&gt;
&lt;td&gt;Frontier-class at mid-tier pricing&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Gemini Flash (GCP)&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Strong&lt;/td&gt;
&lt;td&gt;Competitive&lt;/td&gt;
&lt;td&gt;~78% SWE-bench&lt;/td&gt;
&lt;td&gt;GCP-only; fastest in this tier&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;blockquote&gt;
&lt;p&gt;DeepSeek V3 on Azure often outperforms &lt;code&gt;gpt-4o&lt;/code&gt; on raw reasoning benchmarks at significantly lower cost. Treat it as a tier upgrade, not just a replacement.&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  Tier 3 — Replacing Reasoning Models (&lt;code&gt;gpt-4.1&lt;/code&gt; / &lt;code&gt;gpt-5&lt;/code&gt; / o-series)
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Typical workloads:&lt;/strong&gt; agent systems, research workflows, complex multi-step reasoning&lt;/p&gt;

&lt;h3&gt;
  
  
  Candidates by cloud
&lt;/h3&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Cloud&lt;/th&gt;
&lt;th&gt;Replacement Candidates&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Azure AI Foundry&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Claude Opus, DeepSeek R1, Qwen3-235B Thinking (via Foundry)&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;AWS Bedrock&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Claude Opus&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;GCP Vertex AI&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Gemini 2.5 Pro, Gemini 3.1 Pro&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;h3&gt;
  
  
  Reasoning benchmark reference (HLE + advanced)
&lt;/h3&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Model&lt;/th&gt;
&lt;th&gt;HLE Score&lt;/th&gt;
&lt;th&gt;MMLU-Pro&lt;/th&gt;
&lt;th&gt;GPQA-Diamond&lt;/th&gt;
&lt;th&gt;AIME-25&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Claude Opus 4.x&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Top-tier (Anthropic reports #1 on HLE leaderboard)&lt;/td&gt;
&lt;td&gt;~90+&lt;/td&gt;
&lt;td&gt;Strong&lt;/td&gt;
&lt;td&gt;Strong&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Qwen3-235B-A22B Thinking&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;
&lt;strong&gt;~18%&lt;/strong&gt; (one of few published open-weight HLE scores)&lt;/td&gt;
&lt;td&gt;~84.4%&lt;/td&gt;
&lt;td&gt;~81%&lt;/td&gt;
&lt;td&gt;~92%&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;DeepSeek R1&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Not widely published&lt;/td&gt;
&lt;td&gt;~81.2&lt;/td&gt;
&lt;td&gt;~68.4&lt;/td&gt;
&lt;td&gt;Strong&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Gemini 2.5 / 3.1 Pro&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Competitive&lt;/td&gt;
&lt;td&gt;Strong&lt;/td&gt;
&lt;td&gt;Strong&lt;/td&gt;
&lt;td&gt;Strong&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;Qwen3-235B-A22B Thinking&lt;/strong&gt; is currently one of the few open-weight models with a &lt;strong&gt;published Humanity's Last Exam score (~18%)&lt;/strong&gt; — putting it in the same conversation as frontier closed models for reasoning-heavy tasks.&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  Architecture Pattern: Make LLMs Replaceable
&lt;/h2&gt;

&lt;p&gt;The biggest mistake teams make is &lt;strong&gt;hard-coding a model into their architecture&lt;/strong&gt;.&lt;/p&gt;

&lt;h3&gt;
  
  
  ❌ The fragile pattern
&lt;/h3&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;Application → GPT-4o-mini (hardcoded)
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Any migration requires touching application logic, service config, and prompt templates.&lt;/p&gt;

&lt;h3&gt;
  
  
  ✅ The replaceable pattern
&lt;/h3&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fu04ixmxhxb99gc7dfe5s.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fu04ixmxhxb99gc7dfe5s.png" alt="The replaceable pattern" width="800" height="566"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Benefits:&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Vendor independence — swap providers via config&lt;/li&gt;
&lt;li&gt;Easier model upgrades without app rewrites&lt;/li&gt;
&lt;li&gt;Enables cost-optimised routing&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  Cost Optimisation: Intelligent Request Routing
&lt;/h2&gt;

&lt;p&gt;Many production AI systems at scale route requests by &lt;strong&gt;task complexity&lt;/strong&gt; rather than using one model for everything.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F7qr9b496z534g9hna69x.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F7qr9b496z534g9hna69x.png" alt="Cost Optimisation: Intelligent Request Routing" width="800" height="205"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;This pattern can reduce LLM costs by &lt;strong&gt;60–90%&lt;/strong&gt; in workloads with a mix of simple and complex requests.&lt;/p&gt;




&lt;h2&gt;
  
  
  Prompt Regression Testing — Non-Negotiable
&lt;/h2&gt;

&lt;p&gt;Before committing to any model swap, run &lt;strong&gt;prompt regression tests&lt;/strong&gt; on your real production prompts.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="c1"&gt;# Simple regression harness
&lt;/span&gt;&lt;span class="n"&gt;test_prompts&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;load_production_samples&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mi"&gt;200&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;results&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[]&lt;/span&gt;
&lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;prompt&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;test_prompts&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
    &lt;span class="n"&gt;output_a&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;old_model&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;invoke&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;prompt&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="n"&gt;output_b&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;new_model&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;invoke&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;prompt&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;

    &lt;span class="n"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;append&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;prompt&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;prompt&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;score_a&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;evaluate&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;output_a&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;   &lt;span class="c1"&gt;# correctness, format, hallucination
&lt;/span&gt;        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;score_b&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;evaluate&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;output_b&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;regression&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;score_b&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt; &lt;span class="n"&gt;score_a&lt;/span&gt; &lt;span class="o"&gt;*&lt;/span&gt; &lt;span class="mf"&gt;0.95&lt;/span&gt;
    &lt;span class="p"&gt;})&lt;/span&gt;

&lt;span class="n"&gt;regressions&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;r&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;r&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;results&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;r&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;regression&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]]&lt;/span&gt;
&lt;span class="nf"&gt;print&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Regression rate: &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="nf"&gt;len&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;regressions&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;&lt;span class="o"&gt;/&lt;/span&gt;&lt;span class="nf"&gt;len&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;results&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;&lt;span class="o"&gt;*&lt;/span&gt;&lt;span class="mi"&gt;100&lt;/span&gt;&lt;span class="si"&gt;:&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt;%&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Check for:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Correctness&lt;/strong&gt; — does the answer change?&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Format compliance&lt;/strong&gt; — does it still follow your output structure?&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Hallucination rate&lt;/strong&gt; — does it fabricate facts?&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Latency&lt;/strong&gt; — does it still meet your SLA?&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  ⚠️ Important Disclaimer
&lt;/h2&gt;

&lt;p&gt;The mappings in this guide are a &lt;strong&gt;starting point&lt;/strong&gt;, not guaranteed drop-in replacements.&lt;/p&gt;

&lt;p&gt;Models in the same tier can have meaningfully different behaviour on:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Your specific domain vocabulary&lt;/li&gt;
&lt;li&gt;Your prompt style&lt;/li&gt;
&lt;li&gt;Edge cases in your data&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Every migration must include:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;Prompt regression testing on real data&lt;/li&gt;
&lt;li&gt;Human evaluation of sampled outputs&lt;/li&gt;
&lt;li&gt;Shadow traffic validation (run both models in parallel, compare outputs)&lt;/li&gt;
&lt;li&gt;Gradual rollout (5% → 25% → 100%)&lt;/li&gt;
&lt;/ol&gt;




&lt;h2&gt;
  
  
  Recommended Migration Workflow
&lt;/h2&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fnjhlhmxlmltol3rm2f5d.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fnjhlhmxlmltol3rm2f5d.png" alt="Recommended Migration Workflow" width="336" height="1080"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Do not skip shadow traffic. It catches subtle regressions that prompt tests miss.&lt;/p&gt;




&lt;h2&gt;
  
  
  📋 LLM Migration Cheat Sheet
&lt;/h2&gt;

&lt;h3&gt;
  
  
  Mini Tier (&lt;code&gt;gpt-4o-mini&lt;/code&gt; equivalent)
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Azure&lt;/strong&gt; → Llama 4 Scout, Qwen3-8B/14B, DeepSeek V3, Claude Haiku&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;AWS&lt;/strong&gt; → Claude Haiku, Mistral Small&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;GCP&lt;/strong&gt; → Gemini Flash-Lite, Gemini Flash&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  Standard Tier (&lt;code&gt;gpt-4o&lt;/code&gt; equivalent)
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Azure&lt;/strong&gt; → Claude Sonnet, Llama 4 Maverick, DeepSeek V3&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;AWS&lt;/strong&gt; → Claude Sonnet, Mistral Medium&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;GCP&lt;/strong&gt; → Gemini Flash, Gemini 2.5 Pro&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  Reasoning Tier (o-series / &lt;code&gt;gpt-5&lt;/code&gt; equivalent)
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Azure&lt;/strong&gt; → Claude Opus, DeepSeek R1, Qwen3-235B Thinking&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;AWS&lt;/strong&gt; → Claude Opus&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;GCP&lt;/strong&gt; → Gemini 2.5 Pro, Gemini 3.1 Pro&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  Final Takeaway
&lt;/h2&gt;

&lt;p&gt;The LLM ecosystem is evolving too fast to depend on a single provider.&lt;/p&gt;

&lt;p&gt;Design your systems so that:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;GPT → Claude → Gemini → DeepSeek
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;…is a &lt;strong&gt;configuration change&lt;/strong&gt;, not a &lt;strong&gt;system rewrite&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;When that happens, migrations become boring infrastructure work.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;And boring infrastructure is exactly what you want in production.&lt;/strong&gt;&lt;/p&gt;




&lt;p&gt;&lt;em&gt;Found this useful? Follow &lt;a href="https://dev.to/theprodsde"&gt;TheProdSDE&lt;/a&gt; for more practical engineering guides on AI systems, cloud architecture, and developer tooling.&lt;/em&gt;&lt;/p&gt;




&lt;p&gt;&lt;strong&gt;Tags:&lt;/strong&gt; &lt;code&gt;#ai&lt;/code&gt; &lt;code&gt;#llm&lt;/code&gt; &lt;code&gt;#cloud&lt;/code&gt; &lt;code&gt;#azure&lt;/code&gt; &lt;code&gt;#systemdesign&lt;/code&gt;&lt;/p&gt;

</description>
      <category>ai</category>
      <category>llm</category>
      <category>cloud</category>
      <category>systemdesign</category>
    </item>
    <item>
      <title>Worth your 10min of time if deciding which framework suits your use case better</title>
      <dc:creator>TheProdSDE</dc:creator>
      <pubDate>Fri, 13 Mar 2026 14:48:04 +0000</pubDate>
      <link>https://dev.to/theprodsde/worth-your-10min-of-time-if-deciding-which-framework-suits-your-use-case-better-3gjp</link>
      <guid>https://dev.to/theprodsde/worth-your-10min-of-time-if-deciding-which-framework-suits-your-use-case-better-3gjp</guid>
      <description>&lt;div class="ltag__link"&gt;
  &lt;a href="/theprodsde" class="ltag__link__link"&gt;
    &lt;div class="ltag__link__pic"&gt;
      &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3752909%2F36ae54b2-61fd-4d29-b097-ed6fbe2cd7ce.png" alt="theprodsde"&gt;
    &lt;/div&gt;
  &lt;/a&gt;
  &lt;a href="https://dev.to/theprodsde/langgraph-vs-semantic-kernel-python-ai-agents-in-2026-1p4g" class="ltag__link__link"&gt;
    &lt;div class="ltag__link__content"&gt;
      &lt;h2&gt;LangGraph vs Semantic Kernel: Python AI Agents in 2026&lt;/h2&gt;
      &lt;h3&gt;TheProdSDE ・ Mar 11&lt;/h3&gt;
      &lt;div class="ltag__link__taglist"&gt;
        &lt;span class="ltag__link__tag"&gt;#python&lt;/span&gt;
        &lt;span class="ltag__link__tag"&gt;#ai&lt;/span&gt;
        &lt;span class="ltag__link__tag"&gt;#agents&lt;/span&gt;
        &lt;span class="ltag__link__tag"&gt;#langchain&lt;/span&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/a&gt;
&lt;/div&gt;


</description>
      <category>python</category>
      <category>ai</category>
      <category>agents</category>
      <category>langchain</category>
    </item>
    <item>
      <title>[Boost]</title>
      <dc:creator>TheProdSDE</dc:creator>
      <pubDate>Fri, 13 Mar 2026 14:47:02 +0000</pubDate>
      <link>https://dev.to/theprodsde/-3fa5</link>
      <guid>https://dev.to/theprodsde/-3fa5</guid>
      <description>&lt;div class="ltag__link"&gt;
  &lt;a href="/theprodsde" class="ltag__link__link"&gt;
    &lt;div class="ltag__link__pic"&gt;
      &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3752909%2F36ae54b2-61fd-4d29-b097-ed6fbe2cd7ce.png" alt="theprodsde"&gt;
    &lt;/div&gt;
  &lt;/a&gt;
  &lt;a href="https://dev.to/theprodsde/7-ai-prompts-senior-engineers-use-to-build-production-code-faster-4482" class="ltag__link__link"&gt;
    &lt;div class="ltag__link__content"&gt;
      &lt;h2&gt;7 AI Prompts Senior Engineers Use to Build Production Code Faster&lt;/h2&gt;
      &lt;h3&gt;TheProdSDE ・ Mar 13&lt;/h3&gt;
      &lt;div class="ltag__link__taglist"&gt;
        &lt;span class="ltag__link__tag"&gt;#ai&lt;/span&gt;
        &lt;span class="ltag__link__tag"&gt;#programming&lt;/span&gt;
        &lt;span class="ltag__link__tag"&gt;#productivity&lt;/span&gt;
        &lt;span class="ltag__link__tag"&gt;#software&lt;/span&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/a&gt;
&lt;/div&gt;


</description>
      <category>ai</category>
      <category>programming</category>
      <category>productivity</category>
      <category>software</category>
    </item>
    <item>
      <title>7 AI Prompts Senior Engineers Use to Build Production Code Faster</title>
      <dc:creator>TheProdSDE</dc:creator>
      <pubDate>Fri, 13 Mar 2026 13:15:10 +0000</pubDate>
      <link>https://dev.to/theprodsde/7-ai-prompts-senior-engineers-use-to-build-production-code-faster-4482</link>
      <guid>https://dev.to/theprodsde/7-ai-prompts-senior-engineers-use-to-build-production-code-faster-4482</guid>
      <description>&lt;blockquote&gt;
&lt;p&gt;AI coding tools are powerful — but most developers still use them like autocomplete.&lt;br&gt;
The difference between average AI usage and &lt;strong&gt;10x productivity&lt;/strong&gt; often comes down to one thing:&lt;br&gt;
&lt;strong&gt;how you prompt the system.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;In a previous article, I explained why teams should stop treating AI like a magic architect and instead treat it like &lt;strong&gt;a fast junior engineer inside a well-designed system&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;But that raises an important question:&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;How do experienced engineers actually prompt AI tools in real production workflows?&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;Below are &lt;strong&gt;7 practical prompts senior engineers use&lt;/strong&gt; to guide AI coding assistants toward reliable, production-quality output.&lt;/p&gt;




&lt;h2&gt;
  
  
  What You'll Learn
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;How senior engineers structure prompts for AI coding tools&lt;/li&gt;
&lt;li&gt;Why constraints improve AI-generated code&lt;/li&gt;
&lt;li&gt;Practical prompts you can reuse in real projects&lt;/li&gt;
&lt;li&gt;How to avoid common AI development mistakes&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  Prompt 1 — Spec → Implementation
&lt;/h2&gt;

&lt;p&gt;One of the most effective patterns is &lt;strong&gt;defining the specification first&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;Instead of asking the AI to build a feature from scratch, you provide &lt;strong&gt;clear constraints and structure&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;Example prompt:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="nx"&gt;You&lt;/span&gt; &lt;span class="nx"&gt;are&lt;/span&gt; &lt;span class="nx"&gt;a&lt;/span&gt; &lt;span class="nx"&gt;senior&lt;/span&gt; &lt;span class="nx"&gt;backend&lt;/span&gt; &lt;span class="nx"&gt;engineer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;

&lt;span class="nx"&gt;Implement&lt;/span&gt; &lt;span class="nx"&gt;the&lt;/span&gt; &lt;span class="nx"&gt;following&lt;/span&gt; &lt;span class="kr"&gt;interface&lt;/span&gt; &lt;span class="k"&gt;in&lt;/span&gt; &lt;span class="nx"&gt;TypeScript&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;

&lt;span class="nx"&gt;Constraints&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Follow&lt;/span&gt; &lt;span class="nx"&gt;the&lt;/span&gt; &lt;span class="nx"&gt;existing&lt;/span&gt; &lt;span class="nx"&gt;repository&lt;/span&gt; &lt;span class="nx"&gt;pattern&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Do&lt;/span&gt; &lt;span class="nx"&gt;not&lt;/span&gt; &lt;span class="nx"&gt;introduce&lt;/span&gt; &lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nx"&gt;dependencies&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Use&lt;/span&gt; &lt;span class="nx"&gt;the&lt;/span&gt; &lt;span class="nx"&gt;existing&lt;/span&gt; &lt;span class="nx"&gt;error&lt;/span&gt; &lt;span class="nx"&gt;handling&lt;/span&gt; &lt;span class="nx"&gt;system&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Write&lt;/span&gt; &lt;span class="nx"&gt;clean&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;maintainable&lt;/span&gt; &lt;span class="nx"&gt;code&lt;/span&gt;

&lt;span class="nx"&gt;Interface&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;PASTE&lt;/span&gt; &lt;span class="nx"&gt;INTERFACE&lt;/span&gt; &lt;span class="nx"&gt;HERE&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Why this works:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;The engineer controls architecture&lt;/li&gt;
&lt;li&gt;AI performs mechanical implementation&lt;/li&gt;
&lt;li&gt;Code stays consistent with the existing system&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  Prompt 2 — Safe Refactoring
&lt;/h2&gt;

&lt;p&gt;Refactoring is one of the safest and most valuable uses of AI coding tools.&lt;/p&gt;

&lt;p&gt;Example prompt:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="nx"&gt;Refactor&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt; &lt;span class="nx"&gt;service&lt;/span&gt; &lt;span class="nx"&gt;to&lt;/span&gt; &lt;span class="nx"&gt;extract&lt;/span&gt; &lt;span class="nx"&gt;the&lt;/span&gt; &lt;span class="nx"&gt;email&lt;/span&gt; &lt;span class="nx"&gt;logic&lt;/span&gt; &lt;span class="nx"&gt;into&lt;/span&gt; &lt;span class="nx"&gt;a&lt;/span&gt; &lt;span class="nx"&gt;NotificationService&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;

&lt;span class="nx"&gt;Constraints&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Do&lt;/span&gt; &lt;span class="nx"&gt;not&lt;/span&gt; &lt;span class="nx"&gt;change&lt;/span&gt; &lt;span class="k"&gt;public&lt;/span&gt; &lt;span class="nx"&gt;APIs&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Preserve&lt;/span&gt; &lt;span class="nx"&gt;existing&lt;/span&gt; &lt;span class="nx"&gt;behavior&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Maintain&lt;/span&gt; &lt;span class="nx"&gt;current&lt;/span&gt; &lt;span class="nx"&gt;tests&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Avoid&lt;/span&gt; &lt;span class="nx"&gt;introducing&lt;/span&gt; &lt;span class="nx"&gt;circular&lt;/span&gt; &lt;span class="nx"&gt;dependencies&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Benefits:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Faster multi-file refactoring&lt;/li&gt;
&lt;li&gt;Lower risk of manual mistakes&lt;/li&gt;
&lt;li&gt;Improved code organization&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;AI becomes a &lt;strong&gt;high-speed refactoring assistant&lt;/strong&gt;.&lt;/p&gt;




&lt;h2&gt;
  
  
  Prompt 3 — Test Generation
&lt;/h2&gt;

&lt;p&gt;Writing test scaffolding is another task where AI shines.&lt;/p&gt;

&lt;p&gt;Example prompt:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="nx"&gt;Generate&lt;/span&gt; &lt;span class="nx"&gt;Jest&lt;/span&gt; &lt;span class="nx"&gt;unit&lt;/span&gt; &lt;span class="nx"&gt;tests&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt; &lt;span class="nx"&gt;service&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;

&lt;span class="nx"&gt;Constraints&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Cover&lt;/span&gt; &lt;span class="nx"&gt;success&lt;/span&gt; &lt;span class="nx"&gt;and&lt;/span&gt; &lt;span class="nx"&gt;failure&lt;/span&gt; &lt;span class="nx"&gt;cases&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Mock&lt;/span&gt; &lt;span class="nx"&gt;external&lt;/span&gt; &lt;span class="nx"&gt;dependencies&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Follow&lt;/span&gt; &lt;span class="nx"&gt;our&lt;/span&gt; &lt;span class="nx"&gt;existing&lt;/span&gt; &lt;span class="nx"&gt;test&lt;/span&gt; &lt;span class="nx"&gt;structure&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Focus&lt;/span&gt; &lt;span class="nx"&gt;on&lt;/span&gt; &lt;span class="nx"&gt;behavior&lt;/span&gt; &lt;span class="nx"&gt;rather&lt;/span&gt; &lt;span class="nx"&gt;than&lt;/span&gt; &lt;span class="nx"&gt;implementation&lt;/span&gt; &lt;span class="nx"&gt;details&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;AI can generate:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;basic test structures&lt;/li&gt;
&lt;li&gt;mocks&lt;/li&gt;
&lt;li&gt;scenario coverage&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;You still review tests, but the &lt;strong&gt;initial draft appears instantly&lt;/strong&gt;.&lt;/p&gt;




&lt;h2&gt;
  
  
  Prompt 4 — Code Review Assistant
&lt;/h2&gt;

&lt;p&gt;AI can act as a &lt;strong&gt;secondary reviewer&lt;/strong&gt; for pull requests.&lt;/p&gt;

&lt;p&gt;Example prompt:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="nx"&gt;Review&lt;/span&gt; &lt;span class="nx"&gt;the&lt;/span&gt; &lt;span class="nx"&gt;following&lt;/span&gt; &lt;span class="nx"&gt;code&lt;/span&gt; &lt;span class="k"&gt;as&lt;/span&gt; &lt;span class="nx"&gt;a&lt;/span&gt; &lt;span class="nx"&gt;senior&lt;/span&gt; &lt;span class="nx"&gt;engineer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;

&lt;span class="nx"&gt;Focus&lt;/span&gt; &lt;span class="nx"&gt;on&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;architecture&lt;/span&gt; &lt;span class="nx"&gt;issues&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;potential&lt;/span&gt; &lt;span class="nx"&gt;bugs&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;performance&lt;/span&gt; &lt;span class="nx"&gt;concerns&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;unnecessary&lt;/span&gt; &lt;span class="nx"&gt;complexity&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;security&lt;/span&gt; &lt;span class="nx"&gt;risks&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;This helps engineers quickly surface:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;hidden edge cases&lt;/li&gt;
&lt;li&gt;inefficient patterns&lt;/li&gt;
&lt;li&gt;potential race conditions&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;It does not replace human review, but it &lt;strong&gt;improves signal detection&lt;/strong&gt;.&lt;/p&gt;




&lt;h2&gt;
  
  
  Prompt 5 — Documentation Generation
&lt;/h2&gt;

&lt;p&gt;Documentation is often neglected in fast-moving teams.&lt;/p&gt;

&lt;p&gt;AI can generate a &lt;strong&gt;first draft instantly&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;Example prompt:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;Generate documentation for this API endpoint.

Include:
- request format
- response format
- error conditions
- example usage
- explanation of business logic
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Engineers then refine the output instead of writing everything from scratch.&lt;/p&gt;




&lt;h2&gt;
  
  
  Prompt 6 — Guided Multi-File Refactors
&lt;/h2&gt;

&lt;p&gt;Large refactors across many files are tedious and error-prone.&lt;/p&gt;

&lt;p&gt;AI can help coordinate them.&lt;/p&gt;

&lt;p&gt;Example prompt:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="nx"&gt;Find&lt;/span&gt; &lt;span class="nx"&gt;all&lt;/span&gt; &lt;span class="nx"&gt;usages&lt;/span&gt; &lt;span class="k"&gt;of&lt;/span&gt; &lt;span class="nx"&gt;sendEmail&lt;/span&gt; &lt;span class="nx"&gt;across&lt;/span&gt; &lt;span class="nx"&gt;the&lt;/span&gt; &lt;span class="nx"&gt;repository&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;

&lt;span class="nx"&gt;Refactor&lt;/span&gt; &lt;span class="nx"&gt;them&lt;/span&gt; &lt;span class="nx"&gt;to&lt;/span&gt; &lt;span class="nx"&gt;use&lt;/span&gt; &lt;span class="nx"&gt;NotificationService&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;

&lt;span class="nx"&gt;Constraints&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Preserve&lt;/span&gt; &lt;span class="nx"&gt;existing&lt;/span&gt; &lt;span class="nx"&gt;behavior&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Do&lt;/span&gt; &lt;span class="nx"&gt;not&lt;/span&gt; &lt;span class="nx"&gt;modify&lt;/span&gt; &lt;span class="nx"&gt;unrelated&lt;/span&gt; &lt;span class="nx"&gt;logic&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;Update&lt;/span&gt; &lt;span class="nx"&gt;imports&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="nx"&gt;necessary&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;AI tools integrated with IDEs can perform these changes much faster than manual edits.&lt;/p&gt;




&lt;h2&gt;
  
  
  Prompt 7 — Architecture Sanity Check
&lt;/h2&gt;

&lt;p&gt;One of the most useful prompts is asking AI to &lt;strong&gt;analyze architectural risks&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;Example prompt:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="nx"&gt;Analyze&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt; &lt;span class="kr"&gt;module&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="nx"&gt;potential&lt;/span&gt; &lt;span class="nx"&gt;architecture&lt;/span&gt; &lt;span class="nx"&gt;issues&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;

&lt;span class="nx"&gt;Look&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;tight&lt;/span&gt; &lt;span class="nx"&gt;coupling&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;duplicated&lt;/span&gt; &lt;span class="nx"&gt;logic&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;boundary&lt;/span&gt; &lt;span class="nx"&gt;violations&lt;/span&gt;
&lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;hidden&lt;/span&gt; &lt;span class="nx"&gt;dependencies&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;This works surprisingly well when reviewing complex modules.&lt;/p&gt;

&lt;p&gt;AI often catches patterns that are easy to miss during routine coding.&lt;/p&gt;




&lt;h2&gt;
  
  
  Why Constraints Make AI Better
&lt;/h2&gt;

&lt;p&gt;A common mistake developers make is using &lt;strong&gt;vague prompts&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;Example of a weak prompt:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;Build authentication for my app
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;The AI must guess too much.&lt;/p&gt;

&lt;p&gt;Better prompts:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;define constraints&lt;/li&gt;
&lt;li&gt;reference existing patterns&lt;/li&gt;
&lt;li&gt;specify architecture boundaries&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;The clearer the prompt, the more reliable the output.&lt;/p&gt;




&lt;h2&gt;
  
  
  The Key Insight
&lt;/h2&gt;

&lt;p&gt;AI coding tools are not magic architects.&lt;/p&gt;

&lt;p&gt;They are &lt;strong&gt;execution engines&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;Senior engineers do not ask AI to design systems.&lt;/p&gt;

&lt;p&gt;They ask AI to &lt;strong&gt;implement clearly defined pieces of a system&lt;/strong&gt;.&lt;/p&gt;




&lt;h2&gt;
  
  
  Final Thoughts
&lt;/h2&gt;

&lt;p&gt;AI coding assistants are becoming a permanent part of the developer toolbox.&lt;/p&gt;

&lt;p&gt;But the biggest productivity gains do not come from the tool itself.&lt;/p&gt;

&lt;p&gt;They come from &lt;strong&gt;learning how to guide the tool effectively.&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;The engineers who benefit most from AI are not the ones who rely on it blindly.&lt;/p&gt;

&lt;p&gt;They are the ones who &lt;strong&gt;combine strong engineering discipline with precise prompts.&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;And in that workflow, AI becomes less like autocomplete…&lt;/p&gt;

&lt;p&gt;…and more like &lt;strong&gt;a very fast junior engineer.&lt;/strong&gt;&lt;/p&gt;




&lt;p&gt;Written by &lt;strong&gt;TheProdSDE&lt;/strong&gt; — sharing insights on AI engineering, system design, and developer productivity.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>programming</category>
      <category>productivity</category>
      <category>software</category>
    </item>
    <item>
      <title>Stop Treating AI Like Autocomplete — Design an AI-First Developer Workflow Instead</title>
      <dc:creator>TheProdSDE</dc:creator>
      <pubDate>Thu, 12 Mar 2026 13:51:28 +0000</pubDate>
      <link>https://dev.to/theprodsde/stop-treating-ai-like-autocomplete-design-an-ai-first-developer-workflow-instead-44m7</link>
      <guid>https://dev.to/theprodsde/stop-treating-ai-like-autocomplete-design-an-ai-first-developer-workflow-instead-44m7</guid>
      <description>&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;AI coding tools are everywhere — but most teams still use them like autocomplete.&lt;/strong&gt;&lt;br&gt;
The result? Faster code generation… and faster technical debt.&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;In 2026, almost every developer works with AI tools inside their IDE.&lt;/p&gt;

&lt;p&gt;You type a function name.&lt;br&gt;
Your AI assistant confidently generates 40 lines of code.&lt;/p&gt;

&lt;p&gt;You accept it because the deadline is tight.&lt;/p&gt;

&lt;p&gt;Two weeks later your team is debugging the same AI-generated code.&lt;/p&gt;

&lt;p&gt;The issue isn't AI.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;The issue is the workflow around it.&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;The teams getting massive productivity gains are not the ones simply using AI tools.&lt;/p&gt;

&lt;p&gt;They are the ones &lt;strong&gt;engineering a workflow around AI.&lt;/strong&gt;&lt;/p&gt;



&lt;p&gt;💬 &lt;strong&gt;Quick question for you:&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;How are you currently using AI coding tools?&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;autocomplete replacement
&lt;/li&gt;
&lt;li&gt;debugging helper
&lt;/li&gt;
&lt;li&gt;architecture assistant
&lt;/li&gt;
&lt;li&gt;full feature generation
&lt;/li&gt;
&lt;/ul&gt;
&lt;h2&gt;
  
  
  Drop your workflow in the comments — I'm curious how different teams are approaching this.
&lt;/h2&gt;
&lt;h2&gt;
  
  
  What You'll Learn
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;Why AI coding tools create hidden technical debt&lt;/li&gt;
&lt;li&gt;The correct mental model for AI-assisted development&lt;/li&gt;
&lt;li&gt;A practical AI-first development workflow&lt;/li&gt;
&lt;li&gt;Real engineering patterns that work&lt;/li&gt;
&lt;/ul&gt;
&lt;h2&gt;
  
  
  * Rules for keeping humans in control
&lt;/h2&gt;
&lt;h2&gt;
  
  
  TL;DR
&lt;/h2&gt;

&lt;p&gt;Most teams use AI coding tools like &lt;strong&gt;autocomplete&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;That works for small tasks — but breaks down in real systems.&lt;/p&gt;

&lt;p&gt;The better workflow:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;Engineers design the architecture&lt;/li&gt;
&lt;li&gt;AI generates implementation&lt;/li&gt;
&lt;li&gt;Humans review the output&lt;/li&gt;
&lt;li&gt;Tests validate behavior&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;AI shouldn't design your system.&lt;/p&gt;
&lt;h2&gt;
  
  
  It should &lt;strong&gt;accelerate implementation inside a system you already designed.&lt;/strong&gt;
&lt;/h2&gt;
&lt;h2&gt;
  
  
  The Real Problem
&lt;/h2&gt;

&lt;p&gt;AI coding assistants are now part of everyday development.&lt;/p&gt;

&lt;p&gt;Developers use them to:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;generate boilerplate&lt;/li&gt;
&lt;li&gt;explain unfamiliar code&lt;/li&gt;
&lt;li&gt;write tests&lt;/li&gt;
&lt;li&gt;refactor functions&lt;/li&gt;
&lt;li&gt;scaffold APIs&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;But problems appear when teams move from:&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;"AI helps write code"&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;to&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;"AI probably knows what the system should do."&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;That shift introduces hidden risk.&lt;/p&gt;


&lt;h3&gt;
  
  
  What AI Is Very Good At
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;Generating repetitive code&lt;/li&gt;
&lt;li&gt;Translating intent into a first draft&lt;/li&gt;
&lt;li&gt;Creating test scaffolding&lt;/li&gt;
&lt;li&gt;Performing large refactors&lt;/li&gt;
&lt;li&gt;Accelerating low-risk implementation&lt;/li&gt;
&lt;/ul&gt;


&lt;h3&gt;
  
  
  What AI Is Still Bad At
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;Understanding business rules&lt;/li&gt;
&lt;li&gt;Making architectural trade-offs&lt;/li&gt;
&lt;li&gt;Preserving system boundaries&lt;/li&gt;
&lt;li&gt;Long-term maintainability&lt;/li&gt;
&lt;li&gt;Handling ambiguity safely&lt;/li&gt;
&lt;/ul&gt;



&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;AI doesn't remove engineering discipline.&lt;br&gt;
It amplifies whatever discipline already exists in the team.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;If the workflow is weak, AI accelerates bad architecture.&lt;/p&gt;

&lt;p&gt;If the workflow is strong, AI accelerates productivity.&lt;/p&gt;


&lt;h2&gt;
  
  
  The Correct Mental Model
&lt;/h2&gt;

&lt;p&gt;The best way to think about AI coding tools is simple:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;Treat AI like a fast junior engineer inside your system.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;A good junior engineer can:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;implement well-defined tasks&lt;/li&gt;
&lt;li&gt;write boilerplate&lt;/li&gt;
&lt;li&gt;perform refactors&lt;/li&gt;
&lt;li&gt;generate first drafts&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;But they still require:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;clear requirements&lt;/li&gt;
&lt;li&gt;architecture constraints&lt;/li&gt;
&lt;li&gt;code review&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;AI works exactly the same way.&lt;/p&gt;


&lt;h2&gt;
  
  
  This workflow works especially well with AI-enabled IDEs like Cursor or coding assistants like GitHub Copilot.
&lt;/h2&gt;

&lt;p&gt;A healthy AI development workflow looks like this:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;Problem Definition
      ↓
Interface Design
      ↓
AI Implementation
      ↓
Human Code Review
      ↓
Testing &amp;amp; Validation
      ↓
Merge
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Humans control architecture.&lt;/p&gt;

&lt;p&gt;AI accelerates execution.&lt;/p&gt;




&lt;h2&gt;
  
  
  The Most Dangerous AI Workflow
&lt;/h2&gt;

&lt;p&gt;Many teams accidentally follow this workflow:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;Open IDE&lt;/li&gt;
&lt;li&gt;Ask AI to build a feature&lt;/li&gt;
&lt;li&gt;Accept generated code&lt;/li&gt;
&lt;li&gt;Ship it&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;It feels fast.&lt;/p&gt;

&lt;p&gt;But it introduces:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;hidden coupling&lt;/li&gt;
&lt;li&gt;duplicated logic&lt;/li&gt;
&lt;li&gt;architecture violations&lt;/li&gt;
&lt;li&gt;security risks&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Speed without discipline is &lt;strong&gt;technical debt at machine speed.&lt;/strong&gt;&lt;/p&gt;




&lt;h2&gt;
  
  
  A Practical AI-First Workflow
&lt;/h2&gt;

&lt;p&gt;A better engineering model looks like this:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;Define the problem clearly&lt;/li&gt;
&lt;li&gt;Design interfaces first&lt;/li&gt;
&lt;li&gt;Ask AI to implement inside constraints&lt;/li&gt;
&lt;li&gt;Review like a pull request&lt;/li&gt;
&lt;li&gt;Standardize repeatable prompts&lt;/li&gt;
&lt;/ol&gt;




&lt;h3&gt;
  
  
  Human-Owned Work
&lt;/h3&gt;

&lt;p&gt;Engineers must still own:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;system architecture&lt;/li&gt;
&lt;li&gt;service boundaries&lt;/li&gt;
&lt;li&gt;security decisions&lt;/li&gt;
&lt;li&gt;data contracts&lt;/li&gt;
&lt;li&gt;performance trade-offs&lt;/li&gt;
&lt;li&gt;business rules&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  AI-Assisted Work
&lt;/h3&gt;

&lt;p&gt;AI works best for:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;boilerplate code&lt;/li&gt;
&lt;li&gt;test scaffolding&lt;/li&gt;
&lt;li&gt;repetitive refactors&lt;/li&gt;
&lt;li&gt;documentation drafts&lt;/li&gt;
&lt;li&gt;DTO and mapper generation&lt;/li&gt;
&lt;li&gt;CRUD scaffolding&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;AI executes inside the system you designed.&lt;/p&gt;




&lt;h2&gt;
  
  
  Practical Pattern #1 — Spec → Design → AI Implementation
&lt;/h2&gt;

&lt;p&gt;Weak prompt:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;Build authentication for my app
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Too vague.&lt;/p&gt;

&lt;p&gt;Better workflow:&lt;/p&gt;

&lt;h3&gt;
  
  
  Step 1 — Lightweight spec
&lt;/h3&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight json"&gt;&lt;code&gt;&lt;span class="err"&gt;Feature:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="err"&gt;Email/password&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="err"&gt;login&lt;/span&gt;&lt;span class="w"&gt;

&lt;/span&gt;&lt;span class="err"&gt;POST&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="err"&gt;/api/auth/login&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;span class="err"&gt;Request:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="err"&gt;email&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="err"&gt;password&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="w"&gt;

&lt;/span&gt;&lt;span class="err"&gt;Response:&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="err"&gt;accessToken&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="err"&gt;refreshToken&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="err"&gt;user:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="err"&gt;id&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="err"&gt;email&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="err"&gt;roles&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;






&lt;h3&gt;
  
  
  Step 2 — Define interface
&lt;/h3&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="kr"&gt;interface&lt;/span&gt; &lt;span class="nx"&gt;AuthService&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nf"&gt;login&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;email&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;password&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;LoginResult&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nf"&gt;refresh&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;token&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;LoginResult&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="kr"&gt;interface&lt;/span&gt; &lt;span class="nx"&gt;LoginResult&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nl"&gt;accessToken&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;refreshToken&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;user&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;id&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="nl"&gt;email&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="nl"&gt;roles&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;[];&lt;/span&gt;
  &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;






&lt;h3&gt;
  
  
  Step 3 — AI implementation
&lt;/h3&gt;

&lt;p&gt;Prompt:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;Implement &lt;code&gt;AuthService&lt;/code&gt; using &lt;code&gt;UserRepository&lt;/code&gt;, bcrypt password checks, and the existing JWT helper.&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;Now:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;You control the contract&lt;/li&gt;
&lt;li&gt;AI handles mechanical work&lt;/li&gt;
&lt;li&gt;Architecture remains stable&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  Practical Pattern #2 — Guided Refactors
&lt;/h2&gt;

&lt;p&gt;Example: centralizing email logic behind a service.&lt;/p&gt;

&lt;p&gt;Define abstraction first:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="kr"&gt;interface&lt;/span&gt; &lt;span class="nx"&gt;NotificationService&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nf"&gt;sendWelcomeEmail&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;userId&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="k"&gt;void&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nf"&gt;sendPasswordResetEmail&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;userId&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;resetToken&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="k"&gt;void&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Then instruct AI:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;Find direct &lt;code&gt;sendEmail&lt;/code&gt; calls and route them through &lt;code&gt;NotificationService&lt;/code&gt;.&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;Now the AI performs the mechanical refactor while &lt;strong&gt;architecture stays consistent&lt;/strong&gt;.&lt;/p&gt;




&lt;h2&gt;
  
  
  Practical Pattern #3 — AI First Draft, Human Final Draft
&lt;/h2&gt;

&lt;p&gt;AI can generate boilerplate quickly.&lt;/p&gt;

&lt;p&gt;Example:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;toUserResponse&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;entity&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;UserEntity&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nx"&gt;UserResponse&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;id&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;entity&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;email&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;entity&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;email&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;roles&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;entity&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;roles&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;createdAt&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;entity&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;createdAt&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;toISOString&lt;/span&gt;&lt;span class="p"&gt;(),&lt;/span&gt;
    &lt;span class="na"&gt;updatedAt&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;entity&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;updatedAt&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;toISOString&lt;/span&gt;&lt;span class="p"&gt;(),&lt;/span&gt;
  &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Human refinement:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;formatIsoDate&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;../utils/date&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;toUserResponse&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;entity&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;UserEntity&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nx"&gt;UserResponse&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;id&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;entity&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;email&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;entity&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;email&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;roles&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[...&lt;/span&gt;&lt;span class="nx"&gt;entity&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;roles&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
    &lt;span class="na"&gt;createdAt&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;formatIsoDate&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;entity&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;createdAt&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="na"&gt;updatedAt&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;formatIsoDate&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;entity&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;updatedAt&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
  &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;AI saves typing.&lt;/p&gt;

&lt;p&gt;Engineers enforce consistency.&lt;/p&gt;




&lt;h2&gt;
  
  
  Practical Pattern #4 — AI as Test Author
&lt;/h2&gt;

&lt;p&gt;Define test strategy first.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="c1"&gt;// Scenarios:&lt;/span&gt;
&lt;span class="c1"&gt;// valid credentials → success&lt;/span&gt;
&lt;span class="c1"&gt;// invalid email → AUTH_INVALID_CREDENTIALS&lt;/span&gt;
&lt;span class="c1"&gt;// invalid password → AUTH_INVALID_CREDENTIALS&lt;/span&gt;
&lt;span class="c1"&gt;// locked account → AUTH_ACCOUNT_LOCKED&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Then prompt AI:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;Generate Jest tests covering these scenarios.&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;You then review for:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;missing edge cases&lt;/li&gt;
&lt;li&gt;flaky tests&lt;/li&gt;
&lt;li&gt;incorrect assumptions&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  The AI Development Rules I Follow
&lt;/h2&gt;

&lt;p&gt;Simple rules that dramatically improve outcomes:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;Never generate architecture with AI&lt;/li&gt;
&lt;li&gt;Always define interfaces first&lt;/li&gt;
&lt;li&gt;Constrain AI to specific files&lt;/li&gt;
&lt;li&gt;Review AI code like junior code&lt;/li&gt;
&lt;li&gt;Never merge AI code without tests&lt;/li&gt;
&lt;li&gt;Standardize prompts for repeatable workflows&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;These guardrails keep velocity high without sacrificing quality.&lt;/p&gt;




&lt;h2&gt;
  
  
  Real Tools Supporting This Workflow
&lt;/h2&gt;

&lt;p&gt;Modern tools supporting this approach include:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;GitHub Copilot&lt;/li&gt;
&lt;li&gt;Claude Code&lt;/li&gt;
&lt;li&gt;Cursor&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;These tools are powerful — but &lt;strong&gt;the workflow around them matters more than the tool itself.&lt;/strong&gt;&lt;/p&gt;




&lt;h2&gt;
  
  
  One Insight That Changes Everything
&lt;/h2&gt;

&lt;p&gt;AI does not remove engineering responsibility.&lt;/p&gt;

&lt;p&gt;It &lt;strong&gt;amplifies the engineering discipline already present in the team&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;Bad workflows produce bad code faster.&lt;/p&gt;

&lt;p&gt;Good workflows produce good code faster.&lt;/p&gt;

&lt;p&gt;The difference is not the AI.&lt;/p&gt;

&lt;p&gt;The difference is the &lt;strong&gt;system around it.&lt;/strong&gt;&lt;/p&gt;




&lt;h2&gt;
  
  
  Before adopting AI agents across your team, make sure the workflow is clear.
&lt;/h2&gt;

&lt;p&gt;Otherwise you are simply scaling chaos faster.&lt;/p&gt;

&lt;p&gt;AI coding agents are now a standard part of software development.&lt;/p&gt;

&lt;p&gt;The real competitive advantage is not &lt;strong&gt;using AI&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;It is &lt;strong&gt;engineering workflows around AI.&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;If you treat AI like a magic senior engineer, you will be disappointed.&lt;/p&gt;

&lt;p&gt;If you treat it like a fast junior inside a strong engineering system, you can ship faster &lt;strong&gt;without sacrificing code quality.&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;The next generation of software development is not AI-powered.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;It is AI-orchestrated.&lt;/strong&gt;&lt;/p&gt;




&lt;p&gt;If you're experimenting with AI development workflows, I'd love to hear what has worked (or failed) for your team.&lt;/p&gt;

&lt;p&gt;Drop your experiences in the comments 👇&lt;/p&gt;

&lt;p&gt;Follow &lt;strong&gt;TheProdSDE&lt;/strong&gt; for more content on:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;AI engineering&lt;/li&gt;
&lt;li&gt;system design&lt;/li&gt;
&lt;li&gt;developer productivity&lt;/li&gt;
&lt;/ul&gt;

</description>
      <category>ai</category>
      <category>programming</category>
      <category>productivity</category>
      <category>softwareengineering</category>
    </item>
    <item>
      <title>LangGraph vs Semantic Kernel: Python AI Agents in 2026</title>
      <dc:creator>TheProdSDE</dc:creator>
      <pubDate>Wed, 11 Mar 2026 12:41:03 +0000</pubDate>
      <link>https://dev.to/theprodsde/langgraph-vs-semantic-kernel-python-ai-agents-in-2026-1p4g</link>
      <guid>https://dev.to/theprodsde/langgraph-vs-semantic-kernel-python-ai-agents-in-2026-1p4g</guid>
      <description>&lt;h2&gt;
  
  
  Why This Comparison Matters Right Now
&lt;/h2&gt;

&lt;p&gt;Here's the honest reality of building Python AI agents in 2026: you have two genuinely good framework choices, and picking the wrong one for the wrong problem will cost you &lt;strong&gt;architecture refactors&lt;/strong&gt;, not just a few hours of code changes.&lt;/p&gt;

&lt;p&gt;LangGraph and Semantic Kernel have both crossed major milestones since most popular comparisons were written:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;LangGraph hit v1.0&lt;/strong&gt; in October 2025 — with a formal stability commitment and no breaking changes until 2.0. &lt;a href="https://blog.langchain.com/langchain-langgraph-1dot0/" rel="noopener noreferrer"&gt;(official announcement)&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;LangChain 1.0's &lt;code&gt;create_agent&lt;/code&gt;&lt;/strong&gt; now runs on the LangGraph runtime underneath, making LangGraph the execution engine of the LangChain ecosystem. &lt;a href="https://blog.langchain.com/langchain-langgraph-1dot0/" rel="noopener noreferrer"&gt;(LangChain/LangGraph 1.0 blog)&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Semantic Kernel shipped first-class MCP support for Python&lt;/strong&gt; in v1.28.1 — SK can now act as both an MCP client &lt;em&gt;and&lt;/em&gt; server natively in the SDK. &lt;a href="https://devblogs.microsoft.com/semantic-kernel/semantic-kernel-adds-model-context-protocol-mcp-support-for-python/" rel="noopener noreferrer"&gt;(official SK dev blog)&lt;/a&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;If you're still reading comparisons that call LangGraph "unstable" or Semantic Kernel "too tied to .NET", you're reading old content.&lt;/p&gt;

&lt;p&gt;This post is grounded in the &lt;strong&gt;official LangGraph docs&lt;/strong&gt;, the &lt;strong&gt;official Semantic Kernel docs&lt;/strong&gt;, and both framework changelogs. Let's go.&lt;/p&gt;




&lt;h2&gt;
  
  
  TL;DR: The One-Line Decision Rule
&lt;/h2&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Problem Type&lt;/th&gt;
&lt;th&gt;Framework&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Stateful multi-step agent workflows&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;LangGraph&lt;/strong&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Plugin-based AI platforms &amp;amp; MCP interoperability&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;Semantic Kernel&lt;/strong&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;That distinction explains every trade-off in this article.&lt;/p&gt;




&lt;h2&gt;
  
  
  Architecture: Two Very Different Mental Models
&lt;/h2&gt;

&lt;h3&gt;
  
  
  LangGraph — The Graph Runtime
&lt;/h3&gt;

&lt;p&gt;LangGraph models your agent system as a &lt;strong&gt;stateful graph&lt;/strong&gt; where you explicitly define state, nodes, and edges. Nodes are Python callables or subgraphs. Edges are transitions. State is a typed object that flows through the graph and gets updated at each step.&lt;/p&gt;

&lt;p&gt;That's not an internal implementation detail — &lt;strong&gt;it's the primary abstraction you work with every day&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;The &lt;a href="https://docs.langchain.com/oss/python/releases/langgraph-v1" rel="noopener noreferrer"&gt;official LangGraph v1 docs&lt;/a&gt; describe the framework around three core ideas: &lt;strong&gt;durable execution&lt;/strong&gt;, &lt;strong&gt;controllability&lt;/strong&gt;, and &lt;strong&gt;human-in-the-loop&lt;/strong&gt;. Resuming a workflow from the last checkpoint after a crash, inserting a human review step, or branching into parallel sub-agents are first-class operations — not workarounds.&lt;/p&gt;

&lt;p&gt;Since LangGraph v1, LangChain's &lt;code&gt;create_agent&lt;/code&gt; lives on top of this runtime. The stack now has a clean separation:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Start with &lt;code&gt;create_agent&lt;/code&gt; for standard tool-calling loops.&lt;/li&gt;
&lt;li&gt;Drop down to raw LangGraph when you need explicit workflow topology.&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  Semantic Kernel — The Kernel-Plugin Middleware
&lt;/h3&gt;

&lt;p&gt;Semantic Kernel starts from the &lt;strong&gt;Kernel&lt;/strong&gt; abstraction, which holds AI services, plugins, and functions. Plugins are groups of functions exposed to the model and to agents, and can come from native Python code, prompt templates, or imported external schemas.&lt;/p&gt;

&lt;p&gt;The &lt;a href="https://learn.microsoft.com/en-us/semantic-kernel/frameworks/agent/agent-functions" rel="noopener noreferrer"&gt;official SK agent-functions docs&lt;/a&gt; state:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;em&gt;"Any Plugin available to an Agent is managed within its respective Kernel instance — this enables each Agent to access distinct functionalities based on its specific role."&lt;/em&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;Orchestration emerges from agents choosing functions and planners sequencing capability calls — &lt;strong&gt;rather than from a graph topology you define up front&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;This makes Semantic Kernel feel more like &lt;strong&gt;AI middleware&lt;/strong&gt;. You shape what your agent can do, then let function calling and the agent framework decide how to do it.&lt;/p&gt;

&lt;h3&gt;
  
  
  Architectural Difference — Quick Reference
&lt;/h3&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Dimension&lt;/th&gt;
&lt;th&gt;LangGraph&lt;/th&gt;
&lt;th&gt;Semantic Kernel&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Primary abstraction&lt;/td&gt;
&lt;td&gt;Typed state graph (nodes + edges)&lt;/td&gt;
&lt;td&gt;Kernel + plugins + agents&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Workflow control&lt;/td&gt;
&lt;td&gt;You define topology explicitly&lt;/td&gt;
&lt;td&gt;Emerges from agent function calling&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;State management&lt;/td&gt;
&lt;td&gt;First-class typed state + checkpointing&lt;/td&gt;
&lt;td&gt;Externalized per service or plugin&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Best mental model&lt;/td&gt;
&lt;td&gt;Durable state machine for agents&lt;/td&gt;
&lt;td&gt;AI middleware with composable capabilities&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;




&lt;h2&gt;
  
  
  Code: The Same Agent in Both Frameworks
&lt;/h2&gt;

&lt;p&gt;To make the architectural differences concrete, let's build the same agent in both: a &lt;strong&gt;multi-turn weather assistant&lt;/strong&gt; with memory and a system prompt.&lt;/p&gt;

&lt;h3&gt;
  
  
  LangGraph — Weather Agent with Checkpointing
&lt;/h3&gt;

&lt;blockquote&gt;
&lt;p&gt;Pattern from the &lt;a href="https://langchain-ai.github.io/langgraph/agents/agents/" rel="noopener noreferrer"&gt;official LangGraph agents quickstart&lt;/a&gt;&lt;br&gt;
&lt;/p&gt;
&lt;/blockquote&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="c1"&gt;# pip install -U langgraph "langchain[openai]"
&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;langgraph.prebuilt&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;create_react_agent&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;langgraph.checkpoint.memory&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;InMemorySaver&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;langchain.chat_models&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;init_chat_model&lt;/span&gt;

&lt;span class="c1"&gt;# --- Tool: plain Python function ---
&lt;/span&gt;&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;get_weather&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;city&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;-&amp;gt;&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
    &lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;Get the current weather for a given city.&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;
    &lt;span class="c1"&gt;# Replace with a real API call in production
&lt;/span&gt;    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;It&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="s"&gt;s sunny and 28°C in &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;city&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt;.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;

&lt;span class="c1"&gt;# --- LLM ---
&lt;/span&gt;&lt;span class="n"&gt;model&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;init_chat_model&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;openai:gpt-4o-mini&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;temperature&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="c1"&gt;# --- Checkpointer enables durable multi-turn memory ---
# Swap InMemorySaver for SqliteSaver or PostgresSaver in production
&lt;/span&gt;&lt;span class="n"&gt;checkpointer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;InMemorySaver&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;

&lt;span class="c1"&gt;# --- Compile graph agent ---
&lt;/span&gt;&lt;span class="n"&gt;agent&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;create_react_agent&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;tools&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;get_weather&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
    &lt;span class="n"&gt;prompt&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;You are a helpful weather assistant.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;checkpointer&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;checkpointer&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="c1"&gt;# --- thread_id binds this conversation to a persistent checkpoint ---
&lt;/span&gt;&lt;span class="n"&gt;config&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;configurable&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;thread_id&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;user-session-1&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;}}&lt;/span&gt;

&lt;span class="c1"&gt;# Turn 1
&lt;/span&gt;&lt;span class="n"&gt;response&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;agent&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;invoke&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;messages&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;role&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;user&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;content&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;What is the weather in Mumbai?&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;}]},&lt;/span&gt;
    &lt;span class="n"&gt;config&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;config&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="nf"&gt;print&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;response&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;messages&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;][&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;].&lt;/span&gt;&lt;span class="n"&gt;content&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="c1"&gt;# Turn 2 — agent remembers context automatically via checkpointer
&lt;/span&gt;&lt;span class="n"&gt;followup&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;agent&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;invoke&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;messages&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;role&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;user&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;content&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;How about Delhi?&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;}]},&lt;/span&gt;
    &lt;span class="n"&gt;config&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;config&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="nf"&gt;print&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;followup&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;messages&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;][&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;].&lt;/span&gt;&lt;span class="n"&gt;content&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;What is happening architecturally:&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;code&gt;create_react_agent&lt;/code&gt; compiles a &lt;code&gt;StateGraph&lt;/code&gt; with a model-tool loop under the hood.&lt;/li&gt;
&lt;li&gt;The &lt;code&gt;checkpointer&lt;/code&gt; persists state at every step; the same &lt;code&gt;thread_id&lt;/code&gt; resumes from the last saved state automatically.&lt;/li&gt;
&lt;li&gt;If the process crashes mid-run, restarting and invoking with the same &lt;code&gt;thread_id&lt;/code&gt; picks up from the last checkpoint — &lt;strong&gt;durability is a runtime concern, not your concern&lt;/strong&gt;.&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  Semantic Kernel — Weather Agent with Plugin
&lt;/h3&gt;

&lt;blockquote&gt;
&lt;p&gt;Pattern from the &lt;a href="https://learn.microsoft.com/en-us/semantic-kernel/frameworks/agent/agent-functions" rel="noopener noreferrer"&gt;official SK agent-functions docs&lt;/a&gt;&lt;br&gt;
&lt;/p&gt;
&lt;/blockquote&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="c1"&gt;# pip install semantic-kernel
&lt;/span&gt;
&lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;asyncio&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;semantic_kernel&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;Kernel&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;semantic_kernel.agents&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;ChatCompletionAgent&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;semantic_kernel.connectors.ai.open_ai&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;OpenAIChatCompletion&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;OpenAIChatPromptExecutionSettings&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;semantic_kernel.connectors.ai&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;FunctionChoiceBehavior&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;semantic_kernel.functions&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;kernel_function&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;semantic_kernel.contents&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;ChatHistory&lt;/span&gt;

&lt;span class="c1"&gt;# --- Plugin: class with @kernel_function decorators ---
&lt;/span&gt;&lt;span class="k"&gt;class&lt;/span&gt; &lt;span class="nc"&gt;WeatherPlugin&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
    &lt;span class="nd"&gt;@kernel_function&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;name&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;get_weather&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;description&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Get the weather for a city.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;get_weather&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;self&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;city&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;-&amp;gt;&lt;/span&gt; &lt;span class="nb"&gt;str&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
        &lt;span class="c1"&gt;# Replace with a real API call in production
&lt;/span&gt;        &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;It&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="s"&gt;s sunny and 28°C in &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;city&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="s"&gt;.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;

&lt;span class="c1"&gt;# --- Kernel: holds services and plugins ---
&lt;/span&gt;&lt;span class="n"&gt;kernel&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;Kernel&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
&lt;span class="n"&gt;kernel&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_service&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nc"&gt;OpenAIChatCompletion&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;ai_model_id&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;gpt-4o-mini&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;))&lt;/span&gt;

&lt;span class="c1"&gt;# --- Execution settings: enable auto function calling ---
&lt;/span&gt;&lt;span class="n"&gt;settings&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;OpenAIChatPromptExecutionSettings&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
&lt;span class="n"&gt;settings&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;function_choice_behavior&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;FunctionChoiceBehavior&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nc"&gt;Auto&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;

&lt;span class="c1"&gt;# --- Register plugin ---
&lt;/span&gt;&lt;span class="n"&gt;kernel&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_plugin&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nc"&gt;WeatherPlugin&lt;/span&gt;&lt;span class="p"&gt;(),&lt;/span&gt; &lt;span class="n"&gt;plugin_name&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;WeatherPlugin&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="c1"&gt;# --- Agent: kernel + instructions ---
&lt;/span&gt;&lt;span class="n"&gt;agent&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;ChatCompletionAgent&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;kernel&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;kernel&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;name&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;WeatherAssistant&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;instructions&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;You are a helpful weather assistant.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;run_agent&lt;/span&gt;&lt;span class="p"&gt;():&lt;/span&gt;
    &lt;span class="c1"&gt;# ChatHistory is your responsibility to maintain across turns
&lt;/span&gt;    &lt;span class="n"&gt;history&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;ChatHistory&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;

    &lt;span class="c1"&gt;# Turn 1
&lt;/span&gt;    &lt;span class="n"&gt;history&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_user_message&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;What is the weather in Mumbai?&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;message&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;agent&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;invoke&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;history&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
        &lt;span class="nf"&gt;print&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Agent: &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;message&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;content&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
        &lt;span class="n"&gt;history&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_message&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;message&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;

    &lt;span class="c1"&gt;# Turn 2
&lt;/span&gt;    &lt;span class="n"&gt;history&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_user_message&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;How about Delhi?&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;message&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;agent&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;invoke&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;history&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
        &lt;span class="nf"&gt;print&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sa"&gt;f&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Agent: &lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="n"&gt;message&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;content&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
        &lt;span class="n"&gt;history&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_message&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;message&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;asyncio&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;run&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nf"&gt;run_agent&lt;/span&gt;&lt;span class="p"&gt;())&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;What is happening architecturally:&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;The &lt;code&gt;Kernel&lt;/code&gt; holds the AI service and plugins as a dependency container.&lt;/li&gt;
&lt;li&gt;
&lt;code&gt;@kernel_function&lt;/code&gt; decorators make Python methods discoverable and invocable by the model automatically.&lt;/li&gt;
&lt;li&gt;
&lt;code&gt;FunctionChoiceBehavior.Auto()&lt;/code&gt; tells the model to call functions when needed.&lt;/li&gt;
&lt;li&gt;Memory is a &lt;code&gt;ChatHistory&lt;/code&gt; object &lt;strong&gt;you manage and pass&lt;/strong&gt; into each invocation. The runtime does not persist it for you.&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  The Most Revealing Difference in 6 Lines
&lt;/h3&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="c1"&gt;# LangGraph — runtime owns durability
&lt;/span&gt;&lt;span class="n"&gt;checkpointer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;InMemorySaver&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
&lt;span class="n"&gt;config&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;configurable&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;thread_id&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;session-1&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;}}&lt;/span&gt;
&lt;span class="n"&gt;agent&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;invoke&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;messages&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;config&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;  &lt;span class="c1"&gt;# resumes from last checkpoint automatically
&lt;/span&gt;
&lt;span class="c1"&gt;# Semantic Kernel — you own state
&lt;/span&gt;&lt;span class="n"&gt;history&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;ChatHistory&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
&lt;span class="n"&gt;history&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;add_user_message&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;...&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;agent&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;invoke&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;history&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;  &lt;span class="c1"&gt;# you pass and maintain state explicitly
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;In LangGraph, &lt;strong&gt;durability is a runtime concern&lt;/strong&gt;. In Semantic Kernel, &lt;strong&gt;state management is your concern&lt;/strong&gt;. Neither is wrong — they match different application models.&lt;/p&gt;




&lt;h2&gt;
  
  
  Protocol Support: MCP and A2A
&lt;/h2&gt;

&lt;p&gt;This is where Semantic Kernel has made its most significant leap recently.&lt;/p&gt;

&lt;h3&gt;
  
  
  Semantic Kernel — Native MCP in the Python SDK
&lt;/h3&gt;

&lt;p&gt;The &lt;a href="https://devblogs.microsoft.com/semantic-kernel/semantic-kernel-adds-model-context-protocol-mcp-support-for-python/" rel="noopener noreferrer"&gt;official SK MCP announcement&lt;/a&gt; states:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;em&gt;"Python support for MCP has arrived... SK Python can act as both an MCP Host and an MCP Server, support multiple transport methods (stdio, SSE, WebSocket), chain multiple MCP servers together, and expose SK functions or agents as MCP servers."&lt;/em&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;That is not an adapter or community plugin. It is first-class SDK support from &lt;strong&gt;v1.28.1+&lt;/strong&gt;. For teams building tools and agents that need to cross service boundaries via a standard protocol, this is a meaningful architectural upgrade.&lt;/p&gt;

&lt;h3&gt;
  
  
  LangGraph — Strong MCP at the Deployment Edge
&lt;/h3&gt;

&lt;p&gt;LangGraph's MCP story is more about deployment than in-process integration. When deployed on the LangGraph Platform, every agent is automatically exposed as an MCP-accessible endpoint at &lt;code&gt;/mcp&lt;/code&gt; with no extra code required. For self-hosted deployments, integration is available via the &lt;code&gt;langchain-mcp-adapters&lt;/code&gt; package.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Bottom line:&lt;/strong&gt;&lt;br&gt;
SK is stronger when you want &lt;strong&gt;MCP semantics inside your Python process&lt;/strong&gt;. LangGraph is stronger when you think of agents as &lt;strong&gt;deployed services&lt;/strong&gt; that other clients consume via MCP.&lt;/p&gt;




&lt;h2&gt;
  
  
  Stability and Breaking Changes: The 2026 Reality
&lt;/h2&gt;

&lt;p&gt;Here is what the official docs actually say now.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;LangGraph v1 (October 2025):&lt;/strong&gt; The &lt;a href="https://docs.langchain.com/oss/python/releases/langgraph-v1" rel="noopener noreferrer"&gt;official v1 release notes&lt;/a&gt; state that the core graph APIs and execution model are &lt;strong&gt;unchanged&lt;/strong&gt;. The main migration note is deprecation of &lt;code&gt;create_react_agent&lt;/code&gt; in &lt;code&gt;langgraph.prebuilt&lt;/code&gt; in favour of LangChain's &lt;code&gt;create_agent&lt;/code&gt;. The &lt;a href="https://blog.langchain.com/langchain-langgraph-1dot0/" rel="noopener noreferrer"&gt;LangGraph 1.0 announcement&lt;/a&gt; explicitly commits to no breaking changes until 2.0.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Semantic Kernel 1.x:&lt;/strong&gt; Most architectural disruption landed at 1.0 (namespace reorg, API renames, context variable changes). The &lt;a href="https://devblogs.microsoft.com/semantic-kernel/semantic-kernel-roadmap-h1-2025-accelerating-agents-processes-and-integration/" rel="noopener noreferrer"&gt;H1 2025 SK roadmap&lt;/a&gt; and subsequent releases show an incremental, additive pattern with targeted fixes rather than structural breaks.&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;The old narrative of "LangGraph breaks every release" is no longer accurate. Both frameworks are now in a stability-first phase.&lt;/p&gt;
&lt;/blockquote&gt;




&lt;h2&gt;
  
  
  Updated Technical Ratings (March 2026)
&lt;/h2&gt;

&lt;p&gt;Based on official docs and both frameworks' current stable releases:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Dimension&lt;/th&gt;
&lt;th&gt;LangGraph&lt;/th&gt;
&lt;th&gt;Semantic Kernel&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Workflow control&lt;/td&gt;
&lt;td&gt;⭐ 4.8 / 5&lt;/td&gt;
&lt;td&gt;⭐ 4.0 / 5&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Durable execution&lt;/td&gt;
&lt;td&gt;⭐ 4.9 / 5&lt;/td&gt;
&lt;td&gt;⭐ 4.1 / 5&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Plugin/tool architecture&lt;/td&gt;
&lt;td&gt;⭐ 4.2 / 5&lt;/td&gt;
&lt;td&gt;⭐ 4.8 / 5&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;MCP interoperability&lt;/td&gt;
&lt;td&gt;⭐ 3.9 / 5&lt;/td&gt;
&lt;td&gt;⭐ 4.9 / 5&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Flow debuggability&lt;/td&gt;
&lt;td&gt;⭐ 4.7 / 5&lt;/td&gt;
&lt;td&gt;⭐ 3.9 / 5&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Start simple, scale complex&lt;/td&gt;
&lt;td&gt;⭐ 4.8 / 5&lt;/td&gt;
&lt;td&gt;⭐ 4.4 / 5&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Python DX overall&lt;/td&gt;
&lt;td&gt;⭐ 4.6 / 5&lt;/td&gt;
&lt;td&gt;⭐ 4.5 / 5&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;The scores are intentionally close. Both are production-grade frameworks solving real problems well. The winner for your team is whichever abstraction maps better to how you think about the problem you are solving.&lt;/p&gt;




&lt;h2&gt;
  
  
  When to Choose Which
&lt;/h2&gt;

&lt;h3&gt;
  
  
  ✅ Choose LangGraph when:
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;Your agent logic involves &lt;strong&gt;non-trivial branching, retries, human review, or approval steps&lt;/strong&gt; that benefit from explicit graph topology.&lt;/li&gt;
&lt;li&gt;You need &lt;strong&gt;durable execution&lt;/strong&gt; — workflows that survive crashes, resume from checkpoints, and have auditable step history.&lt;/li&gt;
&lt;li&gt;You are already invested in the &lt;strong&gt;LangChain ecosystem&lt;/strong&gt; and want the clean &lt;code&gt;create_agent&lt;/code&gt; → LangGraph stack with a clear upgrade path.&lt;/li&gt;
&lt;li&gt;You want &lt;strong&gt;fine-grained observability&lt;/strong&gt; into how execution moved through a workflow at the node level.&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  ✅ Choose Semantic Kernel when:
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;You are building a &lt;strong&gt;platform or SDK&lt;/strong&gt; where capabilities are composed as plugins and different agents consume different tool surfaces.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;MCP or A2A interoperability&lt;/strong&gt; is a core requirement and you want it natively in the Python SDK, not via adapters.&lt;/li&gt;
&lt;li&gt;Your team already uses a &lt;strong&gt;DI/service-oriented architecture&lt;/strong&gt; and the kernel-plugin model maps naturally to it.&lt;/li&gt;
&lt;li&gt;You want &lt;strong&gt;lightweight deployment&lt;/strong&gt; without a dedicated orchestration runtime and can manage state externally.&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  The One-Line Rule — Revisited
&lt;/h2&gt;

&lt;blockquote&gt;
&lt;p&gt;If your agent needs to &lt;strong&gt;behave like a durable state machine&lt;/strong&gt;, use LangGraph.&lt;br&gt;
If your agent needs to &lt;strong&gt;behave like a protocol-aware platform component&lt;/strong&gt;, use Semantic Kernel.&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;That is the comparison most blog posts are not making. Hopefully this one was useful.&lt;/p&gt;




&lt;h2&gt;
  
  
  References
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;&lt;a href="https://docs.langchain.com/oss/python/releases/langgraph-v1" rel="noopener noreferrer"&gt;LangGraph v1 release notes&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="https://blog.langchain.com/langchain-langgraph-1dot0/" rel="noopener noreferrer"&gt;LangChain + LangGraph 1.0 announcement&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="https://docs.langchain.com/oss/python/langgraph/durable-execution" rel="noopener noreferrer"&gt;LangGraph durable execution docs&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="https://langchain-ai.github.io/langgraph/agents/agents/" rel="noopener noreferrer"&gt;LangGraph agents quickstart&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="https://learn.microsoft.com/en-us/semantic-kernel/frameworks/agent/agent-functions" rel="noopener noreferrer"&gt;Semantic Kernel agent-functions docs&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="https://learn.microsoft.com/en-us/semantic-kernel/concepts/plugins/" rel="noopener noreferrer"&gt;Semantic Kernel plugins docs&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="https://devblogs.microsoft.com/semantic-kernel/semantic-kernel-adds-model-context-protocol-mcp-support-for-python/" rel="noopener noreferrer"&gt;Semantic Kernel MCP Python support&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="https://devblogs.microsoft.com/semantic-kernel/semantic-kernel-roadmap-h1-2025-accelerating-agents-processes-and-integration/" rel="noopener noreferrer"&gt;Semantic Kernel H1 2025 roadmap&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

</description>
      <category>python</category>
      <category>ai</category>
      <category>agents</category>
      <category>langchain</category>
    </item>
    <item>
      <title>We Scaled from Azure App Service to Container Apps: Here's Why</title>
      <dc:creator>TheProdSDE</dc:creator>
      <pubDate>Wed, 04 Feb 2026 17:28:43 +0000</pubDate>
      <link>https://dev.to/theprodsde/azure-deployment-ladder-from-app-service-free-app-service-basic-azure-container-apps-aca--5pe</link>
      <guid>https://dev.to/theprodsde/azure-deployment-ladder-from-app-service-free-app-service-basic-azure-container-apps-aca--5pe</guid>
      <description>&lt;p&gt;If you’ve ever tried to deploy a full stack (Node backend, MCP server, ETL jobs, Angular SPA, and PostgreSQL) on Azure, you quickly hit a &lt;strong&gt;few hard limits&lt;/strong&gt; at each tier. This is our story of rolling out exactly that stack and how we moved step‑by‑step from &lt;strong&gt;Azure App Service Free&lt;/strong&gt;, to &lt;strong&gt;Basic&lt;/strong&gt;, then to &lt;strong&gt;Azure Container Apps (ACA)&lt;/strong&gt; while keeping Postgres on Azure DB for PostgreSQL.&lt;/p&gt;




&lt;h2&gt;
  
  
  1. Starting with App Service Free (and why it died fast)
&lt;/h2&gt;

&lt;p&gt;We started where many devs do: &lt;strong&gt;Azure App Service Free tier&lt;/strong&gt; (F1). We deployed:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Node.js backend API
&lt;/li&gt;
&lt;li&gt;Next: a WebSocket‑based MCP server
&lt;/li&gt;
&lt;li&gt;ETL job as an Azure WebJob
&lt;/li&gt;
&lt;li&gt;Angular SPA as static assets served by Node
&lt;/li&gt;
&lt;li&gt;PostgreSQL as &lt;strong&gt;Azure Database for PostgreSQL (B1ms + 32 GB storage)&lt;/strong&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;The Free tier is fine for learning&lt;/strong&gt;, but two hard limits killed it for any real‑world sharing:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;No &lt;strong&gt;SLA&lt;/strong&gt; and very limited CPU seconds.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;No support for custom‑domain SSL&lt;/strong&gt;; only &lt;code&gt;*.azurewebsites.net&lt;/code&gt; with Azure‑managed cert.
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Entra ID, modern browsers, and any third‑party integration now demand &lt;strong&gt;HTTPS with a custom domain&lt;/strong&gt;.  &lt;/p&gt;

&lt;p&gt;&lt;strong&gt;App Service Free = no custom‑domain SSL&lt;/strong&gt; unless you downgrade to plain HTTP, which is effectively unusable beyond local dev. &lt;a href="https://about-azure.com/secure-your-azure-app-service-using-free-ssl-tls-certificates/" rel="noopener noreferrer"&gt;about-azure&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;So for &lt;strong&gt;any external demo, client, or even corporate‑network access&lt;/strong&gt;, Free tier is &lt;strong&gt;non‑starter&lt;/strong&gt;.&lt;/p&gt;




&lt;h2&gt;
  
  
  2. Moving to App Service Basic (HTTPS gate, but one‑endpoint ceiling)
&lt;/h2&gt;

&lt;p&gt;We upgraded to &lt;strong&gt;App Service Linux Basic tier (B1)&lt;/strong&gt;, which costs roughly &lt;strong&gt;₹350–₹500/month&lt;/strong&gt; in India, depending on region and billing cycle. &lt;a href="https://learn.microsoft.com/en-us/azure/app-service/overview-hosting-plans" rel="noopener noreferrer"&gt;learn.microsoft&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;This tier unlocked:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Dedicated compute (1 vCore, ~1.75 GB RAM).
&lt;/li&gt;
&lt;li&gt;Custom domains + &lt;strong&gt;App Service Managed Certificates (free, auto‑renewed TLS/SSL)&lt;/strong&gt;. &lt;a href="https://codejack.com/2024/09/update-ssl-certificates-in-azure-app-service/" rel="noopener noreferrer"&gt;codejack&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;WebJobs for background tasks (our daily ETL).
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;On Basic, we could:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Serve the &lt;strong&gt;Angular SPA&lt;/strong&gt; from &lt;code&gt;/&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;Expose the &lt;strong&gt;Node backend API&lt;/strong&gt; on &lt;code&gt;/api/*&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;Run the &lt;strong&gt;MCP server&lt;/strong&gt; in the same Node process or via a virtual path. &lt;a href="https://stackoverflow.com/questions/62078305/multiple-nodejs-instance-on-azure-app-service" rel="noopener noreferrer"&gt;stackoverflow&lt;/a&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;However, one hard constraint emerged:&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;👉 &lt;strong&gt;Azure App Service exposes only one main HTTP endpoint&lt;/strong&gt; per app. Everything fights for the same worker process, CPU, and memory.&lt;/p&gt;

&lt;p&gt;This created three problems:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;No isolation&lt;/strong&gt; between backend, MCP server, and ETL.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;ETL jobs (10–15 min)&lt;/strong&gt; bloated the same instance that served the API.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Scaling&lt;/strong&gt; is limited to a few instances (max 3 in Basic), all sharing the same codebase. &lt;a href="https://learn.microsoft.com/en-us/azure/app-service/overview-hosting-plans" rel="noopener noreferrer"&gt;learn.microsoft&lt;/a&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;So we had:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;HTTPS + custom domain&lt;/strong&gt; ✅
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Cheap, simple PaaS&lt;/strong&gt; ✅
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;True microservices isolation&lt;/strong&gt; ❌
&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  3. Why “one app Service Plan for backend + MCP + ETL” failed
&lt;/h2&gt;

&lt;p&gt;Following the “one App Service Plan for all” pattern sounded clean: share the same plan to keep costs low and everything in one logical place.&lt;/p&gt;

&lt;p&gt;But in practice:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;All services share &lt;strong&gt;one HTTP endpoint&lt;/strong&gt;, so &lt;code&gt;/&lt;/code&gt;, &lt;code&gt;/api&lt;/code&gt;, and MCP‑style endpoints live in the same process.
&lt;/li&gt;
&lt;li&gt;Long‑running &lt;strong&gt;ETL jobs&lt;/strong&gt; prevent the app from being &lt;strong&gt;scaled‑to‑zero&lt;/strong&gt;; you pay for the instance even when idle.
&lt;/li&gt;
&lt;li&gt;No &lt;strong&gt;independent scaling&lt;/strong&gt; for backend vs MCP vs ETL.
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;This is where &lt;strong&gt;architecture becomes a hard constraint&lt;/strong&gt;, not just a style choice.&lt;/p&gt;




&lt;h2&gt;
  
  
  4. PostgreSQL: always separate, always on Azure DB
&lt;/h2&gt;

&lt;p&gt;Throughout this journey, we kept &lt;strong&gt;PostgreSQL as Azure Database for PostgreSQL Flexible Server (B1ms + 32 GB storage)&lt;/strong&gt; on a separate deployment from our compute. &lt;a href="https://azure.microsoft.com/en-us/pricing/details/postgresql/flexible-server/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Why? Because:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;DB and app scale independently&lt;/strong&gt; (compute vs storage, backup, replication).
&lt;/li&gt;
&lt;li&gt;Flexible Server supports &lt;strong&gt;managed backups, high availability, and autoscaling&lt;/strong&gt; without tying it to App Service pricing tiers. &lt;a href="https://azure.microsoft.com/en-us/pricing/details/postgresql/flexible-server/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Postgres cost (B1ms + 32 GB) sits roughly in &lt;strong&gt;₹1,200–₹2,000/month&lt;/strong&gt; for low‑to‑medium traffic. &lt;a href="https://azure.microsoft.com/en-us/pricing/details/postgresql/flexible-server/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;This is a &lt;strong&gt;universal pattern&lt;/strong&gt;:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;App Service / ACA / AKS should &lt;strong&gt;not&lt;/strong&gt; host the DB directly.
&lt;/li&gt;
&lt;li&gt;Use &lt;strong&gt;Azure DB for PostgreSQL&lt;/strong&gt; (or standalone VM) and wire it via connection string or &lt;strong&gt;Managed Identity&lt;/strong&gt; when possible. &lt;a href="https://docs.azure.cn/en-us/postgresql/flexible-server/how-to-cost-optimization" rel="noopener noreferrer"&gt;docs.azure&lt;/a&gt;
&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  5. ACA: the “microservices‑on‑budget” escape hatch
&lt;/h2&gt;

&lt;p&gt;Once we hit the &lt;strong&gt;one‑endpoint bottleneck&lt;/strong&gt; and wanted true isolation between backend, MCP, and ETL, we moved the compute to &lt;strong&gt;Azure Container Apps (ACA)&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;We:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Containerized&lt;/strong&gt; the app (Node + MCP + SPA + ETL logic) into a single Docker image.
&lt;/li&gt;
&lt;li&gt;Pushed it to &lt;strong&gt;Azure Container Registry (ACR)&lt;/strong&gt;. &lt;a href="https://azure.microsoft.com/en-us/pricing/details/container-registry/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;Deployed the backend / MCP containers into &lt;strong&gt;Azure Container Apps&lt;/strong&gt; and wired them to the &lt;strong&gt;same Azure DB for PostgreSQL&lt;/strong&gt;. &lt;a href="https://azure.microsoft.com/en-us/pricing/details/container-apps/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;ACA became the &lt;strong&gt;dev‑test layer&lt;/strong&gt; we wanted:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Microservices‑style isolation&lt;/strong&gt; without full Kubernetes complexity.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Auto‑scale&lt;/strong&gt; for ETL jobs (5–10 min bursts) while keeping cloud bills low by scaling down to near‑zero.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Per‑second billing&lt;/strong&gt; with &lt;strong&gt;180k vCPU‑seconds, 360k GiB‑seconds, and 2M requests per month free&lt;/strong&gt;. &lt;a href="https://azure.microsoft.com/en-us/pricing/details/container-apps/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;We ran the backend with &lt;strong&gt;0.25 vCPU + 0.5 GiB RAM&lt;/strong&gt; and scaled up instances during ETL (5–10 min) only, then let them scale back. This kept the &lt;strong&gt;compute cost very low&lt;/strong&gt; while gaining full control over scaling policies. &lt;a href="https://azureway.cloud/azure-container-apps-pricing-part-2/" rel="noopener noreferrer"&gt;azureway&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;For &lt;strong&gt;demo / internal‑use access&lt;/strong&gt;, ACA’s default endpoint (&lt;code&gt;*.azurecontainerapps.io&lt;/code&gt;) with built‑in HTTPS was sufficient; we did &lt;strong&gt;not&lt;/strong&gt; need to bring a custom domain yet.  &lt;/p&gt;




&lt;h2&gt;
  
  
  6. Why not ACI, why not just Nginx + self‑managed certs?
&lt;/h2&gt;

&lt;p&gt;During the ACA phase, we evaluated:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Azure Container Instances (ACI)&lt;/strong&gt; for the ETL / MCP workloads.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Nginx / reverse proxy&lt;/strong&gt; in front of ACA or App Service with &lt;strong&gt;self‑managed SSL certs&lt;/strong&gt;.
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;We ruled both out quickly:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;ACI&lt;/strong&gt; is great for &lt;strong&gt;short‑lived, one‑shot containers&lt;/strong&gt;, but it lacks the managed app‑style primitives (health checks, scaling, ingress, jobs) we needed for backend + MCP + ETL. &lt;a href="https://learn.microsoft.com/en-us/azure/container-apps/compare-options" rel="noopener noreferrer"&gt;learn.microsoft&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Self‑signed certs&lt;/strong&gt; with Nginx/Grok cause &lt;strong&gt;SSL handshake failures&lt;/strong&gt; in production when the proxy talks to a backend with a non‑trusted certificate. Workarounds like &lt;code&gt;proxy_ssl_verify off&lt;/code&gt; are &lt;strong&gt;security anti‑patterns&lt;/strong&gt;. &lt;a href="https://www.explaintopic.com/nginx/nginx-proxy-pass-https-to-backend-with-self-signed-cert-caus-5813.html" rel="noopener noreferrer"&gt;explaintopic&lt;/a&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Instead, we leaned into &lt;strong&gt;Azure managed TLS&lt;/strong&gt;:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;App Service: &lt;strong&gt;App Service Managed Certificates (free)&lt;/strong&gt; for custom domains. &lt;a href="https://azure.github.io/AppService/2021/05/25/App-Service-Managed-Certificate-GA.html" rel="noopener noreferrer"&gt;azure.github&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;ACA: Use &lt;strong&gt;Azure Front Door&lt;/strong&gt; or &lt;strong&gt;Azure Functions + App Gateway&lt;/strong&gt; as the TLS‑terminating layer when you &lt;strong&gt;need a custom‑domain install&lt;/strong&gt; in production. &lt;a href="https://learn.microsoft.com/en-us/azure/app-service/configure-ssl-certificate?tabs=apex%2Cportal" rel="noopener noreferrer"&gt;learn.microsoft&lt;/a&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;This keeps &lt;strong&gt;you from managing certs&lt;/strong&gt; and instead uses &lt;strong&gt;Azure‑managed TLS&lt;/strong&gt; wherever possible.&lt;/p&gt;




&lt;h2&gt;
  
  
  7. When we move to AKS (for production)
&lt;/h2&gt;

&lt;p&gt;For &lt;strong&gt;Dev / Test&lt;/strong&gt;, ACA + ACR + Azure DB for PostgreSQL is the &lt;strong&gt;sweet spot&lt;/strong&gt;:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Microservices‑style architecture
&lt;/li&gt;
&lt;li&gt;Cost‑effective per‑second billing
&lt;/li&gt;
&lt;li&gt;No need to manage Kubernetes (yet)
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;For &lt;strong&gt;production&lt;/strong&gt;, our plan is &lt;strong&gt;Azure Kubernetes Service (AKS)&lt;/strong&gt; because:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Multi‑cluster, multi‑region HA&lt;/strong&gt; with enterprise SLA.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Fine‑grained&lt;/strong&gt; scaling, pod affinities, networking, observability.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;RBAC, audit logs, and compliance&lt;/strong&gt; story for regulated workloads. &lt;a href="https://sedai.io/blog/understanding-azure-kubernetes-service-aks-pricing-costs" rel="noopener noreferrer"&gt;sedai&lt;/a&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;AKS is &lt;strong&gt;more expensive&lt;/strong&gt; (control plane + worker nodes), but that’s the &lt;strong&gt;cost of enterprise‑grade ops&lt;/strong&gt;, not just compute. &lt;a href="https://azure.microsoft.com/en-us/pricing/details/kubernetes-service/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;So our &lt;strong&gt;ladder&lt;/strong&gt; now looks like:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;App Service Free&lt;/strong&gt; – Learning, POC, but blocked by SSL.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;App Service Basic&lt;/strong&gt; – MVP that needs HTTPS + custom domain.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Azure Container Apps + ACR&lt;/strong&gt; – Microservices on budget, dev‑test ready.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;AKS + Azure DB for PostgreSQL&lt;/strong&gt; – Production‑ready stack.
&lt;/li&gt;
&lt;/ol&gt;




&lt;h2&gt;
  
  
  8. Cost snapshot (approx, India region)
&lt;/h2&gt;

&lt;p&gt;These are rough monthly estimates for &lt;strong&gt;your stack&lt;/strong&gt; (Node + MCP + ETL + SPA + PostgreSQL) on each tier:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Tier / Service&lt;/th&gt;
&lt;th&gt;Cost (approx / month)&lt;/th&gt;
&lt;th&gt;Notes&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;PostgreSQL (B1ms + 32 GB)&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;₹1,200–₹2,000&lt;/td&gt;
&lt;td&gt;Constant across tiers  &lt;a href="https://azure.microsoft.com/en-us/pricing/details/postgresql/flexible-server/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;App Service Linux (B1, Basic)&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;₹350–₹500&lt;/td&gt;
&lt;td&gt;Single plan hosting backend + MCP + ETL job.  &lt;a href="https://learn.microsoft.com/en-us/azure/app-service/overview-hosting-plans" rel="noopener noreferrer"&gt;learn.microsoft&lt;/a&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Azure Container Apps (0.25 vCPU + 0.5 GiB)&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;₹100–₹500 (or &amp;lt;)&lt;/td&gt;
&lt;td&gt;Often under cost when using free vCPU/GiB‑seconds.  &lt;a href="https://azure.microsoft.com/en-us/pricing/details/container-apps/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Azure Container Registry (Basic)&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;₹500–₹600&lt;/td&gt;
&lt;td&gt;For 10 GB storage and basic usage.  &lt;a href="https://azure.microsoft.com/en-us/pricing/details/container-registry/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Azure Kubernetes Service (2‑node cluster)&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;₹4,000–₹6,000+&lt;/td&gt;
&lt;td&gt;Node VMs dominate cost.  &lt;a href="https://azure.microsoft.com/en-us/pricing/details/kubernetes-service/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;From &lt;strong&gt;Free → App Service Basic → ACA → AKS&lt;/strong&gt;, moving up is &lt;strong&gt;less about price&lt;/strong&gt; and &lt;strong&gt;more about capabilities&lt;/strong&gt;:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Isolation
&lt;/li&gt;
&lt;li&gt;True microservices
&lt;/li&gt;
&lt;li&gt;Production‑grade HA / SLA
&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  9. Architecture pro‑/cons: Quick comparison
&lt;/h2&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Service / Tier&lt;/th&gt;
&lt;th&gt;Pros&lt;/th&gt;
&lt;th&gt;Cons&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;App Service Free&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Free, simple, no infra management.  &lt;a href="https://learn.microsoft.com/en-us/azure/app-service/overview-hosting-plans" rel="noopener noreferrer"&gt;learn.microsoft&lt;/a&gt;
&lt;/td&gt;
&lt;td&gt;No custom‑domain SSL, limited CPU, zero SLA.  &lt;a href="https://learn.microsoft.com/en-us/answers/questions/2245310/azure-app-service-free-plan-is-not-really-usable-i" rel="noopener noreferrer"&gt;learn.microsoft&lt;/a&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;App Service Basic&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;HTTPS + custom domain with free managed certs; low operational effort.  &lt;a href="https://azure.github.io/AppService/2019/11/04/Announcing-Managed-Certificates.html" rel="noopener noreferrer"&gt;azure.github&lt;/a&gt;
&lt;/td&gt;
&lt;td&gt;One HTTP endpoint per app, no true isolation, scaling limited.  &lt;a href="https://stackoverflow.com/questions/62078305/multiple-nodejs-instance-on-azure-app-service" rel="noopener noreferrer"&gt;stackoverflow&lt;/a&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Azure Container Apps&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Microservices, per‑second billing, auto‑scale to zero, ETL‑burst friendly.  &lt;a href="https://azure.microsoft.com/en-us/pricing/details/container-apps/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/td&gt;
&lt;td&gt;No built‑in custom‑domain TLS; requires Front Door / App Gateway in front for production‑style domains.  &lt;a href="https://azure.microsoft.com/en-us/pricing/details/container-apps/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Azure Kubernetes Service (AKS)&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Full Kubernetes, HA, RBAC, observability, enterprise‑grade ops.  &lt;a href="https://azure.microsoft.com/en-us/pricing/details/kubernetes-service/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/td&gt;
&lt;td&gt;Higher cost, steeper learning curve, ops overhead.  &lt;a href="https://azure.microsoft.com/en-us/pricing/details/kubernetes-service/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Azure DB for PostgreSQL&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;Managed DB, backups, HA, autoscale options.  &lt;a href="https://azure.microsoft.com/en-us/pricing/details/postgresql/flexible-server/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/td&gt;
&lt;td&gt;Separate billing; you must manage connection‑pooling patterns.&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;




&lt;h2&gt;
  
  
  10. When to move up the ladder
&lt;/h2&gt;

&lt;p&gt;Use this mental model:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Stay on Free&lt;/strong&gt; only if you’re &lt;strong&gt;learning&lt;/strong&gt; and not exposing anything to external users.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Move to App Service Basic&lt;/strong&gt; as soon as you need &lt;strong&gt;HTTPS + custom domain&lt;/strong&gt; (internal dashboards, client demos).
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Move to ACA&lt;/strong&gt; when you want &lt;strong&gt;true isolation&lt;/strong&gt; between backend, MCP, ETL, or when you want &lt;strong&gt;per‑second, scale‑to‑zero billing&lt;/strong&gt;.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Move to AKS&lt;/strong&gt; when you need &lt;strong&gt;multi‑region HA, enterprise SLA, compliance, or mature Kubernetes&lt;/strong&gt; for your stack.
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Also, if you ever catch yourself trying to &lt;strong&gt;run everything in one App Service&lt;/strong&gt; and hitting limits (one endpoint, ETL‑job‑blocking‑API, no scaling boundaries), that’s a &lt;strong&gt;clear signal&lt;/strong&gt; to containerize and move to &lt;strong&gt;ACA or AKS&lt;/strong&gt;.&lt;/p&gt;




&lt;h2&gt;
  
  
  11. Final takeaway
&lt;/h2&gt;

&lt;p&gt;The Azure deployment ladder is &lt;strong&gt;not&lt;/strong&gt; about chasing the cheapest tier forever. It’s about &lt;strong&gt;understanding what each tier unlocks&lt;/strong&gt;:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Free tier&lt;/strong&gt; → Proof of concept only.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Basic tier&lt;/strong&gt; → HTTPS + custom domain, but &lt;strong&gt;monolithic&lt;/strong&gt;; that’s the ceiling.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;ACA&lt;/strong&gt; → Microservices on a &lt;strong&gt;budget&lt;/strong&gt;, with true isolation and flexible scaling.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;AKS&lt;/strong&gt; → Enterprise‑grade ops, HA, and Kubernetes.
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;In our stack:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;PostgreSQL stays on Azure DB for PostgreSQL (B1ms + 32 GB)&lt;/strong&gt; and interacts with &lt;strong&gt;all tiers&lt;/strong&gt;.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;App Service&lt;/strong&gt; gets you from &lt;strong&gt;Free to Basic&lt;/strong&gt; quickly.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;ACA&lt;/strong&gt; becomes the &lt;strong&gt;dev‑test layer&lt;/strong&gt; for &lt;strong&gt;backend + MCP + ETL&lt;/strong&gt; without over‑committing to AKS.
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;AKS&lt;/strong&gt; is the &lt;strong&gt;production plan&lt;/strong&gt; when you’re ready for full Kubernetes maturity.
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;If you’ve been wrestling with &lt;strong&gt;“do I put ETL in WebJobs or in a separate container?”&lt;/strong&gt;, &lt;strong&gt;“how do I share the same DB with all my services?”&lt;/strong&gt;, or &lt;strong&gt;“when is AKS actually worth it?”&lt;/strong&gt;, this deployment ladder captures the &lt;strong&gt;real‑world constraints&lt;/strong&gt; you’ll hit—and the &lt;strong&gt;next step&lt;/strong&gt; to take.&lt;/p&gt;




&lt;h3&gt;
  
  
  Running PostgreSQL on AKS for non‑prod testing
&lt;/h3&gt;

&lt;p&gt;Even though we keep PostgreSQL on Azure DB for PostgreSQL in production, you can also run it &lt;strong&gt;inside AKS&lt;/strong&gt; for &lt;strong&gt;non‑production testing&lt;/strong&gt; while using smaller node sizes to keep costs low. For example, you can:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Deploy PostgreSQL as a &lt;strong&gt;StatefulSet&lt;/strong&gt; on low‑sized nodes (e.g., Standard_B2s) with modest CPU, RAM, and storage.
&lt;/li&gt;
&lt;li&gt;Use this cluster purely for &lt;strong&gt;feature, integration, and load‑testing&lt;/strong&gt; of your backend, MCP, and ETL jobs.
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;This is useful if your goal is to:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Validate that your &lt;strong&gt;application stack behaves correctly&lt;/strong&gt; under pressure.
&lt;/li&gt;
&lt;li&gt;Show demos where the &lt;strong&gt;entire platform (app + DB) lives on Kubernetes&lt;/strong&gt;.
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;However, this setup comes with caveats:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;You take on &lt;strong&gt;DB operations&lt;/strong&gt; (backups, failover, patching, monitoring) yourself.
&lt;/li&gt;
&lt;li&gt;Non‑prod does not replace &lt;strong&gt;production‑style tests on Azure DB for PostgreSQL&lt;/strong&gt;, which remains the recommended pattern for production workloads.
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;So you can use &lt;strong&gt;Postgres‑on‑AKS&lt;/strong&gt; to &lt;strong&gt;prove your app can scale&lt;/strong&gt; in a test environment, but you should keep &lt;strong&gt;managed DB&lt;/strong&gt; for actual production, where HA, SLA, and operational safety matter. &lt;/p&gt;

&lt;h3&gt;
  
  
  References
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;Azure App Service plans overview &lt;a href="https://learn.microsoft.com/en-us/azure/app-service/overview-hosting-plans" rel="noopener noreferrer"&gt;learn.microsoft&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;App Service Managed Certificates GA &lt;a href="https://azure.github.io/AppService/2019/11/04/Announcing-Managed-Certificates.html" rel="noopener noreferrer"&gt;azure.github&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;Azure Container Apps pricing (including free vCPU‑seconds / GiB‑seconds) &lt;a href="https://azure.microsoft.com/en-us/pricing/details/container-apps/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;Azure Database for PostgreSQL Flexible Server pricing &amp;amp; cost‑optimization guide &lt;a href="https://docs.azure.cn/en-us/postgresql/flexible-server/how-to-cost-optimization" rel="noopener noreferrer"&gt;docs.azure&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;Azure Container Registry pricing &lt;a href="https://azure.microsoft.com/en-us/pricing/details/container-registry/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;Azure Kubernetes Service pricing &lt;a href="https://azure.microsoft.com/en-us/pricing/details/kubernetes-service/" rel="noopener noreferrer"&gt;azure.microsoft&lt;/a&gt;
&lt;/li&gt;
&lt;/ul&gt;




</description>
      <category>azure</category>
      <category>devops</category>
      <category>architecture</category>
      <category>programming</category>
    </item>
  </channel>
</rss>
