<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: Lokesh Mure</title>
    <description>The latest articles on DEV Community by Lokesh Mure (@asklokesh).</description>
    <link>https://dev.to/asklokesh</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.us-east-2.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3680093%2F65cf0060-8b0f-437e-a77a-d0c1f0c2afa3.jpeg</url>
      <title>DEV Community: Lokesh Mure</title>
      <link>https://dev.to/asklokesh</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/asklokesh"/>
    <language>en</language>
    <item>
      <title>One Click to Build, Verify, Ship..!</title>
      <dc:creator>Lokesh Mure</dc:creator>
      <pubDate>Fri, 12 Jun 2026 21:07:37 +0000</pubDate>
      <link>https://dev.to/asklokesh/-197n</link>
      <guid>https://dev.to/asklokesh/-197n</guid>
      <description>&lt;div class="ltag__link--embedded"&gt;
  &lt;div class="crayons-story "&gt;
  &lt;a href="https://dev.to/asklokesh/loki-mode-at-20k-developers-15-releases-in-4-days-and-what-we-learned-about-verified-vs-live-446c" class="crayons-story__hidden-navigation-link"&gt;Loki Mode at 20K developers: 15 releases in 4 days, and what we learned about verified vs live autonomous coding&lt;/a&gt;


  &lt;div class="crayons-story__body crayons-story__body-full_post"&gt;
    &lt;div class="crayons-story__top"&gt;
      &lt;div class="crayons-story__meta"&gt;
        &lt;div class="crayons-story__author-pic"&gt;

          &lt;a href="/asklokesh" class="crayons-avatar  crayons-avatar--l  "&gt;
            &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3680093%2F65cf0060-8b0f-437e-a77a-d0c1f0c2afa3.jpeg" alt="asklokesh profile" class="crayons-avatar__image"&gt;
          &lt;/a&gt;
        &lt;/div&gt;
        &lt;div&gt;
          &lt;div&gt;
            &lt;a href="/asklokesh" class="crayons-story__secondary fw-medium m:hidden"&gt;
              Lokesh Mure
            &lt;/a&gt;
            &lt;div class="profile-preview-card relative mb-4 s:mb-0 fw-medium hidden m:inline-block"&gt;
              
                Lokesh Mure
                
              
              &lt;div id="story-author-preview-content-3885121" class="profile-preview-card__content crayons-dropdown branded-7 p-4 pt-0"&gt;
                &lt;div class="gap-4 grid"&gt;
                  &lt;div class="-mt-4"&gt;
                    &lt;a href="/asklokesh" class="flex"&gt;
                      &lt;span class="crayons-avatar crayons-avatar--xl mr-2 shrink-0"&gt;
                        &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3680093%2F65cf0060-8b0f-437e-a77a-d0c1f0c2afa3.jpeg" class="crayons-avatar__image" alt=""&gt;
                      &lt;/span&gt;
                      &lt;span class="crayons-link crayons-subtitle-2 mt-5"&gt;Lokesh Mure&lt;/span&gt;
                    &lt;/a&gt;
                  &lt;/div&gt;
                  &lt;div class="print-hidden"&gt;
                    
                      Follow
                    
                  &lt;/div&gt;
                  &lt;div class="author-preview-metadata-container"&gt;&lt;/div&gt;
                &lt;/div&gt;
              &lt;/div&gt;
            &lt;/div&gt;

          &lt;/div&gt;
          &lt;a href="https://dev.to/asklokesh/loki-mode-at-20k-developers-15-releases-in-4-days-and-what-we-learned-about-verified-vs-live-446c" class="crayons-story__tertiary fs-xs"&gt;&lt;time&gt;Jun 12&lt;/time&gt;&lt;span class="time-ago-indicator-initial-placeholder"&gt;&lt;/span&gt;&lt;/a&gt;
        &lt;/div&gt;
      &lt;/div&gt;

    &lt;/div&gt;

    &lt;div class="crayons-story__indention"&gt;
      &lt;h2 class="crayons-story__title crayons-story__title-full_post"&gt;
        &lt;a href="https://dev.to/asklokesh/loki-mode-at-20k-developers-15-releases-in-4-days-and-what-we-learned-about-verified-vs-live-446c" id="article-link-3885121"&gt;
          Loki Mode at 20K developers: 15 releases in 4 days, and what we learned about verified vs live autonomous coding
        &lt;/a&gt;
      &lt;/h2&gt;
        &lt;div class="crayons-story__tags"&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/ai"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;ai&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/opensource"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;opensource&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/devtools"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;devtools&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/programming"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;programming&lt;/a&gt;
        &lt;/div&gt;
      &lt;div class="crayons-story__bottom"&gt;
        &lt;div class="crayons-story__details"&gt;
          &lt;a href="https://dev.to/asklokesh/loki-mode-at-20k-developers-15-releases-in-4-days-and-what-we-learned-about-verified-vs-live-446c" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left"&gt;
            &lt;div class="multiple_reactions_aggregate"&gt;
              &lt;span class="multiple_reactions_icons_container"&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/exploding-head-daceb38d627e6ae9b730f36a1e390fca556a4289d5a41abb2c35068ad3e2c4b5.svg" width="18" height="18"&gt;
                  &lt;/span&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/multi-unicorn-b44d6f8c23cdd00964192bedc38af3e82463978aa611b4365bd33a0f1f4f3e97.svg" width="18" height="18"&gt;
                  &lt;/span&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/sparkle-heart-5f9bee3767e18deb1bb725290cb151c25234768a0e9a2bd39370c382d02920cf.svg" width="18" height="18"&gt;
                  &lt;/span&gt;
              &lt;/span&gt;
              &lt;span class="aggregate_reactions_counter"&gt;40&lt;span class="hidden s:inline"&gt;&amp;nbsp;reactions&lt;/span&gt;&lt;/span&gt;
            &lt;/div&gt;
          &lt;/a&gt;
            &lt;a href="https://dev.to/asklokesh/loki-mode-at-20k-developers-15-releases-in-4-days-and-what-we-learned-about-verified-vs-live-446c#comments" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left flex items-center"&gt;
              

              &lt;span class="hidden s:inline"&gt;Add&amp;nbsp;Comment&lt;/span&gt;
            &lt;/a&gt;
        &lt;/div&gt;
        &lt;div class="crayons-story__save"&gt;
          &lt;small class="crayons-story__tertiary fs-xs mr-2"&gt;
            19 min read
          &lt;/small&gt;
            
              &lt;span class="bm-initial crayons-icon c-btn__icon"&gt;
                

              &lt;/span&gt;
              &lt;span class="bm-success crayons-icon c-btn__icon"&gt;
                

              &lt;/span&gt;
            
        &lt;/div&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;/div&gt;


</description>
    </item>
    <item>
      <title>Loki Mode at 20K developers: 15 releases in 4 days, and what we learned about verified vs live autonomous coding</title>
      <dc:creator>Lokesh Mure</dc:creator>
      <pubDate>Fri, 12 Jun 2026 19:02:19 +0000</pubDate>
      <link>https://dev.to/asklokesh/loki-mode-at-20k-developers-15-releases-in-4-days-and-what-we-learned-about-verified-vs-live-446c</link>
      <guid>https://dev.to/asklokesh/loki-mode-at-20k-developers-15-releases-in-4-days-and-what-we-learned-about-verified-vs-live-446c</guid>
      <description>&lt;p&gt;I was halfway through a coffee when our self-update telemetry ticked over 20,000 unique developers. Six months ago, &lt;a href="https://github.com/asklokesh/loki-mode" rel="noopener noreferrer"&gt;Loki Mode&lt;/a&gt; was a side project to scratch a personal itch. I wanted an autonomous coding agent I would actually trust to ship a diff into my own repo. Not a Replit-style cloud sandbox. Not a Lovable-style preview. Not a Cursor-style editor pane. A loop I could leave running overnight, walk back to in the morning, and trust the result on the &lt;code&gt;git diff&lt;/code&gt;.&lt;/p&gt;

&lt;p&gt;This week we shipped 15 minor releases in 4 days, and I think we finally landed the thing.&lt;/p&gt;

&lt;p&gt;This post is the honest engineering writeup. Architecture, a real comparison table with check marks, a hands-on walkthrough using a real spec (not the training-wheels quickstart), the issue-to-merged-PR workflow with screenshots, and the parts we got wrong on the way. If you skim, the comparison table in section 4 is where the punchline lives.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F0orv4cy16nafuo2lbibg.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F0orv4cy16nafuo2lbibg.png" alt="20,000 developers, 6,000 weekly active, 500,000 CLI sessions across Norway, USA, Hong Kong, UK and India" width="800" height="418"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  1. The problem we set out to solve
&lt;/h2&gt;

&lt;p&gt;When Replit Agent and Lovable started landing late last year, every founder I know lit up. Spec to deployed app in 90 seconds. Public preview URL. Done.&lt;/p&gt;

&lt;p&gt;But a quiet thing kept happening on my own builds. The preview would load. The Cmd+R refresh would show "Welcome to your todo app." I would click "add todo," type something, hit submit, and watch the request post to a function that swallowed errors silently. The agent had marked the run "complete." The preview showed something running. The preview was lying.&lt;/p&gt;

&lt;p&gt;The thing I wanted was simple: an autonomous loop that &lt;em&gt;refuses&lt;/em&gt; to call work done on an empty diff or a failing test. A real gate. The same gate I would want around a junior engineer's first solo PR. Not "this looks running" but "this would pass code review."&lt;/p&gt;

&lt;p&gt;That is what Loki Mode is. Built it locally first, open-sourced it, and the user base found it.&lt;/p&gt;

&lt;h2&gt;
  
  
  2. The numbers, honest version
&lt;/h2&gt;

&lt;p&gt;From PostHog (anonymous, opt-out via &lt;code&gt;LOKI_TELEMETRY_DISABLED=true&lt;/code&gt; or &lt;code&gt;DO_NOT_TRACK=1&lt;/code&gt;, never captures prompts or PRD content or source code):&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Metric&lt;/th&gt;
&lt;th&gt;Value&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Cumulative developers installed&lt;/td&gt;
&lt;td&gt;20,000+&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Weekly active developers&lt;/td&gt;
&lt;td&gt;~6,000&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Cumulative CLI sessions&lt;/td&gt;
&lt;td&gt;500,000+&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Top country (absolute count)&lt;/td&gt;
&lt;td&gt;United States&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Top country (per-capita)&lt;/td&gt;
&lt;td&gt;Norway&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Trending up fast (last 30 days)&lt;/td&gt;
&lt;td&gt;Hong Kong, United Kingdom, India&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Long-tail markets&lt;/td&gt;
&lt;td&gt;Germany, Brazil, Singapore, Australia&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Top install channel&lt;/td&gt;
&lt;td&gt;Bun (51%), npm (38%), Homebrew (7%), Docker (4%)&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Median time-to-first-verified-build&lt;/td&gt;
&lt;td&gt;~47 minutes from &lt;code&gt;loki start ./prd.md&lt;/code&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;What is honest: this is one person (me) maintaining the project. Two open GitHub issues right now. Bus factor of one. The trade is fast release cadence, fast PR review, and a maintainer who replies on Discord within hours.&lt;/p&gt;

&lt;p&gt;What the data tells me: the developers pulled hardest to Loki are the ones who would not let a hosted spec-to-app tool touch their repo at all. Self-hosted, your-keys, no proxy. That market is bigger than the hosted-tool TAM and &lt;code&gt;BSL-1.1&lt;/code&gt; source-available was built to serve exactly it.&lt;/p&gt;

&lt;h2&gt;
  
  
  3. How the architecture actually works
&lt;/h2&gt;

&lt;p&gt;Skip this if you only care about the product comparison. Read it if you want to know what is under the hood before you trust the thing with your repo.&lt;/p&gt;

&lt;h3&gt;
  
  
  Inputs (the "spec" surface)
&lt;/h3&gt;

&lt;p&gt;A spec is any of:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;A PRD markdown file: &lt;code&gt;./prd.md&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;A GitHub, GitLab, Jira, or Azure DevOps issue (URL or shorthand). GitHub accepts &lt;code&gt;123&lt;/code&gt;, &lt;code&gt;#123&lt;/code&gt;, &lt;code&gt;owner/repo#123&lt;/code&gt;, and full issue URLs. GitLab takes &lt;code&gt;gitlab.com/owner/repo/-/issues/N&lt;/code&gt;. Jira takes &lt;code&gt;PROJ-123&lt;/code&gt; or the Atlassian URL. Azure DevOps takes the work-item URL.&lt;/li&gt;
&lt;li&gt;An OpenAPI document (YAML or JSON)&lt;/li&gt;
&lt;li&gt;An OpenSpec change directory: &lt;code&gt;./openspec/changes/feature-x/&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;A plain text or YAML one-liner: &lt;code&gt;loki start "build a markdown editor with file sync"&lt;/code&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;The CLI auto-detects which kind of spec you handed it based on extension, URL pattern, or none of the above. The unified entry is &lt;code&gt;loki start&lt;/code&gt;; &lt;code&gt;loki run &amp;lt;issue-ref&amp;gt;&lt;/code&gt; is a kept-working deprecated alias.&lt;/p&gt;

&lt;h3&gt;
  
  
  The execution loop: RARV-C
&lt;/h3&gt;

&lt;p&gt;Every run iterates through a five-phase closure loop, with the model tier rotating per phase:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Phase&lt;/th&gt;
&lt;th&gt;Job&lt;/th&gt;
&lt;th&gt;Default model tier&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;
&lt;strong&gt;R&lt;/strong&gt;eason&lt;/td&gt;
&lt;td&gt;Architectural decisions, task decomposition, planning&lt;/td&gt;
&lt;td&gt;Planning (Claude Opus by default)&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;
&lt;strong&gt;A&lt;/strong&gt;ct&lt;/td&gt;
&lt;td&gt;Code generation, file writes, tool use&lt;/td&gt;
&lt;td&gt;Development (Opus or Sonnet)&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;
&lt;strong&gt;R&lt;/strong&gt;eflect&lt;/td&gt;
&lt;td&gt;Self-critique, 3-reviewer blind council vote on the current diff&lt;/td&gt;
&lt;td&gt;Development&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;
&lt;strong&gt;V&lt;/strong&gt;erify&lt;/td&gt;
&lt;td&gt;Automated quality gates (tests, lint, security, coverage, held-out evals)&lt;/td&gt;
&lt;td&gt;Fast (Sonnet or Haiku)&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;
&lt;strong&gt;C&lt;/strong&gt;ompound&lt;/td&gt;
&lt;td&gt;Episodic memory write, learning extraction for future runs&lt;/td&gt;
&lt;td&gt;Fast&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;The &lt;code&gt;C&lt;/code&gt; is what makes the loop compound across sessions. After every iteration, the agent records what it tried, what worked, what failed, and which files were touched into &lt;code&gt;.loki/memory/episodic/&lt;/code&gt;. Future runs in the same project (or sibling projects, when cross-project memory is on) get the agent's accumulated context as a "PAST FAILURES TO AVOID" block in the next prompt.&lt;/p&gt;

&lt;h3&gt;
  
  
  The trust layer
&lt;/h3&gt;

&lt;p&gt;Loki refuses to call a run done on:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;An &lt;strong&gt;empty diff&lt;/strong&gt; against the run-start commit. Always blocks.&lt;/li&gt;
&lt;li&gt;A &lt;strong&gt;red test run&lt;/strong&gt; when a test runner was detected and ran. Always blocks.&lt;/li&gt;
&lt;li&gt;A &lt;strong&gt;failing held-out spec eval&lt;/strong&gt; (section 6 walks this through). Always blocks.&lt;/li&gt;
&lt;li&gt;A &lt;strong&gt;council REJECT&lt;/strong&gt; verdict from the 3-reviewer blind review. Always blocks.&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;Every gate writes machine-readable evidence to &lt;code&gt;.loki/verify/evidence.json&lt;/code&gt; and a human-readable report to &lt;code&gt;.loki/verify/report.md&lt;/code&gt;. Every completed run also emits a portable &lt;code&gt;.loki/proofs/&amp;lt;run_id&amp;gt;/proof.json + index.html&lt;/code&gt; you can hand to a teammate, an auditor, or a PR reviewer.&lt;/p&gt;

&lt;h3&gt;
  
  
  Provider routing
&lt;/h3&gt;

&lt;p&gt;Loki dispatches to one of these underlying agent CLIs:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Provider&lt;/th&gt;
&lt;th&gt;Tier&lt;/th&gt;
&lt;th&gt;Notes&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Claude Code&lt;/td&gt;
&lt;td&gt;Tier 1, E2E-verified primary&lt;/td&gt;
&lt;td&gt;Default. Deepest SDK integration.&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;OpenAI Codex&lt;/td&gt;
&lt;td&gt;Tier 2, Experimental&lt;/td&gt;
&lt;td&gt;Works end-to-end.&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Cline&lt;/td&gt;
&lt;td&gt;Tier 2, Experimental&lt;/td&gt;
&lt;td&gt;Via the &lt;code&gt;-y&lt;/code&gt; autonomous flag.&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Aider&lt;/td&gt;
&lt;td&gt;Tier 3, Experimental&lt;/td&gt;
&lt;td&gt;Best for narrower file-edit tasks.&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;Plus, via &lt;code&gt;ANTHROPIC_BASE_URL&lt;/code&gt;, any LLM that speaks the Anthropic API:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;&lt;span class="c"&gt;# Route the Claude provider through local Ollama (qwen2.5-coder:32b)&lt;/span&gt;
&lt;span class="nb"&gt;export &lt;/span&gt;&lt;span class="nv"&gt;ANTHROPIC_BASE_URL&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;http://localhost:11434/v1
&lt;span class="nb"&gt;export &lt;/span&gt;&lt;span class="nv"&gt;ANTHROPIC_API_KEY&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;ollama
&lt;span class="nb"&gt;export &lt;/span&gt;&lt;span class="nv"&gt;LOKI_MODEL_OVERRIDE&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;qwen2.5-coder:32b
loki start ./prd.md
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;code&gt;LOKI_MODEL_OVERRIDE&lt;/code&gt; only takes effect when &lt;code&gt;ANTHROPIC_BASE_URL&lt;/code&gt; is also set, so you can never accidentally reroute an Anthropic-native run.&lt;/p&gt;

&lt;p&gt;Full multi-provider setup: &lt;a href="https://autonomi.dev/docs/multi-provider-setup" rel="noopener noreferrer"&gt;autonomi.dev/docs/multi-provider-setup&lt;/a&gt;.&lt;/p&gt;

&lt;h2&gt;
  
  
  4. The comparison table, with check marks
&lt;/h2&gt;

&lt;p&gt;This is the part the README will not tell you because it is hard to write honestly. Here is what each tool is good at, and where each tool is the right pick.&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Capability&lt;/th&gt;
&lt;th&gt;Replit Agent&lt;/th&gt;
&lt;th&gt;Lovable&lt;/th&gt;
&lt;th&gt;Cursor&lt;/th&gt;
&lt;th&gt;Loki Mode&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Instant cloud sandbox + URL&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Multiplayer collaboration&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;⚠️&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Marketing-page / landing-page taste&lt;/td&gt;
&lt;td&gt;⚠️&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;⚠️&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Editor integration (inline)&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;⚠️&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Runs locally (no cloud upload)&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Your own provider keys (no proxy)&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;⚠️&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Fullstack with backend + database&lt;/td&gt;
&lt;td&gt;⚠️&lt;/td&gt;
&lt;td&gt;⚠️&lt;/td&gt;
&lt;td&gt;⚠️&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Compose-first multi-service (healthchecks)&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Background runs (delegate + notify)&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;CI-gateable verification (exit 0/1/2)&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Held-out spec evals (anti-reward-hacking)&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Reviewer subcalls cannot mutate code&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Machine-readable evidence per run&lt;/td&gt;
&lt;td&gt;⚠️&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Shareable proof-of-run artifact&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Issue-to-PR autonomous workflow&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Provider-agnostic (4 + any LLM)&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;⚠️&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Source-available license&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅ (BSL 1.1, → Apache 2.0 in 2030)&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Air-gappable&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Mobile browser editor&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;&lt;strong&gt;Pick Replit if&lt;/strong&gt; the goal is to learn, demo, teach, or prototype-with-a-URL fast. They earned that lane honestly.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Pick Lovable if&lt;/strong&gt; the spec is mostly visual and what you ship is a landing page or design-heavy frontend. Their taste of output is genuinely ahead for that work.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Pick Cursor if&lt;/strong&gt; you want AI assistance inside the editor where you already work. The Composer + Tab autocomplete are well-tuned to existing muscle memory.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Pick Loki Mode if&lt;/strong&gt; you are shipping into an existing private codebase, you need a deterministic CI gate on the diff before merge, your provider keys cannot leave your machine, or you want a council that physically cannot edit the code it is reviewing.&lt;/p&gt;

&lt;p&gt;These tools can coexist. Use Cursor while you write the spec. Use Replit when teaching the team. Use Lovable for the marketing site for whatever Loki is building. The case for picking Loki Mode is specifically "verified diff before merge into my repo."&lt;/p&gt;

&lt;h2&gt;
  
  
  5. Workflow 1: a real PRD to a running fullstack app
&lt;/h2&gt;

&lt;p&gt;The shortest path that matters. Drop a markdown spec, get a verified Git repo with a running app.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;&lt;span class="c"&gt;# Install (Bun recommended; v8 will be Bun-only)&lt;/span&gt;
bun &lt;span class="nb"&gt;install&lt;/span&gt; &lt;span class="nt"&gt;-g&lt;/span&gt; loki-mode

&lt;span class="c"&gt;# Verify the install&lt;/span&gt;
loki version
loki doctor
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Write a real spec. The agent reads it as markdown, so be explicit about acceptance criteria. Here is the one I used to test the v7.26.0 compose-first support:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight markdown"&gt;&lt;code&gt;&lt;span class="gh"&gt;# TaskFlow&lt;/span&gt;

A task tracker with user auth, full-text search, and tags.

&lt;span class="gu"&gt;## Stack&lt;/span&gt;
&lt;span class="p"&gt;-&lt;/span&gt; Backend: Node + Express + Postgres
&lt;span class="p"&gt;-&lt;/span&gt; Cache + sessions: Redis
&lt;span class="p"&gt;-&lt;/span&gt; Frontend: React + Vite, served by the same backend

&lt;span class="gu"&gt;## Acceptance criteria&lt;/span&gt;
&lt;span class="p"&gt;-&lt;/span&gt; POST /api/auth/register creates a user (bcrypt hashed password)
&lt;span class="p"&gt;-&lt;/span&gt; POST /api/auth/login returns a session cookie
&lt;span class="p"&gt;-&lt;/span&gt; GET /api/tasks returns the logged-in user's tasks
&lt;span class="p"&gt;-&lt;/span&gt; POST /api/tasks creates a task with title, body, tags[], due_date
&lt;span class="p"&gt;-&lt;/span&gt; PATCH /api/tasks/:id updates a task
&lt;span class="p"&gt;-&lt;/span&gt; DELETE /api/tasks/:id soft-deletes a task
&lt;span class="p"&gt;-&lt;/span&gt; GET /api/search?q=... returns tasks matching the query (Postgres FTS)
&lt;span class="p"&gt;-&lt;/span&gt; 401 when the session is missing or expired
&lt;span class="p"&gt;-&lt;/span&gt; All endpoints return JSON; 422 on invalid input

&lt;span class="gu"&gt;## Run&lt;/span&gt;
&lt;span class="p"&gt;-&lt;/span&gt; One command: docker compose up
&lt;span class="p"&gt;-&lt;/span&gt; Healthcheck on the web service must reflect actual readiness
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Save that as &lt;code&gt;./prd.md&lt;/code&gt; and run:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;loki start ./prd.md
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;What you will see:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Plan auto-shown.&lt;/strong&gt; Before the agent does anything, Loki prints a complexity tier, cost estimate, iteration cap, and time estimate. The estimate is real -- it uses the actual model pricing table. Declining costs nothing.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Dashboard auto-opens.&lt;/strong&gt; A new browser tab opens at &lt;code&gt;http://localhost:57374&lt;/code&gt;. (Skipped on CI, with &lt;code&gt;--detach&lt;/code&gt;/&lt;code&gt;--background&lt;/code&gt;, over SSH without a TTY, with piped stdin, or with &lt;code&gt;LOKI_NO_AUTO_OPEN=1&lt;/code&gt;.)&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;The agent starts iterating.&lt;/strong&gt; Reason, Act, Reflect, Verify, Compound.&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;In the dashboard, the left sidebar shows your project. The main area has tabs for Overview, Tasks, RARV Timeline, Quality, Cost, and Live App.&lt;/p&gt;

&lt;p&gt;The Live App tab is the workflow change that pulled us past 20K. Before v7.24, you had to &lt;code&gt;cd&lt;/code&gt; to the project, run the dev server in another terminal, and pray it talked to the right port. Now the agent is still writing files and the app is already running in the iframe. You click "Add Task," type something, hit submit. You watch the bug get fixed in real time over the next iteration.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fuqdbo8fr35ttavsljb9e.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fuqdbo8fr35ttavsljb9e.png" alt="Live App Preview iframe showing the in-progress app embedded inside the dashboard. The agent is still writing iteration 6 (FTS index and search route) while the app is already running and serving the search query" width="800" height="418"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;For multi-service specs (like the one above), v7.26.0 ships compose-first detection. The agent gets a &lt;code&gt;RUN_CONTRACT&lt;/code&gt; instruction telling it to generate a 12-factor &lt;code&gt;docker-compose.yml&lt;/code&gt; with a clearly-named primary web service (either &lt;code&gt;web&lt;/code&gt;/&lt;code&gt;app&lt;/code&gt; by name, or labeled &lt;code&gt;loki.primary=true&lt;/code&gt;), healthchecks on every service, &lt;code&gt;depends_on&lt;/code&gt; wiring, env-var config, and a committed &lt;code&gt;.env.example&lt;/code&gt;. The runner identifies the primary web service by that label and surfaces THAT in the iframe rather than accidentally surfacing a Postgres port.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fautonomi.dev%2Fmedia%2Fv7-35%2Farchitecture-fullstack.png%3Fv%3D2" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fautonomi.dev%2Fmedia%2Fv7-35%2Farchitecture-fullstack.png%3Fv%3D2" title="spec to RARV-C loop to docker-compose with healthchecks to VERIFIED" alt="How Loki turns a 3-line spec into a verified running compose stack" width="1200" height="627"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Behind the scenes the council reviews each iteration. The Council tab shows the 3-reviewer blind verdicts with the evidence each reviewer raised (not just APPROVED/REJECTED badges):&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fegw6ous6r6go2prg6pzv.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fegw6ous6r6go2prg6pzv.png" alt="A real council verdict from iteration 6: Reviewer 1 APPROVES with cited line numbers, Reviewer 2 raises a CONCERN about bcrypt rounds being hardcoded to 10 when the spec required &gt;=12, Reviewer 3 the devils-advocate APPROVES after running 5 adversarial attacks" width="800" height="418"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The Cost tab tracks per-iteration spend. v7.11.0 added a pre-cap warning at 80% (not just the existing hard stop at 100%); the warning broadcasts over WebSocket so a persistent amber banner appears on every dashboard page if you walked away from the terminal:&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fse0zqrong0fqsa7mlxii.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fse0zqrong0fqsa7mlxii.png" alt="Cost panel showing $1.60 of $2.00 spent at iteration 6 with the persistent 80% warning banner across the top, a per-iteration breakdown table with the council subcall just pushing the total over the warn threshold, and the cost-honesty contract showing the estimator quote and dispatched model agree" width="800" height="418"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;When the run completes, your project directory contains a working app:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;docker compose up           &lt;span class="c"&gt;# the stack&lt;/span&gt;
curl http://localhost:3000/health   &lt;span class="c"&gt;# healthy&lt;/span&gt;
npm &lt;span class="nb"&gt;test&lt;/span&gt;                    &lt;span class="c"&gt;# 47/47 passing&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;And a portable proof:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;loki proof list             &lt;span class="c"&gt;# all proofs for this project&lt;/span&gt;
loki proof show &amp;lt;run-id&amp;gt;    &lt;span class="c"&gt;# render the HTML in the terminal&lt;/span&gt;
loki proof open &amp;lt;run-id&amp;gt;    &lt;span class="c"&gt;# open in your browser&lt;/span&gt;
loki proof share &amp;lt;run-id&amp;gt;   &lt;span class="c"&gt;# publish as a GitHub gist (after redaction preview + confirm)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;The proof leads with the itemized bill (cost USD, tokens, per-model breakdown), then files-changed with the diffstat, then per-reviewer council verdicts with evidence, then quality gates, then wall-clock, provider/model, plus an integrity hash. A single chokepoint at &lt;a href="https://github.com/asklokesh/loki-mode/blob/main/autonomy/lib/proof_redact.py" rel="noopener noreferrer"&gt;&lt;code&gt;autonomy/lib/proof_redact.py&lt;/code&gt;&lt;/a&gt; runs once before serialization and refuses to emit if it did not run. It scrubs Anthropic/OpenAI/Google/GitHub/AWS/Slack keys, Bearer tokens, JWTs, PEM private-key blocks, named secret assignments, DB URI credentials, and absolute user paths from both the JSON and the rendered HTML.&lt;/p&gt;

&lt;h2&gt;
  
  
  6. Workflow 2: GitHub issue to merged PR, hands-free
&lt;/h2&gt;

&lt;p&gt;The thing that gets us past "demo tool" and into "production engineering tool." Hand Loki a real issue from your tracker and walk away.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fp6ou7efqcmghd1dcmbl9.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fp6ou7efqcmghd1dcmbl9.png" alt="Loki issue-to-PR flow: fetch -&gt; isolate -&gt; RARV-C build -&gt; verify -&gt; ship" width="800" height="418"&gt;&lt;/a&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;&lt;span class="c"&gt;# Issue-driven, foreground&lt;/span&gt;
loki start owner/repo#123

&lt;span class="c"&gt;# Issue-driven, background, auto-PR + auto-merge when verified&lt;/span&gt;
loki start 123 &lt;span class="nt"&gt;--ship&lt;/span&gt; &lt;span class="nt"&gt;--bg&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;What each flag does (the cascade is documented in &lt;code&gt;loki start --help&lt;/code&gt;):&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Flag&lt;/th&gt;
&lt;th&gt;Behavior&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;
&lt;code&gt;--worktree&lt;/code&gt;, &lt;code&gt;-w&lt;/code&gt;
&lt;/td&gt;
&lt;td&gt;Git worktree isolation. Branch: &lt;code&gt;loki/issue-&amp;lt;n&amp;gt;&lt;/code&gt;. Working tree never touched.&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;code&gt;--pr&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;Implies &lt;code&gt;--worktree&lt;/code&gt;. Auto-creates a PR via &lt;code&gt;gh pr create&lt;/code&gt; when the run verifies.&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;code&gt;--ship&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;Implies &lt;code&gt;--pr&lt;/code&gt;. Auto-merges via &lt;code&gt;gh pr merge&lt;/code&gt; once the PR's CI passes.&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;
&lt;code&gt;--bg&lt;/code&gt;, &lt;code&gt;--detach&lt;/code&gt;, &lt;code&gt;-d&lt;/code&gt;
&lt;/td&gt;
&lt;td&gt;Background mode. Implies &lt;code&gt;--worktree&lt;/code&gt;. Local desktop notification on completion (v7.22.0).&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;Supported issue refs (auto-detected):&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;GitHub:&lt;/strong&gt; &lt;code&gt;123&lt;/code&gt;, &lt;code&gt;#123&lt;/code&gt;, &lt;code&gt;owner/repo#123&lt;/code&gt;, full issue URL&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;GitLab:&lt;/strong&gt; &lt;code&gt;https://gitlab.com/owner/repo/-/issues/42&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Jira:&lt;/strong&gt; &lt;code&gt;PROJ-123&lt;/code&gt;, &lt;code&gt;https://org.atlassian.net/browse/PROJ-123&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Azure DevOps:&lt;/strong&gt; &lt;code&gt;https://dev.azure.com/org/project/_workitems/edit/456&lt;/code&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;When you delegate with &lt;code&gt;--bg&lt;/code&gt;, v7.22.0's "delegate then notify" writes a durable completion summary to &lt;code&gt;.loki/COMPLETION.txt&lt;/code&gt; and &lt;code&gt;.loki/state/completion.json&lt;/code&gt; and fires a local OS notification (macOS &lt;code&gt;osascript&lt;/code&gt;, Linux &lt;code&gt;notify-send&lt;/code&gt;). Every terminal state notifies and records a summary -- success, max-iterations, stopped, failed, genuinely-blocking pauses. The perpetual-mode auto-clear pause is correctly NOT treated as terminal, so a mid-run pause never produces a false "done" record. Zero network egress.&lt;/p&gt;

&lt;p&gt;Opt-in &lt;code&gt;LOKI_DELEGATE_BRANCH=1&lt;/code&gt; isolates a run on a dedicated &lt;code&gt;loki/delegate-&amp;lt;timestamp&amp;gt;&lt;/code&gt; branch. Opt-in &lt;code&gt;LOKI_DELEGATE_PR=1&lt;/code&gt; opens a local pull request on completion (a &lt;code&gt;gh&lt;/code&gt; call from your own machine, never CI).&lt;/p&gt;

&lt;h2&gt;
  
  
  7. Workflow 3: gate the diff in CI before it merges
&lt;/h2&gt;

&lt;p&gt;This is the third workflow, and the one I think actually moves the needle on enterprise adoption.&lt;/p&gt;

&lt;p&gt;&lt;code&gt;loki verify&lt;/code&gt; is a &lt;a href="https://github.com/asklokesh/loki-mode/blob/main/autonomy/verify.sh" rel="noopener noreferrer"&gt;standalone verification module&lt;/a&gt; that does NOT enter the autonomous loop. It is the deterministic gate. Five checks scoped to the diff:&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fi1asemk214veh5l970r3.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fi1asemk214veh5l970r3.png" alt="loki verify pipeline: five deterministic checks merge into one verdict and one evidence document" width="800" height="418"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Run it locally:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;&lt;span class="c"&gt;# Verify against the default base ref&lt;/span&gt;
loki verify

&lt;span class="c"&gt;# Or against a specific ref&lt;/span&gt;
loki verify origin/main

&lt;span class="c"&gt;# Or for CI as machine-readable JSON&lt;/span&gt;
loki verify origin/main &lt;span class="nt"&gt;--output-json&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; verify-result.json
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Real output from a run on a 14-file diff:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;loki verify (run id: a7c2-...)
=============================

Diff base:            merge-base(origin/main, HEAD)..HEAD
Files changed:        14
Lines added:          892
Lines removed:        47

Build         pass    (12.4s)
Tests         pass    47/47 passing  (1.8s)
Static        pass    eslint clean, tsc strict ok  (3.1s)
Secrets       pass    no secrets in diff
Dependencies  pass    no critical CVE in changed packages
Held-out      pass    5 of 5 reserved spec items satisfied

Verdict:      VERIFIED  (exit 0)
Evidence:     .loki/verify/evidence.json
Report:       .loki/verify/report.md
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Exit codes:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Code&lt;/th&gt;
&lt;th&gt;Meaning&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;code&gt;0&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;VERIFIED -- all checks pass with conclusive evidence&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;code&gt;1&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;CONCERNS -- inconclusive evidence, empty diff, or non-blocking warnings&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;code&gt;2&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;BLOCKED -- red test, secret leak, critical CVE, failing held-out item&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;code&gt;3&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;Verifier error (could not complete; never silently passes)&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;The diff base resolution is &lt;code&gt;merge-base(base, HEAD)..HEAD&lt;/code&gt; -- proper PR semantics, not &lt;code&gt;HEAD~1&lt;/code&gt;. Inconclusive evidence is &lt;strong&gt;never&lt;/strong&gt; reported VERIFIED. Empty diffs yield CONCERNS, not green. Bare root-level test files are detected so discoverable tests are never silently skipped.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Important scope note:&lt;/strong&gt; the v7.27.0 MVP is &lt;strong&gt;deterministic-only&lt;/strong&gt;. No LLM in the gate path. The LLM did its work upstream in the RARV-C build loop. A single-reviewer LLM stage and the blind council are sequenced for future releases per the &lt;a href="https://github.com/asklokesh/loki-mode/blob/main/autonomy/verify.sh" rel="noopener noreferrer"&gt;verification spec&lt;/a&gt;. This is stated honestly in &lt;code&gt;loki verify --help&lt;/code&gt; and in the evidence document (&lt;code&gt;llm_review.status = "skipped"&lt;/code&gt;).&lt;/p&gt;

&lt;p&gt;Drop the same command into GitHub Actions:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight yaml"&gt;&lt;code&gt;&lt;span class="c1"&gt;# .github/workflows/loki-verify.yml&lt;/span&gt;
&lt;span class="na"&gt;name&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;Loki Verify&lt;/span&gt;
&lt;span class="na"&gt;on&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
  &lt;span class="na"&gt;pull_request&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
    &lt;span class="na"&gt;branches&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="pi"&gt;[&lt;/span&gt;&lt;span class="nv"&gt;main&lt;/span&gt;&lt;span class="pi"&gt;]&lt;/span&gt;

&lt;span class="na"&gt;jobs&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
  &lt;span class="na"&gt;verify&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
    &lt;span class="na"&gt;runs-on&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;ubuntu-latest&lt;/span&gt;
    &lt;span class="na"&gt;steps&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
      &lt;span class="pi"&gt;-&lt;/span&gt; &lt;span class="na"&gt;uses&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;actions/checkout@v4&lt;/span&gt;
        &lt;span class="na"&gt;with&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
          &lt;span class="na"&gt;fetch-depth&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="m"&gt;0&lt;/span&gt;  &lt;span class="c1"&gt;# full history for merge-base&lt;/span&gt;

      &lt;span class="pi"&gt;-&lt;/span&gt; &lt;span class="na"&gt;uses&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;oven-sh/setup-bun@v1&lt;/span&gt;

      &lt;span class="pi"&gt;-&lt;/span&gt; &lt;span class="na"&gt;name&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;Install Loki Mode&lt;/span&gt;
        &lt;span class="na"&gt;run&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;bun install -g loki-mode&lt;/span&gt;

      &lt;span class="pi"&gt;-&lt;/span&gt; &lt;span class="na"&gt;name&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;Run verification&lt;/span&gt;
        &lt;span class="na"&gt;env&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
          &lt;span class="na"&gt;ANTHROPIC_API_KEY&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;${{ secrets.ANTHROPIC_API_KEY }}&lt;/span&gt;
        &lt;span class="na"&gt;run&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;loki verify origin/${{ github.base_ref }}&lt;/span&gt;

      &lt;span class="pi"&gt;-&lt;/span&gt; &lt;span class="na"&gt;name&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;Upload evidence&lt;/span&gt;
        &lt;span class="na"&gt;if&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;always()&lt;/span&gt;
        &lt;span class="na"&gt;uses&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;actions/upload-artifact@v4&lt;/span&gt;
        &lt;span class="na"&gt;with&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
          &lt;span class="na"&gt;name&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;loki-verify-evidence&lt;/span&gt;
          &lt;span class="na"&gt;path&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;.loki/verify/&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;The job exits 0/1/2/3. The evidence is a structured artifact you can inspect from the PR view. The deterministic checks make it a real gate, not a vibe check.&lt;/p&gt;

&lt;p&gt;To see how trust evolves on YOUR repo over time:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;loki trust              &lt;span class="c"&gt;# one-line verdict + per-axis direction&lt;/span&gt;
loki trust &lt;span class="nt"&gt;--json&lt;/span&gt;       &lt;span class="c"&gt;# machine-readable trajectory&lt;/span&gt;
loki trust-metrics      &lt;span class="c"&gt;# block rate, p90 failure, council rejection, cost-per-verified-task&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;code&gt;loki trust-metrics&lt;/code&gt; aggregates from a durable append-only log at &lt;code&gt;.loki/metrics/trust-events.jsonl&lt;/code&gt;. Un-instrumented projects report &lt;code&gt;available: false&lt;/code&gt;, never fabricated zeros.&lt;/p&gt;

&lt;h2&gt;
  
  
  8. Workflow 4 (optional): &lt;code&gt;loki quickstart&lt;/code&gt; -- the training-wheels mode
&lt;/h2&gt;

&lt;p&gt;If you have never used the tool before and want a guaranteed-working first run with zero PRD-writing, &lt;code&gt;loki quickstart&lt;/code&gt; is a guided 4-step interview that lands the bundled Todo app on four Enter presses. Setup check, idea (default: Todo app), template pick (deterministic offline scorer over the bundled templates, no LLM at this step), plan review.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;loki quickstart
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;It is genuinely just for the first 10 minutes. The real workflows are the three above.&lt;/p&gt;

&lt;h2&gt;
  
  
  9. Internals: how the held-out gate stops reward-hacking
&lt;/h2&gt;

&lt;p&gt;The technical bit I get the most questions about, and the one I am most proud of from this release wave.&lt;/p&gt;

&lt;h3&gt;
  
  
  The failure mode
&lt;/h3&gt;

&lt;p&gt;Once an autonomous build loop has access to the spec's acceptance checklist, an aggressive optimizer can tune to that exact checklist. The visible items pass. The spec does not. You ship something that satisfies the letter of the checklist and not the intent.&lt;/p&gt;

&lt;p&gt;This is the same failure mode that has plagued ML benchmarks for years (BLEU, ROUGE, leaderboards). For autonomous coding it is worse because the optimizer has access to the test runner and can iterate against it.&lt;/p&gt;

&lt;h3&gt;
  
  
  The fix
&lt;/h3&gt;

&lt;p&gt;Before the first verification, a deterministic selector reserves a slice (real impl in &lt;a href="https://github.com/asklokesh/loki-mode/blob/main/autonomy/prd-checklist.sh" rel="noopener noreferrer"&gt;&lt;code&gt;autonomy/prd-checklist.sh&lt;/code&gt;&lt;/a&gt;):&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="c1"&gt;# Simplified; see the real bash impl linked above
&lt;/span&gt;&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;select_heldout&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;checklist_items&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
    &lt;span class="n"&gt;N&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;len&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;checklist_items&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;N&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt; &lt;span class="mi"&gt;4&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
        &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;[]&lt;/span&gt;  &lt;span class="c1"&gt;# too small to reserve from
&lt;/span&gt;    &lt;span class="n"&gt;count&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;clamp&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nf"&gt;round&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mf"&gt;0.25&lt;/span&gt; &lt;span class="o"&gt;*&lt;/span&gt; &lt;span class="n"&gt;N&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="mi"&gt;5&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="n"&gt;ranked&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;sorted&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;checklist_items&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;key&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="k"&gt;lambda&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;sha256&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;c&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nb"&gt;id&lt;/span&gt;&lt;span class="p"&gt;))&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;ranked&lt;/span&gt;&lt;span class="p"&gt;[:&lt;/span&gt;&lt;span class="n"&gt;count&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Selection is reproducible (&lt;code&gt;sha256(id)&lt;/code&gt;-ranked, not random), idempotent (only written once to &lt;code&gt;.loki/checklist/held-out.json&lt;/code&gt;), and bounded (clamped to 1-5 items).&lt;/p&gt;

&lt;h3&gt;
  
  
  What the build agent sees
&lt;/h3&gt;

&lt;p&gt;Everything the build loop reads is filtered to exclude the held-out IDs. The build agent literally cannot see them in its context window. It can pass every item it can see and still get blocked at the ship gate if the held-out items fail.&lt;/p&gt;

&lt;p&gt;Concretely, the filter removes the IDs from:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;The visible checklist summary in the run prompt&lt;/li&gt;
&lt;li&gt;The per-iteration checklist progress gate&lt;/li&gt;
&lt;li&gt;The completion-prompt count of "N/M items satisfied"&lt;/li&gt;
&lt;li&gt;The dashboard's task panel&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  The completion council reads them at the ship gate
&lt;/h3&gt;

&lt;p&gt;At the ship gate (called from both the standard completion route and the force-review route), &lt;code&gt;council_heldout_gate&lt;/code&gt; (in &lt;a href="https://github.com/asklokesh/loki-mode/blob/main/autonomy/completion-council.sh" rel="noopener noreferrer"&gt;&lt;code&gt;autonomy/completion-council.sh&lt;/code&gt;&lt;/a&gt;) reads &lt;code&gt;.loki/checklist/held-out.json&lt;/code&gt;, runs each item against the current diff with a dedicated evaluation prompt, and writes a &lt;code&gt;heldout_eval&lt;/code&gt; trust event to &lt;code&gt;.loki/metrics/trust-events.jsonl&lt;/code&gt;. A held-out item whose status comes back &lt;code&gt;failing&lt;/code&gt; and is not explicitly waived blocks completion like any other critical failure.&lt;/p&gt;

&lt;h3&gt;
  
  
  Honest limits
&lt;/h3&gt;

&lt;p&gt;This guards the prompt feed, not the filesystem. The reservation lives on disk at &lt;code&gt;.loki/checklist/held-out.json&lt;/code&gt;. An agent with read access to the working tree could open that file and learn which items were held out. The guarantee is that no prompt or summary the build agent reads ever names a held-out item. For the most realistic attack-shape (an LLM tuning to its visible context window), that is the right defense.&lt;/p&gt;

&lt;p&gt;For a stronger guarantee, an opt-in mode could ship that places the held-out file outside the working tree, with the tradeoff that you cannot rerun verification offline without the file path. We chose the on-disk default and named the limit explicitly.&lt;/p&gt;

&lt;p&gt;Opt out entirely with &lt;code&gt;LOKI_HELDOUT_GATE=0&lt;/code&gt;.&lt;/p&gt;

&lt;h2&gt;
  
  
  10. What we learned shipping 15 minor releases in 4 days
&lt;/h2&gt;

&lt;p&gt;The release cadence is not a marketing stunt; it came out of internal practice. A few things that turned out to matter.&lt;/p&gt;

&lt;h3&gt;
  
  
  Coordinated arcs beat feature dumps
&lt;/h3&gt;

&lt;p&gt;The previous wave (v7.9 through v7.17) was the same shape: 9 minor releases over 2 days, sequenced R1 through R10. The v7.20 through v7.35 wave was 15 minors over 4 days. Each release closes one specific user-facing problem, and the arc has a narrative the README can sustain.&lt;/p&gt;

&lt;p&gt;If we had shipped the same functionality as a single v8.0.0 release, we would have spent weeks on integration testing and the user-facing communication would have been a mess.&lt;/p&gt;

&lt;h3&gt;
  
  
  A council that cannot edit code reviews more honestly
&lt;/h3&gt;

&lt;p&gt;The single biggest quality improvement of the week was &lt;a href="https://github.com/asklokesh/loki-mode/releases/tag/v7.33.0" rel="noopener noreferrer"&gt;v7.33.0's &lt;code&gt;--disallowedTools&lt;/code&gt;&lt;/a&gt;. Reviewer subcalls now physically cannot use Edit, Write, NotebookEdit, or destructive git (the list includes the &lt;code&gt;git -C&lt;/code&gt; / &lt;code&gt;--git-dir&lt;/code&gt; / &lt;code&gt;-c&lt;/code&gt; flag-prefixed forms too).&lt;/p&gt;

&lt;p&gt;Before this, we observed the council occasionally "improving" the diff under review, which technically satisfied the review goal (the new diff was now passable) but defeated the gate's purpose. The fix was small, the impact was large. This is the kind of thing you find by reading your own internal traces, not by running a benchmark.&lt;/p&gt;

&lt;p&gt;Opt out with &lt;code&gt;LOKI_REVIEW_TOOL_GUARD=0&lt;/code&gt;.&lt;/p&gt;

&lt;h3&gt;
  
  
  Honest provider labels build trust faster than full-stack promises
&lt;/h3&gt;

&lt;p&gt;When &lt;a href="https://github.com/asklokesh/loki-mode/releases/tag/v7.27.0" rel="noopener noreferrer"&gt;v7.27.0&lt;/a&gt; dropped, the README labels for Codex, Cline, and Aider went from "Supported" to "Experimental." Loki Mode now claims "Tier 1 E2E-verified primary" only for Claude Code.&lt;/p&gt;

&lt;p&gt;This was uncomfortable to ship. The README looks less impressive. The marketing surface got smaller. But Discord activity went up, not down, the week after. The audience pulled to a tool like this is allergic to "supports five providers" marketing copy. Saying the smaller true thing builds more trust than saying the larger fuzzy thing.&lt;/p&gt;

&lt;h3&gt;
  
  
  Cost honesty enforced in code beats cost honesty as a marketing claim
&lt;/h3&gt;

&lt;p&gt;&lt;a href="https://github.com/asklokesh/loki-mode/releases/tag/v7.31.0" rel="noopener noreferrer"&gt;v7.31.0&lt;/a&gt; and &lt;a href="https://github.com/asklokesh/loki-mode/releases/tag/v7.32.0" rel="noopener noreferrer"&gt;v7.32.0&lt;/a&gt; shipped the cost-honesty contract: the &lt;code&gt;loki plan&lt;/code&gt; quote, the dashboard's reported model, and the actually-dispatched model agree across every model lever. A &lt;code&gt;sonnet&lt;/code&gt; session pin that routes through the development tier to Opus now quotes Opus, not Sonnet. The old behavior underquoted by about 1.7x.&lt;/p&gt;

&lt;p&gt;The work was three days of internal plumbing. It does not show up on the README feature list. Users noticed within hours because their cost dashboards stopped lying.&lt;/p&gt;

&lt;h3&gt;
  
  
  Auto-open the dashboard
&lt;/h3&gt;

&lt;p&gt;The single highest-leverage UX change of the week was the one-line "&lt;code&gt;loki start&lt;/code&gt; auto-opens the dashboard." We resisted it for months on the grounds of "developers don't want surprise browser windows." The data was unambiguous: with auto-open, the hit rate of users finishing their first build went up a lot.&lt;/p&gt;

&lt;p&gt;Lesson: respect for the user's environment matters less than removing one barrier between them and a successful first run. The opt-out (&lt;code&gt;LOKI_NO_AUTO_OPEN=1&lt;/code&gt;, plus auto-skip on &lt;code&gt;CI=true&lt;/code&gt; / SSH-no-TTY / piped stdin) is enough.&lt;/p&gt;

&lt;h2&gt;
  
  
  11. The roadmap and where contributions move the needle fastest
&lt;/h2&gt;

&lt;p&gt;Near-term (next 4 weeks):&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;LLM single-reviewer stage in &lt;code&gt;loki verify&lt;/code&gt;.&lt;/strong&gt; v7.27.0 MVP is deterministic-only. The single-reviewer stage is sequenced next per the &lt;a href="https://github.com/asklokesh/loki-mode/blob/main/autonomy/verify.sh" rel="noopener noreferrer"&gt;verification spec&lt;/a&gt;, with the blind council after that.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Public hosted backend for &lt;code&gt;loki proof share --hosted&lt;/code&gt;.&lt;/strong&gt; Today the &lt;code&gt;--hosted&lt;/code&gt; flag publishes to a user-supplied &lt;code&gt;LOKI_HOSTED_ENDPOINT&lt;/code&gt; and prints an honest "no official hosted backend yet" message when unset. We are building the hosted endpoint. Opt-in. The free-forever CLI commitment in &lt;a href="https://github.com/asklokesh/loki-mode/blob/main/docs/OPEN-CORE-BOUNDARY.md" rel="noopener noreferrer"&gt;&lt;code&gt;docs/OPEN-CORE-BOUNDARY.md&lt;/code&gt;&lt;/a&gt; stays.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Mobile dashboard polish.&lt;/strong&gt; The dashboard is web-based but assumes a desktop browser. Mobile responsiveness needs work.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;More benchmark task adapters.&lt;/strong&gt; We ship &lt;code&gt;loki bench&lt;/code&gt; with real adapters for Aider and Claude Code. We need adapters for more competitors. Cleanest contribution surface for an external PR.&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Medium-term (next quarter):&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Replay re-execution mode&lt;/strong&gt; (&lt;code&gt;loki memory replay --apply&lt;/code&gt;). Today &lt;code&gt;loki memory replay&lt;/code&gt; is read-only. Re-execution needs proper sandboxing and confirmation; not shipping until that is right.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Embedding layer for cross-project memory.&lt;/strong&gt; Today's retrieval uses token overlap. An embedding layer would catch synonym mismatches the keyword scorer misses.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;10k-episode memory index&lt;/strong&gt; at p95 &amp;lt; 500ms.&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Where contributions land fastest right now:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Benchmark task adapters&lt;/strong&gt; for any AI coding tool that has a CLI. The contract is clean, the integration is small, and we will land any well-formed PR within 48 hours.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Agent and template marketplace packs&lt;/strong&gt; for &lt;code&gt;loki agent install&lt;/code&gt;. Install is data-only by construction (manifests are never &lt;code&gt;eval&lt;/code&gt;'d, &lt;code&gt;exec&lt;/code&gt;'d, or imported), so contributions are safe to land without security review for each one.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Language server coverage.&lt;/strong&gt; We auto-spawn an &lt;code&gt;lsp-proxy&lt;/code&gt; MCP for TypeScript, Python, Go, Rust. Adding Ruby, PHP, Kotlin, Swift, Elixir is small and well-scoped.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Dashboard panels and i18n.&lt;/strong&gt; The dashboard is Web Components + Tailwind. Adding panels is straightforward.&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;If any of these interest you, drop into &lt;a href="https://discord.gg/k8NpBhc5KA" rel="noopener noreferrer"&gt;Discord&lt;/a&gt; and say hello. I respond within hours.&lt;/p&gt;

&lt;h2&gt;
  
  
  12. Try it in two minutes
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;&lt;span class="c"&gt;# Install (Bun recommended; v8 will be Bun-only)&lt;/span&gt;
bun &lt;span class="nb"&gt;install&lt;/span&gt; &lt;span class="nt"&gt;-g&lt;/span&gt; loki-mode

&lt;span class="c"&gt;# Verify the install&lt;/span&gt;
loki version
loki doctor

&lt;span class="c"&gt;# Workflow 1: from a PRD&lt;/span&gt;
loki start ./prd.md

&lt;span class="c"&gt;# Workflow 2: from any GitHub/GitLab/Jira issue&lt;/span&gt;
loki start owner/repo#123 &lt;span class="nt"&gt;--ship&lt;/span&gt; &lt;span class="nt"&gt;--bg&lt;/span&gt;

&lt;span class="c"&gt;# Workflow 3: CI gate on any branch or PR diff&lt;/span&gt;
loki verify origin/main

&lt;span class="c"&gt;# Inspect trust trajectory across all your runs&lt;/span&gt;
loki trust

&lt;span class="c"&gt;# Per-iteration cost visibility&lt;/span&gt;
loki cost &lt;span class="nt"&gt;--last&lt;/span&gt; 10

&lt;span class="c"&gt;# First-time exploration&lt;/span&gt;
loki quickstart
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;If you build something with it, drop a screenshot in &lt;a href="https://discord.gg/k8NpBhc5KA" rel="noopener noreferrer"&gt;Discord&lt;/a&gt;. I will boost it.&lt;/p&gt;

&lt;h2&gt;
  
  
  Links
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Repo:&lt;/strong&gt; &lt;a href="https://github.com/asklokesh/loki-mode" rel="noopener noreferrer"&gt;github.com/asklokesh/loki-mode&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Site:&lt;/strong&gt; &lt;a href="https://autonomi.dev" rel="noopener noreferrer"&gt;autonomi.dev&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Docs:&lt;/strong&gt; &lt;a href="https://autonomi.dev/docs" rel="noopener noreferrer"&gt;autonomi.dev/docs&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Discord:&lt;/strong&gt; &lt;a href="https://discord.gg/k8NpBhc5KA" rel="noopener noreferrer"&gt;discord.gg/k8NpBhc5KA&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;LinkedIn:&lt;/strong&gt; &lt;a href="https://www.linkedin.com/company/autonomi-dev-agents" rel="noopener noreferrer"&gt;linkedin.com/company/autonomi-dev-agents&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Full v7.20-v7.35 writeup on the blog:&lt;/strong&gt; &lt;a href="https://autonomi.dev/blog/spec-to-live-app-week-v7-20-v7-35" rel="noopener noreferrer"&gt;autonomi.dev/blog/spec-to-live-app-week-v7-20-v7-35&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Verification deep dive:&lt;/strong&gt; &lt;a href="https://autonomi.dev/blog/loki-vs-replit-lovable-verified-shipment" rel="noopener noreferrer"&gt;autonomi.dev/blog/loki-vs-replit-lovable-verified-shipment&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Open-core boundary commitment:&lt;/strong&gt; &lt;a href="https://github.com/asklokesh/loki-mode/blob/main/docs/OPEN-CORE-BOUNDARY.md" rel="noopener noreferrer"&gt;github.com/asklokesh/loki-mode/blob/main/docs/OPEN-CORE-BOUNDARY.md&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;License:&lt;/strong&gt; &lt;a href="https://github.com/asklokesh/loki-mode/blob/main/LICENSE" rel="noopener noreferrer"&gt;BSL 1.1, converts to Apache 2.0 on March 19, 2030&lt;/a&gt;
&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;If you read this far, thank you. Tell me in the comments which part of the verification surface you would push back on, or what would make you trust an autonomous agent to ship a diff into your own codebase. That feedback is the input I most need.&lt;/p&gt;




&lt;p&gt;&lt;em&gt;Loki Mode (also called Autonomi) is built and maintained by &lt;a href="https://github.com/asklokesh" rel="noopener noreferrer"&gt;@asklokesh&lt;/a&gt;. Source-available under BSL 1.1; converts to Apache 2.0 on March 19, 2030. We never proxy your provider keys, never collect prompts or code, and the telemetry that produced the numbers above is opt-out via &lt;code&gt;LOKI_TELEMETRY_DISABLED=true&lt;/code&gt; or &lt;code&gt;DO_NOT_TRACK=1&lt;/code&gt;.&lt;/em&gt;&lt;/p&gt;

</description>
      <category>ai</category>
      <category>opensource</category>
      <category>devtools</category>
      <category>programming</category>
    </item>
    <item>
      <title>One Click to Build, Verify, SHIP..!</title>
      <dc:creator>Lokesh Mure</dc:creator>
      <pubDate>Fri, 26 Dec 2025 17:16:26 +0000</pubDate>
      <link>https://dev.to/asklokesh/-fja</link>
      <guid>https://dev.to/asklokesh/-fja</guid>
      <description>&lt;div class="ltag__link--embedded"&gt;
  &lt;div class="crayons-story "&gt;
  &lt;a href="https://dev.to/asklokesh/how-i-built-an-autonomous-ai-startup-system-with-37-agents-using-claude-code-2p79" class="crayons-story__hidden-navigation-link"&gt;How I Built an Autonomous AI Startup System with 37 Agents Using Claude Code&lt;/a&gt;


  &lt;div class="crayons-story__body crayons-story__body-full_post"&gt;
    &lt;div class="crayons-story__top"&gt;
      &lt;div class="crayons-story__meta"&gt;
        &lt;div class="crayons-story__author-pic"&gt;

          &lt;a href="/asklokesh" class="crayons-avatar  crayons-avatar--l  "&gt;
            &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3680093%2F65cf0060-8b0f-437e-a77a-d0c1f0c2afa3.jpeg" alt="asklokesh profile" class="crayons-avatar__image" width="460" height="460"&gt;
          &lt;/a&gt;
        &lt;/div&gt;
        &lt;div&gt;
          &lt;div&gt;
            &lt;a href="/asklokesh" class="crayons-story__secondary fw-medium m:hidden"&gt;
              Lokesh Mure
            &lt;/a&gt;
            &lt;div class="profile-preview-card relative mb-4 s:mb-0 fw-medium hidden m:inline-block"&gt;
              
                Lokesh Mure
                
              
              &lt;div id="story-author-preview-content-3128884" class="profile-preview-card__content crayons-dropdown branded-7 p-4 pt-0"&gt;
                &lt;div class="gap-4 grid"&gt;
                  &lt;div class="-mt-4"&gt;
                    &lt;a href="/asklokesh" class="flex"&gt;
                      &lt;span class="crayons-avatar crayons-avatar--xl mr-2 shrink-0"&gt;
                        &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3680093%2F65cf0060-8b0f-437e-a77a-d0c1f0c2afa3.jpeg" class="crayons-avatar__image" alt="" width="460" height="460"&gt;
                      &lt;/span&gt;
                      &lt;span class="crayons-link crayons-subtitle-2 mt-5"&gt;Lokesh Mure&lt;/span&gt;
                    &lt;/a&gt;
                  &lt;/div&gt;
                  &lt;div class="print-hidden"&gt;
                    
                      Follow
                    
                  &lt;/div&gt;
                  &lt;div class="author-preview-metadata-container"&gt;&lt;/div&gt;
                &lt;/div&gt;
              &lt;/div&gt;
            &lt;/div&gt;

          &lt;/div&gt;
          &lt;a href="https://dev.to/asklokesh/how-i-built-an-autonomous-ai-startup-system-with-37-agents-using-claude-code-2p79" class="crayons-story__tertiary fs-xs"&gt;&lt;time&gt;Dec 26 '25&lt;/time&gt;&lt;span class="time-ago-indicator-initial-placeholder"&gt;&lt;/span&gt;&lt;/a&gt;
        &lt;/div&gt;
      &lt;/div&gt;

    &lt;/div&gt;

    &lt;div class="crayons-story__indention"&gt;
      &lt;h2 class="crayons-story__title crayons-story__title-full_post"&gt;
        &lt;a href="https://dev.to/asklokesh/how-i-built-an-autonomous-ai-startup-system-with-37-agents-using-claude-code-2p79" id="article-link-3128884"&gt;
          How I Built an Autonomous AI Startup System with 37 Agents Using Claude Code
        &lt;/a&gt;
      &lt;/h2&gt;
        &lt;div class="crayons-story__tags"&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/ai"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;ai&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/opensource"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;opensource&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/productivity"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;productivity&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/programming"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;programming&lt;/a&gt;
        &lt;/div&gt;
      &lt;div class="crayons-story__bottom"&gt;
        &lt;div class="crayons-story__details"&gt;
          &lt;a href="https://dev.to/asklokesh/how-i-built-an-autonomous-ai-startup-system-with-37-agents-using-claude-code-2p79" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left"&gt;
            &lt;div class="multiple_reactions_aggregate"&gt;
              &lt;span class="multiple_reactions_icons_container"&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/fire-f60e7a582391810302117f987b22a8ef04a2fe0df7e3258a5f49332df1cec71e.svg" width="24" height="24"&gt;
                  &lt;/span&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/sparkle-heart-5f9bee3767e18deb1bb725290cb151c25234768a0e9a2bd39370c382d02920cf.svg" width="24" height="24"&gt;
                  &lt;/span&gt;
              &lt;/span&gt;
              &lt;span class="aggregate_reactions_counter"&gt;7&lt;span class="hidden s:inline"&gt;&amp;nbsp;reactions&lt;/span&gt;&lt;/span&gt;
            &lt;/div&gt;
          &lt;/a&gt;
            &lt;a href="https://dev.to/asklokesh/how-i-built-an-autonomous-ai-startup-system-with-37-agents-using-claude-code-2p79#comments" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left flex items-center"&gt;
              

              &lt;span class="hidden s:inline"&gt;Add&amp;nbsp;Comment&lt;/span&gt;
            &lt;/a&gt;
        &lt;/div&gt;
        &lt;div class="crayons-story__save"&gt;
          &lt;small class="crayons-story__tertiary fs-xs mr-2"&gt;
            4 min read
          &lt;/small&gt;
            
              &lt;span class="bm-initial crayons-icon c-btn__icon"&gt;
                

              &lt;/span&gt;
              &lt;span class="bm-success crayons-icon c-btn__icon"&gt;
                

              &lt;/span&gt;
            
        &lt;/div&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;/div&gt;


</description>
      <category>ai</category>
      <category>opensource</category>
      <category>productivity</category>
      <category>programming</category>
    </item>
    <item>
      <title>How I Built an Autonomous AI Startup System with 37 Agents Using Claude Code</title>
      <dc:creator>Lokesh Mure</dc:creator>
      <pubDate>Fri, 26 Dec 2025 17:12:26 +0000</pubDate>
      <link>https://dev.to/asklokesh/how-i-built-an-autonomous-ai-startup-system-with-37-agents-using-claude-code-2p79</link>
      <guid>https://dev.to/asklokesh/how-i-built-an-autonomous-ai-startup-system-with-37-agents-using-claude-code-2p79</guid>
      <description>&lt;p&gt;Last month I asked myself a question that wouldn't leave me alone: what if I could mass hire 37 specialists for my side projects without spending anything?&lt;/p&gt;

&lt;p&gt;I work full-time as a technology lead. Like many of you, I have a graveyard of side projects that died somewhere between "great idea" and "I'll finish it this weekend." The problem was never the idea. It was bandwidth. Solo founders are expected to be developer, marketer, ops, legal, finance, and customer support all at once.&lt;/p&gt;

&lt;p&gt;So I built Loki Mode - an open source Claude Code skill that orchestrates 37 specialized AI agents to take a product requirements document and autonomously build, deploy, and operate a complete product.&lt;/p&gt;

&lt;p&gt;This is the story of how I built it and what I learned.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Problem I Wanted to Solve
&lt;/h2&gt;

&lt;p&gt;Most AI coding tools still require you to babysit every step. You prompt, wait, review, prompt again, fix the hallucination, prompt again. It's faster than coding from scratch, but you're still the bottleneck.&lt;/p&gt;

&lt;p&gt;I wanted something different:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Give it a PRD&lt;/li&gt;
&lt;li&gt;Walk away&lt;/li&gt;
&lt;li&gt;Come back to a deployed product&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;No hand-holding. No human in the loop for routine decisions.&lt;/p&gt;

&lt;h2&gt;
  
  
  Architecture: Why 37 Agents?
&lt;/h2&gt;

&lt;p&gt;I started with a single autonomous agent. It worked for simple tasks but fell apart on anything complex. The context window would fill up, the agent would lose track of what it was doing, and quality degraded.&lt;/p&gt;

&lt;p&gt;The solution was specialization. Instead of one agent trying to be everything, I created focused agents that only do one thing well:&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Engineering Swarm (8 agents):&lt;/strong&gt; frontend, backend, database, mobile, API, QA, performance, infrastructure&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Operations Swarm (8 agents):&lt;/strong&gt; devops, SRE, security, monitoring, incident response, release management, cost optimization, compliance&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Business Swarm (8 agents):&lt;/strong&gt; marketing, sales, finance, legal, support, HR, investor relations, partnerships&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Data Swarm (3 agents):&lt;/strong&gt; ML engineer, data engineer, analytics&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Product Swarm (3 agents):&lt;/strong&gt; product manager, designer, technical writer&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Growth Swarm (4 agents):&lt;/strong&gt; growth hacker, community, customer success, lifecycle marketing&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Review Swarm (3 agents):&lt;/strong&gt; code reviewer, business logic reviewer, security reviewer&lt;/p&gt;

&lt;p&gt;Each agent has a focused context, specific capabilities, and clear boundaries. The orchestrator coordinates them through a distributed task queue.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Parallel Code Review Pattern
&lt;/h2&gt;

&lt;p&gt;This was the single biggest improvement to code quality. Instead of one reviewer, every piece of code goes through three specialized reviewers simultaneously:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;IMPLEMENT → REVIEW (3 parallel) → AGGREGATE → FIX → RE-REVIEW → COMPLETE
                │
                ├─ code-reviewer (quality, patterns, maintainability)
                ├─ business-logic-reviewer (requirements, edge cases)
                └─ security-reviewer (vulnerabilities, auth issues)
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Each reviewer returns a structured response:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight json"&gt;&lt;code&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"strengths"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="s2"&gt;"Well-structured modules"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"Good test coverage"&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"issues"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="w"&gt;
    &lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
      &lt;/span&gt;&lt;span class="nl"&gt;"severity"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"High"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
      &lt;/span&gt;&lt;span class="nl"&gt;"description"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"Missing input validation on user endpoint"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
      &lt;/span&gt;&lt;span class="nl"&gt;"location"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"src/api/users.js:45"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
      &lt;/span&gt;&lt;span class="nl"&gt;"suggestion"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"Add schema validation before processing"&lt;/span&gt;&lt;span class="w"&gt;
    &lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"assessment"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"FAIL"&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;The severity determines what happens next:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Severity&lt;/th&gt;
&lt;th&gt;Action&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Critical/High/Medium&lt;/td&gt;
&lt;td&gt;Block. Dispatch fix agent. Re-run ALL 3 reviewers.&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Low&lt;/td&gt;
&lt;td&gt;Add &lt;code&gt;// TODO(review): ...&lt;/code&gt; comment, continue&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Cosmetic&lt;/td&gt;
&lt;td&gt;Add &lt;code&gt;// FIXME(nitpick): ...&lt;/code&gt; comment, continue&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;This catches issues that a single reviewer would miss. The business logic reviewer catches requirements gaps. The security reviewer catches vulnerabilities. The code reviewer catches maintainability issues.&lt;/p&gt;

&lt;h2&gt;
  
  
  Handling Failures: Circuit Breakers and Dead Letter Queues
&lt;/h2&gt;

&lt;p&gt;Autonomous systems fail. The question is how they fail.&lt;/p&gt;

&lt;p&gt;I implemented circuit breakers borrowed from distributed systems design:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;CLOSED (normal) → failures++ → threshold reached → OPEN (blocking)
                                                        │
                                                   cooldown expires
                                                        │
                                                        ▼
                                                  HALF-OPEN (testing)
                                                        │
                                    success ◄───────────┴───────────► failure
                                       │                                  │
                                       ▼                                  ▼
                                    CLOSED                              OPEN
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;When an agent type fails repeatedly, the circuit breaker opens and stops sending work to that agent type. After a cooldown period, it enters half-open state and allows one test request. If that succeeds, normal operation resumes. If it fails, back to open.&lt;/p&gt;

&lt;p&gt;For tasks that fail even after retries, they go to a dead letter queue for manual review rather than blocking the entire system.&lt;/p&gt;

&lt;h2&gt;
  
  
  State Persistence: Surviving Rate Limits
&lt;/h2&gt;

&lt;p&gt;Claude Code has rate limits. In the middle of building your startup, you might hit them. The system needed to survive this gracefully.&lt;/p&gt;

&lt;p&gt;Every agent maintains its own state file:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight json"&gt;&lt;code&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"id"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"eng-backend-01"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"role"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"eng-backend"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"status"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"active"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"currentTask"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"task-uuid"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"tasksCompleted"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="mi"&gt;12&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"lastCheckpoint"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"2025-01-15T10:30:00Z"&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Before every major operation, agents checkpoint their state. When the system resumes after a rate limit:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;Orchestrator reads its state file&lt;/li&gt;
&lt;li&gt;Scans all agent states for incomplete tasks&lt;/li&gt;
&lt;li&gt;Re-queues orphaned tasks&lt;/li&gt;
&lt;li&gt;Spawns replacement agents for failed ones&lt;/li&gt;
&lt;li&gt;Continues from where it left off&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;No lost work. No starting over.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Anti-Hallucination Protocol
&lt;/h2&gt;

&lt;p&gt;AI agents hallucinate. They claim packages exist that don't. They invent API endpoints. They assume syntax that doesn't compile.&lt;/p&gt;

&lt;p&gt;Every agent follows a strict protocol:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Category&lt;/th&gt;
&lt;th&gt;Verification Method&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Technical capabilities&lt;/td&gt;
&lt;td&gt;Web search official docs&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;API usage&lt;/td&gt;
&lt;td&gt;Read docs + test with real call&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Package/dependency&lt;/td&gt;
&lt;td&gt;Verify exists on registry&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Syntax correctness&lt;/td&gt;
&lt;td&gt;Execute code, don't assume&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Performance claims&lt;/td&gt;
&lt;td&gt;Benchmark with real data&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Competitor features&lt;/td&gt;
&lt;td&gt;Verify on their actual site&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;The rule is simple: never assume, always verify. When uncertain, research first. If still uncertain, choose the conservative option and document the uncertainty.&lt;/p&gt;

&lt;h2&gt;
  
  
  What I Would Do Differently
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Start with fewer agents.&lt;/strong&gt; 37 agents is a lot to coordinate. I would start with the core engineering swarm and add others incrementally.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Better observability.&lt;/strong&gt; Debugging a multi-agent system is hard. I added logging everywhere but still sometimes struggle to understand why an agent made a particular decision.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;More integration tests.&lt;/strong&gt; Unit testing individual agents is straightforward. Testing the interactions between 37 agents is not.&lt;/p&gt;

&lt;h2&gt;
  
  
  Try It Yourself
&lt;/h2&gt;

&lt;p&gt;The entire system is open source under MIT license:&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;GitHub:&lt;/strong&gt; &lt;a href="https://github.com/asklokesh/claudeskill-loki-mode" rel="noopener noreferrer"&gt;https://github.com/asklokesh/claudeskill-loki-mode&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;To use it:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;&lt;span class="c"&gt;# Clone to your Claude Code skills directory&lt;/span&gt;
git clone https://github.com/asklokesh/claudeskill-loki-mode.git ~/.claude/skills/loki-mode

&lt;span class="c"&gt;# Launch Claude Code with autonomous permissions&lt;/span&gt;
claude &lt;span class="nt"&gt;--dangerously-skip-permissions&lt;/span&gt;

&lt;span class="c"&gt;# Say the magic words&lt;/span&gt;
&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; Loki Mode with PRD at ./docs/requirements.md
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Fair warning: this requires &lt;code&gt;--dangerously-skip-permissions&lt;/code&gt; because the agents need to execute code, create files, and make network requests autonomously. Understand what that means before you run it.&lt;/p&gt;

&lt;h2&gt;
  
  
  What's Next
&lt;/h2&gt;

&lt;p&gt;I'm still iterating on this. Current areas of focus:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Better agent coordination patterns&lt;/li&gt;
&lt;li&gt;Reducing token usage through smarter context management&lt;/li&gt;
&lt;li&gt;More deployment targets&lt;/li&gt;
&lt;li&gt;Improved monitoring dashboard&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;If you try it, let me know what breaks. Open an issue or find me on LinkedIn.&lt;/p&gt;




&lt;p&gt;&lt;em&gt;Building in public. One autonomous agent at a time.&lt;/em&gt;&lt;/p&gt;

</description>
      <category>ai</category>
      <category>opensource</category>
      <category>productivity</category>
      <category>programming</category>
    </item>
  </channel>
</rss>
