<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: Muhammad Zulqarnain</title>
    <description>The latest articles on DEV Community by Muhammad Zulqarnain (@mzunain).</description>
    <link>https://dev.to/mzunain</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F2895711%2Fb8f9886a-a886-49c9-a751-c182b236abef.jpeg</url>
      <title>DEV Community: Muhammad Zulqarnain</title>
      <link>https://dev.to/mzunain</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/mzunain"/>
    <language>en</language>
    <item>
      <title>How I Structure Every Full Stack Project in 2025</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Wed, 20 May 2026 06:00:00 +0000</pubDate>
      <link>https://dev.to/mzunain/how-i-structure-every-full-stack-project-in-2025-11i5</link>
      <guid>https://dev.to/mzunain/how-i-structure-every-full-stack-project-in-2025-11i5</guid>
      <description>&lt;p&gt;I build a lot of full-stack projects. The first 50 lines of code set the pattern for the next 50,000.&lt;/p&gt;

&lt;p&gt;Here's the structure I use for every project. Opinionated. Works.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Monorepo Structure
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;project/
├── packages/
│   ├── web/                    # Next.js frontend
│   │   └── src/
│   │       ├── app/
│   │       ├── components/
│   │       ├── hooks/
│   │       └── lib/
│   ├── api/                    # Node.js backend
│   │   └── src/
│   │       ├── routes/
│   │       ├── controllers/
│   │       ├── services/
│   │       ├── middleware/
│   │       └── models/
│   └── shared/                 # Shared TypeScript types
│       └── src/
│           ├── types.ts
│           ├── constants.ts
│           └── validators.ts
├── docker-compose.yml
├── turbo.json
└── package.json
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Root package.json
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight json"&gt;&lt;code&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"workspaces"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="s2"&gt;"packages/*"&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"scripts"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
    &lt;/span&gt;&lt;span class="nl"&gt;"dev"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"turbo run dev --parallel"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
    &lt;/span&gt;&lt;span class="nl"&gt;"build"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"turbo run build"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
    &lt;/span&gt;&lt;span class="nl"&gt;"test"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"turbo run test"&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Environment Management
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="c1"&gt;// packages/shared/src/env.ts&lt;/span&gt;
&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;requiredEnvVars&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;DATABASE_URL&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;JWT_SECRET&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;REDIS_URL&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;];&lt;/span&gt;

&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;env&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="na"&gt;databaseUrl&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;env&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;DATABASE_URL&lt;/span&gt;&lt;span class="o"&gt;!&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="na"&gt;jwtSecret&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;env&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;JWT_SECRET&lt;/span&gt;&lt;span class="o"&gt;!&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="na"&gt;redisUrl&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;env&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;REDIS_URL&lt;/span&gt;&lt;span class="o"&gt;!&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;};&lt;/span&gt;

&lt;span class="nx"&gt;requiredEnvVars&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;forEach&lt;/span&gt;&lt;span class="p"&gt;((&lt;/span&gt;&lt;span class="nx"&gt;key&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="o"&gt;!&lt;/span&gt;&lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;env&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;key&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt; &lt;span class="k"&gt;throw&lt;/span&gt; &lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nc"&gt;Error&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s2"&gt;`Missing required env var: &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;key&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Type Sharing
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="c1"&gt;// packages/shared/src/types.ts&lt;/span&gt;
&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="kd"&gt;type&lt;/span&gt; &lt;span class="nx"&gt;User&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;id&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nl"&gt;email&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nl"&gt;name&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nl"&gt;createdAt&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;Date&lt;/span&gt; &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="kd"&gt;type&lt;/span&gt; &lt;span class="nx"&gt;ApiResponse&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;T&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;success&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;boolean&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nl"&gt;data&lt;/span&gt;&lt;span class="p"&gt;?:&lt;/span&gt; &lt;span class="nx"&gt;T&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nl"&gt;error&lt;/span&gt;&lt;span class="p"&gt;?:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt; &lt;span class="p"&gt;};&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Backend:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;User&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;ApiResponse&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;@shared/types&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="nx"&gt;router&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;post&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;/users&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="na"&gt;user&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;User&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;users&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;body&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;json&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;ApiResponse&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;User&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&amp;gt;&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;success&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kc"&gt;true&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="na"&gt;data&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;user&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Frontend uses same types — no mismatches, no runtime surprises.&lt;/p&gt;

&lt;h2&gt;
  
  
  Docker for Local Dev
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight yaml"&gt;&lt;code&gt;&lt;span class="na"&gt;version&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s1"&gt;'&lt;/span&gt;&lt;span class="s"&gt;3'&lt;/span&gt;
&lt;span class="na"&gt;services&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
  &lt;span class="na"&gt;postgres&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
    &lt;span class="na"&gt;image&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;postgres:16&lt;/span&gt;
    &lt;span class="na"&gt;environment&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
      &lt;span class="na"&gt;POSTGRES_USER&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;dev&lt;/span&gt;
      &lt;span class="na"&gt;POSTGRES_PASSWORD&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;dev&lt;/span&gt;
      &lt;span class="na"&gt;POSTGRES_DB&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;mydb&lt;/span&gt;
    &lt;span class="na"&gt;ports&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="pi"&gt;[&lt;/span&gt;&lt;span class="s2"&gt;"&lt;/span&gt;&lt;span class="s"&gt;5432:5432"&lt;/span&gt;&lt;span class="pi"&gt;]&lt;/span&gt;
  &lt;span class="na"&gt;redis&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
    &lt;span class="na"&gt;image&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;redis:7&lt;/span&gt;
    &lt;span class="na"&gt;ports&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="pi"&gt;[&lt;/span&gt;&lt;span class="s2"&gt;"&lt;/span&gt;&lt;span class="s"&gt;6379:6379"&lt;/span&gt;&lt;span class="pi"&gt;]&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;code&gt;docker-compose up -d&lt;/code&gt; and the environment is ready.&lt;/p&gt;

&lt;h2&gt;
  
  
  Testing Structure
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;tests/
├── unit/
│   └── services/
├── integration/
│   └── routes/
└── fixtures/
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  CI/CD (GitHub Actions)
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight yaml"&gt;&lt;code&gt;&lt;span class="na"&gt;jobs&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
  &lt;span class="na"&gt;test&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
    &lt;span class="na"&gt;runs-on&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;ubuntu-latest&lt;/span&gt;
    &lt;span class="na"&gt;services&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
      &lt;span class="na"&gt;postgres&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
        &lt;span class="na"&gt;image&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;postgres:16&lt;/span&gt;
    &lt;span class="na"&gt;steps&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
      &lt;span class="pi"&gt;-&lt;/span&gt; &lt;span class="na"&gt;uses&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;actions/checkout@v4&lt;/span&gt;
      &lt;span class="pi"&gt;-&lt;/span&gt; &lt;span class="na"&gt;uses&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;actions/setup-node@v4&lt;/span&gt;
        &lt;span class="na"&gt;with&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="pi"&gt;{&lt;/span&gt; &lt;span class="nv"&gt;node-version&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s1"&gt;'&lt;/span&gt;&lt;span class="s"&gt;20'&lt;/span&gt; &lt;span class="pi"&gt;}&lt;/span&gt;
      &lt;span class="pi"&gt;-&lt;/span&gt; &lt;span class="na"&gt;run&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;npm ci&lt;/span&gt;
      &lt;span class="pi"&gt;-&lt;/span&gt; &lt;span class="na"&gt;run&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;npm run lint&lt;/span&gt;
      &lt;span class="pi"&gt;-&lt;/span&gt; &lt;span class="na"&gt;run&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;npm run test&lt;/span&gt;
      &lt;span class="pi"&gt;-&lt;/span&gt; &lt;span class="na"&gt;run&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;npm run build&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Why This Stack
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Monorepo&lt;/strong&gt;: One repo, one CI/CD, type sharing, easier refactoring&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Three packages&lt;/strong&gt;: &lt;code&gt;web&lt;/code&gt; deploys to Vercel, &lt;code&gt;api&lt;/code&gt; deploys anywhere, &lt;code&gt;shared&lt;/code&gt; is the contract between them&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;TypeScript everywhere&lt;/strong&gt;: Catches errors at build time, not runtime&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Turbo&lt;/strong&gt;: Parallel builds, smart caching&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Docker locally&lt;/strong&gt;: Dev matches prod&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Day 1 setup: monorepo + docker + shared types + CI/CD + 5 integration tests. Costs one week. Saves months.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://zunain.com" rel="noopener noreferrer"&gt;zunain.com&lt;/a&gt;&lt;/p&gt;

</description>
      <category>fullstack</category>
      <category>architecture</category>
      <category>webdev</category>
      <category>programming</category>
    </item>
    <item>
      <title>Building Inference-Optimized Emaibeddings: Why Smaller Models Beat Bigger Ones</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Sun, 17 May 2026 20:57:34 +0000</pubDate>
      <link>https://dev.to/mzunain/building-inference-optimized-emaibeddings-why-smaller-models-beat-bigger-ones-35hj</link>
      <guid>https://dev.to/mzunain/building-inference-optimized-emaibeddings-why-smaller-models-beat-bigger-ones-35hj</guid>
      <description></description>
      <category>embeddingsmachine</category>
      <category>ai</category>
      <category>i</category>
      <category>spent</category>
    </item>
    <item>
      <title>How to Build a RAG Evaluation Framework That Catches Real Problems</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Sun, 17 May 2026 19:38:58 +0000</pubDate>
      <link>https://dev.to/mzunain/how-to-build-a-rag-evaluation-framework-that-catches-real-problems-32hd</link>
      <guid>https://dev.to/mzunain/how-to-build-a-rag-evaluation-framework-that-catches-real-problems-32hd</guid>
      <description>&lt;p&gt;Six months into running a production RAG system, I had a problem: my users kept complaining about wrong answers, but my evaluation metrics looked fine. Retrieval accuracy: 87%. User satisfaction: 82%. Everything looked good on paper.&lt;/p&gt;

&lt;p&gt;Then I sat with users for a week and watched them interact with the system.&lt;/p&gt;

&lt;p&gt;The real problems were invisible to my metrics.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Metrics You're Probably Using (And Why They're Not Enough)
&lt;/h2&gt;

&lt;p&gt;Most RAG evaluation setups check:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Retrieval recall: "Did we get the relevant documents?"&lt;/li&gt;
&lt;li&gt;Answer faithfulness: "Is the answer based on the retrieved content?"&lt;/li&gt;
&lt;li&gt;Answer relevance: "Does the answer address the question?"&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;These are fine starting points. But they miss the failure modes that actually hurt users.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Failure Modes Nobody Talks About
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;1. Temporal Confusion&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;Your system retrieved a correct document from 8 months ago. The policy changed 3 months ago. Your answer is confidently wrong.&lt;/p&gt;

&lt;p&gt;Standard metrics: pass. Users: furious.&lt;/p&gt;

&lt;p&gt;Fix: Track document age in your evaluation. Flag answers based on documents older than 30 days for manual review. Update your vector store on a schedule.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;2. Partial Retrieval&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;User asks a compound question: "What are the refund limits for premium vs standard accounts?"&lt;/p&gt;

&lt;p&gt;Your system retrieves the premium refund policy perfectly. Misses the standard account policy entirely.&lt;/p&gt;

&lt;p&gt;Your retrieval recall: 50%. Your answer: half wrong. Your metrics: don't catch this.&lt;/p&gt;

&lt;p&gt;Fix: Decompose compound questions before evaluation. Each sub-question needs its own retrieval check.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;3. Confidence Calibration&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;Your model says "Based on the policy, the limit is $500." The document actually says "approximately $400-600 depending on account history."&lt;/p&gt;

&lt;p&gt;The model collapsed a range into a specific number. Confidently. Wrongly.&lt;/p&gt;

&lt;p&gt;Fix: Add a confidence calibration metric. Compare model confidence signals against actual document specificity.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;4. The Hallucination in Context&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;This one is subtle. The document says X. The model retrieves it correctly. But the model's response adds context from its training data that contradicts X.&lt;/p&gt;

&lt;p&gt;Faithfulness metrics typically catch obvious hallucinations. They miss when the model supplements retrieved content with training data.&lt;/p&gt;

&lt;p&gt;Fix: Run a "context sufficiency" check. If the retrieved documents don't fully answer the question, flag it rather than letting the model fill gaps.&lt;/p&gt;

&lt;h2&gt;
  
  
  Building the Evaluation Framework
&lt;/h2&gt;

&lt;p&gt;Here's what actually works in production:&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Layer 1: Automated Metrics (Fast, Cheap)&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Retrieval recall against a golden dataset&lt;/li&gt;
&lt;li&gt;BERTScore for semantic similarity&lt;/li&gt;
&lt;li&gt;Answer length relative to question complexity&lt;/li&gt;
&lt;li&gt;Response time (slow = bad UX = user churn)&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Cost: ~$0.002 per query to evaluate&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Layer 2: LLM-as-Judge (Medium Cost, Medium Accuracy)&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Have GPT-4 evaluate faithfulness and relevance&lt;/li&gt;
&lt;li&gt;Check for temporal issues by passing document metadata&lt;/li&gt;
&lt;li&gt;Flag compound question handling&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Cost: ~$0.08 per query to evaluate (use on 10% of traffic)&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Layer 3: Human Review (Expensive, Ground Truth)&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Weekly review of 50 random flagged queries&lt;/li&gt;
&lt;li&gt;Monthly review of edge cases identified by Layer 2&lt;/li&gt;
&lt;li&gt;Quarterly user satisfaction interviews&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Cost: $800-2,000/month in person-hours&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;The Feedback Loop That Matters&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;Every failed evaluation goes into a "failure taxonomy." Not just "wrong answer" — but why it was wrong.&lt;/p&gt;

&lt;p&gt;After 3 months of this taxonomy, our top failure modes were:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;Temporal confusion: 34% of failures&lt;/li&gt;
&lt;li&gt;Compound question partial retrieval: 28%&lt;/li&gt;
&lt;li&gt;Confidence miscalibration: 19%&lt;/li&gt;
&lt;li&gt;Hallucination in context: 14%&lt;/li&gt;
&lt;li&gt;Other: 5%&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;This told us exactly where to invest engineering time.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Numbers
&lt;/h2&gt;

&lt;p&gt;Before framework: Users reported wrong answers 3-4 times per week.&lt;/p&gt;

&lt;p&gt;After 3 months with the framework:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Temporal failures: down 71%&lt;/li&gt;
&lt;li&gt;Compound question failures: down 58%&lt;/li&gt;
&lt;li&gt;Overall wrong answer reports: down 64%&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;The evaluation framework cost us 40 engineering hours to build and $800/month to run. The reduction in support tickets saved $3,000/month.&lt;/p&gt;

&lt;h2&gt;
  
  
  What to Build First
&lt;/h2&gt;

&lt;p&gt;If you're starting from scratch: build Layer 1 first. Get automated metrics running on every query. Create a golden dataset of 200 question-answer pairs that represent your most common use cases.&lt;/p&gt;

&lt;p&gt;Run that for 30 days. The failure patterns will tell you what to build next.&lt;/p&gt;

&lt;p&gt;Don't let perfect be the enemy of good. An imperfect evaluation framework running consistently beats a perfect one you're still designing.&lt;/p&gt;

&lt;p&gt;Build the thing. Measure. Iterate.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>machinelearningpython</category>
      <category>rag</category>
    </item>
    <item>
      <title>Embedding Quality Over Speed: Why Your Search Results Are Mediocre</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Sun, 17 May 2026 05:28:42 +0000</pubDate>
      <link>https://dev.to/mzunain/embedding-quality-over-speed-why-your-search-results-are-mediocre-j2k</link>
      <guid>https://dev.to/mzunain/embedding-quality-over-speed-why-your-search-results-are-mediocre-j2k</guid>
      <description></description>
    </item>
    <item>
      <title>The Node.js Patterns I Wish I Knew 3 Years Ago</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Fri, 08 May 2026 06:00:00 +0000</pubDate>
      <link>https://dev.to/mzunain/the-nodejs-patterns-i-wish-i-knew-3-years-ago-36oc</link>
      <guid>https://dev.to/mzunain/the-nodejs-patterns-i-wish-i-knew-3-years-ago-36oc</guid>
      <description>&lt;p&gt;Node.js is amazing for shipping fast. It's also amazing at letting you ship broken things fast.&lt;/p&gt;

&lt;p&gt;I've built a lot of broken things. Here are the patterns that make Node.js systems not break.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 1: EventEmitter for Internal Pub/Sub
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;EventEmitter&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;events&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;orderEvents&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nc"&gt;EventEmitter&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;

&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;createOrder&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;userId&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;items&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;order&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;orders&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="nx"&gt;userId&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;items&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="nx"&gt;orderEvents&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;emit&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;order:created&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;order&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;order&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="nx"&gt;orderEvents&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;on&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;order:created&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;order&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;sendConfirmationEmail&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;order&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="k"&gt;catch&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;logger&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;error&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Email failed&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;));&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;

&lt;span class="nx"&gt;orderEvents&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;on&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;order:created&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;order&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;logAnalytics&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;order_created&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;order&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="k"&gt;catch&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;logger&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;error&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Analytics failed&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;));&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; Order creation returns in 50ms. Side effects async. If email fails, order still succeeded.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 2: Worker Threads for CPU-Bound Tasks
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;Worker&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;worker_threads&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;workers&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;Array&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;cpus&lt;/span&gt;&lt;span class="p"&gt;().&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;fill&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kc"&gt;null&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nc"&gt;Worker&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;./worker.js&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;));&lt;/span&gt;
&lt;span class="kd"&gt;let&lt;/span&gt; &lt;span class="nx"&gt;currentWorker&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;processImage&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;imageBuffer&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nc"&gt;Promise&lt;/span&gt;&lt;span class="p"&gt;((&lt;/span&gt;&lt;span class="nx"&gt;resolve&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;reject&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;worker&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;workers&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;currentWorker&lt;/span&gt;&lt;span class="p"&gt;];&lt;/span&gt;
    &lt;span class="nx"&gt;currentWorker&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;currentWorker&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;%&lt;/span&gt; &lt;span class="nx"&gt;workers&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;timer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;setTimeout&lt;/span&gt;&lt;span class="p"&gt;(()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nf"&gt;reject&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nc"&gt;Error&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Worker timeout&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;)),&lt;/span&gt; &lt;span class="mi"&gt;30000&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="nx"&gt;worker&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;once&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;message&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;result&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nf"&gt;clearTimeout&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;timer&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt; &lt;span class="nf"&gt;resolve&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;result&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
    &lt;span class="nx"&gt;worker&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;once&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;error&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;reject&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="nx"&gt;worker&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;postMessage&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="nx"&gt;imageBuffer&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="p"&gt;});&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; Heavy computation doesn't block the main thread.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 3: Domain-Specific Error Classes
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="kd"&gt;class&lt;/span&gt; &lt;span class="nc"&gt;AppError&lt;/span&gt; &lt;span class="kd"&gt;extends&lt;/span&gt; &lt;span class="nc"&gt;Error&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nf"&gt;constructor&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;statusCode&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;code&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;super&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;statusCode&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;statusCode&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;code&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;code&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="kd"&gt;class&lt;/span&gt; &lt;span class="nc"&gt;NotFoundError&lt;/span&gt; &lt;span class="kd"&gt;extends&lt;/span&gt; &lt;span class="nc"&gt;AppError&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nf"&gt;constructor&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="k"&gt;super&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="mi"&gt;404&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;NOT_FOUND&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="kd"&gt;class&lt;/span&gt; &lt;span class="nc"&gt;ValidationError&lt;/span&gt; &lt;span class="kd"&gt;extends&lt;/span&gt; &lt;span class="nc"&gt;AppError&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nf"&gt;constructor&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="k"&gt;super&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="mi"&gt;400&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;VALIDATION_ERROR&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="nx"&gt;app&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;get&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;/users/:id&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;next&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;try&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;user&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;getUser&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;params&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;user&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;catch &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt; &lt;span class="k"&gt;instanceof&lt;/span&gt; &lt;span class="nx"&gt;AppError&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;status&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;statusCode&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;error&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;else&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;status&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;500&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;error&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Internal server error&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Pattern 4: Connection Pooling
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;pool&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nx"&gt;pg&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nc"&gt;Pool&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
  &lt;span class="na"&gt;max&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;20&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="na"&gt;idleTimeoutMillis&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;30000&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="na"&gt;connectionTimeoutMillis&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;2000&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;

&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;getUser&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;result&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;pool&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;query&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;SELECT * FROM users WHERE id = $1&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;]);&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;result&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;rows&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;];&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Pattern 5: Circuit Breaker
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="kd"&gt;class&lt;/span&gt; &lt;span class="nc"&gt;CircuitBreaker&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nf"&gt;constructor&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;fn&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;options&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{})&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;fn&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;fn&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureThreshold&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureThreshold&lt;/span&gt; &lt;span class="o"&gt;||&lt;/span&gt; &lt;span class="mi"&gt;5&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;resetTimeout&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;resetTimeout&lt;/span&gt; &lt;span class="o"&gt;||&lt;/span&gt; &lt;span class="mi"&gt;60000&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;state&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;CLOSED&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureCount&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;lastFailureTime&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="kc"&gt;null&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="nf"&gt;call&lt;/span&gt;&lt;span class="p"&gt;(...&lt;/span&gt;&lt;span class="nx"&gt;args&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;state&lt;/span&gt; &lt;span class="o"&gt;===&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;OPEN&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;Date&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;now&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;lastFailureTime&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;resetTimeout&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
        &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;state&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;HALF_OPEN&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
      &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;else&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
        &lt;span class="k"&gt;throw&lt;/span&gt; &lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nc"&gt;Error&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Circuit breaker is OPEN&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
      &lt;span class="p"&gt;}&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;
    &lt;span class="k"&gt;try&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;result&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;fn&lt;/span&gt;&lt;span class="p"&gt;(...&lt;/span&gt;&lt;span class="nx"&gt;args&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
      &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureCount&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
      &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;state&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;CLOSED&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
      &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;result&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;catch &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureCount&lt;/span&gt;&lt;span class="o"&gt;++&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
      &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;lastFailureTime&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nb"&gt;Date&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;now&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
      &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureCount&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;=&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureThreshold&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;state&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;OPEN&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
      &lt;span class="k"&gt;throw&lt;/span&gt; &lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Pattern 6: Graceful Shutdown
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;gracefulShutdown&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nx"&gt;server&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;close&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="nx"&gt;logger&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;info&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Server closed&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;forceShutdown&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;setTimeout&lt;/span&gt;&lt;span class="p"&gt;(()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;exit&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt; &lt;span class="mi"&gt;30000&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="k"&gt;try&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;close&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
    &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;redis&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;quit&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
    &lt;span class="nf"&gt;clearTimeout&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;forceShutdown&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;exit&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;catch &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;exit&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;on&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;SIGTERM&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;gracefulShutdown&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;on&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;SIGINT&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;gracefulShutdown&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  The Meta Pattern: Async Error Handling
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;asyncHandler&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;fn&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;return &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;next&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;resolve&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nf"&gt;fn&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;next&lt;/span&gt;&lt;span class="p"&gt;)).&lt;/span&gt;&lt;span class="k"&gt;catch&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;next&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="nx"&gt;app&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;get&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;/users/:id&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nf"&gt;asyncHandler&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;user&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;getUser&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;params&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;user&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="p"&gt;}));&lt;/span&gt;

&lt;span class="nx"&gt;app&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;use&lt;/span&gt;&lt;span class="p"&gt;((&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;next&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nx"&gt;logger&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;error&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Unhandled error&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;status&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;500&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;error&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Internal server error&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;These aren't fancy patterns. They're the ones that keep systems running at 3am without pages.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://zunain.com" rel="noopener noreferrer"&gt;zunain.com&lt;/a&gt;&lt;/p&gt;

</description>
      <category>node</category>
      <category>javascript</category>
      <category>backend</category>
      <category>programming</category>
    </item>
    <item>
      <title>The Node.js Patterns I Wish I Knew 3 Years Ago</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Fri, 08 May 2026 05:06:33 +0000</pubDate>
      <link>https://dev.to/mzunain/the-nodejs-patterns-i-wish-i-knew-3-years-ago-3h6e</link>
      <guid>https://dev.to/mzunain/the-nodejs-patterns-i-wish-i-knew-3-years-ago-3h6e</guid>
      <description>&lt;p&gt;Event loop mastery, streams, worker threads, graceful shutdown, and the design patterns that separate senior Node.js engineers from the rest.&lt;/p&gt;

</description>
      <category>node</category>
      <category>javascript</category>
      <category>backend</category>
      <category>architecture</category>
    </item>
    <item>
      <title>Building Production-Ready RAG: A Complete Architecture Guide</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Mon, 27 Apr 2026 16:39:13 +0000</pubDate>
      <link>https://dev.to/mzunain/building-production-ready-rag-a-complete-architecture-guide-4nfj</link>
      <guid>https://dev.to/mzunain/building-production-ready-rag-a-complete-architecture-guide-4nfj</guid>
      <description>&lt;h1&gt;
  
  
  Building Production-Ready RAG: A Complete Architecture Guide
&lt;/h1&gt;

&lt;p&gt;Most RAG tutorials show you how to stuff documents into a vector database and query them with an LLM. That gets you a demo. It does not get you a production system.&lt;/p&gt;

&lt;p&gt;I've built RAG pipelines that serve real users. The gap between "it works in a notebook" and "it works at 2 AM when the on-call gets paged" is massive. This guide covers what actually matters when you ship RAG to production.&lt;/p&gt;

&lt;h2&gt;
  
  
  Why Most RAG Demos Fail
&lt;/h2&gt;

&lt;p&gt;The standard RAG tutorial looks like this: load PDFs, chunk them, embed them, store in Pinecone, query with OpenAI. Five lines of LangChain. Done.&lt;/p&gt;

&lt;p&gt;Then you deploy it and discover:&lt;/p&gt;

&lt;p&gt;The chunking strategy matters more than the model. Bad chunks produce bad retrieval. Bad retrieval produces hallucinated answers. Users lose trust fast.&lt;/p&gt;

&lt;p&gt;Latency kills user experience. A 6-second response time is fine in a demo. Real users expect sub-2 seconds. You need caching, streaming, and careful orchestration.&lt;/p&gt;

&lt;p&gt;Cost scales faster than you expect. Every query hits an embedding model and an LLM. At 10K queries per day, your monthly bill will surprise you.&lt;/p&gt;

&lt;p&gt;Evaluation is almost impossible without infrastructure. "Does this answer look right?" is not a test strategy.&lt;/p&gt;

&lt;p&gt;Let's fix each of these.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Ingestion Pipeline
&lt;/h2&gt;

&lt;p&gt;Ingestion is where most RAG systems quietly break. Bad data in, bad answers out.&lt;/p&gt;

&lt;h3&gt;
  
  
  Document Processing
&lt;/h3&gt;

&lt;p&gt;Before you chunk anything, clean your documents. Strip headers, footers, navigation elements, and boilerplate. Extract tables separately because they chunk terribly as plain text.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kr"&gt;interface&lt;/span&gt; &lt;span class="nx"&gt;ProcessedDocument&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nl"&gt;id&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;content&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;source&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="nl"&gt;title&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="nl"&gt;section&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="nl"&gt;lastUpdated&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;Date&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="nl"&gt;documentType&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;article&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;api-doc&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;tutorial&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;faq&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;processDocument&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;RawDocument&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;ProcessedDocument&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="c1"&gt;// Split by semantic sections, not arbitrary character counts&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;sections&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;extractSections&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;html&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;sections&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;((&lt;/span&gt;&lt;span class="nx"&gt;section&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;index&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;({&lt;/span&gt;
    &lt;span class="na"&gt;id&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;-section-&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;index&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;content&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;cleanMarkdown&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;section&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="na"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="na"&gt;source&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;url&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;title&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;title&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;section&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;section&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;heading&lt;/span&gt; &lt;span class="o"&gt;||&lt;/span&gt; &lt;span class="s2"&gt;`Section &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;index&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;lastUpdated&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;updatedAt&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;documentType&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;classifyDocument&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="p"&gt;},&lt;/span&gt;
  &lt;span class="p"&gt;}));&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  Chunking Strategy
&lt;/h3&gt;

&lt;p&gt;Forget fixed-size chunking. It splits sentences mid-thought and produces chunks that lack context.&lt;/p&gt;

&lt;p&gt;Use semantic chunking: split on paragraph boundaries, heading boundaries, and logical breaks. Each chunk should be a self-contained thought that makes sense in isolation.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;semanticChunk&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
  &lt;span class="nb"&gt;document&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;ProcessedDocument&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nl"&gt;maxTokens&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;number&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nl"&gt;overlap&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;number&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nx"&gt;Chunk&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;paragraphs&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nb"&gt;document&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;split&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sr"&gt;/&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="sr"&gt;+/&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;Chunk&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[];&lt;/span&gt;
  &lt;span class="kd"&gt;let&lt;/span&gt; &lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[];&lt;/span&gt;
  &lt;span class="kd"&gt;let&lt;/span&gt; &lt;span class="nx"&gt;currentTokens&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

  &lt;span class="k"&gt;for &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;paragraph&lt;/span&gt; &lt;span class="k"&gt;of&lt;/span&gt; &lt;span class="nx"&gt;paragraphs&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;paragraphTokens&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;estimateTokens&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;paragraph&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

    &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;currentTokens&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="nx"&gt;paragraphTokens&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;maxTokens&lt;/span&gt; &lt;span class="o"&gt;&amp;amp;&amp;amp;&lt;/span&gt; &lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;push&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
        &lt;span class="na"&gt;content&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;join&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="na"&gt;tokenCount&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;currentTokens&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="na"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;document&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="p"&gt;});&lt;/span&gt;

      &lt;span class="c1"&gt;// Keep last paragraph for overlap/context continuity&lt;/span&gt;
      &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;overlapParagraphs&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;slice&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
      &lt;span class="nx"&gt;currentChunk&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[...&lt;/span&gt;&lt;span class="nx"&gt;overlapParagraphs&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;paragraph&lt;/span&gt;&lt;span class="p"&gt;];&lt;/span&gt;
      &lt;span class="nx"&gt;currentTokens&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;estimateTokens&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;overlapParagraphs&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;join&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;))&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="nx"&gt;paragraphTokens&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;else&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;push&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;paragraph&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
      &lt;span class="nx"&gt;currentTokens&lt;/span&gt; &lt;span class="o"&gt;+=&lt;/span&gt; &lt;span class="nx"&gt;paragraphTokens&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;push&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
      &lt;span class="na"&gt;content&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;join&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
      &lt;span class="na"&gt;tokenCount&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;currentTokens&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;document&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Target chunk size: 200-500 tokens. Smaller chunks give more precise retrieval. Larger chunks give more context. I've found 300-400 tokens to be the sweet spot for most documentation use cases.&lt;/p&gt;

&lt;h3&gt;
  
  
  Embedding Models
&lt;/h3&gt;

&lt;p&gt;For production, I use OpenAI's &lt;code&gt;text-embedding-3-small&lt;/code&gt; for most cases. It's cheap ($0.02 per 1M tokens), fast, and the quality is good enough for 90% of use cases.&lt;/p&gt;

&lt;p&gt;If you need better retrieval quality and can afford the latency, &lt;code&gt;text-embedding-3-large&lt;/code&gt; with dimension reduction to 1024 gives excellent results at a reasonable cost.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;embedChunks&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;Chunk&lt;/span&gt;&lt;span class="p"&gt;[]):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;EmbeddedChunk&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="c1"&gt;// Batch embeddings for efficiency (max 2048 per request)&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;batchSize&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;2048&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="na"&gt;results&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;EmbeddedChunk&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[];&lt;/span&gt;

  &lt;span class="k"&gt;for &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kd"&gt;let&lt;/span&gt; &lt;span class="nx"&gt;i&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nx"&gt;i&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt; &lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nx"&gt;i&lt;/span&gt; &lt;span class="o"&gt;+=&lt;/span&gt; &lt;span class="nx"&gt;batchSize&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;batch&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;slice&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;i&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;i&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="nx"&gt;batchSize&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;openai&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;embeddings&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
      &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;text-embedding-3-small&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;input&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;batch&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;c&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;c&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="p"&gt;});&lt;/span&gt;

    &lt;span class="k"&gt;for &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kd"&gt;let&lt;/span&gt; &lt;span class="nx"&gt;j&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nx"&gt;j&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt; &lt;span class="nx"&gt;batch&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nx"&gt;j&lt;/span&gt;&lt;span class="o"&gt;++&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;push&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
        &lt;span class="p"&gt;...&lt;/span&gt;&lt;span class="nx"&gt;batch&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;j&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
        &lt;span class="na"&gt;embedding&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;data&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;j&lt;/span&gt;&lt;span class="p"&gt;].&lt;/span&gt;&lt;span class="nx"&gt;embedding&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="p"&gt;});&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  The Retrieval Layer
&lt;/h2&gt;

&lt;p&gt;Vector similarity search alone is not enough. You need hybrid search and re-ranking.&lt;/p&gt;

&lt;h3&gt;
  
  
  Hybrid Search with pgvector
&lt;/h3&gt;

&lt;p&gt;If you're already running PostgreSQL, use pgvector. One less service to manage. Combine vector similarity with full-text search for hybrid retrieval.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;hybridSearch&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
  &lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="nx"&gt;queryEmbedding&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;number&lt;/span&gt;&lt;span class="p"&gt;[],&lt;/span&gt;
  &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nl"&gt;limit&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;number&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nl"&gt;minScore&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;number&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;SearchResult&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;results&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;query&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s2"&gt;`
    WITH vector_results AS (
      SELECT id, content, metadata,
             1 - (embedding &amp;lt;=&amp;gt; $1::vector) AS vector_score
      FROM chunks
      WHERE 1 - (embedding &amp;lt;=&amp;gt; $1::vector) &amp;gt; $3
      ORDER BY embedding &amp;lt;=&amp;gt; $1::vector
      LIMIT $2 * 2
    ),
    text_results AS (
      SELECT id, content, metadata,
             ts_rank(search_vector, plainto_tsquery('english', $4)) AS text_score
      FROM chunks
      WHERE search_vector @@ plainto_tsquery('english', $4)
      ORDER BY text_score DESC
      LIMIT $2 * 2
    )
    SELECT COALESCE(v.id, t.id) AS id,
           COALESCE(v.content, t.content) AS content,
           COALESCE(v.metadata, t.metadata) AS metadata,
           COALESCE(v.vector_score, 0) * 0.7 + COALESCE(t.text_score, 0) * 0.3 AS combined_score
    FROM vector_results v
    FULL OUTER JOIN text_results t ON v.id = t.id
    ORDER BY combined_score DESC
    LIMIT $2
  `&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;queryEmbedding&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;limit&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;minScore&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;]);&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;rows&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;The 0.7/0.3 weighting between vector and text scores works well as a starting point. Tune it based on your evaluation results.&lt;/p&gt;

&lt;h3&gt;
  
  
  Re-ranking
&lt;/h3&gt;

&lt;p&gt;After retrieval, re-rank the results. A cross-encoder model gives much better relevance scores than cosine similarity alone. Cohere's re-rank API is the easiest option.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;rerankResults&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
  &lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;SearchResult&lt;/span&gt;&lt;span class="p"&gt;[],&lt;/span&gt;
  &lt;span class="nx"&gt;topK&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;number&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;5&lt;/span&gt;
&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;SearchResult&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;cohere&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;rerank&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
    &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;rerank-english-v3.0&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;documents&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="na"&gt;topN&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;topK&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;});&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;({&lt;/span&gt;
    &lt;span class="p"&gt;...&lt;/span&gt;&lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;index&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
    &lt;span class="na"&gt;relevanceScore&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;relevanceScore&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;}));&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;This step alone improved our answer quality by about 15% in our evaluations.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Generation Layer
&lt;/h2&gt;

&lt;h3&gt;
  
  
  Prompt Design
&lt;/h3&gt;

&lt;p&gt;Your system prompt should be specific about the task, the constraints, and the expected format. Vague prompts produce vague answers.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;buildPrompt&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;context&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;SearchResult&lt;/span&gt;&lt;span class="p"&gt;[]):&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;contextBlock&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;context&lt;/span&gt;
    &lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;((&lt;/span&gt;&lt;span class="nx"&gt;c&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;i&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="s2"&gt;`[Source &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;i&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;: &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;c&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;title&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;]\n&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;c&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;join&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="s1"&gt;---&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="s2"&gt;`You are a technical documentation assistant. Answer the user's question using ONLY the provided context. If the context doesn't contain enough information to answer fully, say so clearly.

Rules:
- Cite sources using [Source N] format
- If you're unsure, say "I'm not confident about this" rather than guessing
- Keep answers concise and technical
- Include code examples when relevant

Context:
&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;contextBlock&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;

Question: &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  Streaming Responses
&lt;/h3&gt;

&lt;p&gt;Users should see the answer forming in real time. Stream tokens as they arrive.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt;&lt;span class="o"&gt;*&lt;/span&gt; &lt;span class="nf"&gt;generateAnswer&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
  &lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="nx"&gt;context&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;SearchResult&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt;
&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nx"&gt;AsyncGenerator&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;prompt&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;buildPrompt&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;context&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;stream&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;openai&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;chat&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;completions&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
    &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;gpt-4o-mini&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;messages&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[{&lt;/span&gt; &lt;span class="na"&gt;role&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;user&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="na"&gt;content&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;prompt&lt;/span&gt; &lt;span class="p"&gt;}],&lt;/span&gt;
    &lt;span class="na"&gt;stream&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kc"&gt;true&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;temperature&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mf"&gt;0.1&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="c1"&gt;// Low temperature for factual answers&lt;/span&gt;
    &lt;span class="na"&gt;max_tokens&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;1000&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;});&lt;/span&gt;

  &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="k"&gt;await &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;chunk&lt;/span&gt; &lt;span class="k"&gt;of&lt;/span&gt; &lt;span class="nx"&gt;stream&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;content&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;chunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;choices&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;]?.&lt;/span&gt;&lt;span class="nx"&gt;delta&lt;/span&gt;&lt;span class="p"&gt;?.&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="k"&gt;yield&lt;/span&gt; &lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  Hallucination Prevention
&lt;/h3&gt;

&lt;p&gt;Three layers of defense:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Confidence scoring&lt;/strong&gt;: If no retrieved chunk scores above 0.75 relevance, don't generate an answer. Tell the user you couldn't find relevant information.&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Source grounding&lt;/strong&gt;: The prompt explicitly limits answers to the provided context. This doesn't eliminate hallucinations but reduces them significantly.&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Citation verification&lt;/strong&gt;: Post-process the response. If the model cites [Source 3] but Source 3 doesn't contain the claimed information, flag it.&lt;br&gt;
&lt;/p&gt;&lt;/li&gt;
&lt;/ol&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;validateCitations&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
  &lt;span class="nx"&gt;answer&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="nx"&gt;sources&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;SearchResult&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt;
&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nx"&gt;ValidationResult&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;citationPattern&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="sr"&gt;/&lt;/span&gt;&lt;span class="se"&gt;\[&lt;/span&gt;&lt;span class="sr"&gt;Source &lt;/span&gt;&lt;span class="se"&gt;(\d&lt;/span&gt;&lt;span class="sr"&gt;+&lt;/span&gt;&lt;span class="se"&gt;)\]&lt;/span&gt;&lt;span class="sr"&gt;/g&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;citations&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[...&lt;/span&gt;&lt;span class="nx"&gt;answer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;matchAll&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;citationPattern&lt;/span&gt;&lt;span class="p"&gt;)];&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;issues&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[];&lt;/span&gt;

  &lt;span class="k"&gt;for &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;citation&lt;/span&gt; &lt;span class="k"&gt;of&lt;/span&gt; &lt;span class="nx"&gt;citations&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;sourceIndex&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;parseInt&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;citation&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt; &lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;sourceIndex&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;=&lt;/span&gt; &lt;span class="nx"&gt;sources&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;issues&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;push&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s2"&gt;`Citation [Source &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;citation&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;]}&lt;/span&gt;&lt;span class="s2"&gt;] references non-existent source`&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="c1"&gt;// Check if answer contains claims without any citations&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;sentences&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;answer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;split&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sr"&gt;/&lt;/span&gt;&lt;span class="se"&gt;[&lt;/span&gt;&lt;span class="sr"&gt;.!?&lt;/span&gt;&lt;span class="se"&gt;]&lt;/span&gt;&lt;span class="sr"&gt;+/&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;filter&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;s&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;s&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;trim&lt;/span&gt;&lt;span class="p"&gt;().&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="mi"&gt;20&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;uncitedClaims&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;sentences&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;filter&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="nx"&gt;s&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="o"&gt;!&lt;/span&gt;&lt;span class="nx"&gt;s&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;includes&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;[Source&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;&amp;amp;&amp;amp;&lt;/span&gt; &lt;span class="nf"&gt;looksLikeFactualClaim&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;s&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
  &lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;uncitedClaims&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;sentences&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;*&lt;/span&gt; &lt;span class="mf"&gt;0.5&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="nx"&gt;issues&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;push&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;More than half of factual claims lack citations&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;valid&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;issues&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;===&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="nx"&gt;issues&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;citationCount&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;citations&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Evaluation
&lt;/h2&gt;

&lt;p&gt;You cannot improve what you cannot measure. Build an evaluation pipeline from day one.&lt;/p&gt;

&lt;h3&gt;
  
  
  Test Set Construction
&lt;/h3&gt;

&lt;p&gt;Create a test set of 50-100 question/answer pairs. Include edge cases: questions with no answer in the docs, ambiguous questions, questions requiring information from multiple sources.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kr"&gt;interface&lt;/span&gt; &lt;span class="nx"&gt;EvalCase&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nl"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;expectedAnswer&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;relevantDocIds&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;[];&lt;/span&gt;
  &lt;span class="nl"&gt;difficulty&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;easy&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;medium&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;hard&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;category&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;factual&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;procedural&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;conceptual&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;unanswerable&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;evaluateRAG&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;testCases&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;EvalCase&lt;/span&gt;&lt;span class="p"&gt;[]):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;EvalReport&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;results&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;all&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="nx"&gt;testCases&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;retrieved&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;hybridSearch&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;embed&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;limit&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;5&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="na"&gt;minScore&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mf"&gt;0.3&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
      &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;answer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;generateFullAnswer&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;retrieved&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

      &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
        &lt;span class="na"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="na"&gt;retrievalRecall&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;calculateRecall&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;retrieved&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;relevantDocIds&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="na"&gt;answerRelevance&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;scoreRelevance&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;answer&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;expectedAnswer&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="na"&gt;latencyMs&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;answer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;latencyMs&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="na"&gt;tokenCost&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;answer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;totalTokens&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="p"&gt;};&lt;/span&gt;
    &lt;span class="p"&gt;})&lt;/span&gt;
  &lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;avgRetrievalRecall&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;avg&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;retrievalRecall&lt;/span&gt;&lt;span class="p"&gt;)),&lt;/span&gt;
    &lt;span class="na"&gt;avgAnswerRelevance&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;avg&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;answerRelevance&lt;/span&gt;&lt;span class="p"&gt;)),&lt;/span&gt;
    &lt;span class="na"&gt;avgLatencyMs&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;avg&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;latencyMs&lt;/span&gt;&lt;span class="p"&gt;)),&lt;/span&gt;
    &lt;span class="na"&gt;totalCost&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;sum&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;tokenCost&lt;/span&gt;&lt;span class="p"&gt;)),&lt;/span&gt;
    &lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  Key Metrics
&lt;/h3&gt;

&lt;p&gt;Track these in production:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Retrieval recall@5&lt;/strong&gt;: What percentage of relevant documents appear in the top 5 results? Target: &amp;gt;85%.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Answer relevance&lt;/strong&gt;: LLM-as-judge scoring against expected answers. Target: &amp;gt;4.0/5.0.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Latency P95&lt;/strong&gt;: Time from query to first streamed token. Target: &amp;lt;2 seconds.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;User feedback rate&lt;/strong&gt;: Thumbs up/down ratio. Target: &amp;gt;80% positive.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Hallucination rate&lt;/strong&gt;: Flagged by citation validation. Target: &amp;lt;5%.&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Cost Optimization
&lt;/h2&gt;

&lt;p&gt;At scale, RAG costs add up fast. Three strategies that made the biggest difference for us:&lt;/p&gt;

&lt;h3&gt;
  
  
  Semantic Caching
&lt;/h3&gt;

&lt;p&gt;Cache answers for semantically similar queries. If someone asks "how do I deploy to production?" and another person asks "what's the deployment process?", they should get the same cached answer.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;getCachedOrGenerate&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;RAGResponse&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;queryEmbedding&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;embed&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="c1"&gt;// Check cache for semantically similar queries&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;cached&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;query&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s2"&gt;`
    SELECT response, query
    FROM response_cache
    WHERE 1 - (query_embedding &amp;lt;=&amp;gt; $1::vector) &amp;gt; 0.95
    AND created_at &amp;gt; NOW() - INTERVAL '24 hours'
    ORDER BY query_embedding &amp;lt;=&amp;gt; $1::vector
    LIMIT 1
  `&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;queryEmbedding&lt;/span&gt;&lt;span class="p"&gt;]);&lt;/span&gt;

  &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;cached&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;rows&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="p"&gt;...&lt;/span&gt;&lt;span class="nx"&gt;cached&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;rows&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;].&lt;/span&gt;&lt;span class="nx"&gt;response&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="na"&gt;fromCache&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kc"&gt;true&lt;/span&gt; &lt;span class="p"&gt;};&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="c1"&gt;// Generate fresh response&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;generateRAGResponse&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;queryEmbedding&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="c1"&gt;// Cache it&lt;/span&gt;
  &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;query&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="s2"&gt;`INSERT INTO response_cache (query, query_embedding, response) VALUES ($1, $2, $3)`&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;queryEmbedding&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
  &lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;This alone cut our LLM costs by about 40%.&lt;/p&gt;

&lt;h3&gt;
  
  
  Model Selection
&lt;/h3&gt;

&lt;p&gt;Use &lt;code&gt;gpt-4o-mini&lt;/code&gt; for most queries. It's 15x cheaper than &lt;code&gt;gpt-4o&lt;/code&gt; and handles straightforward documentation questions just fine. Route complex queries (multi-hop reasoning, comparison questions) to &lt;code&gt;gpt-4o&lt;/code&gt;.&lt;/p&gt;

&lt;h3&gt;
  
  
  Embedding Cost
&lt;/h3&gt;

&lt;p&gt;Pre-compute and store embeddings. Never re-embed a document that hasn't changed. Use a hash of the content to detect changes during re-indexing.&lt;/p&gt;

&lt;h2&gt;
  
  
  Real-World Performance Numbers
&lt;/h2&gt;

&lt;p&gt;From our production system serving ~5K queries per day:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Average retrieval latency: 45ms (pgvector with HNSW index)&lt;/li&gt;
&lt;li&gt;Average generation latency: 1.2s (gpt-4o-mini, streaming)&lt;/li&gt;
&lt;li&gt;Time to first token: 380ms&lt;/li&gt;
&lt;li&gt;Retrieval recall@5: 88%&lt;/li&gt;
&lt;li&gt;User satisfaction (thumbs up): 84%&lt;/li&gt;
&lt;li&gt;Monthly cost: ~$280 (embeddings + LLM + infrastructure)&lt;/li&gt;
&lt;li&gt;Cache hit rate: 38%&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;These numbers are realistic for a mid-scale documentation search system. Your numbers will vary based on document corpus size, query complexity, and model choice.&lt;/p&gt;

&lt;h2&gt;
  
  
  Wrapping Up
&lt;/h2&gt;

&lt;p&gt;Production RAG is not about picking the right model. It's about building a system with good data pipelines, smart retrieval, proper evaluation, and cost controls.&lt;/p&gt;

&lt;p&gt;Start with the simplest version that works. Measure everything. Improve the pieces that matter most based on your metrics.&lt;/p&gt;

&lt;p&gt;The code examples in this article are simplified but functional. Adapt them to your stack and your data.&lt;/p&gt;

&lt;p&gt;If you're building RAG right now, I'd love to hear what challenges you're hitting. Drop a comment or reach out.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>rag</category>
      <category>typescript</category>
      <category>architecture</category>
    </item>
    <item>
      <title>Stop Building AI Wrappers. Start Building AI Products.</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Fri, 24 Apr 2026 06:00:00 +0000</pubDate>
      <link>https://dev.to/mzunain/stop-building-ai-wrappers-start-building-ai-products-28mk</link>
      <guid>https://dev.to/mzunain/stop-building-ai-wrappers-start-building-ai-products-28mk</guid>
      <description>&lt;p&gt;Everyone and their brother is building an AI wrapper right now.&lt;/p&gt;

&lt;p&gt;You know the type: "I built a ChatGPT UI with a prompt." "I wrapped Claude and added a database." "I made an AI that does X by just calling the API."&lt;/p&gt;

&lt;p&gt;These are not products. They're proofs of concept that will be dead in 6 months.&lt;/p&gt;

&lt;p&gt;Here's the difference between an AI wrapper and an AI product, and why it matters.&lt;/p&gt;

&lt;h2&gt;
  
  
  What's an AI Wrapper?
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;Wrapper = LLM API + UI/UX
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;ul&gt;
&lt;li&gt;"Prompt builder for GPT" — still just calling GPT&lt;/li&gt;
&lt;li&gt;"AI email writer" — just Claude, but for emails&lt;/li&gt;
&lt;li&gt;"Chat interface that remembers context" — ChatGPT but persistent&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;These have zero defensibility. When OpenAI releases ChatGPT Plus with the same features, your wrapper dies.&lt;/p&gt;

&lt;h2&gt;
  
  
  What's an AI Product?
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;AI Product = (Proprietary Data + Specialized Model + Workflow Integration + User Loop)
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Cursor&lt;/strong&gt;: Code editor that understands your specific codebase. Remove AI, product breaks.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Perplexity&lt;/strong&gt;: Web search + AI reasoning over sources + citations. The synthesis is the product.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Replit Agent&lt;/strong&gt;: AI that executes code, sees errors, iterates. The feedback loop is the product.&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  The 5 Differences
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;1. Proprietary Data&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Wrapper: Uses public information&lt;/li&gt;
&lt;li&gt;Product: Has a data moat&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;2. Specialized vs. General&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Wrapper: Uses a general LLM&lt;/li&gt;
&lt;li&gt;Product: Fine-tunes for specific task&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;3. Workflow Integration&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Wrapper: Standalone tool&lt;/li&gt;
&lt;li&gt;Product: Integrated into how users work&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;4. Feedback Loop&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Wrapper: Fire and forget&lt;/li&gt;
&lt;li&gt;Product: Learns from user behavior&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;5. Defensibility&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Wrapper: Dead when the LLM vendor ships the same feature&lt;/li&gt;
&lt;li&gt;Product: Moat that gets wider with users&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  How to Build an AI Product
&lt;/h2&gt;

&lt;ol&gt;
&lt;li&gt;Start with a specific, narrow problem&lt;/li&gt;
&lt;li&gt;Identify your data advantage — if the answer is "none," you're building a wrapper&lt;/li&gt;
&lt;li&gt;Build the feedback loop from day 1 — capture accept/reject/edit signals&lt;/li&gt;
&lt;li&gt;Integrate into user workflow&lt;/li&gt;
&lt;li&gt;Fine-tune or specialize your model&lt;/li&gt;
&lt;li&gt;Think about the data flywheel&lt;/li&gt;
&lt;/ol&gt;

&lt;h2&gt;
  
  
  The Investment Thesis
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;Wrapper: "We built a UI for Claude." — VCs pass&lt;/li&gt;
&lt;li&gt;Product: "We trained a model on your domain data and it handles 60% of your support tickets." — VCs listen&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Build for defensibility. Not demo impressiveness.&lt;/p&gt;

&lt;p&gt;More at &lt;a href="https://zunain.com" rel="noopener noreferrer"&gt;zunain.com&lt;/a&gt;.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>llm</category>
      <category>webdev</category>
      <category>programming</category>
    </item>
    <item>
      <title>From $200/Month to Free: Running OpenClaw with Local AI Models</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Sun, 19 Apr 2026 18:15:45 +0000</pubDate>
      <link>https://dev.to/mzunain/from-200month-to-free-running-openclaw-with-local-ai-models-37n9</link>
      <guid>https://dev.to/mzunain/from-200month-to-free-running-openclaw-with-local-ai-models-37n9</guid>
      <description>&lt;p&gt;&lt;em&gt;This is a submission for the &lt;a href="https://dev.to/challenges/openclaw-2026-04-16"&gt;OpenClaw Writing Challenge&lt;/a&gt;&lt;/em&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  The Problem: AI Assistant Costs Are Skyrocketing
&lt;/h2&gt;

&lt;p&gt;If you're running OpenClaw with cloud-hosted LLMs like Claude or GPT-4, you know the pain. Premium API access can easily cost $200/month or more, and that's assuming moderate usage. For developers, founders, or anyone automating workflows extensively, those costs compound fast.&lt;/p&gt;

&lt;p&gt;But here's the thing: &lt;strong&gt;OpenClaw doesn't require cloud AI&lt;/strong&gt;. You can run it entirely locally with open-source models—and in many cases, get comparable results for &lt;strong&gt;$0/month in API fees&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;This guide walks through three deployment tiers, from completely free to budget-friendly, showing you how to cut your OpenClaw costs to zero while maintaining functionality.&lt;/p&gt;

&lt;h2&gt;
  
  
  Understanding Your Options
&lt;/h2&gt;

&lt;h3&gt;
  
  
  Tier 1: Completely Free (Ollama + Local Models)
&lt;/h3&gt;

&lt;p&gt;&lt;strong&gt;Cost&lt;/strong&gt;: $0/month&lt;br&gt;&lt;br&gt;
&lt;strong&gt;Hardware&lt;/strong&gt;: Any spare laptop/desktop with 8GB+ RAM&lt;br&gt;&lt;br&gt;
&lt;strong&gt;Best For&lt;/strong&gt;: Personal automation, learning, experimentation&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;How it works&lt;/strong&gt;:&lt;br&gt;&lt;br&gt;
Ollama lets you run powerful open-source models like Qwen 2.5 (7B/14B), Llama 3, or Mistral locally. These models are surprisingly capable for most automation tasks—code generation, data extraction, text summarization, and workflow orchestration.&lt;/p&gt;

&lt;p&gt;OpenClaw connects to Ollama as a model provider, treating your local instance like any cloud API.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Setup Steps&lt;/strong&gt;:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Install Ollama&lt;/strong&gt; (Mac/Linux/Windows):
&lt;/li&gt;
&lt;/ol&gt;
&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;   curl &lt;span class="nt"&gt;-fsSL&lt;/span&gt; https://ollama.com/install.sh | sh
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;


&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Pull a capable model&lt;/strong&gt;:
&lt;/li&gt;
&lt;/ol&gt;
&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;   ollama pull qwen2.5:14b
   &lt;span class="c"&gt;# or for lower-end hardware:&lt;/span&gt;
   ollama pull qwen2.5:7b
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;


&lt;ol&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Configure OpenClaw&lt;/strong&gt;:&lt;br&gt;&lt;br&gt;
In your OpenClaw settings, switch the model provider to &lt;code&gt;ollama&lt;/code&gt; and point it to &lt;code&gt;http://localhost:11434&lt;/code&gt;.&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Test your setup&lt;/strong&gt;:&lt;br&gt;&lt;br&gt;
Create a simple skill (e.g., "Summarize my emails") and verify it works with your local model.&lt;/p&gt;&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;&lt;strong&gt;Tradeoffs&lt;/strong&gt;:  &lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Your device needs to stay on 24/7 for skills to run
&lt;/li&gt;
&lt;li&gt;Slightly slower inference than cloud APIs
&lt;/li&gt;
&lt;li&gt;Smaller context windows (typically 8K-32K tokens vs 128K+ for cloud models)&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;Real savings&lt;/strong&gt;: If you were paying $200/month for Claude API access, that's &lt;strong&gt;$2,400/year&lt;/strong&gt; saved.&lt;/p&gt;


&lt;h3&gt;
  
  
  Tier 2: Budget Cloud ($10-30/month)
&lt;/h3&gt;

&lt;p&gt;&lt;strong&gt;Cost&lt;/strong&gt;: $10-30/month&lt;br&gt;&lt;br&gt;
&lt;strong&gt;Hardware&lt;/strong&gt;: None (cloud-hosted)&lt;br&gt;&lt;br&gt;
&lt;strong&gt;Best For&lt;/strong&gt;: Production workflows, team usage, 24/7 availability&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;How it works&lt;/strong&gt;:&lt;br&gt;&lt;br&gt;
If running a local device 24/7 isn't practical, you can deploy Ollama on a cheap VPS (Virtual Private Server) and point OpenClaw to it remotely.&lt;/p&gt;

&lt;p&gt;Alternatively, use budget-friendly cloud APIs like:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Minimax API&lt;/strong&gt;: ~$0.001 per 1K tokens (~$20-30/month for heavy use)
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Groq&lt;/strong&gt;: Fast inference, generous free tier
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Together AI&lt;/strong&gt;: Competitive pricing on open models&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;VPS Setup Example&lt;/strong&gt; (DigitalOcean/Hetzner):&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Spin up a VPS&lt;/strong&gt; (~$10-15/month for 8GB RAM):
&lt;/li&gt;
&lt;/ol&gt;
&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;   &lt;span class="c"&gt;# SSH into your VPS&lt;/span&gt;
   ssh user@your-vps-ip
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;


&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Install Ollama&lt;/strong&gt;:
&lt;/li&gt;
&lt;/ol&gt;
&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;   curl &lt;span class="nt"&gt;-fsSL&lt;/span&gt; https://ollama.com/install.sh | sh
   ollama pull qwen2.5:14b
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;


&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Expose Ollama&lt;/strong&gt; (use a reverse proxy like ngrok or Tailscale for secure access):
&lt;/li&gt;
&lt;/ol&gt;
&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;   ollama serve &lt;span class="nt"&gt;--host&lt;/span&gt; 0.0.0.0
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;


&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Point OpenClaw&lt;/strong&gt; to &lt;code&gt;http://your-vps-ip:11434&lt;/code&gt;
&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;&lt;strong&gt;Tradeoffs&lt;/strong&gt;:  &lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Small monthly cost but still 10x cheaper than Claude Max
&lt;/li&gt;
&lt;li&gt;Requires basic VPS management skills
&lt;/li&gt;
&lt;li&gt;Latency depends on VPS location&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;Real savings&lt;/strong&gt;: Instead of $200/month on cloud APIs, you're paying $15-30/month—saving &lt;strong&gt;$170-185/month&lt;/strong&gt; or &lt;strong&gt;$2,040-2,220/year&lt;/strong&gt;.&lt;/p&gt;


&lt;h3&gt;
  
  
  Tier 3: Hybrid Approach (Best of Both Worlds)
&lt;/h3&gt;

&lt;p&gt;&lt;strong&gt;Cost&lt;/strong&gt;: Variable ($0-50/month depending on usage)&lt;br&gt;&lt;br&gt;
&lt;strong&gt;Strategy&lt;/strong&gt;: Use local models for routine tasks, cloud APIs for complex reasoning&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;How it works&lt;/strong&gt;:&lt;br&gt;&lt;br&gt;
OpenClaw supports multiple model providers simultaneously. You can configure different skills to use different models:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Routine automation&lt;/strong&gt; (email filtering, data extraction) → Ollama (free)
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Complex reasoning&lt;/strong&gt; (code review, strategic planning) → Claude/GPT-4 (pay-per-use)&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;This hybrid approach optimizes for both cost and capability.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Configuration Example&lt;/strong&gt;:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight yaml"&gt;&lt;code&gt;&lt;span class="na"&gt;skills&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
  &lt;span class="na"&gt;email_summarizer&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
    &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;ollama/qwen2.5:14b&lt;/span&gt;

  &lt;span class="na"&gt;code_reviewer&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
    &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;anthropic/claude-3-opus&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Real savings&lt;/strong&gt;: If 80% of your tasks run locally and 20% use cloud APIs, you're looking at ~$40/month instead of $200—saving &lt;strong&gt;$160/month&lt;/strong&gt; or &lt;strong&gt;$1,920/year&lt;/strong&gt;.&lt;/p&gt;




&lt;h2&gt;
  
  
  Choosing the Right Model
&lt;/h2&gt;

&lt;p&gt;Not all models are created equal. Here's what works well for OpenClaw:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Model&lt;/th&gt;
&lt;th&gt;Size&lt;/th&gt;
&lt;th&gt;Best For&lt;/th&gt;
&lt;th&gt;Context Window&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Qwen 2.5&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;7B-14B&lt;/td&gt;
&lt;td&gt;General automation, coding&lt;/td&gt;
&lt;td&gt;32K tokens&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Llama 3.1&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;8B-70B&lt;/td&gt;
&lt;td&gt;Reasoning, chat&lt;/td&gt;
&lt;td&gt;128K tokens&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Mistral&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;7B-22B&lt;/td&gt;
&lt;td&gt;Fast inference, multilingual&lt;/td&gt;
&lt;td&gt;32K tokens&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;DeepSeek Coder&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;6.7B&lt;/td&gt;
&lt;td&gt;Code generation, debugging&lt;/td&gt;
&lt;td&gt;16K tokens&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;For most users, &lt;strong&gt;Qwen 2.5 14B&lt;/strong&gt; offers the best balance of capability and resource requirements.&lt;/p&gt;




&lt;h2&gt;
  
  
  Real-World Example: My 5-Agent Setup
&lt;/h2&gt;

&lt;p&gt;I run 5 OpenClaw agents entirely on Ollama using a spare MacBook Air (16GB RAM):&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Email Assistant&lt;/strong&gt;: Filters, summarizes, drafts replies
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Code Helper&lt;/strong&gt;: Generates boilerplate, reviews PRs
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Research Agent&lt;/strong&gt;: Monitors RSS feeds, summarizes articles
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Data Extractor&lt;/strong&gt;: Pulls structured data from websites
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Task Scheduler&lt;/strong&gt;: Manages my Notion workspace&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;&lt;strong&gt;Total monthly cost&lt;/strong&gt;: $0 (minus electricity, ~$2-3/month)&lt;br&gt;&lt;br&gt;
&lt;strong&gt;Previous cloud API cost&lt;/strong&gt;: ~$180/month&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Annual savings&lt;/strong&gt;: $2,160&lt;/p&gt;

&lt;p&gt;The MacBook runs 24/7, but I was going to keep it plugged in anyway. The agents paid for themselves in week one.&lt;/p&gt;


&lt;h2&gt;
  
  
  Getting Started: Your First Local OpenClaw Agent
&lt;/h2&gt;

&lt;p&gt;Here's a step-by-step walkthrough to create your first cost-free OpenClaw skill:&lt;/p&gt;
&lt;h3&gt;
  
  
  1. Install Prerequisites
&lt;/h3&gt;


&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;&lt;span class="c"&gt;# Install Ollama&lt;/span&gt;
curl &lt;span class="nt"&gt;-fsSL&lt;/span&gt; https://ollama.com/install.sh | sh

&lt;span class="c"&gt;# Pull a model&lt;/span&gt;
ollama pull qwen2.5:14b

&lt;span class="c"&gt;# Verify it's running&lt;/span&gt;
ollama list
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;

&lt;h3&gt;
  
  
  2. Configure OpenClaw
&lt;/h3&gt;

&lt;p&gt;In your OpenClaw instance:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Navigate to &lt;strong&gt;Settings&lt;/strong&gt; → &lt;strong&gt;Model Providers&lt;/strong&gt;
&lt;/li&gt;
&lt;li&gt;Add a new provider: &lt;code&gt;Ollama&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;Set endpoint: &lt;code&gt;http://localhost:11434&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;Test connection&lt;/li&gt;
&lt;/ul&gt;
&lt;h3&gt;
  
  
  3. Create a Simple Skill
&lt;/h3&gt;

&lt;p&gt;Let's build an &lt;strong&gt;Email Summarizer&lt;/strong&gt;:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="c1"&gt;# Example skill configuration
&lt;/span&gt;&lt;span class="n"&gt;name&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Daily Email Summary&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
&lt;span class="n"&gt;trigger&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;cron: 0 8 * * *&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;  &lt;span class="c1"&gt;# Run at 8 AM daily
&lt;/span&gt;&lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;ollama/qwen2.5:14b&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;

&lt;span class="n"&gt;prompt&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt;
  &lt;span class="n"&gt;Summarize&lt;/span&gt; &lt;span class="n"&gt;these&lt;/span&gt; &lt;span class="n"&gt;emails&lt;/span&gt; &lt;span class="n"&gt;into&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="n"&gt;concise&lt;/span&gt; &lt;span class="n"&gt;bullet&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="n"&gt;point&lt;/span&gt; &lt;span class="nb"&gt;list&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;
  &lt;span class="n"&gt;Focus&lt;/span&gt; &lt;span class="n"&gt;on&lt;/span&gt; &lt;span class="n"&gt;action&lt;/span&gt; &lt;span class="n"&gt;items&lt;/span&gt; &lt;span class="ow"&gt;and&lt;/span&gt; &lt;span class="n"&gt;key&lt;/span&gt; &lt;span class="n"&gt;information&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;

  &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="n"&gt;email_content&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="n"&gt;output_format&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;markdown&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
&lt;span class="n"&gt;notification&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;slack&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  4. Test &amp;amp; Iterate
&lt;/h3&gt;

&lt;p&gt;Run the skill manually first:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;openclaw run email-summarizer &lt;span class="nt"&gt;--test&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Once it works, let it run on schedule. Monitor performance and adjust the prompt as needed.&lt;/p&gt;




&lt;h2&gt;
  
  
  Tips for Optimizing Local Model Performance
&lt;/h2&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Use quantized models&lt;/strong&gt;: GGUF 4-bit quantization runs 2-3x faster with minimal quality loss&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Batch requests&lt;/strong&gt;: Process multiple items together to maximize throughput&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Cache responses&lt;/strong&gt;: For repetitive tasks, cache and reuse model outputs&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Monitor resources&lt;/strong&gt;: Use &lt;code&gt;htop&lt;/code&gt; or Activity Monitor to track CPU/GPU usage&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Upgrade RAM if needed&lt;/strong&gt;: 16GB is the sweet spot for running 14B models comfortably&lt;/li&gt;
&lt;/ol&gt;




&lt;h2&gt;
  
  
  When Cloud APIs Still Make Sense
&lt;/h2&gt;

&lt;p&gt;Local models aren't always the answer. Stick with cloud APIs when:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;You need cutting-edge reasoning&lt;/strong&gt; (GPT-4o, Claude Opus for complex tasks)
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Context windows matter&lt;/strong&gt; (analyzing 100K+ token documents)
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Latency is critical&lt;/strong&gt; (sub-second response times)
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;You don't have suitable hardware&lt;/strong&gt; (less than 8GB RAM)&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;The hybrid approach (local for most tasks, cloud for special cases) often delivers the best ROI.&lt;/p&gt;




&lt;h2&gt;
  
  
  Conclusion: Take Control of Your AI Costs
&lt;/h2&gt;

&lt;p&gt;OpenClaw's flexibility means you're not locked into expensive cloud APIs. Whether you go fully local with Ollama, deploy a budget VPS, or use a hybrid strategy, you can dramatically reduce costs without sacrificing functionality.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Key takeaways&lt;/strong&gt;:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;✅ Local models (Ollama + Qwen/Llama) work for 80%+ of automation tasks
&lt;/li&gt;
&lt;li&gt;✅ VPS deployment costs $10-30/month vs $200+ for cloud APIs
&lt;/li&gt;
&lt;li&gt;✅ Hybrid approach balances cost and capability
&lt;/li&gt;
&lt;li&gt;✅ Annual savings of $1,920-2,400 are realistic&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;If you're spending over $100/month on AI API access, it's time to evaluate local options. OpenClaw makes it easy.&lt;/p&gt;




&lt;h2&gt;
  
  
  Resources
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;OpenClaw Docs&lt;/strong&gt;: &lt;a href="https://docs.openclaw.ai" rel="noopener noreferrer"&gt;docs.openclaw.ai&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Ollama&lt;/strong&gt;: &lt;a href="https://ollama.com" rel="noopener noreferrer"&gt;ollama.com&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Qwen 2.5&lt;/strong&gt;: &lt;a href="https://huggingface.co/Qwen" rel="noopener noreferrer"&gt;huggingface.co/Qwen&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Budget VPS Providers&lt;/strong&gt;: DigitalOcean, Hetzner, Vultr&lt;/li&gt;
&lt;/ul&gt;




&lt;p&gt;&lt;em&gt;Have you switched to local models for OpenClaw? What's your setup? Drop a comment below!&lt;/em&gt;&lt;/p&gt;

</description>
      <category>devchallenge</category>
      <category>openclawchallenge</category>
    </item>
    <item>
      <title>EcoMind: AI Carbon Footprint Analyzer for Earth Day 2026</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Sun, 19 Apr 2026 16:05:04 +0000</pubDate>
      <link>https://dev.to/mzunain/ecomind-ai-carbon-footprint-analyzer-for-earth-day-2026-571i</link>
      <guid>https://dev.to/mzunain/ecomind-ai-carbon-footprint-analyzer-for-earth-day-2026-571i</guid>
      <description>&lt;h2&gt;
  
  
  What I Built
&lt;/h2&gt;

&lt;p&gt;EcoMind is an AI-powered carbon footprint analyzer that helps individuals understand their environmental impact and receive personalized, actionable recommendations to reduce their carbon emissions.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Live Demo&lt;/strong&gt;: &lt;a href="https://ecomind-coral.vercel.app" rel="noopener noreferrer"&gt;https://ecomind-coral.vercel.app&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;GitHub&lt;/strong&gt;: &lt;a href="https://github.com/mzunain/ecomind" rel="noopener noreferrer"&gt;https://github.com/mzunain/ecomind&lt;/a&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  Demo
&lt;/h2&gt;

&lt;p&gt;EcoMind provides:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;🌍 Instant carbon footprint analysis based on your lifestyle&lt;/li&gt;
&lt;li&gt;📊 Sustainability score (1-10) with detailed breakdown
&lt;/li&gt;
&lt;li&gt;💡 Top 5 personalized reduction actions ranked by impact&lt;/li&gt;
&lt;li&gt;🌱 Region-specific advice (optimized for Nordic countries)&lt;/li&gt;
&lt;li&gt;⚡ Fast, beautiful dark theme UI&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Journey
&lt;/h2&gt;

&lt;h3&gt;
  
  
  Why Google Gemini?
&lt;/h3&gt;

&lt;p&gt;I chose Google Gemini for its structured JSON output capability, which is perfect for generating consistent, type-safe environmental data. The model excels at understanding complex lifestyle patterns and providing nuanced, regional advice.&lt;/p&gt;

&lt;h3&gt;
  
  
  Technical Implementation
&lt;/h3&gt;

&lt;p&gt;Using Next.js 15 with the App Router and Google's @google/generative-ai SDK, I implemented:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Structured Schema&lt;/strong&gt;: Defined a TypeScript schema for carbon analysis&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Regional Context&lt;/strong&gt;: Added location-aware recommendations&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Difficulty Scoring&lt;/strong&gt;: Categorized actions by implementation difficulty&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Premium UI&lt;/strong&gt;: Modern glassmorphism design with Tailwind CSS&lt;/li&gt;
&lt;/ol&gt;

&lt;h3&gt;
  
  
  Challenges Faced
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;Model naming: Gemini API model versions required experimentation&lt;/li&gt;
&lt;li&gt;Structured output validation: Ensuring consistent JSON responses&lt;/li&gt;
&lt;li&gt;Regional accuracy: Balancing global averages with local context (e.g., Finland's clean energy grid)&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Category Submission
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Best Use of Google Gemini&lt;/strong&gt; - EcoMind showcases Gemini's strength in structured data generation and contextual understanding for environmental impact analysis.&lt;/p&gt;

&lt;h2&gt;
  
  
  Additional Prize Categories
&lt;/h2&gt;

&lt;p&gt;N/A&lt;/p&gt;

</description>
      <category>weekendchallenge</category>
      <category>ai</category>
      <category>gemini</category>
      <category>environment</category>
    </item>
    <item>
      <title>EcoMind: AI-Powered Carbon Footprint Analyzer Using Google Gemini</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Sun, 19 Apr 2026 15:31:31 +0000</pubDate>
      <link>https://dev.to/mzunain/ecomind-ai-powered-carbon-footprint-analyzer-using-google-gemini-2p7h</link>
      <guid>https://dev.to/mzunain/ecomind-ai-powered-carbon-footprint-analyzer-using-google-gemini-2p7h</guid>
      <description>&lt;p&gt;&lt;em&gt;This is a submission for &lt;a href="https://dev.to/challenges/weekend-2026-04-16"&gt;Weekend Challenge: Earth Day Edition&lt;/a&gt;&lt;/em&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  What I Built
&lt;/h2&gt;

&lt;p&gt;EcoMind is a Next.js web application powered by the Google Gemini API that helps users understand and reduce their personal carbon footprint. You describe your daily habits — commute, diet, home energy use, shopping patterns — and Gemini analyzes your lifestyle, estimates your CO₂ impact, and returns a personalized action plan with concrete steps ranked by impact.&lt;/p&gt;

&lt;p&gt;The goal was to make environmental awareness feel personal and actionable rather than abstract. Most carbon calculators spit out a number and leave you wondering "so what?" EcoMind closes that gap by combining Gemini's reasoning capabilities with structured output parsing to give context-aware recommendations tailored to your actual situation.&lt;/p&gt;

&lt;p&gt;Key features:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Conversational input&lt;/strong&gt;: Describe your habits in plain language — no rigid forms&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Gemini-powered analysis&lt;/strong&gt;: Uses &lt;code&gt;gemini-1.5-flash&lt;/code&gt; to parse lifestyle data, estimate emissions, and generate recommendations&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Structured JSON output&lt;/strong&gt;: Forced JSON schema response from Gemini for reliable rendering&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Actionable breakdown&lt;/strong&gt;: Each recommendation shows estimated CO₂ savings per year&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Finland-aware context&lt;/strong&gt;: Recommendations factor in northern European energy mix and public transit infrastructure&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Demo
&lt;/h2&gt;

&lt;p&gt;🔗 &lt;strong&gt;Live demo&lt;/strong&gt;: &lt;a href="https://ecomind-coral.vercel.app" rel="noopener noreferrer"&gt;https://ecomind-coral.vercel.app&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;You can also run it locally — instructions in the README.&lt;/p&gt;

&lt;h2&gt;
  
  
  Code
&lt;/h2&gt;


&lt;div class="ltag-github-readme-tag"&gt;
  &lt;div class="readme-overview"&gt;
    &lt;h2&gt;
      &lt;img src="https://assets.dev.to/assets/github-logo-5a155e1f9a670af7944dd5e12375bc76ed542ea80224905ecaf878b9157cdefc.svg" alt="GitHub logo"&gt;
      &lt;a href="https://github.com/mzunain" rel="noopener noreferrer"&gt;
        mzunain
      &lt;/a&gt; / &lt;a href="https://github.com/mzunain/ecomind" rel="noopener noreferrer"&gt;
        ecomind
      &lt;/a&gt;
    &lt;/h2&gt;
    &lt;h3&gt;
      AI-powered carbon footprint analyzer using Google Gemini - DEV Weekend Challenge Earth Day Edition
    &lt;/h3&gt;
  &lt;/div&gt;
  &lt;div class="ltag-github-body"&gt;
    
&lt;div id="readme" class="md"&gt;
&lt;div class="markdown-heading"&gt;
&lt;h1 class="heading-element"&gt;EcoMind 🌍&lt;/h1&gt;
&lt;/div&gt;
&lt;p&gt;&lt;strong&gt;AI-powered carbon footprint analyzer using Google Gemini&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;Built for the &lt;a href="https://dev.to/challenges/weekend-2026-04-16" rel="nofollow"&gt;DEV Weekend Challenge: Earth Day Edition&lt;/a&gt;&lt;/p&gt;
&lt;div class="markdown-heading"&gt;
&lt;h2 class="heading-element"&gt;Live Demo&lt;/h2&gt;
&lt;/div&gt;
&lt;p&gt;🔗 &lt;a href="https://ecomind-gamma.vercel.app" rel="nofollow noopener noreferrer"&gt;https://ecomind-gamma.vercel.app&lt;/a&gt;&lt;/p&gt;
&lt;div class="markdown-heading"&gt;
&lt;h2 class="heading-element"&gt;What It Does&lt;/h2&gt;
&lt;/div&gt;
&lt;p&gt;EcoMind lets you describe your daily lifestyle in plain language and returns a personalised carbon footprint analysis powered by the Google Gemini API. No rigid forms — just describe your commute, diet, home heating, and travel habits, and get back:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Estimated annual CO₂&lt;/strong&gt; in kg&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Sustainability score&lt;/strong&gt; (1–10)&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Top 5 reduction actions&lt;/strong&gt; ranked by impact with CO₂ savings per year&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Regional context&lt;/strong&gt; — advice tailored to your location (Finland-aware by default)&lt;/li&gt;
&lt;/ul&gt;
&lt;div class="markdown-heading"&gt;
&lt;h2 class="heading-element"&gt;Tech Stack&lt;/h2&gt;

&lt;/div&gt;
&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Next.js 14&lt;/strong&gt; (App Router) + TypeScript&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Google Gemini API&lt;/strong&gt; (&lt;code&gt;gemini-1.5-flash&lt;/code&gt;) via &lt;code&gt;@google/generative-ai&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Structured JSON output&lt;/strong&gt; using Gemini's &lt;code&gt;responseSchema&lt;/code&gt; feature&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Tailwind CSS&lt;/strong&gt; for styling&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Vercel&lt;/strong&gt; for deployment&lt;/li&gt;
&lt;/ul&gt;
&lt;div class="markdown-heading"&gt;
&lt;h2 class="heading-element"&gt;Getting Started&lt;/h2&gt;

&lt;/div&gt;
&lt;div class="highlight highlight-source-shell notranslate position-relative overflow-auto js-code-highlight"&gt;
&lt;pre&gt;&lt;span class="pl-c"&gt;&lt;span class="pl-c"&gt;#&lt;/span&gt; Clone the repo&lt;/span&gt;
git clone https://github.com/mzunain/ecomind.git
&lt;span class="pl-c1"&gt;cd&lt;/span&gt; ecomind
&lt;span class="pl-c"&gt;&lt;span class="pl-c"&gt;#&lt;/span&gt; Install dependencies&lt;/span&gt;
npm install

&lt;span class="pl-c"&gt;&lt;span class="pl-c"&gt;#&lt;/span&gt; Add your Gemini API key&lt;/span&gt;
&lt;span class="pl-c1"&gt;echo&lt;/span&gt; &lt;span class="pl-s"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt;&lt;/pre&gt;…
&lt;/div&gt;
&lt;/div&gt;
  &lt;/div&gt;
  &lt;div class="gh-btn-container"&gt;&lt;a class="gh-btn" href="https://github.com/mzunain/ecomind" rel="noopener noreferrer"&gt;View on GitHub&lt;/a&gt;&lt;/div&gt;
&lt;/div&gt;


&lt;h2&gt;
  
  
  How I Built It
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Tech Stack:&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Next.js 14&lt;/strong&gt; (App Router) + TypeScript&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Google Gemini API&lt;/strong&gt; (&lt;code&gt;gemini-1.5-flash&lt;/code&gt;) via &lt;code&gt;@google/generative-ai&lt;/code&gt; SDK&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Tailwind CSS&lt;/strong&gt; for styling&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Vercel&lt;/strong&gt; for deployment&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;How Gemini is integrated:&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;The core of EcoMind is a single API route (&lt;code&gt;/api/analyze&lt;/code&gt;) that takes a user's lifestyle description and feeds it to Gemini with a carefully engineered system prompt:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;model&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;genAI&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;getGenerativeModel&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
  &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;gemini-1.5-flash&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="na"&gt;generationConfig&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;responseMimeType&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;application/json&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;responseSchema&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;ecoAnalysisSchema&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;},&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;

&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;prompt&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="s2"&gt;`
You are an environmental impact expert. Analyze the following lifestyle description
and return a structured carbon footprint assessment.
Lifestyle: &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;userInput&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;
Provide: estimated annual CO2 in kg, top 5 reduction actions with CO2 savings,
and an overall sustainability score (1-10).
`&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;result&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;model&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;generateContent&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;prompt&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;I used Gemini's &lt;strong&gt;structured output / JSON schema enforcement&lt;/strong&gt; feature to guarantee parseable responses — this was the most interesting technical decision. Instead of prompt-engineering my way to consistent JSON, I defined a Zod-like schema and passed it as &lt;code&gt;responseSchema&lt;/code&gt;. This made the frontend rendering completely reliable.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Structured output schema:&lt;/strong&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;ecoAnalysisSchema&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;object&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="na"&gt;properties&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;annualCO2Kg&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;number&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
    &lt;span class="na"&gt;sustainabilityScore&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;number&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
    &lt;span class="na"&gt;topActions&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;array&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;items&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
        &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;object&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="na"&gt;properties&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
          &lt;span class="na"&gt;action&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;string&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
          &lt;span class="na"&gt;annualSavingKg&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;number&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
          &lt;span class="na"&gt;difficulty&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;string&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="na"&gt;enum&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;easy&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;medium&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;hard&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
          &lt;span class="na"&gt;category&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;string&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
        &lt;span class="p"&gt;},&lt;/span&gt;
      &lt;span class="p"&gt;},&lt;/span&gt;
    &lt;span class="p"&gt;},&lt;/span&gt;
    &lt;span class="na"&gt;summary&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;string&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
  &lt;span class="p"&gt;},&lt;/span&gt;
&lt;span class="p"&gt;};&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Why this matters for Earth Day:&lt;/strong&gt;&lt;br&gt;
Carbon literacy is one of the biggest blockers to individual climate action. People don't act because the data feels distant. By making it conversational and immediate — type a few sentences about your week and get back a personalized plan — EcoMind lowers that barrier significantly.&lt;/p&gt;

&lt;p&gt;I also added a Finland-specific context layer because I'm based in Turku and the sustainability advice for someone in the Nordics (excellent public transit, clean energy grid, cold climate heating costs) is genuinely different from generic global advice.&lt;/p&gt;

&lt;h2&gt;
  
  
  Prize Categories
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Best Use of Google Gemini&lt;/strong&gt; — Gemini is the core engine of EcoMind. The project uses:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;code&gt;gemini-1.5-flash&lt;/code&gt; for fast, cost-effective inference&lt;/li&gt;
&lt;li&gt;Structured JSON output with response schema for reliable parsing&lt;/li&gt;
&lt;li&gt;A multi-part system prompt that grounds Gemini in environmental science context&lt;/li&gt;
&lt;li&gt;Dynamic prompt construction based on user input&lt;/li&gt;
&lt;/ul&gt;

</description>
      <category>devchallenge</category>
      <category>weekendchallenge</category>
    </item>
    <item>
      <title>5 Next.js Performance Patterns That Cut Our Load Time by 60%</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Mon, 13 Apr 2026 06:00:00 +0000</pubDate>
      <link>https://dev.to/mzunain/5-nextjs-performance-patterns-that-cut-our-load-time-by-60-11gk</link>
      <guid>https://dev.to/mzunain/5-nextjs-performance-patterns-that-cut-our-load-time-by-60-11gk</guid>
      <description>&lt;p&gt;Performance is a feature.&lt;/p&gt;

&lt;p&gt;At Quran.com, we serve 50 million users across 200+ countries on varying networks. A 1-second delay for users on slow connections meant people gave up. Every millisecond mattered.&lt;/p&gt;

&lt;p&gt;Here are 5 patterns we implemented that cut load time by 60%. With code.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 1: Font Subsetting
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;The Problem:&lt;/strong&gt; We served the full Amiri Arabic font (600KB) on every page. Users on 3G in Southeast Asia waited 4 seconds for the first render.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;The Pattern:&lt;/strong&gt; Subset fonts to only the Unicode ranges actually used on each page.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="p"&gt;@&lt;/span&gt;&lt;span class="nd"&gt;font&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="nx"&gt;face&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nx"&gt;font&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="nx"&gt;family&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Amiri&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;src&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;url&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;/fonts/amiri-subset.woff2&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="nf"&gt;format&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;woff2&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="nx"&gt;font&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="nx"&gt;display&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;swap&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nx"&gt;unicode&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="nx"&gt;range&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;U&lt;/span&gt;&lt;span class="o"&gt;+&lt;/span&gt;&lt;span class="mi"&gt;0600&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="mi"&gt;06&lt;/span&gt;&lt;span class="nx"&gt;FF&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;U&lt;/span&gt;&lt;span class="o"&gt;+&lt;/span&gt;&lt;span class="mi"&gt;200&lt;/span&gt;&lt;span class="nx"&gt;C&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;U&lt;/span&gt;&lt;span class="o"&gt;+&lt;/span&gt;&lt;span class="mi"&gt;200&lt;/span&gt;&lt;span class="nx"&gt;D&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;The Result:&lt;/strong&gt; Font size: 600KB → 48KB. Load time improvement: 40% on mobile.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;pyftsubset Amiri-Regular.ttf &lt;span class="nt"&gt;--unicodes&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;0600-06FF,200C,200D &lt;span class="nt"&gt;--output-file&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;amiri-subset.woff2
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Pattern 2: Incremental Static Regeneration (ISR)
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;The Problem:&lt;/strong&gt; 114 surahs with 200+ translations. Full site generation: 45 minutes. Deploys blocked.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;getStaticProps&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;context&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;data&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;fetchSurahData&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;context&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;params&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;surah&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;props&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;data&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
    &lt;span class="na"&gt;revalidate&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;3600&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;getStaticPaths&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;paths&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nb"&gt;Array&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="k"&gt;from&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;length&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;114&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;_&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;i&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;({&lt;/span&gt;
    &lt;span class="na"&gt;params&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;surah&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nc"&gt;String&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;i&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
  &lt;span class="p"&gt;}));&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;paths&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="na"&gt;fallback&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;blocking&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;On-demand revalidation:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;default&lt;/span&gt; &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;handler&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;secret&lt;/span&gt; &lt;span class="o"&gt;!==&lt;/span&gt; &lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;env&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;REVALIDATE_SECRET&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;status&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;401&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;message&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Invalid token&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
  &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;revalidate&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s2"&gt;`/surah/&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;surah&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;revalidated&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kc"&gt;true&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;The Result:&lt;/strong&gt; Pages cached on CDN, served in &amp;lt;100ms globally.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 3: Dynamic Imports for Heavy Components
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;The Problem:&lt;/strong&gt; Quran player: 350KB bundled. Every user downloads it even if they never use audio.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="nx"&gt;dynamic&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;next/dynamic&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;QuranPlayer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;dynamic&lt;/span&gt;&lt;span class="p"&gt;(()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="k"&gt;import&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;@/components/QuranPlayer&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="na"&gt;loading&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;div&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="nx"&gt;Loading&lt;/span&gt; &lt;span class="nx"&gt;player&lt;/span&gt;&lt;span class="p"&gt;...&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="sr"&gt;/div&amp;gt;&lt;/span&gt;&lt;span class="err"&gt;,
&lt;/span&gt;  &lt;span class="na"&gt;ssr&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kc"&gt;false&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;

&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;default&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;SurahPage&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="nx"&gt;data&lt;/span&gt; &lt;span class="p"&gt;})&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;showPlayer&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;setShowPlayer&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;useState&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kc"&gt;false&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="k"&gt;return &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;div&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;
      &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;SurahText&lt;/span&gt; &lt;span class="nx"&gt;data&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;data&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="sr"&gt;/&lt;/span&gt;&lt;span class="err"&gt;&amp;gt;
&lt;/span&gt;      &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;showPlayer&lt;/span&gt; &lt;span class="o"&gt;&amp;amp;&amp;amp;&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;QuranPlayer&lt;/span&gt; &lt;span class="nx"&gt;surah&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;data&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;surah&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="sr"&gt;/&amp;gt;&lt;/span&gt;&lt;span class="err"&gt;}
&lt;/span&gt;      &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;button&lt;/span&gt; &lt;span class="nx"&gt;onClick&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nf"&gt;setShowPlayer&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kc"&gt;true&lt;/span&gt;&lt;span class="p"&gt;)}&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="nx"&gt;Play&lt;/span&gt; &lt;span class="nx"&gt;Audio&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="sr"&gt;/button&lt;/span&gt;&lt;span class="err"&gt;&amp;gt;
&lt;/span&gt;    &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="sr"&gt;/div&lt;/span&gt;&lt;span class="err"&gt;&amp;gt;
&lt;/span&gt;  &lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; 68% of users never click play. They saved the download entirely.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 4: Image Optimization Pipeline
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;The Problem:&lt;/strong&gt; Users upload 3-5MB JPEGs displayed at 120x120px.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="nx"&gt;sharp&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;sharp&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;default&lt;/span&gt; &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;handler&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;buffer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;Buffer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="k"&gt;from&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;body&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;webpBuffer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;sharp&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;buffer&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;resize&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;400&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="mi"&gt;400&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;fit&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;cover&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;}).&lt;/span&gt;&lt;span class="nf"&gt;webp&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;quality&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;80&lt;/span&gt; &lt;span class="p"&gt;}).&lt;/span&gt;&lt;span class="nf"&gt;toBuffer&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;pngBuffer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;sharp&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;buffer&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;resize&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;400&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="mi"&gt;400&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;fit&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;cover&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;}).&lt;/span&gt;&lt;span class="nf"&gt;png&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;quality&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;80&lt;/span&gt; &lt;span class="p"&gt;}).&lt;/span&gt;&lt;span class="nf"&gt;toBuffer&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;webpUrl&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;uploadToS3&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;webpBuffer&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;image.webp&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;pngUrl&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;uploadToS3&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;pngBuffer&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;image.png&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="nx"&gt;webpUrl&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;pngUrl&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;





&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight jsx"&gt;&lt;code&gt;&lt;span class="p"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nt"&gt;picture&lt;/span&gt;&lt;span class="p"&gt;&amp;gt;&lt;/span&gt;
  &lt;span class="p"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nt"&gt;source&lt;/span&gt; &lt;span class="na"&gt;srcSet&lt;/span&gt;&lt;span class="p"&gt;=&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;image&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;webpUrl&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;=&lt;/span&gt;&lt;span class="s"&gt;"image/webp"&lt;/span&gt; &lt;span class="p"&gt;/&amp;gt;&lt;/span&gt;
  &lt;span class="p"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nt"&gt;img&lt;/span&gt; &lt;span class="na"&gt;src&lt;/span&gt;&lt;span class="p"&gt;=&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;image&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;pngUrl&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt; &lt;span class="na"&gt;alt&lt;/span&gt;&lt;span class="p"&gt;=&lt;/span&gt;&lt;span class="s"&gt;"User avatar"&lt;/span&gt; &lt;span class="na"&gt;width&lt;/span&gt;&lt;span class="p"&gt;=&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="mi"&gt;120&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt; &lt;span class="na"&gt;height&lt;/span&gt;&lt;span class="p"&gt;=&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="mi"&gt;120&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt; &lt;span class="p"&gt;/&amp;gt;&lt;/span&gt;
&lt;span class="p"&gt;&amp;lt;/&lt;/span&gt;&lt;span class="nt"&gt;picture&lt;/span&gt;&lt;span class="p"&gt;&amp;gt;&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; Images: 3-5MB → 45-80KB.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 5: Intelligent Prefetching
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;SurahNavigation&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="nx"&gt;currentSurah&lt;/span&gt; &lt;span class="p"&gt;})&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;router&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;useRouter&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;handlePrefetch&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;router&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;prefetch&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s2"&gt;`/surah/&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;currentSurah&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="k"&gt;return &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;Link&lt;/span&gt; &lt;span class="nx"&gt;href&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="s2"&gt;`/surah/&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;currentSurah&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="nx"&gt;onMouseEnter&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;handlePrefetch&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="nx"&gt;onTouchStart&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;handlePrefetch&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;
      &lt;span class="nx"&gt;Next&lt;/span&gt; &lt;span class="nx"&gt;Surah&lt;/span&gt;
    &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="sr"&gt;/Link&lt;/span&gt;&lt;span class="err"&gt;&amp;gt;
&lt;/span&gt;  &lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; Users feel instant navigation. Pages load in &amp;lt;200ms instead of 1-2s.&lt;/p&gt;

&lt;h2&gt;
  
  
  Before/After
&lt;/h2&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Metric&lt;/th&gt;
&lt;th&gt;Before&lt;/th&gt;
&lt;th&gt;After&lt;/th&gt;
&lt;th&gt;Improvement&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;First Contentful Paint&lt;/td&gt;
&lt;td&gt;3.2s&lt;/td&gt;
&lt;td&gt;1.8s&lt;/td&gt;
&lt;td&gt;44%&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Largest Contentful Paint&lt;/td&gt;
&lt;td&gt;4.8s&lt;/td&gt;
&lt;td&gt;1.9s&lt;/td&gt;
&lt;td&gt;60%&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Time to Interactive&lt;/td&gt;
&lt;td&gt;6.2s&lt;/td&gt;
&lt;td&gt;2.4s&lt;/td&gt;
&lt;td&gt;61%&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Page Size&lt;/td&gt;
&lt;td&gt;2.8MB&lt;/td&gt;
&lt;td&gt;1.1MB&lt;/td&gt;
&lt;td&gt;61%&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Lighthouse Score&lt;/td&gt;
&lt;td&gt;42&lt;/td&gt;
&lt;td&gt;86&lt;/td&gt;
&lt;td&gt;+44 pts&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;Performance work isn't about micro-optimizations. It's about architectural decisions that compound.&lt;/p&gt;

&lt;p&gt;Find me at &lt;a href="https://zunain.com" rel="noopener noreferrer"&gt;zunain.com&lt;/a&gt; if you're optimizing at scale.&lt;/p&gt;

</description>
      <category>performance</category>
      <category>webdev</category>
      <category>nextjs</category>
      <category>javascript</category>
    </item>
  </channel>
</rss>
