<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: Muhammad Zulqarnain</title>
    <description>The latest articles on DEV Community by Muhammad Zulqarnain (@mzunain).</description>
    <link>https://dev.to/mzunain</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F2895711%2Fb8f9886a-a886-49c9-a751-c182b236abef.jpeg</url>
      <title>DEV Community: Muhammad Zulqarnain</title>
      <link>https://dev.to/mzunain</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/mzunain"/>
    <language>en</language>
    <item>
      <title>The Node.js Patterns I Wish I Knew 3 Years Ago</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Fri, 08 May 2026 06:00:00 +0000</pubDate>
      <link>https://dev.to/mzunain/the-nodejs-patterns-i-wish-i-knew-3-years-ago-36oc</link>
      <guid>https://dev.to/mzunain/the-nodejs-patterns-i-wish-i-knew-3-years-ago-36oc</guid>
      <description>&lt;p&gt;Node.js is amazing for shipping fast. It's also amazing at letting you ship broken things fast.&lt;/p&gt;

&lt;p&gt;I've built a lot of broken things. Here are the patterns that make Node.js systems not break.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 1: EventEmitter for Internal Pub/Sub
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;EventEmitter&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;events&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;orderEvents&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nc"&gt;EventEmitter&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;

&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;createOrder&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;userId&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;items&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;order&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;orders&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="nx"&gt;userId&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;items&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="nx"&gt;orderEvents&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;emit&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;order:created&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;order&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;order&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="nx"&gt;orderEvents&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;on&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;order:created&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;order&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;sendConfirmationEmail&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;order&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="k"&gt;catch&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;logger&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;error&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Email failed&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;));&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;

&lt;span class="nx"&gt;orderEvents&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;on&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;order:created&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;order&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;logAnalytics&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;order_created&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;order&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="k"&gt;catch&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;logger&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;error&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Analytics failed&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;));&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; Order creation returns in 50ms. Side effects async. If email fails, order still succeeded.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 2: Worker Threads for CPU-Bound Tasks
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;Worker&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;worker_threads&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;workers&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;Array&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;os&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;cpus&lt;/span&gt;&lt;span class="p"&gt;().&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;fill&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kc"&gt;null&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nc"&gt;Worker&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;./worker.js&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;));&lt;/span&gt;
&lt;span class="kd"&gt;let&lt;/span&gt; &lt;span class="nx"&gt;currentWorker&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;processImage&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;imageBuffer&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nc"&gt;Promise&lt;/span&gt;&lt;span class="p"&gt;((&lt;/span&gt;&lt;span class="nx"&gt;resolve&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;reject&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;worker&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;workers&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;currentWorker&lt;/span&gt;&lt;span class="p"&gt;];&lt;/span&gt;
    &lt;span class="nx"&gt;currentWorker&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;currentWorker&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;%&lt;/span&gt; &lt;span class="nx"&gt;workers&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;timer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;setTimeout&lt;/span&gt;&lt;span class="p"&gt;(()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nf"&gt;reject&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nc"&gt;Error&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Worker timeout&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;)),&lt;/span&gt; &lt;span class="mi"&gt;30000&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="nx"&gt;worker&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;once&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;message&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;result&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nf"&gt;clearTimeout&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;timer&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt; &lt;span class="nf"&gt;resolve&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;result&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
    &lt;span class="nx"&gt;worker&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;once&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;error&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;reject&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="nx"&gt;worker&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;postMessage&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="nx"&gt;imageBuffer&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="p"&gt;});&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; Heavy computation doesn't block the main thread.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 3: Domain-Specific Error Classes
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="kd"&gt;class&lt;/span&gt; &lt;span class="nc"&gt;AppError&lt;/span&gt; &lt;span class="kd"&gt;extends&lt;/span&gt; &lt;span class="nc"&gt;Error&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nf"&gt;constructor&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;statusCode&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;code&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;super&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;statusCode&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;statusCode&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;code&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;code&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="kd"&gt;class&lt;/span&gt; &lt;span class="nc"&gt;NotFoundError&lt;/span&gt; &lt;span class="kd"&gt;extends&lt;/span&gt; &lt;span class="nc"&gt;AppError&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nf"&gt;constructor&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="k"&gt;super&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="mi"&gt;404&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;NOT_FOUND&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="kd"&gt;class&lt;/span&gt; &lt;span class="nc"&gt;ValidationError&lt;/span&gt; &lt;span class="kd"&gt;extends&lt;/span&gt; &lt;span class="nc"&gt;AppError&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nf"&gt;constructor&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="k"&gt;super&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="mi"&gt;400&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;VALIDATION_ERROR&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="nx"&gt;app&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;get&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;/users/:id&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;next&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;try&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;user&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;getUser&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;params&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;user&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;catch &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt; &lt;span class="k"&gt;instanceof&lt;/span&gt; &lt;span class="nx"&gt;AppError&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;status&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;statusCode&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;error&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;message&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;else&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;status&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;500&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;error&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Internal server error&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Pattern 4: Connection Pooling
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;pool&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nx"&gt;pg&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nc"&gt;Pool&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
  &lt;span class="na"&gt;max&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;20&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="na"&gt;idleTimeoutMillis&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;30000&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="na"&gt;connectionTimeoutMillis&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;2000&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;

&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;getUser&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;result&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;pool&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;query&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;SELECT * FROM users WHERE id = $1&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;]);&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;result&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;rows&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;];&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Pattern 5: Circuit Breaker
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="kd"&gt;class&lt;/span&gt; &lt;span class="nc"&gt;CircuitBreaker&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nf"&gt;constructor&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;fn&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;options&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{})&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;fn&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;fn&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureThreshold&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureThreshold&lt;/span&gt; &lt;span class="o"&gt;||&lt;/span&gt; &lt;span class="mi"&gt;5&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;resetTimeout&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;resetTimeout&lt;/span&gt; &lt;span class="o"&gt;||&lt;/span&gt; &lt;span class="mi"&gt;60000&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;state&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;CLOSED&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureCount&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;lastFailureTime&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="kc"&gt;null&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="nf"&gt;call&lt;/span&gt;&lt;span class="p"&gt;(...&lt;/span&gt;&lt;span class="nx"&gt;args&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;state&lt;/span&gt; &lt;span class="o"&gt;===&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;OPEN&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;Date&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;now&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;lastFailureTime&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;resetTimeout&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
        &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;state&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;HALF_OPEN&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
      &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;else&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
        &lt;span class="k"&gt;throw&lt;/span&gt; &lt;span class="k"&gt;new&lt;/span&gt; &lt;span class="nc"&gt;Error&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Circuit breaker is OPEN&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
      &lt;span class="p"&gt;}&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;
    &lt;span class="k"&gt;try&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;result&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;fn&lt;/span&gt;&lt;span class="p"&gt;(...&lt;/span&gt;&lt;span class="nx"&gt;args&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
      &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureCount&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
      &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;state&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;CLOSED&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
      &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;result&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;catch &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureCount&lt;/span&gt;&lt;span class="o"&gt;++&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
      &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;lastFailureTime&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nb"&gt;Date&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;now&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
      &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureCount&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;=&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;failureThreshold&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="k"&gt;this&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;state&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;OPEN&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
      &lt;span class="k"&gt;throw&lt;/span&gt; &lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Pattern 6: Graceful Shutdown
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;gracefulShutdown&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nx"&gt;server&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;close&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="nx"&gt;logger&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;info&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Server closed&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;forceShutdown&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;setTimeout&lt;/span&gt;&lt;span class="p"&gt;(()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;exit&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt; &lt;span class="mi"&gt;30000&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="k"&gt;try&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;close&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
    &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;redis&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;quit&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
    &lt;span class="nf"&gt;clearTimeout&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;forceShutdown&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;exit&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;catch &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;exit&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;on&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;SIGTERM&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;gracefulShutdown&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;on&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;SIGINT&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;gracefulShutdown&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  The Meta Pattern: Async Error Handling
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;asyncHandler&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;fn&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;return &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;next&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;resolve&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nf"&gt;fn&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;next&lt;/span&gt;&lt;span class="p"&gt;)).&lt;/span&gt;&lt;span class="k"&gt;catch&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;next&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="nx"&gt;app&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;get&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;/users/:id&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nf"&gt;asyncHandler&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;user&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;getUser&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;params&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;user&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="p"&gt;}));&lt;/span&gt;

&lt;span class="nx"&gt;app&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;use&lt;/span&gt;&lt;span class="p"&gt;((&lt;/span&gt;&lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;next&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nx"&gt;logger&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;error&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Unhandled error&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;err&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;status&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;500&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;error&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Internal server error&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;These aren't fancy patterns. They're the ones that keep systems running at 3am without pages.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://zunain.com" rel="noopener noreferrer"&gt;zunain.com&lt;/a&gt;&lt;/p&gt;

</description>
      <category>node</category>
      <category>javascript</category>
      <category>backend</category>
      <category>programming</category>
    </item>
    <item>
      <title>The Node.js Patterns I Wish I Knew 3 Years Ago</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Fri, 08 May 2026 05:06:33 +0000</pubDate>
      <link>https://dev.to/mzunain/the-nodejs-patterns-i-wish-i-knew-3-years-ago-3h6e</link>
      <guid>https://dev.to/mzunain/the-nodejs-patterns-i-wish-i-knew-3-years-ago-3h6e</guid>
      <description>&lt;p&gt;Event loop mastery, streams, worker threads, graceful shutdown, and the design patterns that separate senior Node.js engineers from the rest.&lt;/p&gt;

</description>
      <category>node</category>
      <category>javascript</category>
      <category>backend</category>
      <category>architecture</category>
    </item>
    <item>
      <title>Building Production-Ready RAG: A Complete Architecture Guide</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Mon, 27 Apr 2026 16:39:13 +0000</pubDate>
      <link>https://dev.to/mzunain/building-production-ready-rag-a-complete-architecture-guide-4nfj</link>
      <guid>https://dev.to/mzunain/building-production-ready-rag-a-complete-architecture-guide-4nfj</guid>
      <description>&lt;h1&gt;
  
  
  Building Production-Ready RAG: A Complete Architecture Guide
&lt;/h1&gt;

&lt;p&gt;Most RAG tutorials show you how to stuff documents into a vector database and query them with an LLM. That gets you a demo. It does not get you a production system.&lt;/p&gt;

&lt;p&gt;I've built RAG pipelines that serve real users. The gap between "it works in a notebook" and "it works at 2 AM when the on-call gets paged" is massive. This guide covers what actually matters when you ship RAG to production.&lt;/p&gt;

&lt;h2&gt;
  
  
  Why Most RAG Demos Fail
&lt;/h2&gt;

&lt;p&gt;The standard RAG tutorial looks like this: load PDFs, chunk them, embed them, store in Pinecone, query with OpenAI. Five lines of LangChain. Done.&lt;/p&gt;

&lt;p&gt;Then you deploy it and discover:&lt;/p&gt;

&lt;p&gt;The chunking strategy matters more than the model. Bad chunks produce bad retrieval. Bad retrieval produces hallucinated answers. Users lose trust fast.&lt;/p&gt;

&lt;p&gt;Latency kills user experience. A 6-second response time is fine in a demo. Real users expect sub-2 seconds. You need caching, streaming, and careful orchestration.&lt;/p&gt;

&lt;p&gt;Cost scales faster than you expect. Every query hits an embedding model and an LLM. At 10K queries per day, your monthly bill will surprise you.&lt;/p&gt;

&lt;p&gt;Evaluation is almost impossible without infrastructure. "Does this answer look right?" is not a test strategy.&lt;/p&gt;

&lt;p&gt;Let's fix each of these.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Ingestion Pipeline
&lt;/h2&gt;

&lt;p&gt;Ingestion is where most RAG systems quietly break. Bad data in, bad answers out.&lt;/p&gt;

&lt;h3&gt;
  
  
  Document Processing
&lt;/h3&gt;

&lt;p&gt;Before you chunk anything, clean your documents. Strip headers, footers, navigation elements, and boilerplate. Extract tables separately because they chunk terribly as plain text.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kr"&gt;interface&lt;/span&gt; &lt;span class="nx"&gt;ProcessedDocument&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nl"&gt;id&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;content&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;source&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="nl"&gt;title&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="nl"&gt;section&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="nl"&gt;lastUpdated&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;Date&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="nl"&gt;documentType&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;article&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;api-doc&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;tutorial&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;faq&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;processDocument&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;RawDocument&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;ProcessedDocument&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="c1"&gt;// Split by semantic sections, not arbitrary character counts&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;sections&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;extractSections&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;html&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;sections&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;((&lt;/span&gt;&lt;span class="nx"&gt;section&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;index&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;({&lt;/span&gt;
    &lt;span class="na"&gt;id&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;-section-&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;index&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;content&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;cleanMarkdown&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;section&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="na"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="na"&gt;source&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;url&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;title&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;title&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;section&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;section&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;heading&lt;/span&gt; &lt;span class="o"&gt;||&lt;/span&gt; &lt;span class="s2"&gt;`Section &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;index&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;lastUpdated&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;updatedAt&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;documentType&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;classifyDocument&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;raw&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="p"&gt;},&lt;/span&gt;
  &lt;span class="p"&gt;}));&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  Chunking Strategy
&lt;/h3&gt;

&lt;p&gt;Forget fixed-size chunking. It splits sentences mid-thought and produces chunks that lack context.&lt;/p&gt;

&lt;p&gt;Use semantic chunking: split on paragraph boundaries, heading boundaries, and logical breaks. Each chunk should be a self-contained thought that makes sense in isolation.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;semanticChunk&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
  &lt;span class="nb"&gt;document&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;ProcessedDocument&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nl"&gt;maxTokens&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;number&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nl"&gt;overlap&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;number&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nx"&gt;Chunk&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;paragraphs&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nb"&gt;document&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;split&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sr"&gt;/&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="sr"&gt;+/&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;Chunk&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[];&lt;/span&gt;
  &lt;span class="kd"&gt;let&lt;/span&gt; &lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[];&lt;/span&gt;
  &lt;span class="kd"&gt;let&lt;/span&gt; &lt;span class="nx"&gt;currentTokens&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

  &lt;span class="k"&gt;for &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;paragraph&lt;/span&gt; &lt;span class="k"&gt;of&lt;/span&gt; &lt;span class="nx"&gt;paragraphs&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;paragraphTokens&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;estimateTokens&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;paragraph&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

    &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;currentTokens&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="nx"&gt;paragraphTokens&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;maxTokens&lt;/span&gt; &lt;span class="o"&gt;&amp;amp;&amp;amp;&lt;/span&gt; &lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;push&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
        &lt;span class="na"&gt;content&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;join&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="na"&gt;tokenCount&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;currentTokens&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="na"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;document&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="p"&gt;});&lt;/span&gt;

      &lt;span class="c1"&gt;// Keep last paragraph for overlap/context continuity&lt;/span&gt;
      &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;overlapParagraphs&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;slice&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
      &lt;span class="nx"&gt;currentChunk&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[...&lt;/span&gt;&lt;span class="nx"&gt;overlapParagraphs&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;paragraph&lt;/span&gt;&lt;span class="p"&gt;];&lt;/span&gt;
      &lt;span class="nx"&gt;currentTokens&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;estimateTokens&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;overlapParagraphs&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;join&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;))&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="nx"&gt;paragraphTokens&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;else&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;push&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;paragraph&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
      &lt;span class="nx"&gt;currentTokens&lt;/span&gt; &lt;span class="o"&gt;+=&lt;/span&gt; &lt;span class="nx"&gt;paragraphTokens&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;push&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
      &lt;span class="na"&gt;content&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;currentChunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;join&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
      &lt;span class="na"&gt;tokenCount&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;currentTokens&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nb"&gt;document&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Target chunk size: 200-500 tokens. Smaller chunks give more precise retrieval. Larger chunks give more context. I've found 300-400 tokens to be the sweet spot for most documentation use cases.&lt;/p&gt;

&lt;h3&gt;
  
  
  Embedding Models
&lt;/h3&gt;

&lt;p&gt;For production, I use OpenAI's &lt;code&gt;text-embedding-3-small&lt;/code&gt; for most cases. It's cheap ($0.02 per 1M tokens), fast, and the quality is good enough for 90% of use cases.&lt;/p&gt;

&lt;p&gt;If you need better retrieval quality and can afford the latency, &lt;code&gt;text-embedding-3-large&lt;/code&gt; with dimension reduction to 1024 gives excellent results at a reasonable cost.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;embedChunks&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;Chunk&lt;/span&gt;&lt;span class="p"&gt;[]):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;EmbeddedChunk&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="c1"&gt;// Batch embeddings for efficiency (max 2048 per request)&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;batchSize&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;2048&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="na"&gt;results&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;EmbeddedChunk&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[];&lt;/span&gt;

  &lt;span class="k"&gt;for &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kd"&gt;let&lt;/span&gt; &lt;span class="nx"&gt;i&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nx"&gt;i&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt; &lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nx"&gt;i&lt;/span&gt; &lt;span class="o"&gt;+=&lt;/span&gt; &lt;span class="nx"&gt;batchSize&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;batch&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;slice&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;i&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;i&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="nx"&gt;batchSize&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;openai&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;embeddings&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
      &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;text-embedding-3-small&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;input&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;batch&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;c&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;c&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="p"&gt;});&lt;/span&gt;

    &lt;span class="k"&gt;for &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kd"&gt;let&lt;/span&gt; &lt;span class="nx"&gt;j&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nx"&gt;j&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt; &lt;span class="nx"&gt;batch&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nx"&gt;j&lt;/span&gt;&lt;span class="o"&gt;++&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;push&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
        &lt;span class="p"&gt;...&lt;/span&gt;&lt;span class="nx"&gt;batch&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;j&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
        &lt;span class="na"&gt;embedding&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;data&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;j&lt;/span&gt;&lt;span class="p"&gt;].&lt;/span&gt;&lt;span class="nx"&gt;embedding&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="p"&gt;});&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  The Retrieval Layer
&lt;/h2&gt;

&lt;p&gt;Vector similarity search alone is not enough. You need hybrid search and re-ranking.&lt;/p&gt;

&lt;h3&gt;
  
  
  Hybrid Search with pgvector
&lt;/h3&gt;

&lt;p&gt;If you're already running PostgreSQL, use pgvector. One less service to manage. Combine vector similarity with full-text search for hybrid retrieval.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;hybridSearch&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
  &lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="nx"&gt;queryEmbedding&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;number&lt;/span&gt;&lt;span class="p"&gt;[],&lt;/span&gt;
  &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nl"&gt;limit&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;number&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt; &lt;span class="nl"&gt;minScore&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;number&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;SearchResult&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;results&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;query&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s2"&gt;`
    WITH vector_results AS (
      SELECT id, content, metadata,
             1 - (embedding &amp;lt;=&amp;gt; $1::vector) AS vector_score
      FROM chunks
      WHERE 1 - (embedding &amp;lt;=&amp;gt; $1::vector) &amp;gt; $3
      ORDER BY embedding &amp;lt;=&amp;gt; $1::vector
      LIMIT $2 * 2
    ),
    text_results AS (
      SELECT id, content, metadata,
             ts_rank(search_vector, plainto_tsquery('english', $4)) AS text_score
      FROM chunks
      WHERE search_vector @@ plainto_tsquery('english', $4)
      ORDER BY text_score DESC
      LIMIT $2 * 2
    )
    SELECT COALESCE(v.id, t.id) AS id,
           COALESCE(v.content, t.content) AS content,
           COALESCE(v.metadata, t.metadata) AS metadata,
           COALESCE(v.vector_score, 0) * 0.7 + COALESCE(t.text_score, 0) * 0.3 AS combined_score
    FROM vector_results v
    FULL OUTER JOIN text_results t ON v.id = t.id
    ORDER BY combined_score DESC
    LIMIT $2
  `&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;queryEmbedding&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;limit&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;options&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;minScore&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;]);&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;rows&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;The 0.7/0.3 weighting between vector and text scores works well as a starting point. Tune it based on your evaluation results.&lt;/p&gt;

&lt;h3&gt;
  
  
  Re-ranking
&lt;/h3&gt;

&lt;p&gt;After retrieval, re-rank the results. A cross-encoder model gives much better relevance scores than cosine similarity alone. Cohere's re-rank API is the easiest option.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;rerankResults&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
  &lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;SearchResult&lt;/span&gt;&lt;span class="p"&gt;[],&lt;/span&gt;
  &lt;span class="nx"&gt;topK&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;number&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;5&lt;/span&gt;
&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;SearchResult&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;cohere&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;rerank&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
    &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;rerank-english-v3.0&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;documents&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="na"&gt;topN&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;topK&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;});&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;({&lt;/span&gt;
    &lt;span class="p"&gt;...&lt;/span&gt;&lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;index&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
    &lt;span class="na"&gt;relevanceScore&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;relevanceScore&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;}));&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;This step alone improved our answer quality by about 15% in our evaluations.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Generation Layer
&lt;/h2&gt;

&lt;h3&gt;
  
  
  Prompt Design
&lt;/h3&gt;

&lt;p&gt;Your system prompt should be specific about the task, the constraints, and the expected format. Vague prompts produce vague answers.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;buildPrompt&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;context&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;SearchResult&lt;/span&gt;&lt;span class="p"&gt;[]):&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;contextBlock&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;context&lt;/span&gt;
    &lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;((&lt;/span&gt;&lt;span class="nx"&gt;c&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;i&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="s2"&gt;`[Source &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;i&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;: &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;c&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;metadata&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;title&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;]\n&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;c&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;join&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="s1"&gt;---&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="s2"&gt;`You are a technical documentation assistant. Answer the user's question using ONLY the provided context. If the context doesn't contain enough information to answer fully, say so clearly.

Rules:
- Cite sources using [Source N] format
- If you're unsure, say "I'm not confident about this" rather than guessing
- Keep answers concise and technical
- Include code examples when relevant

Context:
&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;contextBlock&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;

Question: &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  Streaming Responses
&lt;/h3&gt;

&lt;p&gt;Users should see the answer forming in real time. Stream tokens as they arrive.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt;&lt;span class="o"&gt;*&lt;/span&gt; &lt;span class="nf"&gt;generateAnswer&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
  &lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="nx"&gt;context&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;SearchResult&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt;
&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nx"&gt;AsyncGenerator&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;prompt&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;buildPrompt&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;context&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;stream&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;openai&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;chat&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;completions&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
    &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;gpt-4o-mini&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;messages&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[{&lt;/span&gt; &lt;span class="na"&gt;role&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;user&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="na"&gt;content&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;prompt&lt;/span&gt; &lt;span class="p"&gt;}],&lt;/span&gt;
    &lt;span class="na"&gt;stream&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kc"&gt;true&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;temperature&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mf"&gt;0.1&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="c1"&gt;// Low temperature for factual answers&lt;/span&gt;
    &lt;span class="na"&gt;max_tokens&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;1000&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;});&lt;/span&gt;

  &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="k"&gt;await &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;chunk&lt;/span&gt; &lt;span class="k"&gt;of&lt;/span&gt; &lt;span class="nx"&gt;stream&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;content&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;chunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;choices&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;]?.&lt;/span&gt;&lt;span class="nx"&gt;delta&lt;/span&gt;&lt;span class="p"&gt;?.&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="k"&gt;yield&lt;/span&gt; &lt;span class="nx"&gt;content&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  Hallucination Prevention
&lt;/h3&gt;

&lt;p&gt;Three layers of defense:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Confidence scoring&lt;/strong&gt;: If no retrieved chunk scores above 0.75 relevance, don't generate an answer. Tell the user you couldn't find relevant information.&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Source grounding&lt;/strong&gt;: The prompt explicitly limits answers to the provided context. This doesn't eliminate hallucinations but reduces them significantly.&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Citation verification&lt;/strong&gt;: Post-process the response. If the model cites [Source 3] but Source 3 doesn't contain the claimed information, flag it.&lt;br&gt;
&lt;/p&gt;&lt;/li&gt;
&lt;/ol&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;validateCitations&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
  &lt;span class="nx"&gt;answer&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="nx"&gt;sources&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;SearchResult&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt;
&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nx"&gt;ValidationResult&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;citationPattern&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="sr"&gt;/&lt;/span&gt;&lt;span class="se"&gt;\[&lt;/span&gt;&lt;span class="sr"&gt;Source &lt;/span&gt;&lt;span class="se"&gt;(\d&lt;/span&gt;&lt;span class="sr"&gt;+&lt;/span&gt;&lt;span class="se"&gt;)\]&lt;/span&gt;&lt;span class="sr"&gt;/g&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;citations&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[...&lt;/span&gt;&lt;span class="nx"&gt;answer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;matchAll&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;citationPattern&lt;/span&gt;&lt;span class="p"&gt;)];&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;issues&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;[]&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[];&lt;/span&gt;

  &lt;span class="k"&gt;for &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;citation&lt;/span&gt; &lt;span class="k"&gt;of&lt;/span&gt; &lt;span class="nx"&gt;citations&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;sourceIndex&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;parseInt&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;citation&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt; &lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
    &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;sourceIndex&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;=&lt;/span&gt; &lt;span class="nx"&gt;sources&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;issues&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;push&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s2"&gt;`Citation [Source &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;citation&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;]}&lt;/span&gt;&lt;span class="s2"&gt;] references non-existent source`&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="p"&gt;}&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="c1"&gt;// Check if answer contains claims without any citations&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;sentences&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;answer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;split&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sr"&gt;/&lt;/span&gt;&lt;span class="se"&gt;[&lt;/span&gt;&lt;span class="sr"&gt;.!?&lt;/span&gt;&lt;span class="se"&gt;]&lt;/span&gt;&lt;span class="sr"&gt;+/&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;filter&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;s&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;s&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;trim&lt;/span&gt;&lt;span class="p"&gt;().&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="mi"&gt;20&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;uncitedClaims&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;sentences&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;filter&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="nx"&gt;s&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="o"&gt;!&lt;/span&gt;&lt;span class="nx"&gt;s&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;includes&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;[Source&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;&amp;amp;&amp;amp;&lt;/span&gt; &lt;span class="nf"&gt;looksLikeFactualClaim&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;s&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
  &lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;uncitedClaims&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;sentences&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;*&lt;/span&gt; &lt;span class="mf"&gt;0.5&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="nx"&gt;issues&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;push&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;More than half of factual claims lack citations&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;valid&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;issues&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;===&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="nx"&gt;issues&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;citationCount&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;citations&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Evaluation
&lt;/h2&gt;

&lt;p&gt;You cannot improve what you cannot measure. Build an evaluation pipeline from day one.&lt;/p&gt;

&lt;h3&gt;
  
  
  Test Set Construction
&lt;/h3&gt;

&lt;p&gt;Create a test set of 50-100 question/answer pairs. Include edge cases: questions with no answer in the docs, ambiguous questions, questions requiring information from multiple sources.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kr"&gt;interface&lt;/span&gt; &lt;span class="nx"&gt;EvalCase&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nl"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;expectedAnswer&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;relevantDocIds&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;[];&lt;/span&gt;
  &lt;span class="nl"&gt;difficulty&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;easy&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;medium&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;hard&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;category&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;factual&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;procedural&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;conceptual&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;unanswerable&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;evaluateRAG&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;testCases&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;EvalCase&lt;/span&gt;&lt;span class="p"&gt;[]):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;EvalReport&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;results&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;all&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="nx"&gt;testCases&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;retrieved&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;hybridSearch&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;embed&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;limit&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;5&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="na"&gt;minScore&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mf"&gt;0.3&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
      &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;answer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;generateFullAnswer&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;retrieved&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

      &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
        &lt;span class="na"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="na"&gt;retrievalRecall&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;calculateRecall&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;retrieved&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;relevantDocIds&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="na"&gt;answerRelevance&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;scoreRelevance&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;answer&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;tc&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;expectedAnswer&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="na"&gt;latencyMs&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;answer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;latencyMs&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="na"&gt;tokenCost&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;answer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;totalTokens&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="p"&gt;};&lt;/span&gt;
    &lt;span class="p"&gt;})&lt;/span&gt;
  &lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;avgRetrievalRecall&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;avg&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;retrievalRecall&lt;/span&gt;&lt;span class="p"&gt;)),&lt;/span&gt;
    &lt;span class="na"&gt;avgAnswerRelevance&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;avg&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;answerRelevance&lt;/span&gt;&lt;span class="p"&gt;)),&lt;/span&gt;
    &lt;span class="na"&gt;avgLatencyMs&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;avg&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;latencyMs&lt;/span&gt;&lt;span class="p"&gt;)),&lt;/span&gt;
    &lt;span class="na"&gt;totalCost&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;sum&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;r&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;r&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;tokenCost&lt;/span&gt;&lt;span class="p"&gt;)),&lt;/span&gt;
    &lt;span class="nx"&gt;results&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  Key Metrics
&lt;/h3&gt;

&lt;p&gt;Track these in production:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Retrieval recall@5&lt;/strong&gt;: What percentage of relevant documents appear in the top 5 results? Target: &amp;gt;85%.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Answer relevance&lt;/strong&gt;: LLM-as-judge scoring against expected answers. Target: &amp;gt;4.0/5.0.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Latency P95&lt;/strong&gt;: Time from query to first streamed token. Target: &amp;lt;2 seconds.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;User feedback rate&lt;/strong&gt;: Thumbs up/down ratio. Target: &amp;gt;80% positive.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Hallucination rate&lt;/strong&gt;: Flagged by citation validation. Target: &amp;lt;5%.&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Cost Optimization
&lt;/h2&gt;

&lt;p&gt;At scale, RAG costs add up fast. Three strategies that made the biggest difference for us:&lt;/p&gt;

&lt;h3&gt;
  
  
  Semantic Caching
&lt;/h3&gt;

&lt;p&gt;Cache answers for semantically similar queries. If someone asks "how do I deploy to production?" and another person asks "what's the deployment process?", they should get the same cached answer.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;getCachedOrGenerate&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="nb"&gt;Promise&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;RAGResponse&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;queryEmbedding&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;embed&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="c1"&gt;// Check cache for semantically similar queries&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;cached&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;query&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s2"&gt;`
    SELECT response, query
    FROM response_cache
    WHERE 1 - (query_embedding &amp;lt;=&amp;gt; $1::vector) &amp;gt; 0.95
    AND created_at &amp;gt; NOW() - INTERVAL '24 hours'
    ORDER BY query_embedding &amp;lt;=&amp;gt; $1::vector
    LIMIT 1
  `&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;queryEmbedding&lt;/span&gt;&lt;span class="p"&gt;]);&lt;/span&gt;

  &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;cached&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;rows&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;length&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="p"&gt;...&lt;/span&gt;&lt;span class="nx"&gt;cached&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;rows&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;].&lt;/span&gt;&lt;span class="nx"&gt;response&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="na"&gt;fromCache&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kc"&gt;true&lt;/span&gt; &lt;span class="p"&gt;};&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="c1"&gt;// Generate fresh response&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;generateRAGResponse&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;queryEmbedding&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="c1"&gt;// Cache it&lt;/span&gt;
  &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;query&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="s2"&gt;`INSERT INTO response_cache (query, query_embedding, response) VALUES ($1, $2, $3)`&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;queryEmbedding&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
  &lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;response&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;This alone cut our LLM costs by about 40%.&lt;/p&gt;

&lt;h3&gt;
  
  
  Model Selection
&lt;/h3&gt;

&lt;p&gt;Use &lt;code&gt;gpt-4o-mini&lt;/code&gt; for most queries. It's 15x cheaper than &lt;code&gt;gpt-4o&lt;/code&gt; and handles straightforward documentation questions just fine. Route complex queries (multi-hop reasoning, comparison questions) to &lt;code&gt;gpt-4o&lt;/code&gt;.&lt;/p&gt;

&lt;h3&gt;
  
  
  Embedding Cost
&lt;/h3&gt;

&lt;p&gt;Pre-compute and store embeddings. Never re-embed a document that hasn't changed. Use a hash of the content to detect changes during re-indexing.&lt;/p&gt;

&lt;h2&gt;
  
  
  Real-World Performance Numbers
&lt;/h2&gt;

&lt;p&gt;From our production system serving ~5K queries per day:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Average retrieval latency: 45ms (pgvector with HNSW index)&lt;/li&gt;
&lt;li&gt;Average generation latency: 1.2s (gpt-4o-mini, streaming)&lt;/li&gt;
&lt;li&gt;Time to first token: 380ms&lt;/li&gt;
&lt;li&gt;Retrieval recall@5: 88%&lt;/li&gt;
&lt;li&gt;User satisfaction (thumbs up): 84%&lt;/li&gt;
&lt;li&gt;Monthly cost: ~$280 (embeddings + LLM + infrastructure)&lt;/li&gt;
&lt;li&gt;Cache hit rate: 38%&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;These numbers are realistic for a mid-scale documentation search system. Your numbers will vary based on document corpus size, query complexity, and model choice.&lt;/p&gt;

&lt;h2&gt;
  
  
  Wrapping Up
&lt;/h2&gt;

&lt;p&gt;Production RAG is not about picking the right model. It's about building a system with good data pipelines, smart retrieval, proper evaluation, and cost controls.&lt;/p&gt;

&lt;p&gt;Start with the simplest version that works. Measure everything. Improve the pieces that matter most based on your metrics.&lt;/p&gt;

&lt;p&gt;The code examples in this article are simplified but functional. Adapt them to your stack and your data.&lt;/p&gt;

&lt;p&gt;If you're building RAG right now, I'd love to hear what challenges you're hitting. Drop a comment or reach out.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>rag</category>
      <category>typescript</category>
      <category>architecture</category>
    </item>
    <item>
      <title>Stop Building AI Wrappers. Start Building AI Products.</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Fri, 24 Apr 2026 06:00:00 +0000</pubDate>
      <link>https://dev.to/mzunain/stop-building-ai-wrappers-start-building-ai-products-28mk</link>
      <guid>https://dev.to/mzunain/stop-building-ai-wrappers-start-building-ai-products-28mk</guid>
      <description>&lt;p&gt;Everyone and their brother is building an AI wrapper right now.&lt;/p&gt;

&lt;p&gt;You know the type: "I built a ChatGPT UI with a prompt." "I wrapped Claude and added a database." "I made an AI that does X by just calling the API."&lt;/p&gt;

&lt;p&gt;These are not products. They're proofs of concept that will be dead in 6 months.&lt;/p&gt;

&lt;p&gt;Here's the difference between an AI wrapper and an AI product, and why it matters.&lt;/p&gt;

&lt;h2&gt;
  
  
  What's an AI Wrapper?
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;Wrapper = LLM API + UI/UX
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;ul&gt;
&lt;li&gt;"Prompt builder for GPT" — still just calling GPT&lt;/li&gt;
&lt;li&gt;"AI email writer" — just Claude, but for emails&lt;/li&gt;
&lt;li&gt;"Chat interface that remembers context" — ChatGPT but persistent&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;These have zero defensibility. When OpenAI releases ChatGPT Plus with the same features, your wrapper dies.&lt;/p&gt;

&lt;h2&gt;
  
  
  What's an AI Product?
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;AI Product = (Proprietary Data + Specialized Model + Workflow Integration + User Loop)
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Cursor&lt;/strong&gt;: Code editor that understands your specific codebase. Remove AI, product breaks.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Perplexity&lt;/strong&gt;: Web search + AI reasoning over sources + citations. The synthesis is the product.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Replit Agent&lt;/strong&gt;: AI that executes code, sees errors, iterates. The feedback loop is the product.&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  The 5 Differences
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;1. Proprietary Data&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Wrapper: Uses public information&lt;/li&gt;
&lt;li&gt;Product: Has a data moat&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;2. Specialized vs. General&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Wrapper: Uses a general LLM&lt;/li&gt;
&lt;li&gt;Product: Fine-tunes for specific task&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;3. Workflow Integration&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Wrapper: Standalone tool&lt;/li&gt;
&lt;li&gt;Product: Integrated into how users work&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;4. Feedback Loop&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Wrapper: Fire and forget&lt;/li&gt;
&lt;li&gt;Product: Learns from user behavior&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;5. Defensibility&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Wrapper: Dead when the LLM vendor ships the same feature&lt;/li&gt;
&lt;li&gt;Product: Moat that gets wider with users&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  How to Build an AI Product
&lt;/h2&gt;

&lt;ol&gt;
&lt;li&gt;Start with a specific, narrow problem&lt;/li&gt;
&lt;li&gt;Identify your data advantage — if the answer is "none," you're building a wrapper&lt;/li&gt;
&lt;li&gt;Build the feedback loop from day 1 — capture accept/reject/edit signals&lt;/li&gt;
&lt;li&gt;Integrate into user workflow&lt;/li&gt;
&lt;li&gt;Fine-tune or specialize your model&lt;/li&gt;
&lt;li&gt;Think about the data flywheel&lt;/li&gt;
&lt;/ol&gt;

&lt;h2&gt;
  
  
  The Investment Thesis
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;Wrapper: "We built a UI for Claude." — VCs pass&lt;/li&gt;
&lt;li&gt;Product: "We trained a model on your domain data and it handles 60% of your support tickets." — VCs listen&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Build for defensibility. Not demo impressiveness.&lt;/p&gt;

&lt;p&gt;More at &lt;a href="https://zunain.com" rel="noopener noreferrer"&gt;zunain.com&lt;/a&gt;.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>llm</category>
      <category>webdev</category>
      <category>programming</category>
    </item>
    <item>
      <title>From $200/Month to Free: Running OpenClaw with Local AI Models</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Sun, 19 Apr 2026 18:15:45 +0000</pubDate>
      <link>https://dev.to/mzunain/from-200month-to-free-running-openclaw-with-local-ai-models-37n9</link>
      <guid>https://dev.to/mzunain/from-200month-to-free-running-openclaw-with-local-ai-models-37n9</guid>
      <description>&lt;p&gt;&lt;em&gt;This is a submission for the &lt;a href="https://dev.to/challenges/openclaw-2026-04-16"&gt;OpenClaw Writing Challenge&lt;/a&gt;&lt;/em&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  The Problem: AI Assistant Costs Are Skyrocketing
&lt;/h2&gt;

&lt;p&gt;If you're running OpenClaw with cloud-hosted LLMs like Claude or GPT-4, you know the pain. Premium API access can easily cost $200/month or more, and that's assuming moderate usage. For developers, founders, or anyone automating workflows extensively, those costs compound fast.&lt;/p&gt;

&lt;p&gt;But here's the thing: &lt;strong&gt;OpenClaw doesn't require cloud AI&lt;/strong&gt;. You can run it entirely locally with open-source models—and in many cases, get comparable results for &lt;strong&gt;$0/month in API fees&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;This guide walks through three deployment tiers, from completely free to budget-friendly, showing you how to cut your OpenClaw costs to zero while maintaining functionality.&lt;/p&gt;

&lt;h2&gt;
  
  
  Understanding Your Options
&lt;/h2&gt;

&lt;h3&gt;
  
  
  Tier 1: Completely Free (Ollama + Local Models)
&lt;/h3&gt;

&lt;p&gt;&lt;strong&gt;Cost&lt;/strong&gt;: $0/month&lt;br&gt;&lt;br&gt;
&lt;strong&gt;Hardware&lt;/strong&gt;: Any spare laptop/desktop with 8GB+ RAM&lt;br&gt;&lt;br&gt;
&lt;strong&gt;Best For&lt;/strong&gt;: Personal automation, learning, experimentation&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;How it works&lt;/strong&gt;:&lt;br&gt;&lt;br&gt;
Ollama lets you run powerful open-source models like Qwen 2.5 (7B/14B), Llama 3, or Mistral locally. These models are surprisingly capable for most automation tasks—code generation, data extraction, text summarization, and workflow orchestration.&lt;/p&gt;

&lt;p&gt;OpenClaw connects to Ollama as a model provider, treating your local instance like any cloud API.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Setup Steps&lt;/strong&gt;:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Install Ollama&lt;/strong&gt; (Mac/Linux/Windows):
&lt;/li&gt;
&lt;/ol&gt;
&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;   curl &lt;span class="nt"&gt;-fsSL&lt;/span&gt; https://ollama.com/install.sh | sh
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;


&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Pull a capable model&lt;/strong&gt;:
&lt;/li&gt;
&lt;/ol&gt;
&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;   ollama pull qwen2.5:14b
   &lt;span class="c"&gt;# or for lower-end hardware:&lt;/span&gt;
   ollama pull qwen2.5:7b
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;


&lt;ol&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Configure OpenClaw&lt;/strong&gt;:&lt;br&gt;&lt;br&gt;
In your OpenClaw settings, switch the model provider to &lt;code&gt;ollama&lt;/code&gt; and point it to &lt;code&gt;http://localhost:11434&lt;/code&gt;.&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Test your setup&lt;/strong&gt;:&lt;br&gt;&lt;br&gt;
Create a simple skill (e.g., "Summarize my emails") and verify it works with your local model.&lt;/p&gt;&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;&lt;strong&gt;Tradeoffs&lt;/strong&gt;:  &lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Your device needs to stay on 24/7 for skills to run
&lt;/li&gt;
&lt;li&gt;Slightly slower inference than cloud APIs
&lt;/li&gt;
&lt;li&gt;Smaller context windows (typically 8K-32K tokens vs 128K+ for cloud models)&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;Real savings&lt;/strong&gt;: If you were paying $200/month for Claude API access, that's &lt;strong&gt;$2,400/year&lt;/strong&gt; saved.&lt;/p&gt;


&lt;h3&gt;
  
  
  Tier 2: Budget Cloud ($10-30/month)
&lt;/h3&gt;

&lt;p&gt;&lt;strong&gt;Cost&lt;/strong&gt;: $10-30/month&lt;br&gt;&lt;br&gt;
&lt;strong&gt;Hardware&lt;/strong&gt;: None (cloud-hosted)&lt;br&gt;&lt;br&gt;
&lt;strong&gt;Best For&lt;/strong&gt;: Production workflows, team usage, 24/7 availability&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;How it works&lt;/strong&gt;:&lt;br&gt;&lt;br&gt;
If running a local device 24/7 isn't practical, you can deploy Ollama on a cheap VPS (Virtual Private Server) and point OpenClaw to it remotely.&lt;/p&gt;

&lt;p&gt;Alternatively, use budget-friendly cloud APIs like:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Minimax API&lt;/strong&gt;: ~$0.001 per 1K tokens (~$20-30/month for heavy use)
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Groq&lt;/strong&gt;: Fast inference, generous free tier
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Together AI&lt;/strong&gt;: Competitive pricing on open models&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;VPS Setup Example&lt;/strong&gt; (DigitalOcean/Hetzner):&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Spin up a VPS&lt;/strong&gt; (~$10-15/month for 8GB RAM):
&lt;/li&gt;
&lt;/ol&gt;
&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;   &lt;span class="c"&gt;# SSH into your VPS&lt;/span&gt;
   ssh user@your-vps-ip
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;


&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Install Ollama&lt;/strong&gt;:
&lt;/li&gt;
&lt;/ol&gt;
&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;   curl &lt;span class="nt"&gt;-fsSL&lt;/span&gt; https://ollama.com/install.sh | sh
   ollama pull qwen2.5:14b
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;


&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Expose Ollama&lt;/strong&gt; (use a reverse proxy like ngrok or Tailscale for secure access):
&lt;/li&gt;
&lt;/ol&gt;
&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;   ollama serve &lt;span class="nt"&gt;--host&lt;/span&gt; 0.0.0.0
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;


&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Point OpenClaw&lt;/strong&gt; to &lt;code&gt;http://your-vps-ip:11434&lt;/code&gt;
&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;&lt;strong&gt;Tradeoffs&lt;/strong&gt;:  &lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Small monthly cost but still 10x cheaper than Claude Max
&lt;/li&gt;
&lt;li&gt;Requires basic VPS management skills
&lt;/li&gt;
&lt;li&gt;Latency depends on VPS location&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;Real savings&lt;/strong&gt;: Instead of $200/month on cloud APIs, you're paying $15-30/month—saving &lt;strong&gt;$170-185/month&lt;/strong&gt; or &lt;strong&gt;$2,040-2,220/year&lt;/strong&gt;.&lt;/p&gt;


&lt;h3&gt;
  
  
  Tier 3: Hybrid Approach (Best of Both Worlds)
&lt;/h3&gt;

&lt;p&gt;&lt;strong&gt;Cost&lt;/strong&gt;: Variable ($0-50/month depending on usage)&lt;br&gt;&lt;br&gt;
&lt;strong&gt;Strategy&lt;/strong&gt;: Use local models for routine tasks, cloud APIs for complex reasoning&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;How it works&lt;/strong&gt;:&lt;br&gt;&lt;br&gt;
OpenClaw supports multiple model providers simultaneously. You can configure different skills to use different models:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Routine automation&lt;/strong&gt; (email filtering, data extraction) → Ollama (free)
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Complex reasoning&lt;/strong&gt; (code review, strategic planning) → Claude/GPT-4 (pay-per-use)&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;This hybrid approach optimizes for both cost and capability.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Configuration Example&lt;/strong&gt;:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight yaml"&gt;&lt;code&gt;&lt;span class="na"&gt;skills&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
  &lt;span class="na"&gt;email_summarizer&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
    &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;ollama/qwen2.5:14b&lt;/span&gt;

  &lt;span class="na"&gt;code_reviewer&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt;
    &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="pi"&gt;:&lt;/span&gt; &lt;span class="s"&gt;anthropic/claude-3-opus&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Real savings&lt;/strong&gt;: If 80% of your tasks run locally and 20% use cloud APIs, you're looking at ~$40/month instead of $200—saving &lt;strong&gt;$160/month&lt;/strong&gt; or &lt;strong&gt;$1,920/year&lt;/strong&gt;.&lt;/p&gt;




&lt;h2&gt;
  
  
  Choosing the Right Model
&lt;/h2&gt;

&lt;p&gt;Not all models are created equal. Here's what works well for OpenClaw:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Model&lt;/th&gt;
&lt;th&gt;Size&lt;/th&gt;
&lt;th&gt;Best For&lt;/th&gt;
&lt;th&gt;Context Window&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Qwen 2.5&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;7B-14B&lt;/td&gt;
&lt;td&gt;General automation, coding&lt;/td&gt;
&lt;td&gt;32K tokens&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Llama 3.1&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;8B-70B&lt;/td&gt;
&lt;td&gt;Reasoning, chat&lt;/td&gt;
&lt;td&gt;128K tokens&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Mistral&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;7B-22B&lt;/td&gt;
&lt;td&gt;Fast inference, multilingual&lt;/td&gt;
&lt;td&gt;32K tokens&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;DeepSeek Coder&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;6.7B&lt;/td&gt;
&lt;td&gt;Code generation, debugging&lt;/td&gt;
&lt;td&gt;16K tokens&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;For most users, &lt;strong&gt;Qwen 2.5 14B&lt;/strong&gt; offers the best balance of capability and resource requirements.&lt;/p&gt;




&lt;h2&gt;
  
  
  Real-World Example: My 5-Agent Setup
&lt;/h2&gt;

&lt;p&gt;I run 5 OpenClaw agents entirely on Ollama using a spare MacBook Air (16GB RAM):&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Email Assistant&lt;/strong&gt;: Filters, summarizes, drafts replies
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Code Helper&lt;/strong&gt;: Generates boilerplate, reviews PRs
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Research Agent&lt;/strong&gt;: Monitors RSS feeds, summarizes articles
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Data Extractor&lt;/strong&gt;: Pulls structured data from websites
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Task Scheduler&lt;/strong&gt;: Manages my Notion workspace&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;&lt;strong&gt;Total monthly cost&lt;/strong&gt;: $0 (minus electricity, ~$2-3/month)&lt;br&gt;&lt;br&gt;
&lt;strong&gt;Previous cloud API cost&lt;/strong&gt;: ~$180/month&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Annual savings&lt;/strong&gt;: $2,160&lt;/p&gt;

&lt;p&gt;The MacBook runs 24/7, but I was going to keep it plugged in anyway. The agents paid for themselves in week one.&lt;/p&gt;


&lt;h2&gt;
  
  
  Getting Started: Your First Local OpenClaw Agent
&lt;/h2&gt;

&lt;p&gt;Here's a step-by-step walkthrough to create your first cost-free OpenClaw skill:&lt;/p&gt;
&lt;h3&gt;
  
  
  1. Install Prerequisites
&lt;/h3&gt;


&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;&lt;span class="c"&gt;# Install Ollama&lt;/span&gt;
curl &lt;span class="nt"&gt;-fsSL&lt;/span&gt; https://ollama.com/install.sh | sh

&lt;span class="c"&gt;# Pull a model&lt;/span&gt;
ollama pull qwen2.5:14b

&lt;span class="c"&gt;# Verify it's running&lt;/span&gt;
ollama list
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;

&lt;h3&gt;
  
  
  2. Configure OpenClaw
&lt;/h3&gt;

&lt;p&gt;In your OpenClaw instance:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Navigate to &lt;strong&gt;Settings&lt;/strong&gt; → &lt;strong&gt;Model Providers&lt;/strong&gt;
&lt;/li&gt;
&lt;li&gt;Add a new provider: &lt;code&gt;Ollama&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;Set endpoint: &lt;code&gt;http://localhost:11434&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;Test connection&lt;/li&gt;
&lt;/ul&gt;
&lt;h3&gt;
  
  
  3. Create a Simple Skill
&lt;/h3&gt;

&lt;p&gt;Let's build an &lt;strong&gt;Email Summarizer&lt;/strong&gt;:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="c1"&gt;# Example skill configuration
&lt;/span&gt;&lt;span class="n"&gt;name&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Daily Email Summary&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
&lt;span class="n"&gt;trigger&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;cron: 0 8 * * *&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;  &lt;span class="c1"&gt;# Run at 8 AM daily
&lt;/span&gt;&lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;ollama/qwen2.5:14b&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;

&lt;span class="n"&gt;prompt&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt;
  &lt;span class="n"&gt;Summarize&lt;/span&gt; &lt;span class="n"&gt;these&lt;/span&gt; &lt;span class="n"&gt;emails&lt;/span&gt; &lt;span class="n"&gt;into&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="n"&gt;concise&lt;/span&gt; &lt;span class="n"&gt;bullet&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="n"&gt;point&lt;/span&gt; &lt;span class="nb"&gt;list&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;
  &lt;span class="n"&gt;Focus&lt;/span&gt; &lt;span class="n"&gt;on&lt;/span&gt; &lt;span class="n"&gt;action&lt;/span&gt; &lt;span class="n"&gt;items&lt;/span&gt; &lt;span class="ow"&gt;and&lt;/span&gt; &lt;span class="n"&gt;key&lt;/span&gt; &lt;span class="n"&gt;information&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;

  &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="n"&gt;email_content&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="n"&gt;output_format&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;markdown&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
&lt;span class="n"&gt;notification&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;slack&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  4. Test &amp;amp; Iterate
&lt;/h3&gt;

&lt;p&gt;Run the skill manually first:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;openclaw run email-summarizer &lt;span class="nt"&gt;--test&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Once it works, let it run on schedule. Monitor performance and adjust the prompt as needed.&lt;/p&gt;




&lt;h2&gt;
  
  
  Tips for Optimizing Local Model Performance
&lt;/h2&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Use quantized models&lt;/strong&gt;: GGUF 4-bit quantization runs 2-3x faster with minimal quality loss&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Batch requests&lt;/strong&gt;: Process multiple items together to maximize throughput&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Cache responses&lt;/strong&gt;: For repetitive tasks, cache and reuse model outputs&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Monitor resources&lt;/strong&gt;: Use &lt;code&gt;htop&lt;/code&gt; or Activity Monitor to track CPU/GPU usage&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Upgrade RAM if needed&lt;/strong&gt;: 16GB is the sweet spot for running 14B models comfortably&lt;/li&gt;
&lt;/ol&gt;




&lt;h2&gt;
  
  
  When Cloud APIs Still Make Sense
&lt;/h2&gt;

&lt;p&gt;Local models aren't always the answer. Stick with cloud APIs when:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;You need cutting-edge reasoning&lt;/strong&gt; (GPT-4o, Claude Opus for complex tasks)
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Context windows matter&lt;/strong&gt; (analyzing 100K+ token documents)
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Latency is critical&lt;/strong&gt; (sub-second response times)
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;You don't have suitable hardware&lt;/strong&gt; (less than 8GB RAM)&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;The hybrid approach (local for most tasks, cloud for special cases) often delivers the best ROI.&lt;/p&gt;




&lt;h2&gt;
  
  
  Conclusion: Take Control of Your AI Costs
&lt;/h2&gt;

&lt;p&gt;OpenClaw's flexibility means you're not locked into expensive cloud APIs. Whether you go fully local with Ollama, deploy a budget VPS, or use a hybrid strategy, you can dramatically reduce costs without sacrificing functionality.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Key takeaways&lt;/strong&gt;:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;✅ Local models (Ollama + Qwen/Llama) work for 80%+ of automation tasks
&lt;/li&gt;
&lt;li&gt;✅ VPS deployment costs $10-30/month vs $200+ for cloud APIs
&lt;/li&gt;
&lt;li&gt;✅ Hybrid approach balances cost and capability
&lt;/li&gt;
&lt;li&gt;✅ Annual savings of $1,920-2,400 are realistic&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;If you're spending over $100/month on AI API access, it's time to evaluate local options. OpenClaw makes it easy.&lt;/p&gt;




&lt;h2&gt;
  
  
  Resources
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;OpenClaw Docs&lt;/strong&gt;: &lt;a href="https://docs.openclaw.ai" rel="noopener noreferrer"&gt;docs.openclaw.ai&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Ollama&lt;/strong&gt;: &lt;a href="https://ollama.com" rel="noopener noreferrer"&gt;ollama.com&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Qwen 2.5&lt;/strong&gt;: &lt;a href="https://huggingface.co/Qwen" rel="noopener noreferrer"&gt;huggingface.co/Qwen&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Budget VPS Providers&lt;/strong&gt;: DigitalOcean, Hetzner, Vultr&lt;/li&gt;
&lt;/ul&gt;




&lt;p&gt;&lt;em&gt;Have you switched to local models for OpenClaw? What's your setup? Drop a comment below!&lt;/em&gt;&lt;/p&gt;

</description>
      <category>devchallenge</category>
      <category>openclawchallenge</category>
    </item>
    <item>
      <title>EcoMind: AI Carbon Footprint Analyzer for Earth Day 2026</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Sun, 19 Apr 2026 16:05:04 +0000</pubDate>
      <link>https://dev.to/mzunain/ecomind-ai-carbon-footprint-analyzer-for-earth-day-2026-571i</link>
      <guid>https://dev.to/mzunain/ecomind-ai-carbon-footprint-analyzer-for-earth-day-2026-571i</guid>
      <description>&lt;h2&gt;
  
  
  What I Built
&lt;/h2&gt;

&lt;p&gt;EcoMind is an AI-powered carbon footprint analyzer that helps individuals understand their environmental impact and receive personalized, actionable recommendations to reduce their carbon emissions.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Live Demo&lt;/strong&gt;: &lt;a href="https://ecomind-coral.vercel.app" rel="noopener noreferrer"&gt;https://ecomind-coral.vercel.app&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;GitHub&lt;/strong&gt;: &lt;a href="https://github.com/mzunain/ecomind" rel="noopener noreferrer"&gt;https://github.com/mzunain/ecomind&lt;/a&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  Demo
&lt;/h2&gt;

&lt;p&gt;EcoMind provides:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;🌍 Instant carbon footprint analysis based on your lifestyle&lt;/li&gt;
&lt;li&gt;📊 Sustainability score (1-10) with detailed breakdown
&lt;/li&gt;
&lt;li&gt;💡 Top 5 personalized reduction actions ranked by impact&lt;/li&gt;
&lt;li&gt;🌱 Region-specific advice (optimized for Nordic countries)&lt;/li&gt;
&lt;li&gt;⚡ Fast, beautiful dark theme UI&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Journey
&lt;/h2&gt;

&lt;h3&gt;
  
  
  Why Google Gemini?
&lt;/h3&gt;

&lt;p&gt;I chose Google Gemini for its structured JSON output capability, which is perfect for generating consistent, type-safe environmental data. The model excels at understanding complex lifestyle patterns and providing nuanced, regional advice.&lt;/p&gt;

&lt;h3&gt;
  
  
  Technical Implementation
&lt;/h3&gt;

&lt;p&gt;Using Next.js 15 with the App Router and Google's @google/generative-ai SDK, I implemented:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Structured Schema&lt;/strong&gt;: Defined a TypeScript schema for carbon analysis&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Regional Context&lt;/strong&gt;: Added location-aware recommendations&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Difficulty Scoring&lt;/strong&gt;: Categorized actions by implementation difficulty&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Premium UI&lt;/strong&gt;: Modern glassmorphism design with Tailwind CSS&lt;/li&gt;
&lt;/ol&gt;

&lt;h3&gt;
  
  
  Challenges Faced
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;Model naming: Gemini API model versions required experimentation&lt;/li&gt;
&lt;li&gt;Structured output validation: Ensuring consistent JSON responses&lt;/li&gt;
&lt;li&gt;Regional accuracy: Balancing global averages with local context (e.g., Finland's clean energy grid)&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Category Submission
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Best Use of Google Gemini&lt;/strong&gt; - EcoMind showcases Gemini's strength in structured data generation and contextual understanding for environmental impact analysis.&lt;/p&gt;

&lt;h2&gt;
  
  
  Additional Prize Categories
&lt;/h2&gt;

&lt;p&gt;N/A&lt;/p&gt;

</description>
      <category>weekendchallenge</category>
      <category>ai</category>
      <category>gemini</category>
      <category>environment</category>
    </item>
    <item>
      <title>EcoMind: AI-Powered Carbon Footprint Analyzer Using Google Gemini</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Sun, 19 Apr 2026 15:31:31 +0000</pubDate>
      <link>https://dev.to/mzunain/ecomind-ai-powered-carbon-footprint-analyzer-using-google-gemini-2p7h</link>
      <guid>https://dev.to/mzunain/ecomind-ai-powered-carbon-footprint-analyzer-using-google-gemini-2p7h</guid>
      <description>&lt;p&gt;&lt;em&gt;This is a submission for &lt;a href="https://dev.to/challenges/weekend-2026-04-16"&gt;Weekend Challenge: Earth Day Edition&lt;/a&gt;&lt;/em&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  What I Built
&lt;/h2&gt;

&lt;p&gt;EcoMind is a Next.js web application powered by the Google Gemini API that helps users understand and reduce their personal carbon footprint. You describe your daily habits — commute, diet, home energy use, shopping patterns — and Gemini analyzes your lifestyle, estimates your CO₂ impact, and returns a personalized action plan with concrete steps ranked by impact.&lt;/p&gt;

&lt;p&gt;The goal was to make environmental awareness feel personal and actionable rather than abstract. Most carbon calculators spit out a number and leave you wondering "so what?" EcoMind closes that gap by combining Gemini's reasoning capabilities with structured output parsing to give context-aware recommendations tailored to your actual situation.&lt;/p&gt;

&lt;p&gt;Key features:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Conversational input&lt;/strong&gt;: Describe your habits in plain language — no rigid forms&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Gemini-powered analysis&lt;/strong&gt;: Uses &lt;code&gt;gemini-1.5-flash&lt;/code&gt; to parse lifestyle data, estimate emissions, and generate recommendations&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Structured JSON output&lt;/strong&gt;: Forced JSON schema response from Gemini for reliable rendering&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Actionable breakdown&lt;/strong&gt;: Each recommendation shows estimated CO₂ savings per year&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Finland-aware context&lt;/strong&gt;: Recommendations factor in northern European energy mix and public transit infrastructure&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Demo
&lt;/h2&gt;

&lt;p&gt;🔗 &lt;strong&gt;Live demo&lt;/strong&gt;: &lt;a href="https://ecomind-coral.vercel.app" rel="noopener noreferrer"&gt;https://ecomind-coral.vercel.app&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;You can also run it locally — instructions in the README.&lt;/p&gt;

&lt;h2&gt;
  
  
  Code
&lt;/h2&gt;


&lt;div class="ltag-github-readme-tag"&gt;
  &lt;div class="readme-overview"&gt;
    &lt;h2&gt;
      &lt;img src="https://assets.dev.to/assets/github-logo-5a155e1f9a670af7944dd5e12375bc76ed542ea80224905ecaf878b9157cdefc.svg" alt="GitHub logo"&gt;
      &lt;a href="https://github.com/mzunain" rel="noopener noreferrer"&gt;
        mzunain
      &lt;/a&gt; / &lt;a href="https://github.com/mzunain/ecomind" rel="noopener noreferrer"&gt;
        ecomind
      &lt;/a&gt;
    &lt;/h2&gt;
    &lt;h3&gt;
      AI-powered carbon footprint analyzer using Google Gemini - DEV Weekend Challenge Earth Day Edition
    &lt;/h3&gt;
  &lt;/div&gt;
  &lt;div class="ltag-github-body"&gt;
    
&lt;div id="readme" class="md"&gt;
&lt;div class="markdown-heading"&gt;
&lt;h1 class="heading-element"&gt;EcoMind 🌍&lt;/h1&gt;
&lt;/div&gt;
&lt;p&gt;&lt;strong&gt;AI-powered carbon footprint analyzer using Google Gemini&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;Built for the &lt;a href="https://dev.to/challenges/weekend-2026-04-16" rel="nofollow"&gt;DEV Weekend Challenge: Earth Day Edition&lt;/a&gt;&lt;/p&gt;
&lt;div class="markdown-heading"&gt;
&lt;h2 class="heading-element"&gt;Live Demo&lt;/h2&gt;
&lt;/div&gt;
&lt;p&gt;🔗 &lt;a href="https://ecomind-gamma.vercel.app" rel="nofollow noopener noreferrer"&gt;https://ecomind-gamma.vercel.app&lt;/a&gt;&lt;/p&gt;
&lt;div class="markdown-heading"&gt;
&lt;h2 class="heading-element"&gt;What It Does&lt;/h2&gt;
&lt;/div&gt;
&lt;p&gt;EcoMind lets you describe your daily lifestyle in plain language and returns a personalised carbon footprint analysis powered by the Google Gemini API. No rigid forms — just describe your commute, diet, home heating, and travel habits, and get back:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Estimated annual CO₂&lt;/strong&gt; in kg&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Sustainability score&lt;/strong&gt; (1–10)&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Top 5 reduction actions&lt;/strong&gt; ranked by impact with CO₂ savings per year&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Regional context&lt;/strong&gt; — advice tailored to your location (Finland-aware by default)&lt;/li&gt;
&lt;/ul&gt;
&lt;div class="markdown-heading"&gt;
&lt;h2 class="heading-element"&gt;Tech Stack&lt;/h2&gt;

&lt;/div&gt;
&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Next.js 14&lt;/strong&gt; (App Router) + TypeScript&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Google Gemini API&lt;/strong&gt; (&lt;code&gt;gemini-1.5-flash&lt;/code&gt;) via &lt;code&gt;@google/generative-ai&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Structured JSON output&lt;/strong&gt; using Gemini's &lt;code&gt;responseSchema&lt;/code&gt; feature&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Tailwind CSS&lt;/strong&gt; for styling&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Vercel&lt;/strong&gt; for deployment&lt;/li&gt;
&lt;/ul&gt;
&lt;div class="markdown-heading"&gt;
&lt;h2 class="heading-element"&gt;Getting Started&lt;/h2&gt;

&lt;/div&gt;
&lt;div class="highlight highlight-source-shell notranslate position-relative overflow-auto js-code-highlight"&gt;
&lt;pre&gt;&lt;span class="pl-c"&gt;&lt;span class="pl-c"&gt;#&lt;/span&gt; Clone the repo&lt;/span&gt;
git clone https://github.com/mzunain/ecomind.git
&lt;span class="pl-c1"&gt;cd&lt;/span&gt; ecomind
&lt;span class="pl-c"&gt;&lt;span class="pl-c"&gt;#&lt;/span&gt; Install dependencies&lt;/span&gt;
npm install

&lt;span class="pl-c"&gt;&lt;span class="pl-c"&gt;#&lt;/span&gt; Add your Gemini API key&lt;/span&gt;
&lt;span class="pl-c1"&gt;echo&lt;/span&gt; &lt;span class="pl-s"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt;&lt;/pre&gt;…
&lt;/div&gt;
&lt;/div&gt;
  &lt;/div&gt;
  &lt;div class="gh-btn-container"&gt;&lt;a class="gh-btn" href="https://github.com/mzunain/ecomind" rel="noopener noreferrer"&gt;View on GitHub&lt;/a&gt;&lt;/div&gt;
&lt;/div&gt;


&lt;h2&gt;
  
  
  How I Built It
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Tech Stack:&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Next.js 14&lt;/strong&gt; (App Router) + TypeScript&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Google Gemini API&lt;/strong&gt; (&lt;code&gt;gemini-1.5-flash&lt;/code&gt;) via &lt;code&gt;@google/generative-ai&lt;/code&gt; SDK&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Tailwind CSS&lt;/strong&gt; for styling&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Vercel&lt;/strong&gt; for deployment&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;How Gemini is integrated:&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;The core of EcoMind is a single API route (&lt;code&gt;/api/analyze&lt;/code&gt;) that takes a user's lifestyle description and feeds it to Gemini with a carefully engineered system prompt:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;model&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;genAI&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;getGenerativeModel&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
  &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;gemini-1.5-flash&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="na"&gt;generationConfig&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;responseMimeType&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;application/json&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;responseSchema&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;ecoAnalysisSchema&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;},&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;

&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;prompt&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="s2"&gt;`
You are an environmental impact expert. Analyze the following lifestyle description
and return a structured carbon footprint assessment.
Lifestyle: &lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;userInput&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;
Provide: estimated annual CO2 in kg, top 5 reduction actions with CO2 savings,
and an overall sustainability score (1-10).
`&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;result&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;model&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;generateContent&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;prompt&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;I used Gemini's &lt;strong&gt;structured output / JSON schema enforcement&lt;/strong&gt; feature to guarantee parseable responses — this was the most interesting technical decision. Instead of prompt-engineering my way to consistent JSON, I defined a Zod-like schema and passed it as &lt;code&gt;responseSchema&lt;/code&gt;. This made the frontend rendering completely reliable.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Structured output schema:&lt;/strong&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;ecoAnalysisSchema&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;object&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="na"&gt;properties&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;annualCO2Kg&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;number&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
    &lt;span class="na"&gt;sustainabilityScore&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;number&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
    &lt;span class="na"&gt;topActions&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;array&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
      &lt;span class="na"&gt;items&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
        &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;object&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="na"&gt;properties&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
          &lt;span class="na"&gt;action&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;string&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
          &lt;span class="na"&gt;annualSavingKg&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;number&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
          &lt;span class="na"&gt;difficulty&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;string&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="na"&gt;enum&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;easy&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;medium&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;hard&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
          &lt;span class="na"&gt;category&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;string&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
        &lt;span class="p"&gt;},&lt;/span&gt;
      &lt;span class="p"&gt;},&lt;/span&gt;
    &lt;span class="p"&gt;},&lt;/span&gt;
    &lt;span class="na"&gt;summary&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;string&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
  &lt;span class="p"&gt;},&lt;/span&gt;
&lt;span class="p"&gt;};&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Why this matters for Earth Day:&lt;/strong&gt;&lt;br&gt;
Carbon literacy is one of the biggest blockers to individual climate action. People don't act because the data feels distant. By making it conversational and immediate — type a few sentences about your week and get back a personalized plan — EcoMind lowers that barrier significantly.&lt;/p&gt;

&lt;p&gt;I also added a Finland-specific context layer because I'm based in Turku and the sustainability advice for someone in the Nordics (excellent public transit, clean energy grid, cold climate heating costs) is genuinely different from generic global advice.&lt;/p&gt;

&lt;h2&gt;
  
  
  Prize Categories
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Best Use of Google Gemini&lt;/strong&gt; — Gemini is the core engine of EcoMind. The project uses:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;code&gt;gemini-1.5-flash&lt;/code&gt; for fast, cost-effective inference&lt;/li&gt;
&lt;li&gt;Structured JSON output with response schema for reliable parsing&lt;/li&gt;
&lt;li&gt;A multi-part system prompt that grounds Gemini in environmental science context&lt;/li&gt;
&lt;li&gt;Dynamic prompt construction based on user input&lt;/li&gt;
&lt;/ul&gt;

</description>
      <category>devchallenge</category>
      <category>weekendchallenge</category>
    </item>
    <item>
      <title>5 Next.js Performance Patterns That Cut Our Load Time by 60%</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Mon, 13 Apr 2026 06:00:00 +0000</pubDate>
      <link>https://dev.to/mzunain/5-nextjs-performance-patterns-that-cut-our-load-time-by-60-11gk</link>
      <guid>https://dev.to/mzunain/5-nextjs-performance-patterns-that-cut-our-load-time-by-60-11gk</guid>
      <description>&lt;p&gt;Performance is a feature.&lt;/p&gt;

&lt;p&gt;At Quran.com, we serve 50 million users across 200+ countries on varying networks. A 1-second delay for users on slow connections meant people gave up. Every millisecond mattered.&lt;/p&gt;

&lt;p&gt;Here are 5 patterns we implemented that cut load time by 60%. With code.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 1: Font Subsetting
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;The Problem:&lt;/strong&gt; We served the full Amiri Arabic font (600KB) on every page. Users on 3G in Southeast Asia waited 4 seconds for the first render.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;The Pattern:&lt;/strong&gt; Subset fonts to only the Unicode ranges actually used on each page.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="p"&gt;@&lt;/span&gt;&lt;span class="nd"&gt;font&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="nx"&gt;face&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="nx"&gt;font&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="nx"&gt;family&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Amiri&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nl"&gt;src&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nf"&gt;url&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;/fonts/amiri-subset.woff2&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="nf"&gt;format&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;woff2&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="nx"&gt;font&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="nx"&gt;display&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;swap&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
  &lt;span class="nx"&gt;unicode&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="nx"&gt;range&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;U&lt;/span&gt;&lt;span class="o"&gt;+&lt;/span&gt;&lt;span class="mi"&gt;0600&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="mi"&gt;06&lt;/span&gt;&lt;span class="nx"&gt;FF&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;U&lt;/span&gt;&lt;span class="o"&gt;+&lt;/span&gt;&lt;span class="mi"&gt;200&lt;/span&gt;&lt;span class="nx"&gt;C&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;U&lt;/span&gt;&lt;span class="o"&gt;+&lt;/span&gt;&lt;span class="mi"&gt;200&lt;/span&gt;&lt;span class="nx"&gt;D&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;The Result:&lt;/strong&gt; Font size: 600KB → 48KB. Load time improvement: 40% on mobile.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;pyftsubset Amiri-Regular.ttf &lt;span class="nt"&gt;--unicodes&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;0600-06FF,200C,200D &lt;span class="nt"&gt;--output-file&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;amiri-subset.woff2
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Pattern 2: Incremental Static Regeneration (ISR)
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;The Problem:&lt;/strong&gt; 114 surahs with 200+ translations. Full site generation: 45 minutes. Deploys blocked.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;getStaticProps&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;context&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;data&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;fetchSurahData&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;context&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;params&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;surah&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;props&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;data&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
    &lt;span class="na"&gt;revalidate&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;3600&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;getStaticPaths&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;paths&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nb"&gt;Array&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="k"&gt;from&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;length&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;114&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;_&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;i&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;({&lt;/span&gt;
    &lt;span class="na"&gt;params&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;surah&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nc"&gt;String&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;i&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
  &lt;span class="p"&gt;}));&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;paths&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="na"&gt;fallback&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;blocking&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;};&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;On-demand revalidation:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;default&lt;/span&gt; &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;handler&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;secret&lt;/span&gt; &lt;span class="o"&gt;!==&lt;/span&gt; &lt;span class="nx"&gt;process&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;env&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;REVALIDATE_SECRET&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;status&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;401&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;message&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Invalid token&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
  &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;revalidate&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s2"&gt;`/surah/&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;query&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;surah&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;revalidated&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kc"&gt;true&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;The Result:&lt;/strong&gt; Pages cached on CDN, served in &amp;lt;100ms globally.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 3: Dynamic Imports for Heavy Components
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;The Problem:&lt;/strong&gt; Quran player: 350KB bundled. Every user downloads it even if they never use audio.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="nx"&gt;dynamic&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;next/dynamic&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;QuranPlayer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;dynamic&lt;/span&gt;&lt;span class="p"&gt;(()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="k"&gt;import&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;@/components/QuranPlayer&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="na"&gt;loading&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;div&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="nx"&gt;Loading&lt;/span&gt; &lt;span class="nx"&gt;player&lt;/span&gt;&lt;span class="p"&gt;...&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="sr"&gt;/div&amp;gt;&lt;/span&gt;&lt;span class="err"&gt;,
&lt;/span&gt;  &lt;span class="na"&gt;ssr&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kc"&gt;false&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;

&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;default&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;SurahPage&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="nx"&gt;data&lt;/span&gt; &lt;span class="p"&gt;})&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="nx"&gt;showPlayer&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;setShowPlayer&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;useState&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kc"&gt;false&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="k"&gt;return &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;div&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;
      &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;SurahText&lt;/span&gt; &lt;span class="nx"&gt;data&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;data&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="sr"&gt;/&lt;/span&gt;&lt;span class="err"&gt;&amp;gt;
&lt;/span&gt;      &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;showPlayer&lt;/span&gt; &lt;span class="o"&gt;&amp;amp;&amp;amp;&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;QuranPlayer&lt;/span&gt; &lt;span class="nx"&gt;surah&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;data&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;surah&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="sr"&gt;/&amp;gt;&lt;/span&gt;&lt;span class="err"&gt;}
&lt;/span&gt;      &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;button&lt;/span&gt; &lt;span class="nx"&gt;onClick&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nf"&gt;setShowPlayer&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="kc"&gt;true&lt;/span&gt;&lt;span class="p"&gt;)}&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="nx"&gt;Play&lt;/span&gt; &lt;span class="nx"&gt;Audio&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="sr"&gt;/button&lt;/span&gt;&lt;span class="err"&gt;&amp;gt;
&lt;/span&gt;    &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="sr"&gt;/div&lt;/span&gt;&lt;span class="err"&gt;&amp;gt;
&lt;/span&gt;  &lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; 68% of users never click play. They saved the download entirely.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 4: Image Optimization Pipeline
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;The Problem:&lt;/strong&gt; Users upload 3-5MB JPEGs displayed at 120x120px.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="nx"&gt;sharp&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;sharp&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;default&lt;/span&gt; &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;handler&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;buffer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;Buffer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="k"&gt;from&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;body&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;webpBuffer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;sharp&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;buffer&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;resize&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;400&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="mi"&gt;400&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;fit&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;cover&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;}).&lt;/span&gt;&lt;span class="nf"&gt;webp&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;quality&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;80&lt;/span&gt; &lt;span class="p"&gt;}).&lt;/span&gt;&lt;span class="nf"&gt;toBuffer&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;pngBuffer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;sharp&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;buffer&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;resize&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;400&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="mi"&gt;400&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;fit&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;cover&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;}).&lt;/span&gt;&lt;span class="nf"&gt;png&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;quality&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;80&lt;/span&gt; &lt;span class="p"&gt;}).&lt;/span&gt;&lt;span class="nf"&gt;toBuffer&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;webpUrl&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;uploadToS3&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;webpBuffer&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;image.webp&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;pngUrl&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;uploadToS3&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;pngBuffer&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;image.png&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="nx"&gt;webpUrl&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;pngUrl&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;





&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight jsx"&gt;&lt;code&gt;&lt;span class="p"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nt"&gt;picture&lt;/span&gt;&lt;span class="p"&gt;&amp;gt;&lt;/span&gt;
  &lt;span class="p"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nt"&gt;source&lt;/span&gt; &lt;span class="na"&gt;srcSet&lt;/span&gt;&lt;span class="p"&gt;=&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;image&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;webpUrl&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt; &lt;span class="na"&gt;type&lt;/span&gt;&lt;span class="p"&gt;=&lt;/span&gt;&lt;span class="s"&gt;"image/webp"&lt;/span&gt; &lt;span class="p"&gt;/&amp;gt;&lt;/span&gt;
  &lt;span class="p"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nt"&gt;img&lt;/span&gt; &lt;span class="na"&gt;src&lt;/span&gt;&lt;span class="p"&gt;=&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;image&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;pngUrl&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt; &lt;span class="na"&gt;alt&lt;/span&gt;&lt;span class="p"&gt;=&lt;/span&gt;&lt;span class="s"&gt;"User avatar"&lt;/span&gt; &lt;span class="na"&gt;width&lt;/span&gt;&lt;span class="p"&gt;=&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="mi"&gt;120&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt; &lt;span class="na"&gt;height&lt;/span&gt;&lt;span class="p"&gt;=&lt;/span&gt;&lt;span class="si"&gt;{&lt;/span&gt;&lt;span class="mi"&gt;120&lt;/span&gt;&lt;span class="si"&gt;}&lt;/span&gt; &lt;span class="p"&gt;/&amp;gt;&lt;/span&gt;
&lt;span class="p"&gt;&amp;lt;/&lt;/span&gt;&lt;span class="nt"&gt;picture&lt;/span&gt;&lt;span class="p"&gt;&amp;gt;&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; Images: 3-5MB → 45-80KB.&lt;/p&gt;

&lt;h2&gt;
  
  
  Pattern 5: Intelligent Prefetching
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;SurahNavigation&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="nx"&gt;currentSurah&lt;/span&gt; &lt;span class="p"&gt;})&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;router&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;useRouter&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;handlePrefetch&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;router&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;prefetch&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s2"&gt;`/surah/&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;currentSurah&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="k"&gt;return &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;Link&lt;/span&gt; &lt;span class="nx"&gt;href&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="s2"&gt;`/surah/&lt;/span&gt;&lt;span class="p"&gt;${&lt;/span&gt;&lt;span class="nx"&gt;currentSurah&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="s2"&gt;`&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="nx"&gt;onMouseEnter&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;handlePrefetch&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="nx"&gt;onTouchStart&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;handlePrefetch&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;
      &lt;span class="nx"&gt;Next&lt;/span&gt; &lt;span class="nx"&gt;Surah&lt;/span&gt;
    &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="sr"&gt;/Link&lt;/span&gt;&lt;span class="err"&gt;&amp;gt;
&lt;/span&gt;  &lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; Users feel instant navigation. Pages load in &amp;lt;200ms instead of 1-2s.&lt;/p&gt;

&lt;h2&gt;
  
  
  Before/After
&lt;/h2&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Metric&lt;/th&gt;
&lt;th&gt;Before&lt;/th&gt;
&lt;th&gt;After&lt;/th&gt;
&lt;th&gt;Improvement&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;First Contentful Paint&lt;/td&gt;
&lt;td&gt;3.2s&lt;/td&gt;
&lt;td&gt;1.8s&lt;/td&gt;
&lt;td&gt;44%&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Largest Contentful Paint&lt;/td&gt;
&lt;td&gt;4.8s&lt;/td&gt;
&lt;td&gt;1.9s&lt;/td&gt;
&lt;td&gt;60%&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Time to Interactive&lt;/td&gt;
&lt;td&gt;6.2s&lt;/td&gt;
&lt;td&gt;2.4s&lt;/td&gt;
&lt;td&gt;61%&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Page Size&lt;/td&gt;
&lt;td&gt;2.8MB&lt;/td&gt;
&lt;td&gt;1.1MB&lt;/td&gt;
&lt;td&gt;61%&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Lighthouse Score&lt;/td&gt;
&lt;td&gt;42&lt;/td&gt;
&lt;td&gt;86&lt;/td&gt;
&lt;td&gt;+44 pts&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;Performance work isn't about micro-optimizations. It's about architectural decisions that compound.&lt;/p&gt;

&lt;p&gt;Find me at &lt;a href="https://zunain.com" rel="noopener noreferrer"&gt;zunain.com&lt;/a&gt; if you're optimizing at scale.&lt;/p&gt;

</description>
      <category>performance</category>
      <category>webdev</category>
      <category>nextjs</category>
      <category>javascript</category>
    </item>
    <item>
      <title>How We Scaled Quran.com to 50M Monthly Users: Architecture Lessons From the Inside</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Sun, 05 Apr 2026 23:21:10 +0000</pubDate>
      <link>https://dev.to/mzunain/how-we-scaled-qurancom-to-50m-monthly-users-architecture-lessons-from-the-inside-i33</link>
      <guid>https://dev.to/mzunain/how-we-scaled-qurancom-to-50m-monthly-users-architecture-lessons-from-the-inside-i33</guid>
      <description>&lt;h1&gt;
  
  
  How We Scaled Quran.com to 50M Monthly Users: Architecture Lessons From the Inside
&lt;/h1&gt;

&lt;p&gt;Scaling a web application to serve 50 million monthly users is not just a technical challenge — it is an architectural one. At Quran.com, one of the world's most visited Islamic resources, I led the frontend engineering effort that took the platform from struggling under load to reliably serving tens of millions of users globally.&lt;/p&gt;

&lt;p&gt;Here's what I learned.&lt;/p&gt;

&lt;h2&gt;
  
  
  Start With the Right Foundation: Next.js and ISR
&lt;/h2&gt;

&lt;p&gt;When I joined the team, Quran.com was running a single-page React app with client-side rendering. Every page load hit the server, every user got the same slow first paint. The fix was a full migration to Next.js with Incremental Static Regeneration (ISR).&lt;/p&gt;

&lt;p&gt;ISR lets you pre-render pages at build time and revalidate them in the background on a schedule. For content like Surah pages — which rarely change — we set revalidation to 3600 seconds (1 hour). This meant most users hit pre-rendered HTML instantly instead of waiting for server execution.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; Time to first byte dropped by over 70% for users in Asia and the Middle East, where our traffic is heaviest.&lt;/p&gt;

&lt;h2&gt;
  
  
  Edge Caching: The Second Layer
&lt;/h2&gt;

&lt;p&gt;ISR alone was not enough. We layered Cloudflare in front of Vercel, configured to cache ISR-generated pages at the CDN edge. Users in Riyadh, Karachi, Jakarta — wherever they are — now get pages served from a node close to them.&lt;/p&gt;

&lt;p&gt;The configuration that made this work:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="c1"&gt;// next.config.js&lt;/span&gt;
&lt;span class="nx"&gt;module&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;exports&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="nf"&gt;headers&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;
      &lt;span class="p"&gt;{&lt;/span&gt;
        &lt;span class="na"&gt;source&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;/surah/:id*&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="na"&gt;headers&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;
          &lt;span class="p"&gt;{&lt;/span&gt;
            &lt;span class="na"&gt;key&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Cache-Control&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="na"&gt;value&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;public, s-maxage=3600, stale-while-revalidate=86400&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;
          &lt;span class="p"&gt;}&lt;/span&gt;
        &lt;span class="p"&gt;]&lt;/span&gt;
      &lt;span class="p"&gt;}&lt;/span&gt;
    &lt;span class="p"&gt;]&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;The &lt;code&gt;stale-while-revalidate&lt;/code&gt; header is the key. It tells the CDN: serve the cached version immediately, and revalidate in the background. Users never wait.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; CDN cache hit rate went from ~40% to 92%.&lt;/p&gt;

&lt;h2&gt;
  
  
  Code Splitting and Bundle Optimization
&lt;/h2&gt;

&lt;p&gt;A 50M-user app cannot afford bloated JavaScript bundles. We audited every dependency and applied aggressive code splitting:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Dynamic imports for heavy components (audio player, translation viewer)&lt;/li&gt;
&lt;li&gt;Route-based splitting via Next.js dynamic routing&lt;/li&gt;
&lt;li&gt;Moving user preferences (theme, language settings) to client-side state to avoid cache busting on every personalized request&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;Result:&lt;/strong&gt; Initial JS bundle size reduced by ~45%.&lt;/p&gt;

&lt;h2&gt;
  
  
  What Scaling Actually Teaches You
&lt;/h2&gt;

&lt;p&gt;The biggest lesson from working at this scale: &lt;strong&gt;the architecture decisions that seem like premature optimization are often the decisions that matter most.&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;ISR is not a performance trick — it is a scalability strategy. At 50 million users, every millisecond you shave off the critical path compounds. A 100ms improvement in TTFB means millions of users per day load faster, engage more, and return more often.&lt;/p&gt;

&lt;p&gt;If you are building a content-heavy Next.js application, do not wait until you have scale problems to think about ISR and edge caching. Build it right the first time.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Stack at a Glance
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Framework:&lt;/strong&gt; Next.js 14 (App Router, ISR)&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;CDN:&lt;/strong&gt; Cloudflare&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Deployment:&lt;/strong&gt; Vercel&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;State:&lt;/strong&gt; React Query + Zustand&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Languages:&lt;/strong&gt; TypeScript throughout&lt;/li&gt;
&lt;/ul&gt;




&lt;p&gt;If you have questions about scaling Next.js in production or want to talk architecture, reach out at zunain.com.&lt;/p&gt;

</description>
      <category>nextjs</category>
      <category>webdev</category>
      <category>architecture</category>
      <category>performance</category>
    </item>
    <item>
      <title>Building a Production RAG Pipeline: Lessons from Real-World AI Apps</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Sun, 05 Apr 2026 19:23:37 +0000</pubDate>
      <link>https://dev.to/mzunain/building-a-production-rag-pipeline-lessons-from-real-world-ai-apps-4nmj</link>
      <guid>https://dev.to/mzunain/building-a-production-rag-pipeline-lessons-from-real-world-ai-apps-4nmj</guid>
      <description>&lt;h1&gt;
  
  
  Building a Production RAG Pipeline: Lessons from Real-World AI Apps
&lt;/h1&gt;

&lt;p&gt;RAG (Retrieval-Augmented Generation) sounds simple on paper — embed your documents, store them in a vector DB, retrieve the relevant chunks, and pass them to an LLM. In practice, getting a RAG pipeline to production quality is significantly harder.&lt;/p&gt;

&lt;p&gt;Here's what I learned building RAG pipelines for real SaaS products.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Naive Implementation
&lt;/h2&gt;

&lt;p&gt;Most tutorials show you this flow:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;Chunk your documents&lt;/li&gt;
&lt;li&gt;Embed them with OpenAI&lt;/li&gt;
&lt;li&gt;Store in Pinecone&lt;/li&gt;
&lt;li&gt;Retrieve top-k chunks&lt;/li&gt;
&lt;li&gt;Pass to GPT-4&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;This works fine in demos. It fails in production for a few key reasons.&lt;/p&gt;

&lt;h2&gt;
  
  
  Problem 1: Chunking Strategy Kills Retrieval Quality
&lt;/h2&gt;

&lt;p&gt;Naive fixed-size chunking (every 512 tokens) destroys semantic context. A paragraph about "authentication" gets split mid-sentence, and your retrieval picks up half-relevant chunks.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;What works:&lt;/strong&gt; Semantic chunking — split at natural sentence and paragraph boundaries, and use overlapping windows so context carries across chunk boundaries.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;semantic_chunk&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;text&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;chunk_size&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mi"&gt;500&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;overlap&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mi"&gt;50&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
    &lt;span class="n"&gt;sentences&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;split_into_sentences&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;text&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="n"&gt;chunks&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[]&lt;/span&gt;
    &lt;span class="n"&gt;current_chunk&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[]&lt;/span&gt;
    &lt;span class="n"&gt;current_length&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;

    &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;sentence&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;sentences&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
        &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;current_length&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="nf"&gt;len&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;sentence&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;&amp;gt;&lt;/span&gt; &lt;span class="n"&gt;chunk_size&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
            &lt;span class="n"&gt;chunks&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;append&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="s"&gt; &lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;join&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;current_chunk&lt;/span&gt;&lt;span class="p"&gt;))&lt;/span&gt;
            &lt;span class="c1"&gt;# Keep last N tokens as overlap
&lt;/span&gt;            &lt;span class="n"&gt;current_chunk&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;current_chunk&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="n"&gt;overlap&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt;
            &lt;span class="n"&gt;current_length&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;sum&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nf"&gt;len&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;s&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;current_chunk&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
        &lt;span class="n"&gt;current_chunk&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;append&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;sentence&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
        &lt;span class="n"&gt;current_length&lt;/span&gt; &lt;span class="o"&gt;+=&lt;/span&gt; &lt;span class="nf"&gt;len&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;sentence&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;

    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;chunks&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Problem 2: Top-K Retrieval Without Re-ranking
&lt;/h2&gt;

&lt;p&gt;Retrieving the top 5 chunks by cosine similarity often misses the point. The 6th most similar chunk might be the most relevant one in context.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;What works:&lt;/strong&gt; Two-stage retrieval — retrieve top 20 by vector similarity, then re-rank with a cross-encoder model (or GPT-4 itself) to get the final top 5.&lt;/p&gt;

&lt;h2&gt;
  
  
  Problem 3: No Caching = Expensive at Scale
&lt;/h2&gt;

&lt;p&gt;Every query hitting your vector DB and LLM is a cost. At scale, you'll see the same semantic queries repeatedly.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;What works:&lt;/strong&gt; Semantic caching — hash the embedding of each query and cache results for similar queries (cosine similarity &amp;gt; 0.97).&lt;/p&gt;

&lt;h2&gt;
  
  
  The Production Stack That Works
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Embeddings:&lt;/strong&gt; OpenAI text-embedding-3-large (best quality/cost)&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Vector DB:&lt;/strong&gt; Pinecone (managed, scales easily) or Weaviate (self-hosted, more control)&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Re-ranking:&lt;/strong&gt; Cohere Rerank API or a local cross-encoder&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Caching:&lt;/strong&gt; Redis with embedding-based similarity check&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;LLM:&lt;/strong&gt; GPT-4o for quality, GPT-4o-mini for speed/cost tradeoff&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Key Takeaway
&lt;/h2&gt;

&lt;p&gt;Production RAG is 20% architecture and 80% data quality + retrieval tuning. The chunking and retrieval strategy matter more than the LLM choice.&lt;/p&gt;

&lt;p&gt;What's been your biggest challenge with RAG in production? Drop it in the comments.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>machinelearning</category>
      <category>python</category>
      <category>webdev</category>
    </item>
    <item>
      <title>How We Scaled Quran.com to 50M+ Monthly Users: Architecture Lessons from the Inside</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Sat, 04 Apr 2026 20:08:55 +0000</pubDate>
      <link>https://dev.to/mzunain/how-we-scaled-qurancom-to-50m-monthly-users-architecture-lessons-from-the-inside-cbi</link>
      <guid>https://dev.to/mzunain/how-we-scaled-qurancom-to-50m-monthly-users-architecture-lessons-from-the-inside-cbi</guid>
      <description>&lt;p&gt;Quran.com is one of the most-visited Islamic websites in the world. During my time there as a Full Stack Engineer, the platform crossed 50 million monthly active users — a milestone that forced us to rethink nearly every assumption we'd made about architecture, data, and performance.&lt;/p&gt;

&lt;p&gt;This post covers the technical decisions that got us there, the tradeoffs we made, and what I'd do differently.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Scale Reality
&lt;/h2&gt;

&lt;p&gt;50M MAU sounds like an abstract number. In practice it means spikes of hundreds of thousands of concurrent users during Ramadan, Quran audio files being streamed from every continent simultaneously, prayer time calculations for locations across the entire globe, and serving both high-bandwidth users in the West and users on 2G/3G connections in South Asia and Sub-Saharan Africa.&lt;/p&gt;

&lt;p&gt;That last point shaped almost every technical decision. You can't optimize purely for fast connections. You have to think about the user opening the site on a 2G connection in rural Pakistan at 3am for Fajr prayer.&lt;/p&gt;

&lt;h2&gt;
  
  
  Frontend: Next.js as the Foundation
&lt;/h2&gt;

&lt;p&gt;We built on Next.js. Server-side rendering was critical for two reasons: SEO (the Quran text needs to be indexable) and first-load performance on slow connections. Each Surah page (114 in total) is statically generated at build time — meaning the first HTML response is near-instant, no waiting for JS to hydrate before the user sees text.&lt;/p&gt;

&lt;p&gt;We were aggressive about code splitting: audio player logic only loads when the user actually interacts with audio. Next.js Image with AVIF/WebP fallback cut image payloads significantly while staying compatible with older devices.&lt;/p&gt;

&lt;h2&gt;
  
  
  Offline Audio Streaming
&lt;/h2&gt;

&lt;p&gt;One of the most technically interesting problems: how do you let a user listen to Quran recitations with no internet?&lt;/p&gt;

&lt;p&gt;Quran audio is broken into individual ayah (verse) recordings — up to 6,236 of them per reciter. You can't cache all of that upfront. Our solution: when a user starts playing a Surah, we silently prefetch the next 10 ayahs in the background using Service Workers and the Cache API. If connectivity drops, playback continues seamlessly. We also gave users an explicit "Download for offline" option for Surahs they listen to regularly.&lt;/p&gt;

&lt;h2&gt;
  
  
  Geospatial: Prayer Times at Scale
&lt;/h2&gt;

&lt;p&gt;Prayer times are calculated based on GPS coordinates — simple until you consider DST rules that differ by country, multiple calculation methodologies (Hanafi, Shafi'i, MWL, ISNA), and users who want accurate times regardless of whether they share their location.&lt;/p&gt;

&lt;p&gt;We moved prayer time calculation to the client side. The astronomy math runs in under a millisecond on any modern device, requires no server round-trip, and stores no user location data.&lt;/p&gt;

&lt;p&gt;For map-based features — nearby mosques, Qibla direction — we used Mapbox GL JS. The vector tile approach is ideal: compact, cacheable, and sharp at any zoom level.&lt;/p&gt;

&lt;h2&gt;
  
  
  Database and Infrastructure
&lt;/h2&gt;

&lt;p&gt;PostgreSQL was our primary data store. The Quran text is relational in nature — verses have translations, tafsirs, word-by-word breakdowns, audio timestamps, and cross-references. A relational model maps cleanly to this structure.&lt;/p&gt;

&lt;p&gt;For our read-heavy endpoints (almost all of them), we leaned on read replicas to distribute query load, Redis for caching frequently-accessed data, and aggressive CDN caching at the edge for static API responses.&lt;/p&gt;

&lt;p&gt;Infrastructure ran on AWS: ECS for containerized services, RDS for PostgreSQL, ElastiCache for Redis, CloudFront as CDN.&lt;/p&gt;

&lt;h2&gt;
  
  
  What I'd Do Differently
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Invest in observability earlier.&lt;/strong&gt; We added distributed tracing later than we should have. When you're debugging a latency spike that only affects users in a specific region, you want granular trace data — not logs.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Be more aggressive with edge caching.&lt;/strong&gt; Many API responses could have been fully cached at the CDN layer, eliminating server load for the vast majority of requests. We were conservative about this early and paid the price during Ramadan traffic spikes.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Design for low-bandwidth from day one.&lt;/strong&gt; We retrofitted a lot of optimizations later. Starting with that constraint leads to better decisions upfront — smaller bundles, progressive loading, offline-first thinking baked in from the beginning.&lt;/p&gt;

&lt;h2&gt;
  
  
  Closing Thoughts
&lt;/h2&gt;

&lt;p&gt;Working on a platform where the content matters deeply to hundreds of millions of people sharpens your instincts. Performance isn't vanity. A 3-second improvement in load time is the difference between someone completing Fajr prayer with the app or giving up.&lt;/p&gt;

&lt;p&gt;If you're working on similar challenges — high-scale web apps, geospatial features, or offline-capable PWAs — find me at &lt;a href="https://zunain.com" rel="noopener noreferrer"&gt;zunain.com&lt;/a&gt; or on GitHub at &lt;a href="https://github.com/mzulqarnain118" rel="noopener noreferrer"&gt;mzulqarnain118&lt;/a&gt;.&lt;/p&gt;

</description>
      <category>nextjs</category>
      <category>webdev</category>
      <category>architecture</category>
      <category>javascript</category>
    </item>
    <item>
      <title>Next.js Production Apps: Server Components and API Routes</title>
      <dc:creator>Muhammad Zulqarnain</dc:creator>
      <pubDate>Tue, 20 Jan 2026 08:00:00 +0000</pubDate>
      <link>https://dev.to/mzunain/nextjs-production-apps-server-components-and-api-routes-5a00</link>
      <guid>https://dev.to/mzunain/nextjs-production-apps-server-components-and-api-routes-5a00</guid>
      <description>&lt;p&gt;Next.js has become my framework of choice for building production-ready full-stack applications. Here's how I leverage its powerful features.&lt;/p&gt;

&lt;h2&gt;
  
  
  Why Next.js?
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Complete Full-Stack Solution&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Server and client components&lt;/li&gt;
&lt;li&gt;Built-in API routes&lt;/li&gt;
&lt;li&gt;Automatic code splitting&lt;/li&gt;
&lt;li&gt;Optimized image handling&lt;/li&gt;
&lt;li&gt;SEO-friendly by default&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Server Components (App Router)
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="c1"&gt;// app/page.tsx&lt;/span&gt;
&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;getProducts&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;@/lib/data&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;default&lt;/span&gt; &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;HomePage&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;products&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nf"&gt;getProducts&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;

  &lt;span class="k"&gt;return &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;main&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;
      &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;h1&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="nx"&gt;Products&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="sr"&gt;/h1&lt;/span&gt;&lt;span class="err"&gt;&amp;gt;
&lt;/span&gt;      &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;products&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;product&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;
        &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="nx"&gt;ProductCard&lt;/span&gt; &lt;span class="nx"&gt;key&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;product&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="nx"&gt;product&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="nx"&gt;product&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="sr"&gt;/&lt;/span&gt;&lt;span class="err"&gt;&amp;gt;
&lt;/span&gt;      &lt;span class="p"&gt;))}&lt;/span&gt;
    &lt;span class="o"&gt;&amp;lt;&lt;/span&gt;&lt;span class="sr"&gt;/main&lt;/span&gt;&lt;span class="err"&gt;&amp;gt;
&lt;/span&gt;  &lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  API Routes
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="c1"&gt;// app/api/products/route.ts&lt;/span&gt;
&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;NextResponse&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;next/server&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;@/lib/db&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;GET&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="k"&gt;try&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;products&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;product&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;findMany&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;NextResponse&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;products&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;catch &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;error&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;NextResponse&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
      &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;error&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Failed to fetch products&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt; &lt;span class="p"&gt;},&lt;/span&gt;
      &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;status&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;500&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt;
    &lt;span class="p"&gt;);&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="k"&gt;export&lt;/span&gt; &lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;POST&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;request&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;Request&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;body&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;request&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;product&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;product&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;create&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;data&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;body&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;NextResponse&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;json&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;product&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Data Fetching Patterns
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="c1"&gt;// Server Component - Direct database access&lt;/span&gt;
&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;getUser&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;id&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kr"&gt;string&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;user&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;user&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;findUnique&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;where&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;id&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nx"&gt;user&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;

&lt;span class="c1"&gt;// With caching&lt;/span&gt;
&lt;span class="k"&gt;import&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="nx"&gt;unstable_cache&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt; &lt;span class="k"&gt;from&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;next/cache&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;getCachedProducts&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;unstable_cache&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
  &lt;span class="k"&gt;async &lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="nx"&gt;db&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;product&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;findMany&lt;/span&gt;&lt;span class="p"&gt;(),&lt;/span&gt;
  &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;products&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
  &lt;span class="p"&gt;{&lt;/span&gt; &lt;span class="na"&gt;revalidate&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;3600&lt;/span&gt; &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;);&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  My Production Setup
&lt;/h2&gt;

&lt;p&gt;✅ &lt;strong&gt;Performance&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Image optimization with next/image&lt;/li&gt;
&lt;li&gt;Font optimization&lt;/li&gt;
&lt;li&gt;Bundle analyzer&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;✅ &lt;strong&gt;SEO&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Metadata API&lt;/li&gt;
&lt;li&gt;Sitemap generation&lt;/li&gt;
&lt;li&gt;robots.txt&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;✅ &lt;strong&gt;Developer Experience&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;TypeScript throughout&lt;/li&gt;
&lt;li&gt;ESLint &amp;amp; Prettier&lt;/li&gt;
&lt;li&gt;Tailwind CSS&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Next.js simplifies full-stack development while maintaining excellent performance and SEO. The App Router and Server Components have transformed how I build web applications.&lt;/p&gt;

</description>
      <category>programming</category>
    </item>
  </channel>
</rss>
