<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: Kshitij Sharma</title>
    <description>The latest articles on DEV Community by Kshitij Sharma (@kshitij_sharma_fd33fdb032).</description>
    <link>https://dev.to/kshitij_sharma_fd33fdb032</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3892871%2Fb33f228c-f438-4022-9db2-6b287297b3c4.png</url>
      <title>DEV Community: Kshitij Sharma</title>
      <link>https://dev.to/kshitij_sharma_fd33fdb032</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/kshitij_sharma_fd33fdb032"/>
    <language>en</language>
    <item>
      <title>Understand the mechanism behind the api failing randomly</title>
      <dc:creator>Kshitij Sharma</dc:creator>
      <pubDate>Wed, 22 Apr 2026 17:41:09 +0000</pubDate>
      <link>https://dev.to/kshitij_sharma_fd33fdb032/understand-the-mechanism-behind-the-api-failing-randomly-2mci</link>
      <guid>https://dev.to/kshitij_sharma_fd33fdb032/understand-the-mechanism-behind-the-api-failing-randomly-2mci</guid>
      <description>&lt;div class="ltag__link--embedded"&gt;
  &lt;div class="crayons-story "&gt;
  &lt;a href="https://dev.to/kshitij_sharma_fd33fdb032/when-your-api-randomly-starts-timing-out-6a3" class="crayons-story__hidden-navigation-link"&gt;When Your API “Randomly” Starts Timing Out&lt;/a&gt;


  &lt;div class="crayons-story__body crayons-story__body-full_post"&gt;
    &lt;div class="crayons-story__top"&gt;
      &lt;div class="crayons-story__meta"&gt;
        &lt;div class="crayons-story__author-pic"&gt;

          &lt;a href="/kshitij_sharma_fd33fdb032" class="crayons-avatar  crayons-avatar--l  "&gt;
            &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3892871%2Fb33f228c-f438-4022-9db2-6b287297b3c4.png" alt="kshitij_sharma_fd33fdb032 profile" class="crayons-avatar__image" width="96" height="96"&gt;
          &lt;/a&gt;
        &lt;/div&gt;
        &lt;div&gt;
          &lt;div&gt;
            &lt;a href="/kshitij_sharma_fd33fdb032" class="crayons-story__secondary fw-medium m:hidden"&gt;
              Kshitij Sharma
            &lt;/a&gt;
            &lt;div class="profile-preview-card relative mb-4 s:mb-0 fw-medium hidden m:inline-block"&gt;
              
                Kshitij Sharma
                
              
              &lt;div id="story-author-preview-content-3537617" class="profile-preview-card__content crayons-dropdown branded-7 p-4 pt-0"&gt;
                &lt;div class="gap-4 grid"&gt;
                  &lt;div class="-mt-4"&gt;
                    &lt;a href="/kshitij_sharma_fd33fdb032" class="flex"&gt;
                      &lt;span class="crayons-avatar crayons-avatar--xl mr-2 shrink-0"&gt;
                        &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3892871%2Fb33f228c-f438-4022-9db2-6b287297b3c4.png" class="crayons-avatar__image" alt="" width="96" height="96"&gt;
                      &lt;/span&gt;
                      &lt;span class="crayons-link crayons-subtitle-2 mt-5"&gt;Kshitij Sharma&lt;/span&gt;
                    &lt;/a&gt;
                  &lt;/div&gt;
                  &lt;div class="print-hidden"&gt;
                    
                      Follow
                    
                  &lt;/div&gt;
                  &lt;div class="author-preview-metadata-container"&gt;&lt;/div&gt;
                &lt;/div&gt;
              &lt;/div&gt;
            &lt;/div&gt;

          &lt;/div&gt;
          &lt;a href="https://dev.to/kshitij_sharma_fd33fdb032/when-your-api-randomly-starts-timing-out-6a3" class="crayons-story__tertiary fs-xs"&gt;&lt;time&gt;Apr 22&lt;/time&gt;&lt;span class="time-ago-indicator-initial-placeholder"&gt;&lt;/span&gt;&lt;/a&gt;
        &lt;/div&gt;
      &lt;/div&gt;

    &lt;/div&gt;

    &lt;div class="crayons-story__indention"&gt;
      &lt;h2 class="crayons-story__title crayons-story__title-full_post"&gt;
        &lt;a href="https://dev.to/kshitij_sharma_fd33fdb032/when-your-api-randomly-starts-timing-out-6a3" id="article-link-3537617"&gt;
          When Your API “Randomly” Starts Timing Out
        &lt;/a&gt;
      &lt;/h2&gt;
        &lt;div class="crayons-story__tags"&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/backend"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;backend&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/webdev"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;webdev&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/networking"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;networking&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/distributedsystems"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;distributedsystems&lt;/a&gt;
        &lt;/div&gt;
      &lt;div class="crayons-story__bottom"&gt;
        &lt;div class="crayons-story__details"&gt;
          &lt;a href="https://dev.to/kshitij_sharma_fd33fdb032/when-your-api-randomly-starts-timing-out-6a3" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left"&gt;
            &lt;div class="multiple_reactions_aggregate"&gt;
              &lt;span class="multiple_reactions_icons_container"&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/sparkle-heart-5f9bee3767e18deb1bb725290cb151c25234768a0e9a2bd39370c382d02920cf.svg" width="24" height="24"&gt;
                  &lt;/span&gt;
              &lt;/span&gt;
              &lt;span class="aggregate_reactions_counter"&gt;1&lt;span class="hidden s:inline"&gt; reaction&lt;/span&gt;&lt;/span&gt;
            &lt;/div&gt;
          &lt;/a&gt;
            &lt;a href="https://dev.to/kshitij_sharma_fd33fdb032/when-your-api-randomly-starts-timing-out-6a3#comments" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left flex items-center"&gt;
              Comments


              &lt;span class="hidden s:inline"&gt;Add Comment&lt;/span&gt;
            &lt;/a&gt;
        &lt;/div&gt;
        &lt;div class="crayons-story__save"&gt;
          &lt;small class="crayons-story__tertiary fs-xs mr-2"&gt;
            4 min read
          &lt;/small&gt;
            
              &lt;span class="bm-initial"&gt;
                

              &lt;/span&gt;
              &lt;span class="bm-success"&gt;
                

              &lt;/span&gt;
            
        &lt;/div&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;/div&gt;


</description>
    </item>
    <item>
      <title>When Your API “Randomly” Starts Timing Out</title>
      <dc:creator>Kshitij Sharma</dc:creator>
      <pubDate>Wed, 22 Apr 2026 17:30:48 +0000</pubDate>
      <link>https://dev.to/kshitij_sharma_fd33fdb032/when-your-api-randomly-starts-timing-out-6a3</link>
      <guid>https://dev.to/kshitij_sharma_fd33fdb032/when-your-api-randomly-starts-timing-out-6a3</guid>
      <description>&lt;p&gt;You deploy a perfectly fine service. Load tests passed. Latency looked clean. Then production hits—and suddenly:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;P95 latency spikes&lt;/li&gt;
&lt;li&gt;Requests hang without logs&lt;/li&gt;
&lt;li&gt;CPU is fine, memory is fine… but users are screaming&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;This isn’t a “bug.” This is you not understanding the &lt;strong&gt;&lt;em&gt;actual HTTP request lifecycle&lt;/em&gt;&lt;/strong&gt; beyond the textbook diagram.&lt;/p&gt;

&lt;p&gt;If you don’t know what &lt;strong&gt;&lt;em&gt;really&lt;/em&gt;&lt;/strong&gt; happens between a client sending a request and your handler returning a response, you’re flying blind.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F3sn82cip1cu0vf98te3b.jpg" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F3sn82cip1cu0vf98te3b.jpg" alt=" " width="800" height="725"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  HTTP Request Lifecycle — What Actually Happens Under the Hood
&lt;/h2&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fxlemttnwnohmubuj150r.jpg" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fxlemttnwnohmubuj150r.jpg" alt=" " width="775" height="708"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Forget diagrams like &lt;strong&gt;&lt;em&gt;Client → Server → Response&lt;/em&gt;&lt;/strong&gt;. That’s marketing-level abstraction.&lt;/p&gt;

&lt;p&gt;A real request goes through:&lt;/p&gt;

&lt;h3&gt;
  
  
  1. Connection Establishment
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;DNS resolution&lt;/li&gt;
&lt;li&gt;TCP handshake (3-way)&lt;/li&gt;
&lt;li&gt;TLS handshake (if HTTPS)&lt;/li&gt;
&lt;li&gt;Connection pooling / reuse (keep-alive)&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  2. Kernel → User Space Transition
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;NIC receives packet → kernel buffer&lt;/li&gt;
&lt;li&gt;Socket read readiness via epoll/kqueue&lt;/li&gt;
&lt;li&gt;Data copied into user space buffers&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  3. HTTP Parsing
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;Raw bytes → protocol parsing (headers, method, path)&lt;/li&gt;
&lt;li&gt;Chunked decoding / content-length validation&lt;/li&gt;
&lt;li&gt;Header normalization&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  4. Routing &amp;amp; Middleware Chain
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;Path matching (often regex or trie-based)&lt;/li&gt;
&lt;li&gt;Middleware execution (auth, logging, rate limiting)&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  5. Business Logic Execution
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;DB calls&lt;/li&gt;
&lt;li&gt;External APIs&lt;/li&gt;
&lt;li&gt;CPU-bound work&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  6. Response Construction
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;Serialization (JSON, protobuf, etc.)&lt;/li&gt;
&lt;li&gt;Compression (gzip, brotli)&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  7. Write Back to Socket
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;Kernel send buffer&lt;/li&gt;
&lt;li&gt;TCP congestion control&lt;/li&gt;
&lt;li&gt;Potential partial writes&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  8. Connection Lifecycle Decision
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;Keep-alive reuse vs close&lt;/li&gt;
&lt;li&gt;Idle timeout tracking&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Miss any one of these layers, and you’ll misdiagnose production issues.&lt;/p&gt;




&lt;h1&gt;
  
  
  Where Systems Actually Break
&lt;/h1&gt;

&lt;p&gt;Let’s cut the theory. Real failures:&lt;/p&gt;

&lt;h3&gt;
  
  
  🔴 1. Head-of-Line Blocking in Connection Pools
&lt;/h3&gt;

&lt;p&gt;You think you're async, but your HTTP client pool is exhausted.&lt;/p&gt;

&lt;p&gt;Result:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Requests queue waiting for a free connection&lt;/li&gt;
&lt;li&gt;Latency explodes without CPU increase&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  🔴 2. Slow Clients = Resource Leaks
&lt;/h3&gt;

&lt;p&gt;If a client reads slowly:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Your server keeps buffers open&lt;/li&gt;
&lt;li&gt;Threads/event-loop slots remain occupied&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;This is &lt;strong&gt;&lt;em&gt;classic slowloris territory&lt;/em&gt;&lt;/strong&gt;.&lt;/p&gt;




&lt;h3&gt;
  
  
  🔴 3. Middleware Abuse
&lt;/h3&gt;

&lt;p&gt;Stacking 10 middlewares sounds clean.&lt;/p&gt;

&lt;p&gt;Reality:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Each adds latency&lt;/li&gt;
&lt;li&gt;Each may block (logging, auth calls)&lt;/li&gt;
&lt;li&gt;Hard to reason about ordering&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  🔴 4. TLS Handshake Overhead
&lt;/h3&gt;

&lt;p&gt;Without reuse:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Every request pays ~1–2 RTT extra&lt;/li&gt;
&lt;li&gt;CPU spikes due to crypto&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  🔴 5. Kernel Buffer Backpressure
&lt;/h3&gt;

&lt;p&gt;Your app “sent” the response.&lt;/p&gt;

&lt;p&gt;Kernel says:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;Nope, buffer full. Try later.&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;If you ignore this:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Writes block&lt;/li&gt;
&lt;li&gt;Event loop stalls&lt;/li&gt;
&lt;li&gt;Throughput collapses&lt;/li&gt;
&lt;/ul&gt;




&lt;h1&gt;
  
  
  Architecture Decisions That Actually Matter
&lt;/h1&gt;

&lt;h2&gt;
  
  
  1. Thread-per-request vs Event Loop
&lt;/h2&gt;

&lt;h3&gt;
  
  
  Thread-per-request (e.g., classic Java)
&lt;/h3&gt;

&lt;p&gt;Pros:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Simpler mental model&lt;/li&gt;
&lt;li&gt;Blocking code is fine&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Cons:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Context switching overhead&lt;/li&gt;
&lt;li&gt;Memory per thread (~1MB stack)&lt;/li&gt;
&lt;/ul&gt;




&lt;h3&gt;
  
  
  Event-driven (Node.js, Netty, Go runtime hybrid)
&lt;/h3&gt;

&lt;p&gt;Pros:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;High concurrency&lt;/li&gt;
&lt;li&gt;Efficient IO&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Cons:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Blocking = catastrophic&lt;/li&gt;
&lt;li&gt;Debugging harder&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  2. Reverse Proxy in Front (NGINX / Envoy)
&lt;/h2&gt;

&lt;p&gt;You &lt;strong&gt;&lt;em&gt;should not&lt;/em&gt;&lt;/strong&gt; expose your app server directly.&lt;/p&gt;

&lt;p&gt;Why:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Handles TLS termination&lt;/li&gt;
&lt;li&gt;Absorbs slow clients&lt;/li&gt;
&lt;li&gt;Better connection management&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  3. Connection Reuse Strategy
&lt;/h2&gt;

&lt;p&gt;Bad:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;New TCP per request&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Better:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;HTTP/1.1 keep-alive&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Best:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;HTTP/2 multiplexing&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Trade-off:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;HTTP/2 introduces head-of-line blocking at TCP layer&lt;/li&gt;
&lt;li&gt;QUIC (HTTP/3) fixes it but adds complexity&lt;/li&gt;
&lt;/ul&gt;




&lt;h1&gt;
  
  
  Implementation: What This Looks Like in Code
&lt;/h1&gt;

&lt;h2&gt;
  
  
  Example: Minimal HTTP Server (Node.js — showing lifecycle touchpoints)
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;http&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;require&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;http&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;server&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;http&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;createServer&lt;/span&gt;&lt;span class="p"&gt;((&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="c1"&gt;// 1. Request received (already parsed by Node's HTTP parser)&lt;/span&gt;

  &lt;span class="c1"&gt;// 2. Middleware simulation&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;start&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nb"&gt;Date&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;now&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;

  &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;req&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;headers&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;x-block&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="c1"&gt;// simulate bad middleware&lt;/span&gt;
    &lt;span class="k"&gt;while &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;Date&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;now&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="o"&gt;-&lt;/span&gt; &lt;span class="nx"&gt;start&lt;/span&gt; &lt;span class="o"&gt;&amp;lt;&lt;/span&gt; &lt;span class="mi"&gt;100&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{}&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;

  &lt;span class="c1"&gt;// 3. Business logic&lt;/span&gt;
  &lt;span class="nf"&gt;setTimeout&lt;/span&gt;&lt;span class="p"&gt;(()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;responseBody&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;JSON&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;stringify&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt; &lt;span class="na"&gt;ok&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="kc"&gt;true&lt;/span&gt; &lt;span class="p"&gt;});&lt;/span&gt;

    &lt;span class="c1"&gt;// 4. Response write&lt;/span&gt;
    &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;setHeader&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Content-Type&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;application/json&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;setHeader&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Content-Length&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;Buffer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;byteLength&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;responseBody&lt;/span&gt;&lt;span class="p"&gt;));&lt;/span&gt;

    &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;write&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;responseBody&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

    &lt;span class="c1"&gt;// 5. End response (flush to kernel)&lt;/span&gt;
    &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;end&lt;/span&gt;&lt;span class="p"&gt;();&lt;/span&gt;
  &lt;span class="p"&gt;},&lt;/span&gt; &lt;span class="mi"&gt;10&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;

&lt;span class="c1"&gt;// 6. Connection-level tuning&lt;/span&gt;
&lt;span class="nx"&gt;server&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;keepAliveTimeout&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;5000&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;
&lt;span class="nx"&gt;server&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nx"&gt;headersTimeout&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="mi"&gt;6000&lt;/span&gt;&lt;span class="p"&gt;;&lt;/span&gt;

&lt;span class="nx"&gt;server&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;listen&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;3000&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;






&lt;h2&gt;
  
  
  Where This Code Lies to You
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;You don’t see TCP&lt;/li&gt;
&lt;li&gt;You don’t see kernel buffers&lt;/li&gt;
&lt;li&gt;You don’t control backpressure explicitly&lt;/li&gt;
&lt;li&gt;You don’t see partial writes&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;That abstraction is convenient—and dangerous.&lt;/p&gt;




&lt;h1&gt;
  
  
  Advanced Concern: Backpressure Handling
&lt;/h1&gt;

&lt;p&gt;Most people ignore this. That’s why systems collapse under load.&lt;/p&gt;

&lt;h3&gt;
  
  
  Example (Node.js stream backpressure):
&lt;/h3&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight javascript"&gt;&lt;code&gt;&lt;span class="kd"&gt;function&lt;/span&gt; &lt;span class="nf"&gt;writeResponse&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nx"&gt;data&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
  &lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;canContinue&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;write&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nx"&gt;data&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;

  &lt;span class="k"&gt;if &lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="o"&gt;!&lt;/span&gt;&lt;span class="nx"&gt;canContinue&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="c1"&gt;// Kernel buffer full — wait&lt;/span&gt;
    &lt;span class="nx"&gt;res&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;once&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;drain&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="p"&gt;()&lt;/span&gt; &lt;span class="o"&gt;=&amp;gt;&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
      &lt;span class="nx"&gt;console&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;log&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="s1"&gt;Resumed writing&lt;/span&gt;&lt;span class="dl"&gt;'&lt;/span&gt;&lt;span class="p"&gt;);&lt;/span&gt;
    &lt;span class="p"&gt;});&lt;/span&gt;
  &lt;span class="p"&gt;}&lt;/span&gt;
&lt;span class="p"&gt;}&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;If you ignore this:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Memory spikes&lt;/li&gt;
&lt;li&gt;Latency spikes&lt;/li&gt;
&lt;li&gt;Eventually crashes&lt;/li&gt;
&lt;/ul&gt;




&lt;h1&gt;
  
  
  Failure Case: Timeout Mismatch Hell
&lt;/h1&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fcpzt0mk2bfgd217l2zpq.jpg" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fcpzt0mk2bfgd217l2zpq.jpg" alt=" " width="637" height="467"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;You configure:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Load balancer timeout: 60s&lt;/li&gt;
&lt;li&gt;App server timeout: 30s&lt;/li&gt;
&lt;li&gt;DB timeout: 10s&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;What happens?&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;DB times out → app retries&lt;/li&gt;
&lt;li&gt;App still running → LB kills connection&lt;/li&gt;
&lt;li&gt;Client retries → duplicate work&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Result:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Cascade failure&lt;/li&gt;
&lt;/ul&gt;




&lt;h1&gt;
  
  
  Trade-offs You Can’t Avoid
&lt;/h1&gt;

&lt;h2&gt;
  
  
  Latency vs Throughput
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;Small buffers → lower latency, more syscalls&lt;/li&gt;
&lt;li&gt;Large buffers → better throughput, worse tail latency&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  Simplicity vs Control
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;Frameworks hide complexity&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;But you lose control over:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;connection reuse&lt;/li&gt;
&lt;li&gt;backpressure&lt;/li&gt;
&lt;li&gt;parsing behavior&lt;/li&gt;
&lt;/ul&gt;


&lt;/li&gt;

&lt;/ul&gt;




&lt;h2&gt;
  
  
  CPU vs Network Efficiency
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;Compression saves bandwidth&lt;/li&gt;
&lt;li&gt;Costs CPU&lt;/li&gt;
&lt;li&gt;Under load, CPU becomes bottleneck&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  Keep-Alive vs Resource Locking
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;Keep-alive reduces handshake overhead&lt;/li&gt;
&lt;li&gt;But holds connections longer&lt;/li&gt;
&lt;li&gt;Risk: connection pool exhaustion&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  Final System Design (What Actually Works in Production)
&lt;/h2&gt;

&lt;p&gt;A sane architecture:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;Client
  ↓
CDN (optional)
  ↓
Reverse Proxy (NGINX / Envoy)
  ↓
App Server (stateless, event-driven)
  ↓
Service Layer
  ↓
Database / Cache
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Key rules:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Terminate TLS early&lt;/li&gt;
&lt;li&gt;Enforce timeouts at every layer&lt;/li&gt;
&lt;li&gt;Use connection pooling aggressively&lt;/li&gt;
&lt;li&gt;Monitor queueing, not just CPU&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Key Takeaways (No Fluff)
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;HTTP lifecycle is mostly &lt;em&gt;not&lt;/em&gt; in your code—it’s in the kernel and network stack&lt;/li&gt;
&lt;li&gt;Most latency issues are queueing problems, not computation problems&lt;/li&gt;
&lt;li&gt;Backpressure is real; ignoring it will kill your system&lt;/li&gt;
&lt;li&gt;Middleware is not free—treat it like production code, not decoration&lt;/li&gt;
&lt;li&gt;Timeouts must be aligned across layers or you create cascading failures&lt;/li&gt;
&lt;li&gt;Keep-alive and pooling are double-edged swords&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;If you still think HTTP is just “request comes in, response goes out,” you’re not ready to debug production systems.&lt;/p&gt;

</description>
      <category>backend</category>
      <category>webdev</category>
      <category>networking</category>
      <category>distributedsystems</category>
    </item>
  </channel>
</rss>
