<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: Kavish Kartha</title>
    <description>The latest articles on DEV Community by Kavish Kartha (@kkartha31).</description>
    <link>https://dev.to/kkartha31</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3840787%2F74dc5b3e-b03d-496c-9bd5-6d2d080aed61.jpg</url>
      <title>DEV Community: Kavish Kartha</title>
      <link>https://dev.to/kkartha31</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/kkartha31"/>
    <language>en</language>
    <item>
      <title>5 minute read, check it out!</title>
      <dc:creator>Kavish Kartha</dc:creator>
      <pubDate>Tue, 24 Mar 2026 00:12:08 +0000</pubDate>
      <link>https://dev.to/kkartha31/5-minute-read-check-it-out-4c49</link>
      <guid>https://dev.to/kkartha31/5-minute-read-check-it-out-4c49</guid>
      <description>&lt;div class="ltag__link--embedded"&gt;
  &lt;div class="crayons-story "&gt;
  &lt;a href="https://dev.to/kkartha31/i-built-a-load-tester-with-an-ai-diagnosis-layer-because-no-existing-tool-does-both-2opf" class="crayons-story__hidden-navigation-link"&gt;I built a load tester with an AI diagnosis layer—because no existing tool does both&lt;/a&gt;


  &lt;div class="crayons-story__body crayons-story__body-full_post"&gt;
    &lt;div class="crayons-story__top"&gt;
      &lt;div class="crayons-story__meta"&gt;
        &lt;div class="crayons-story__author-pic"&gt;

          &lt;a href="/kkartha31" class="crayons-avatar  crayons-avatar--l  "&gt;
            &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3840787%2F74dc5b3e-b03d-496c-9bd5-6d2d080aed61.jpg" alt="kkartha31 profile" class="crayons-avatar__image" width="96" height="96"&gt;
          &lt;/a&gt;
        &lt;/div&gt;
        &lt;div&gt;
          &lt;div&gt;
            &lt;a href="/kkartha31" class="crayons-story__secondary fw-medium m:hidden"&gt;
              Kavish Kartha
            &lt;/a&gt;
            &lt;div class="profile-preview-card relative mb-4 s:mb-0 fw-medium hidden m:inline-block"&gt;
              
                Kavish Kartha
                
              
              &lt;div id="story-author-preview-content-3391940" class="profile-preview-card__content crayons-dropdown branded-7 p-4 pt-0"&gt;
                &lt;div class="gap-4 grid"&gt;
                  &lt;div class="-mt-4"&gt;
                    &lt;a href="/kkartha31" class="flex"&gt;
                      &lt;span class="crayons-avatar crayons-avatar--xl mr-2 shrink-0"&gt;
                        &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3840787%2F74dc5b3e-b03d-496c-9bd5-6d2d080aed61.jpg" class="crayons-avatar__image" alt="" width="96" height="96"&gt;
                      &lt;/span&gt;
                      &lt;span class="crayons-link crayons-subtitle-2 mt-5"&gt;Kavish Kartha&lt;/span&gt;
                    &lt;/a&gt;
                  &lt;/div&gt;
                  &lt;div class="print-hidden"&gt;
                    
                      Follow
                    
                  &lt;/div&gt;
                  &lt;div class="author-preview-metadata-container"&gt;&lt;/div&gt;
                &lt;/div&gt;
              &lt;/div&gt;
            &lt;/div&gt;

          &lt;/div&gt;
          &lt;a href="https://dev.to/kkartha31/i-built-a-load-tester-with-an-ai-diagnosis-layer-because-no-existing-tool-does-both-2opf" class="crayons-story__tertiary fs-xs"&gt;&lt;time&gt;Mar 24&lt;/time&gt;&lt;span class="time-ago-indicator-initial-placeholder"&gt;&lt;/span&gt;&lt;/a&gt;
        &lt;/div&gt;
      &lt;/div&gt;

    &lt;/div&gt;

    &lt;div class="crayons-story__indention"&gt;
      &lt;h2 class="crayons-story__title crayons-story__title-full_post"&gt;
        &lt;a href="https://dev.to/kkartha31/i-built-a-load-tester-with-an-ai-diagnosis-layer-because-no-existing-tool-does-both-2opf" id="article-link-3391940"&gt;
          I built a load tester with an AI diagnosis layer—because no existing tool does both
        &lt;/a&gt;
      &lt;/h2&gt;
        &lt;div class="crayons-story__tags"&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/opensource"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;opensource&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/langchain"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;langchain&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/webdev"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;webdev&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/ai"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;ai&lt;/a&gt;
        &lt;/div&gt;
      &lt;div class="crayons-story__bottom"&gt;
        &lt;div class="crayons-story__details"&gt;
            &lt;a href="https://dev.to/kkartha31/i-built-a-load-tester-with-an-ai-diagnosis-layer-because-no-existing-tool-does-both-2opf#comments" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left flex items-center"&gt;
              Comments


              &lt;span class="hidden s:inline"&gt;Add Comment&lt;/span&gt;
            &lt;/a&gt;
        &lt;/div&gt;
        &lt;div class="crayons-story__save"&gt;
          &lt;small class="crayons-story__tertiary fs-xs mr-2"&gt;
            3 min read
          &lt;/small&gt;
            
              &lt;span class="bm-initial"&gt;
                

              &lt;/span&gt;
              &lt;span class="bm-success"&gt;
                

              &lt;/span&gt;
            
        &lt;/div&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;/div&gt;


</description>
      <category>opensource</category>
      <category>langchain</category>
      <category>webdev</category>
      <category>ai</category>
    </item>
    <item>
      <title>I built a load tester with an AI diagnosis layer—because no existing tool does both</title>
      <dc:creator>Kavish Kartha</dc:creator>
      <pubDate>Tue, 24 Mar 2026 00:06:35 +0000</pubDate>
      <link>https://dev.to/kkartha31/i-built-a-load-tester-with-an-ai-diagnosis-layer-because-no-existing-tool-does-both-2opf</link>
      <guid>https://dev.to/kkartha31/i-built-a-load-tester-with-an-ai-diagnosis-layer-because-no-existing-tool-does-both-2opf</guid>
      <description>&lt;p&gt;Load testing and LLM observability are two separate categories of tools. Nobody has combined them.&lt;/p&gt;

&lt;p&gt;So I built something that does. It's called &lt;a href="https://github.com/kavishkartha05/QueryScope" rel="noopener noreferrer"&gt;QueryScope&lt;/a&gt;.&lt;/p&gt;




&lt;h2&gt;
  
  
  The problem
&lt;/h2&gt;

&lt;p&gt;k6, JMeter, and Locust are great tools. They fire requests, measure latency, and produce a report. But the report just tells you what happened. P99 spiked. Error rate went up. It doesn't tell you &lt;em&gt;why&lt;/em&gt;. LangSmith and Langfuse are also great. But they monitor AI apps passively. They don't run load tests.&lt;/p&gt;

&lt;p&gt;If you want to benchmark an endpoint AND ask "why did tail latency get worse after my last deploy?", you're stitching together multiple tools manually. &lt;/p&gt;

&lt;p&gt;&lt;em&gt;You&lt;/em&gt; are still the workflow engine. And that was the part that bothered me.&lt;/p&gt;




&lt;h2&gt;
  
  
  What QueryScope does
&lt;/h2&gt;

&lt;p&gt;Users can point QueryScope at any REST or LLM endpoint. They can configure requests and concurrency and get real p50/p95/p99 (percentiles), throughput, and error rate in a live dashboard.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F8rkky6k9d3exnckajm36.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F8rkky6k9d3exnckajm36.png" alt="Latency distribution chart showing p50/p95/p99 across multiple runs" width="800" height="312"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Now that's just the load testing layer. &lt;em&gt;Here's the interesting part&lt;/em&gt;:&lt;/p&gt;

&lt;p&gt;Every completed run gets embedded via OpenAI &lt;code&gt;text-embedding-3-small&lt;/code&gt; and indexed into Azure AI Search via LlamaIndex. When you ask, "explain the latest benchmark on the Y Combinator page", a LangChain LCEL retrieval chain finds semantically similar historical runs, injects your 5 most recent runs from Postgres as ground truth, and GPT-4o-mini generates a grounded diagnosis.&lt;/p&gt;

&lt;p&gt;&lt;em&gt;Not a hallucination&lt;/em&gt;. An explanation grounded in your actual benchmark data.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F88afnirkpzxdezfrv565.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F88afnirkpzxdezfrv565.png" alt="AI diagnosis panel showing a grounded root cause analysis" width="800" height="437"&gt;&lt;/a&gt;&lt;/p&gt;




&lt;h2&gt;
  
  
  The MCP server
&lt;/h2&gt;

&lt;p&gt;This is the component I'm most proud of. I created a Node.js MCP server that exposes two tools: &lt;code&gt;run_benchmark&lt;/code&gt; and &lt;code&gt;query_runs&lt;/code&gt;. Users can connect it to Claude Desktop and prompt it, "benchmark this endpoint with 50 requests at concurrency 5", and Claude will call the tool, fire the actual HTTP requests, and analyze the results autonomously.&lt;/p&gt;

&lt;p&gt;&lt;em&gt;No UI needed&lt;/em&gt;; this feature enables agentic behavior as Claude is driving execution, not just answering a question. &lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fwunw5te4u9r356uhbgxo.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fwunw5te4u9r356uhbgxo.png" alt="MCP server tool registration for run_benchmark using the Model Context Protocol SDK" width="800" height="636"&gt;&lt;/a&gt;&lt;/p&gt;




&lt;h2&gt;
  
  
  How the RAG pipeline works end to end
&lt;/h2&gt;

&lt;p&gt;This is the piece I spent the most time on:&lt;br&gt;
&lt;strong&gt;Indexing&lt;/strong&gt; - after every benchmark completes, the indexer builds a plain-text summary:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;"Benchmark run {id} against {url} ({method}) with {n} requests. p50={p50}ms p95={p95}ms p99={p99}ms throughput={tps}req/s error_rate={err}" &lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;Since embedding models are trained on natural language, I decided to use plain text over JSON; so "p99 spiked to 582ms" carries more semantic signal than &lt;code&gt;{"p99": 582}&lt;/code&gt;. That summary gets embedded and upserted into Azure AI Search via LlamaIndex.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Retrieval&lt;/strong&gt; - when you ask a question, two things happen in parallel: &lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;The question gets embedded and Azure AI Search does a vector similarity search, returning the top 5 semantically relevant runs&lt;/li&gt;
&lt;li&gt;The 5 most recent runs get fetched directly from Postgres as ground truth&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;Both get injected into a LangChain LCEL prompt alongside your question. GPT-4o-mini generates the diagnosis grounded in both sources: &lt;em&gt;semantic relevance AND recency&lt;/em&gt;.&lt;/p&gt;

&lt;p&gt;The recency injection was a fix I had to implement. This is because pure vector search doesn't understand queries like "my last two runs"; it finds semantically similar runs regardless of time. Injecting recent runs from Postgres directly solved that.&lt;/p&gt;




&lt;h2&gt;
  
  
  The full stack
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;FastAPI + async SQLAlchemy + asyncpg&lt;/strong&gt; → benchmark runner and REST API&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;LlamaIndex + Azure AI Search&lt;/strong&gt; → indexing and vector retrieval&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;LangChain LCEL&lt;/strong&gt; → RCA chain with Postgres context injection&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;React + Recharts&lt;/strong&gt; → live polling dashboard&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Node.js&lt;/strong&gt; + &lt;code&gt;@modelcontextprotocol/sdk&lt;/code&gt; → MCP server&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Docker Compose&lt;/strong&gt; → one command local setup&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Kubernetes&lt;/strong&gt; with HPA → scales benchmark workers under load&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;MySQL adapter&lt;/strong&gt; → &lt;code&gt;sa.JSON&lt;/code&gt; replaces Postgres-native &lt;code&gt;ARRAY&lt;/code&gt; for cross-DB compatibility&lt;/li&gt;
&lt;/ul&gt;




&lt;h2&gt;
  
  
  Try it
&lt;/h2&gt;

&lt;p&gt;Self-hostable, open source, runs with one command:&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fr5lptxckvjpya35u3r5m.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fr5lptxckvjpya35u3r5m.png" alt="Setup" width="800" height="368"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Full demo walkthrough&lt;/strong&gt;: &lt;a href="https://www.loom.com/share/aa0458b3b73849f4b8c731217b443b6f" rel="noopener noreferrer"&gt;https://www.loom.com/share/aa0458b3b73849f4b8c731217b443b6f&lt;/a&gt; &lt;/p&gt;

&lt;p&gt;&lt;strong&gt;GitHub&lt;/strong&gt;: &lt;a href="https://github.com/kavishkartha05/QueryScope" rel="noopener noreferrer"&gt;https://github.com/kavishkartha05/QueryScope&lt;/a&gt; &lt;/p&gt;

&lt;p&gt;Happy to hear feedback or answer questions about the RAG pipeline, MCP integration, or anything else in the comments.&lt;/p&gt;

</description>
      <category>opensource</category>
      <category>langchain</category>
      <category>webdev</category>
      <category>ai</category>
    </item>
  </channel>
</rss>
