<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: clemra</title>
    <description>The latest articles on DEV Community by clemra (@clemra).</description>
    <link>https://dev.to/clemra</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F1150622%2F94e677bd-2802-48b6-ac4f-f81d0311ac90.png</url>
      <title>DEV Community: clemra</title>
      <link>https://dev.to/clemra</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/clemra"/>
    <language>en</language>
    <item>
      <title>[Boost]</title>
      <dc:creator>clemra</dc:creator>
      <pubDate>Wed, 01 Oct 2025 16:00:07 +0000</pubDate>
      <link>https://dev.to/clemra/-52j0</link>
      <guid>https://dev.to/clemra/-52j0</guid>
      <description>&lt;div class="ltag__link"&gt;
  &lt;a href="/margarita_sliachina" class="ltag__link__link"&gt;
    &lt;div class="ltag__link__pic"&gt;
      &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3517901%2F9cf559b7-bf45-4232-90ee-f04ecd21e2c5.png" alt="margarita_sliachina"&gt;
    &lt;/div&gt;
  &lt;/a&gt;
  &lt;a href="https://dev.to/margarita_sliachina/llm-observability-debugging-my-journaling-agent-457m" class="ltag__link__link"&gt;
    &lt;div class="ltag__link__content"&gt;
      &lt;h2&gt;LLM Observability: Debugging My Journaling Agent&lt;/h2&gt;
      &lt;h3&gt;Margarita Sliachina ・ Sep 20&lt;/h3&gt;
      &lt;div class="ltag__link__taglist"&gt;
        &lt;span class="ltag__link__tag"&gt;#ai&lt;/span&gt;
        &lt;span class="ltag__link__tag"&gt;#llm&lt;/span&gt;
        &lt;span class="ltag__link__tag"&gt;#opensource&lt;/span&gt;
        &lt;span class="ltag__link__tag"&gt;#langfuse&lt;/span&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/a&gt;
&lt;/div&gt;


</description>
      <category>ai</category>
      <category>llm</category>
      <category>opensource</category>
      <category>langfuse</category>
    </item>
    <item>
      <title>Excited about this!</title>
      <dc:creator>clemra</dc:creator>
      <pubDate>Fri, 06 Jun 2025 13:05:01 +0000</pubDate>
      <link>https://dev.to/clemra/excited-about-this-143o</link>
      <guid>https://dev.to/clemra/excited-about-this-143o</guid>
      <description>&lt;div class="ltag__link--embedded"&gt;
  &lt;div class="crayons-story "&gt;
  &lt;a href="https://dev.to/clemra/all-langfuse-product-features-now-free-open-source-4140" class="crayons-story__hidden-navigation-link"&gt;All Langfuse Product Features now Free Open-Source&lt;/a&gt;


  &lt;div class="crayons-story__body crayons-story__body-full_post"&gt;
    &lt;div class="crayons-story__top"&gt;
      &lt;div class="crayons-story__meta"&gt;
        &lt;div class="crayons-story__author-pic"&gt;

          &lt;a href="/clemra" class="crayons-avatar  crayons-avatar--l  "&gt;
            &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F1150622%2F94e677bd-2802-48b6-ac4f-f81d0311ac90.png" alt="clemra profile" class="crayons-avatar__image"&gt;
          &lt;/a&gt;
        &lt;/div&gt;
        &lt;div&gt;
          &lt;div&gt;
            &lt;a href="/clemra" class="crayons-story__secondary fw-medium m:hidden"&gt;
              clemra
            &lt;/a&gt;
            &lt;div class="profile-preview-card relative mb-4 s:mb-0 fw-medium hidden m:inline-block"&gt;
              
                clemra
                
              
              &lt;div id="story-author-preview-content-2569296" class="profile-preview-card__content crayons-dropdown branded-7 p-4 pt-0"&gt;
                &lt;div class="gap-4 grid"&gt;
                  &lt;div class="-mt-4"&gt;
                    &lt;a href="/clemra" class="flex"&gt;
                      &lt;span class="crayons-avatar crayons-avatar--xl mr-2 shrink-0"&gt;
                        &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F1150622%2F94e677bd-2802-48b6-ac4f-f81d0311ac90.png" class="crayons-avatar__image" alt=""&gt;
                      &lt;/span&gt;
                      &lt;span class="crayons-link crayons-subtitle-2 mt-5"&gt;clemra&lt;/span&gt;
                    &lt;/a&gt;
                  &lt;/div&gt;
                  &lt;div class="print-hidden"&gt;
                    
                      Follow
                    
                  &lt;/div&gt;
                  &lt;div class="author-preview-metadata-container"&gt;&lt;/div&gt;
                &lt;/div&gt;
              &lt;/div&gt;
            &lt;/div&gt;

          &lt;/div&gt;
          &lt;a href="https://dev.to/clemra/all-langfuse-product-features-now-free-open-source-4140" class="crayons-story__tertiary fs-xs"&gt;&lt;time&gt;Jun 6 '25&lt;/time&gt;&lt;span class="time-ago-indicator-initial-placeholder"&gt;&lt;/span&gt;&lt;/a&gt;
        &lt;/div&gt;
      &lt;/div&gt;

    &lt;/div&gt;

    &lt;div class="crayons-story__indention"&gt;
      &lt;h2 class="crayons-story__title crayons-story__title-full_post"&gt;
        &lt;a href="https://dev.to/clemra/all-langfuse-product-features-now-free-open-source-4140" id="article-link-2569296"&gt;
          All Langfuse Product Features now Free Open-Source
        &lt;/a&gt;
      &lt;/h2&gt;
        &lt;div class="crayons-story__tags"&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/opensource"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;opensource&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/llm"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;llm&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/devtools"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;devtools&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/langfuse"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;langfuse&lt;/a&gt;
        &lt;/div&gt;
      &lt;div class="crayons-story__bottom"&gt;
        &lt;div class="crayons-story__details"&gt;
          &lt;a href="https://dev.to/clemra/all-langfuse-product-features-now-free-open-source-4140" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left"&gt;
            &lt;div class="multiple_reactions_aggregate"&gt;
              &lt;span class="multiple_reactions_icons_container"&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/fire-f60e7a582391810302117f987b22a8ef04a2fe0df7e3258a5f49332df1cec71e.svg" width="18" height="18"&gt;
                  &lt;/span&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/exploding-head-daceb38d627e6ae9b730f36a1e390fca556a4289d5a41abb2c35068ad3e2c4b5.svg" width="18" height="18"&gt;
                  &lt;/span&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/sparkle-heart-5f9bee3767e18deb1bb725290cb151c25234768a0e9a2bd39370c382d02920cf.svg" width="18" height="18"&gt;
                  &lt;/span&gt;
              &lt;/span&gt;
              &lt;span class="aggregate_reactions_counter"&gt;13&lt;span class="hidden s:inline"&gt; reactions&lt;/span&gt;&lt;/span&gt;
            &lt;/div&gt;
          &lt;/a&gt;
            &lt;a href="https://dev.to/clemra/all-langfuse-product-features-now-free-open-source-4140#comments" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left flex items-center"&gt;
              Comments


              3&lt;span class="hidden s:inline"&gt; comments&lt;/span&gt;
            &lt;/a&gt;
        &lt;/div&gt;
        &lt;div class="crayons-story__save"&gt;
          &lt;small class="crayons-story__tertiary fs-xs mr-2"&gt;
            2 min read
          &lt;/small&gt;
            
              &lt;span class="bm-initial"&gt;
                

              &lt;/span&gt;
              &lt;span class="bm-success"&gt;
                

              &lt;/span&gt;
            
        &lt;/div&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;/div&gt;


</description>
      <category>opensource</category>
      <category>llm</category>
      <category>devtools</category>
      <category>langfuse</category>
    </item>
    <item>
      <title>All Langfuse Product Features now Free Open-Source</title>
      <dc:creator>clemra</dc:creator>
      <pubDate>Fri, 06 Jun 2025 13:02:00 +0000</pubDate>
      <link>https://dev.to/clemra/all-langfuse-product-features-now-free-open-source-4140</link>
      <guid>https://dev.to/clemra/all-langfuse-product-features-now-free-open-source-4140</guid>
      <description>&lt;p&gt;Today, we’re excited to share a big milestone: &lt;strong&gt;all Langfuse product features are now open source under the MIT license&lt;/strong&gt;.&lt;/p&gt;

&lt;p&gt;The LLM landscape is evolving rapidly - and so are the tools and workflows that developers use to build and improve their LLM applications. To empower our community and invite deeper collaboration, we’ve decided to open source all previously commercial features of Langfuse.&lt;/p&gt;

&lt;p&gt;That means you can now self-host features like:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;a href="https://langfuse.com/docs/scores/model-based-evals" rel="noopener noreferrer"&gt;Managed LLM-as-a-Judge evaluations&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;a href="https://langfuse.com/docs/scores/annotation" rel="noopener noreferrer"&gt;Annotation queues&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;a href="https://langfuse.com/docs/datasets/prompt-experiments" rel="noopener noreferrer"&gt;Prompt experiments&lt;/a&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="https://langfuse.com/docs/playground" rel="noopener noreferrer"&gt;LLM playground&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;All fully open under the MIT license.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F9pteo40w7qeiotzbs0g4.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F9pteo40w7qeiotzbs0g4.png" alt="Langfuse Open Source Project" width="800" height="542"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;If you're already self-hosting Langfuse, &lt;a href="https://langfuse.com/self-hosting/upgrade" rel="noopener noreferrer"&gt;upgrade to the latest version&lt;/a&gt; to unlock these new features.&lt;/p&gt;




&lt;h2&gt;
  
  
  &lt;strong&gt;Why we’re doing this&lt;/strong&gt;
&lt;/h2&gt;

&lt;p&gt;Langfuse is building the &lt;strong&gt;open source LLM engineering platform&lt;/strong&gt; - a platform to observe, evaluate, and improve LLM applications.&lt;/p&gt;

&lt;p&gt;To truly serve the developer community, we believe the &lt;strong&gt;full dev cycle&lt;/strong&gt; needs to be covered by open source. Features like evals and prompt experiments are table stakes in today’s LLMOps world. These shouldn’t be behind a paywall.&lt;/p&gt;

&lt;p&gt;By removing commercial restrictions from key product features, we’re doubling down on transparency, adoption, and community collaboration. It lets us move faster together.&lt;/p&gt;




&lt;h2&gt;
  
  
  &lt;strong&gt;Our Open Source Journey&lt;/strong&gt;
&lt;/h2&gt;

&lt;p&gt;Langfuse &lt;a href="https://langfuse.com/blog/product-analytics-for-LLM-apps" rel="noopener noreferrer"&gt;started as an open source project&lt;/a&gt;, grounded in a few core principles:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Developers should fully own and access their data
&lt;/li&gt;
&lt;li&gt;Langfuse must integrate with any stack, framework, or model
&lt;/li&gt;
&lt;li&gt;Teams should have the freedom to customize the platform for their workflows&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;That philosophy still guides us today - now with an even wider open-source surface.&lt;/p&gt;

&lt;p&gt;While we continue to support enterprise needs (like SCIM, Audit Logs, and custom data retention policies), &lt;strong&gt;the core of Langfuse is now entirely OSS&lt;/strong&gt;.&lt;/p&gt;

&lt;h2&gt;
  
  
  &lt;strong&gt;Langfuse in the Wild&lt;/strong&gt;
&lt;/h2&gt;

&lt;p&gt;Langfuse is growing fast:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;7,000,000 SDK installs/month
&lt;/li&gt;
&lt;li&gt;5,500,000 Docker pulls
&lt;/li&gt;
&lt;li&gt;8,000 monthly active self-hosted instances&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;We’re constantly amazed by the scale and creativity of our community. We hope this move makes Langfuse even more accessible and extensible for all developers building with LLMs.&lt;/p&gt;

&lt;p&gt;📌 &lt;a href="https://langfuse.com/self-hosting" rel="noopener noreferrer"&gt;Check out the self-hosting docs&lt;/a&gt;&lt;br&gt;&lt;br&gt;
🚀 &lt;a href="https://langfuse.com/changelog/2025-05-22-terraform-modules" rel="noopener noreferrer"&gt;Deploy with Terraform&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Let us know what you're building - and how we can help make Langfuse even better.&lt;/p&gt;

&lt;p&gt;✍️ &lt;em&gt;By Clemens, Marc &amp;amp; Max&lt;/em&gt;  &lt;/p&gt;

</description>
      <category>opensource</category>
      <category>llm</category>
      <category>devtools</category>
      <category>langfuse</category>
    </item>
    <item>
      <title>RAG observability in 2 lines of code with Llama Index &amp; Langfuse</title>
      <dc:creator>clemra</dc:creator>
      <pubDate>Mon, 18 Mar 2024 19:34:03 +0000</pubDate>
      <link>https://dev.to/clemra/rag-observability-in-2-lines-of-code-with-llama-index-langfuse-51pa</link>
      <guid>https://dev.to/clemra/rag-observability-in-2-lines-of-code-with-llama-index-langfuse-51pa</guid>
      <description>&lt;h2&gt;
  
  
  Why you need observability for RAG
&lt;/h2&gt;

&lt;p&gt;There are so many different ways to make RAG work for a use case. What vector store to use? What retrieval strategy to use? LlamaIndex makes it easy to try many of them without having to deal with the complexity of integrations, prompts and memory all at once.&lt;/p&gt;

&lt;p&gt;Initially, we at Langfuse worked on complex RAG/agent applications and quickly realized that there is a new need for observability and experimentation to tweak and iterate on the details. In the end, these details matter to get from something cool to an actually reliable RAG application that is safe for users and customers. Think of this: if there is a user session of interest in your &lt;em&gt;production&lt;/em&gt; RAG application, how can you quickly see whether the retrieved context for that session was actually relevant or the LLM response was on point? &lt;/p&gt;

&lt;h2&gt;
  
  
  What is Langfuse?
&lt;/h2&gt;

&lt;p&gt;Thus, we started working on &lt;a href="//Langfuse.com"&gt;Langfuse.com&lt;/a&gt; (&lt;a href="https://github.com/langfuse/langfuse"&gt;GitHub&lt;/a&gt;) to establish an open source LLM engineering platform with tightly integrated features for tracing, prompt management, and evaluation. In the beginning we just solved our own and our friends’ problems. Today we are at over 1000 projects which rely on Langfuse, and  2.3k stars on GitHub. You can either &lt;a href="https://langfuse.com/docs/deployment/self-host"&gt;self-host&lt;/a&gt; Langfuse or use the &lt;a href="https://cloud.langfuse.com"&gt;cloud instance&lt;/a&gt; maintained by us.&lt;/p&gt;

&lt;p&gt;We are thrilled to announce our new integration with LlamaIndex today. This feature was &lt;a href="https://github.com/orgs/langfuse/discussions/828"&gt;highly requested&lt;/a&gt; by our community and aligns with our project's focus on native integration with major application frameworks. Thank you to everyone who contributed and tested it during the beta phase!&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fdcq1wmpy47yjcyjvp7qw.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fdcq1wmpy47yjcyjvp7qw.png" alt="Integrating Llama Index with Langfuse for open source observability and analytics" width="800" height="527"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  The challenge
&lt;/h2&gt;

&lt;p&gt;We love LlamaIndex, since the clean and standardized interface abstracts a lot of complexity away. Let’s take this simple example of a &lt;code&gt;VectorStoreIndex&lt;/code&gt; and a &lt;code&gt;ChatEngine&lt;/code&gt;.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;llama_index.core&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;SimpleDirectoryReader&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;llama_index.core&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;VectorStoreIndex&lt;/span&gt;

&lt;span class="n"&gt;documents&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;SimpleDirectoryReader&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;./data&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;load_data&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;

&lt;span class="n"&gt;index&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;VectorStoreIndex&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;from_documents&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;documents&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;chat_engine&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;index&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;as_chat_engine&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;

&lt;span class="nf"&gt;print&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;chat_engine&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;chat&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;What problems can I solve with RAG?&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;))&lt;/span&gt;
&lt;span class="nf"&gt;print&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;chat_engine&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;chat&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;How do I optimize my RAG application?&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;))&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;In just 3 lines we loaded our local documents, added them to an index and initialized a ChatEngine with memory. Subsequently we had a stateful conversation with the chat_engine.&lt;/p&gt;

&lt;p&gt;This is awesome to get started, but we quickly run into questions like:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;&lt;em&gt;“What context is actually retrieved from the index to answer the questions?”&lt;/em&gt;&lt;/li&gt;
&lt;li&gt;&lt;em&gt;“How is chat memory managed?”&lt;/em&gt;&lt;/li&gt;
&lt;li&gt;&lt;em&gt;“Which steps add the most latency to the overall execution? How to optimize it?”&lt;/em&gt;&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  One-click OSS observability to the rescue
&lt;/h2&gt;

&lt;p&gt;&lt;iframe width="710" height="399" src="https://www.youtube.com/embed/4PZbb9XwG2o"&gt;
&lt;/iframe&gt;
&lt;/p&gt;

&lt;p&gt;We integrated Langfuse to be a &lt;code&gt;one-click&lt;/code&gt; integration with LlamaIndex using the global callback manager.&lt;/p&gt;

&lt;h3&gt;
  
  
  Preparation
&lt;/h3&gt;

&lt;ol&gt;
&lt;li&gt;Install the community package (&lt;code&gt;pip install llama-index-callbacks-langfuse&lt;/code&gt;)&lt;/li&gt;
&lt;li&gt;Copy/paste the environment variables from the Langfuse project settings to your Python project: 'LANGFUSE_SECRET_KEY', 'LANGFUSE_PUBLIC_KEY' and 'LANGFUSE_HOST'&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;Now, you only need to set the global &lt;code&gt;langfuse&lt;/code&gt; handler:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;llama_index.core&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;set_global_handler&lt;/span&gt;

&lt;span class="nf"&gt;set_global_handler&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;langfuse&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;And voilá, with just two lines of code you get detailed traces for all aspects of your RAG application in Langfuse. They automatically include latency and usage/cost breakdowns.&lt;/p&gt;

&lt;h2&gt;
  
  
  Group multiple chat threads into a session
&lt;/h2&gt;

&lt;p&gt;Working with lots of teams building GenAI/LLM/RAG applications, we’ve continuously added more features that are useful to debug and improve these applications. One example is &lt;a href="https://langfuse.com/docs/tracing/sessions"&gt;session tracking&lt;/a&gt; for conversational applications to see the traces in context of a full message thread.&lt;/p&gt;

&lt;p&gt;To activate it, just add an id that identifies the session as a trace param before calling the chat_engine.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;llama_index.core&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;global_handler&lt;/span&gt;

&lt;span class="n"&gt;global_handler&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;set_trace_params&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;session_id&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;your-session-id&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;chat_engine&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;chat&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;What did he do growing up?&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;chat_engine&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;chat&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;What did he do at USC?&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;chat_engine&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;chat&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;How old is he?&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Thereby you can see all these chat invocations grouped into a session view in Langfuse Tracing:&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fvkchrqqz9ttol6z1jsbv.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fvkchrqqz9ttol6z1jsbv.png" alt="RAG Observability with Langfuse" width="800" height="547"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Next to sessions, you can also track individual users or add tags and metadata to your Langfuse traces.&lt;/p&gt;

&lt;h2&gt;
  
  
  Trace more complex applications and use other Langfuse features for prompt management and evaluation
&lt;/h2&gt;

&lt;p&gt;This integration makes it easy to get started with Tracing. If your application ends up growing into using custom logic or other frameworks/packages, all Langfuse integrations are fully interoperable.&lt;/p&gt;

&lt;p&gt;We have also built additional features to version control and collaborate on prompts (langfuse &lt;a href="https://langfuse.com/docs/prompts/get-started"&gt;prompt management&lt;/a&gt;), track &lt;a href="https://langfuse.com/docs/experimentation"&gt;experiments&lt;/a&gt;, and &lt;a href="https://langfuse.com/docs/scores/overview"&gt;evaluate&lt;/a&gt; production traces. For RAG specifically, we collaborated with the RAGAS team and it’s easy to run their popular eval suite on traces captured with Langfuse (see &lt;a href="https://langfuse.com/docs/scores/model-based-evals/ragas"&gt;cookbook&lt;/a&gt;).&lt;/p&gt;

&lt;h2&gt;
  
  
  Get started
&lt;/h2&gt;

&lt;p&gt;The easiest way to get started is to follow the &lt;a href="https://docs.llamaindex.ai/en/stable/examples/callbacks/LangfuseCallbackHandler.html"&gt;cookbook&lt;/a&gt; and check out the &lt;a href="https://langfuse.com/docs/integrations/llama-index/get-started"&gt;docs&lt;/a&gt;.&lt;/p&gt;

&lt;h2&gt;
  
  
  Feedback? Ping us
&lt;/h2&gt;

&lt;p&gt;We’d love to hear any feedback. Come join us on our &lt;a href="https://langfuse.com/discord"&gt;community discord&lt;/a&gt; or add your thoughts to this &lt;a href="https://github.com/orgs/langfuse/discussions/828"&gt;GitHub thread&lt;/a&gt;.&lt;/p&gt;

</description>
      <category>observability</category>
      <category>llamaindex</category>
      <category>rag</category>
      <category>opensource</category>
    </item>
    <item>
      <title>LLM Analytics 101 - How to Improve your LLM app</title>
      <dc:creator>clemra</dc:creator>
      <pubDate>Thu, 14 Sep 2023 20:10:17 +0000</pubDate>
      <link>https://dev.to/clemra/llm-analytics-101-how-to-improve-your-llm-app-1ph5</link>
      <guid>https://dev.to/clemra/llm-analytics-101-how-to-improve-your-llm-app-1ph5</guid>
      <description>&lt;p&gt;&lt;em&gt;This guide gives builders on the LLM application layer an  understanding of the &lt;strong&gt;why&lt;/strong&gt;, &lt;strong&gt;what&lt;/strong&gt; and &lt;strong&gt;how&lt;/strong&gt; of tracing &amp;amp; analytics to improve their LLM applications&lt;/em&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  LLMs Have Changed Software Delivery
&lt;/h2&gt;

&lt;p&gt;Generative AI outputs are not deterministic. That is, they cannot be reliably forecasted. This changes how software is delivered as compared to more 'traditional' software engineering. If it is not clear what an output will look like and what a '&lt;em&gt;good&lt;/em&gt;' output is, it is harder to to assure quality and build robust tests before shipping code.&lt;/p&gt;

&lt;p&gt;Learning from production data has taken the place of extensive software design and testing on the LLM application layer. But to learn from production, you have to trace your LLMs and analyze what works and what does not.&lt;/p&gt;

&lt;h2&gt;
  
  
  Tracing LLM apps - What's Different?
&lt;/h2&gt;

&lt;p&gt;Building LLM-based apps means integrating multiple complex elements and interactions to your code. This can mean chains, agents, different base models, tools, embedding retrieval and routing. Traditional logging and analytics tools are not well equipped to ingest, display and analyze these new ways of interacting with LLMs. &lt;br&gt;
The new logging stack needs to think LLM-native from the ground up. That means grouping calls and visualizing them in a way that enables teams to understand and debug them.&lt;/p&gt;

&lt;h2&gt;
  
  
  Let's Dive in: What to Measure?
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight typescript"&gt;&lt;code&gt;&lt;span class="c1"&gt;// Example generation creation&lt;/span&gt;
&lt;span class="kd"&gt;const&lt;/span&gt; &lt;span class="nx"&gt;generation&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nx"&gt;trace&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;generation&lt;/span&gt;&lt;span class="p"&gt;({&lt;/span&gt;
  &lt;span class="na"&gt;name&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;chat-completion&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="na"&gt;model&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="s2"&gt;gpt-3.5-turbo&lt;/span&gt;&lt;span class="dl"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="na"&gt;modelParameters&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="p"&gt;{&lt;/span&gt;
    &lt;span class="na"&gt;temperature&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mf"&gt;0.9&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="na"&gt;maxTokens&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;2000&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
  &lt;span class="p"&gt;},&lt;/span&gt;
  &lt;span class="na"&gt;prompt&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="nx"&gt;messages&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;});&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;The baseline requirement to improve an LLM-based app is to trace its activity. But what does that mean and what do I want to record? From working with our users at the bleeding edge of LLMs, we've see five metrics emerge to keep track of:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Volume:&lt;/strong&gt; The foundation for all other metrics - track all LLM calls and their content and attach relevant metadata for both prompts and completions.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Costs:&lt;/strong&gt; Record token counts and pricing to compute the cost of each call. Track GPU seconds and pricing for self-hosted models.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Latency:&lt;/strong&gt; Measure latency for every call. Use this data to analyze which steps add latency and start improving your users' experience.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Quality:&lt;/strong&gt; Proactively solicit user feedback, conduct manual evaluations and score outputs using model-based evaluations.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Errors/Exceptions:&lt;/strong&gt; Monitor for timeouts and HTTP errors, such as rate limits, that are indicative of systemic issues.&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Implementing Effective Analytics through KPIs
&lt;/h2&gt;

&lt;p&gt;We've seen successful teams implement the following best practice KPIs by slicing the above five metrics (volume, cost, latency, quality, errors) by:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Use case:&lt;/strong&gt; Cluster prompts and completions by use case to understand how your users are interacting with your LLM&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Model and configuration:&lt;/strong&gt; How do different models and model configurations affect quality, latency or errors?&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Chain and step:&lt;/strong&gt; Drill down into chains to understand what drives performance&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;User data:&lt;/strong&gt; Group users by specific characteristics to gain insight into personas and specific constituencies in your product&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Chain and step:&lt;/strong&gt; Drill down into chains to understand what drives performance&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Model and configuration:&lt;/strong&gt; Track how different models and model configurations affect quality, latency or errors&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Use case:&lt;/strong&gt; Cluster prompts and completions by use case to understand how your users are interacting with your LLM&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Time:&lt;/strong&gt; Inspect your KPIs over time and detect trends&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Version:&lt;/strong&gt; Track prompts, chains and software releases by their version and understand performance changes&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Geography:&lt;/strong&gt; Especially important for latency&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Language:&lt;/strong&gt; Understand how well your app works by user language&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Step-by-Step: Implementing Tracing &amp;amp; Analytics in LLM Applications
&lt;/h2&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Define goals:&lt;/strong&gt; What do you want to achieve and how do your goals align with your users' requirements. Take the above metrics as a starting point to define KPIs unique to your application.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Incorporate tracking:&lt;/strong&gt; This means backend execution and scores (e.g. capturing user feedback in the frontend).&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Inspect and debug:&lt;/strong&gt; Understand your users by inspecting runtime traces through a visual UI&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Analyze:&lt;/strong&gt; Start by measuring cost by model/user and time, cost by product feature, latency by step of a chain and start scattering quality/latency/cost grouped by experiments or production versions.&lt;/li&gt;
&lt;/ol&gt;

&lt;h2&gt;
  
  
  Give Langfuse a Spin
&lt;/h2&gt;

&lt;p&gt;&lt;a href="https://langfuse.com" rel="noopener noreferrer"&gt;Langfuse&lt;/a&gt; makes tracing and analyzing LLM applications accessible. It is an open-source project under MIT license.&lt;/p&gt;

&lt;p&gt;It offers data integration with async SDKs (JS/TS, Python), via API, and Langchain integrations. It provides a UI for debugging complex traces &amp;amp; includes pre-built dashboards to analyze quality, latency and cost. It allows for recording user feedback and using LLM models to grade and score your outputs. To get going, refer to the &lt;a href="https://langfuse.com/docs/get-started" rel="noopener noreferrer"&gt;quickstart guide&lt;/a&gt; in the docs. &lt;/p&gt;

&lt;p&gt;Visit us on &lt;a href="https://langfuse.com/discord" rel="noopener noreferrer"&gt;Discord&lt;/a&gt; and &lt;a href="https://github.com/langfuse/langfuse/" rel="noopener noreferrer"&gt;Github&lt;/a&gt; to engage with our project.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fb3ksrdnl61f7vzaej4t6.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fb3ksrdnl61f7vzaej4t6.png" alt="A trace in Langfuse"&gt;&lt;/a&gt;&lt;br&gt;
Interested? Sign up to try the demo at &lt;a href="http://langfuse.com" rel="noopener noreferrer"&gt;langfuse.com&lt;/a&gt;. Self-hosting instructions can be found in &lt;a href="https://langfuse.com/docs/deployment/self-host" rel="noopener noreferrer"&gt;our docs&lt;/a&gt;.&lt;/p&gt;

</description>
      <category>analytics</category>
      <category>llm</category>
      <category>observability</category>
      <category>llmops</category>
    </item>
  </channel>
</rss>
