<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: Vaibhav Dubey</title>
    <description>The latest articles on DEV Community by Vaibhav Dubey (@vaibsd).</description>
    <link>https://dev.to/vaibsd</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F2789963%2F35ed2c37-9dc5-45f5-8b65-43bc92a18163.jpeg</url>
      <title>DEV Community: Vaibhav Dubey</title>
      <link>https://dev.to/vaibsd</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/vaibsd"/>
    <language>en</language>
    <item>
      <title>[Boost]</title>
      <dc:creator>Vaibhav Dubey</dc:creator>
      <pubDate>Mon, 12 May 2025 16:39:15 +0000</pubDate>
      <link>https://dev.to/vaibsd/-4m6l</link>
      <guid>https://dev.to/vaibsd/-4m6l</guid>
      <description>&lt;div class="ltag__link--embedded"&gt;
  &lt;div class="crayons-story "&gt;
  &lt;a href="https://dev.to/vaibsd/plexe-ml-models-from-a-prompt-3ibg" class="crayons-story__hidden-navigation-link"&gt;Plexe - ML models from a prompt&lt;/a&gt;


  &lt;div class="crayons-story__body crayons-story__body-full_post"&gt;
    &lt;div class="crayons-story__top"&gt;
      &lt;div class="crayons-story__meta"&gt;
        &lt;div class="crayons-story__author-pic"&gt;

          &lt;a href="/vaibsd" class="crayons-avatar  crayons-avatar--l  "&gt;
            &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F2789963%2F35ed2c37-9dc5-45f5-8b65-43bc92a18163.jpeg" alt="vaibsd profile" class="crayons-avatar__image"&gt;
          &lt;/a&gt;
        &lt;/div&gt;
        &lt;div&gt;
          &lt;div&gt;
            &lt;a href="/vaibsd" class="crayons-story__secondary fw-medium m:hidden"&gt;
              Vaibhav Dubey
            &lt;/a&gt;
            &lt;div class="profile-preview-card relative mb-4 s:mb-0 fw-medium hidden m:inline-block"&gt;
              
                Vaibhav Dubey
                
              
              &lt;div id="story-author-preview-content-2475660" class="profile-preview-card__content crayons-dropdown branded-7 p-4 pt-0"&gt;
                &lt;div class="gap-4 grid"&gt;
                  &lt;div class="-mt-4"&gt;
                    &lt;a href="/vaibsd" class="flex"&gt;
                      &lt;span class="crayons-avatar crayons-avatar--xl mr-2 shrink-0"&gt;
                        &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F2789963%2F35ed2c37-9dc5-45f5-8b65-43bc92a18163.jpeg" class="crayons-avatar__image" alt=""&gt;
                      &lt;/span&gt;
                      &lt;span class="crayons-link crayons-subtitle-2 mt-5"&gt;Vaibhav Dubey&lt;/span&gt;
                    &lt;/a&gt;
                  &lt;/div&gt;
                  &lt;div class="print-hidden"&gt;
                    
                      Follow
                    
                  &lt;/div&gt;
                  &lt;div class="author-preview-metadata-container"&gt;&lt;/div&gt;
                &lt;/div&gt;
              &lt;/div&gt;
            &lt;/div&gt;

          &lt;/div&gt;
          &lt;a href="https://dev.to/vaibsd/plexe-ml-models-from-a-prompt-3ibg" class="crayons-story__tertiary fs-xs"&gt;&lt;time&gt;May 12 '25&lt;/time&gt;&lt;span class="time-ago-indicator-initial-placeholder"&gt;&lt;/span&gt;&lt;/a&gt;
        &lt;/div&gt;
      &lt;/div&gt;

    &lt;/div&gt;

    &lt;div class="crayons-story__indention"&gt;
      &lt;h2 class="crayons-story__title crayons-story__title-full_post"&gt;
        &lt;a href="https://dev.to/vaibsd/plexe-ml-models-from-a-prompt-3ibg" id="article-link-2475660"&gt;
          Plexe - ML models from a prompt
        &lt;/a&gt;
      &lt;/h2&gt;
        &lt;div class="crayons-story__tags"&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/ai"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;ai&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/opensource"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;opensource&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/machinelearning"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;machinelearning&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/data"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;data&lt;/a&gt;
        &lt;/div&gt;
      &lt;div class="crayons-story__bottom"&gt;
        &lt;div class="crayons-story__details"&gt;
            &lt;a href="https://dev.to/vaibsd/plexe-ml-models-from-a-prompt-3ibg#comments" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left flex items-center"&gt;
              Comments


              &lt;span class="hidden s:inline"&gt;Add Comment&lt;/span&gt;
            &lt;/a&gt;
        &lt;/div&gt;
        &lt;div class="crayons-story__save"&gt;
          &lt;small class="crayons-story__tertiary fs-xs mr-2"&gt;
            3 min read
          &lt;/small&gt;
            
              &lt;span class="bm-initial"&gt;
                

              &lt;/span&gt;
              &lt;span class="bm-success"&gt;
                

              &lt;/span&gt;
            
        &lt;/div&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;/div&gt;


</description>
      <category>ai</category>
      <category>opensource</category>
      <category>machinelearning</category>
      <category>data</category>
    </item>
    <item>
      <title>Plexe - ML models from a prompt</title>
      <dc:creator>Vaibhav Dubey</dc:creator>
      <pubDate>Mon, 12 May 2025 16:34:36 +0000</pubDate>
      <link>https://dev.to/vaibsd/plexe-ml-models-from-a-prompt-3ibg</link>
      <guid>https://dev.to/vaibsd/plexe-ml-models-from-a-prompt-3ibg</guid>
      <description>&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fejsxs9w8qs8oapao3jn1.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fejsxs9w8qs8oapao3jn1.png" alt="Image description" width="800" height="832"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;We’re building &lt;a href="https://github.com/plexe-ai/plexe" rel="noopener noreferrer"&gt;Plexe&lt;/a&gt;, an open-source agent that turns natural language task descriptions into trained ML models. Here’s a video walkthrough: &lt;a href="https://www.youtube.com/watch?v=bUwCSglhcXY" rel="noopener noreferrer"&gt;https://www.youtube.com/watch?v=bUwCSglhcXY&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;There are all kinds of uses for ML models that never get realized because the process of making them is messy and convoluted. You can spend months trying to find the data, clean it, experiment with models and deploy to production, only to find out that your project has been binned for taking so long. There are many tools for “automating” ML, but it still takes teams of ML experts to actually productionize something of value. And we can’t keep throwing LLMs at every ML problem. Why use a generic 10B parameter language model, if a logistic regression trained on your data could do the job better?&lt;/p&gt;

&lt;p&gt;Our light-bulb moment was that we could use LLMs to generate task-specific ML models that would be trained on one’s own data. Thanks to the emergent reasoning ability of LLMs, it is now possible to create an agentic system that might automate most of the ML lifecycle. &lt;/p&gt;

&lt;p&gt;A couple of months ago, we started developing a Python library that would let you define ML models on structured data using a description of the expected behaviour. Our initial implementation arranged potential solutions into a graph, using LLMs to write plans, implement them as code, and run the resulting training script. Using simple search algorithms, the system traversed the solution space to identify and package the best model.&lt;/p&gt;

&lt;p&gt;However, we ran into several limitations, as the algorithm proved brittle under edge cases, and we kept having to put patches for every minor issue in the training process. We decided to rethink the approach, throw everything out, and rebuild the tool using an agentic approach prioritising generality and flexibility. What started as a single ML engineering agent turned into an agentic ML "team", with all experiments tracked and logged using MLFlow.&lt;/p&gt;

&lt;p&gt;Our current implementation uses the smolagents library to define an agent hierarchy. We mapped the functionality of our previous implementation to a set of specialized agents, such as an “ML scientist” that proposes solution plans, and so on. Each agent has specialized tools, instructions, and prompt templates. To facilitate cross-agent communication, we implemented a shared memory that enables objects (datasets, code snippets, etc) to be passed across agents indirectly by referencing keys in a registry. You can find a detailed write-up on how it works &lt;a href="https://github.com/plexe-ai/plexe/blob/main/docs/architecture/multi-agent-system.md" rel="noopener noreferrer"&gt;here&lt;/a&gt;.&lt;br&gt;
Plexe’s early release is focused on predictive problems over structured data, and can be used to build models such as forecasting player injury risk in high-intensity sports, product recommendations for an ecommerce marketplace, or predicting technical indicators for algorithmic trading. Here are some &lt;a href="https://github.com/plexe-ai/plexe/tree/main/examples" rel="noopener noreferrer"&gt;examples&lt;/a&gt; to get you started!&lt;/p&gt;

&lt;p&gt;To get it working on your data, you can dump any CSV, parquet, etc and Plexe uses what it needs from your dataset to figure out what features it should use. In the open-source tool, it only supports adding files right now but in our platform version, we'll have support for integrating with Postgres where it pulls all available data based on an SQL query and dumps it into a parquet file for the agent to build models.&lt;/p&gt;

&lt;p&gt;Next up, we’ll be tackling more of the ML project lifecycle: we’re currently working on adding a “feature engineering agent” that focuses on the complex data transformations that are often required for data to be ready for model training. If you're interested, check Plexe out and let us know your thoughts!&lt;/p&gt;

</description>
      <category>ai</category>
      <category>opensource</category>
      <category>machinelearning</category>
      <category>data</category>
    </item>
    <item>
      <title>I built an open-source library to generate specialised machine learning models from natural language</title>
      <dc:creator>Vaibhav Dubey</dc:creator>
      <pubDate>Thu, 13 Feb 2025 09:22:14 +0000</pubDate>
      <link>https://dev.to/vaibsd/i-built-an-open-source-library-to-generate-specialised-machine-learning-models-from-natural-language-1pfe</link>
      <guid>https://dev.to/vaibsd/i-built-an-open-source-library-to-generate-specialised-machine-learning-models-from-natural-language-1pfe</guid>
      <description>&lt;p&gt;Smolmodels is an open-source library that uses graph search with LLM-based code generation to automatically create lightweight, task-specific ML models from natural language descriptions.&lt;/p&gt;

&lt;p&gt;Here's how it works with a time-series prediction example; let’s say df  is a dataframe containing the “air passengers” dataset from statsmodels.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;    import smolmodels as sm

    model = sm.Model(
        intent="Predict the number of international air passengers (in thousands) in a given month, based on historical time series data.",
        input_schema={"Month": str},
        output_schema={"Passengers": int}
    )

    model.build(dataset=df, provider="openai/gpt-4o")

    prediction = model.predict({"Month": "2019-01"})

    sm.models.save_model(model, "air_passengers")
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Under the hood, the library:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Parses the intent to identify ML task type and constraints&lt;/li&gt;
&lt;li&gt;Uses graph search to explore potential model architectures&lt;/li&gt;
&lt;li&gt;Automatically optimises the solutions produced&lt;/li&gt;
&lt;li&gt;Generates task-specific training code&lt;/li&gt;
&lt;li&gt;Generates inference code to use the model&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;The project is in alpha stage and completely open source: &lt;a href="https://github.com/plexe-ai/smolmodels" rel="noopener noreferrer"&gt;https://github.com/plexe-ai/smolmodels&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The library is fully open-source (Apache-2.0), so feel free to use it however you like. Or just tear it apart in the comments if you think this is dumb. We’d love some feedback, and we’re very open to code contributions!&lt;/p&gt;

</description>
      <category>llm</category>
      <category>ai</category>
      <category>machinelearning</category>
      <category>opensource</category>
    </item>
    <item>
      <title>Created my first open-source project</title>
      <dc:creator>Vaibhav Dubey</dc:creator>
      <pubDate>Thu, 30 Jan 2025 09:08:51 +0000</pubDate>
      <link>https://dev.to/vaibsd/created-my-first-open-source-project-5dha</link>
      <guid>https://dev.to/vaibsd/created-my-first-open-source-project-5dha</guid>
      <description>&lt;p&gt;Took about a month to get here but happy to release this open source project I've been working on for the last month: &lt;a href="https://github.com/plexe-ai/smolmodels" rel="noopener noreferrer"&gt;https://github.com/plexe-ai/smolmodels&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;It lets you automatically build ML models by defining what you need in plain words and setting up input/output schemas. Under the hood it uses graph search and LLMs to explore different models and compare their performance&lt;/p&gt;

&lt;p&gt;This is an early alpha, actively being developed. Would love feedback or ideas on where to take it next.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>machinelearning</category>
      <category>opensource</category>
      <category>softwareengineering</category>
    </item>
  </channel>
</rss>
