<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: Nebu0528</title>
    <description>The latest articles on DEV Community by Nebu0528 (@nebu0528).</description>
    <link>https://dev.to/nebu0528</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3850146%2Fbb38a3d4-a0fd-47f6-921f-6d175cb2013b.png</url>
      <title>DEV Community: Nebu0528</title>
      <link>https://dev.to/nebu0528</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/nebu0528"/>
    <language>en</language>
    <item>
      <title>[Boost]</title>
      <dc:creator>Nebu0528</dc:creator>
      <pubDate>Sat, 04 Apr 2026 23:36:03 +0000</pubDate>
      <link>https://dev.to/nebu0528/-1mfl</link>
      <guid>https://dev.to/nebu0528/-1mfl</guid>
      <description>&lt;div class="ltag__link--embedded"&gt;
  &lt;div class="crayons-story "&gt;
  &lt;a href="https://dev.to/nebu0528/i-built-a-tool-to-distribute-python-tasks-across-local-machines-heres-how-it-performed-ogb" class="crayons-story__hidden-navigation-link"&gt;I Built a Tool to Distribute Python Tasks Across Local Machines. Here's How It Performed&lt;/a&gt;


  &lt;div class="crayons-story__body crayons-story__body-full_post"&gt;
    &lt;div class="crayons-story__top"&gt;
      &lt;div class="crayons-story__meta"&gt;
        &lt;div class="crayons-story__author-pic"&gt;

          &lt;a href="/nebu0528" class="crayons-avatar  crayons-avatar--l  "&gt;
            &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3850146%2Fbb38a3d4-a0fd-47f6-921f-6d175cb2013b.png" alt="nebu0528 profile" class="crayons-avatar__image" width="96" height="96"&gt;
          &lt;/a&gt;
        &lt;/div&gt;
        &lt;div&gt;
          &lt;div&gt;
            &lt;a href="/nebu0528" class="crayons-story__secondary fw-medium m:hidden"&gt;
              Nebu0528
            &lt;/a&gt;
            &lt;div class="profile-preview-card relative mb-4 s:mb-0 fw-medium hidden m:inline-block"&gt;
              
                Nebu0528
                
              
              &lt;div id="story-author-preview-content-3427400" class="profile-preview-card__content crayons-dropdown branded-7 p-4 pt-0"&gt;
                &lt;div class="gap-4 grid"&gt;
                  &lt;div class="-mt-4"&gt;
                    &lt;a href="/nebu0528" class="flex"&gt;
                      &lt;span class="crayons-avatar crayons-avatar--xl mr-2 shrink-0"&gt;
                        &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3850146%2Fbb38a3d4-a0fd-47f6-921f-6d175cb2013b.png" class="crayons-avatar__image" alt="" width="96" height="96"&gt;
                      &lt;/span&gt;
                      &lt;span class="crayons-link crayons-subtitle-2 mt-5"&gt;Nebu0528&lt;/span&gt;
                    &lt;/a&gt;
                  &lt;/div&gt;
                  &lt;div class="print-hidden"&gt;
                    
                      Follow
                    
                  &lt;/div&gt;
                  &lt;div class="author-preview-metadata-container"&gt;&lt;/div&gt;
                &lt;/div&gt;
              &lt;/div&gt;
            &lt;/div&gt;

          &lt;/div&gt;
          &lt;a href="https://dev.to/nebu0528/i-built-a-tool-to-distribute-python-tasks-across-local-machines-heres-how-it-performed-ogb" class="crayons-story__tertiary fs-xs"&gt;&lt;time&gt;Mar 29&lt;/time&gt;&lt;span class="time-ago-indicator-initial-placeholder"&gt;&lt;/span&gt;&lt;/a&gt;
        &lt;/div&gt;
      &lt;/div&gt;

    &lt;/div&gt;

    &lt;div class="crayons-story__indention"&gt;
      &lt;h2 class="crayons-story__title crayons-story__title-full_post"&gt;
        &lt;a href="https://dev.to/nebu0528/i-built-a-tool-to-distribute-python-tasks-across-local-machines-heres-how-it-performed-ogb" id="article-link-3427400"&gt;
          I Built a Tool to Distribute Python Tasks Across Local Machines. Here's How It Performed
        &lt;/a&gt;
      &lt;/h2&gt;
        &lt;div class="crayons-story__tags"&gt;
            &lt;a class="crayons-tag crayons-tag--filled  " href="/t/showdev"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;showdev&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/distributedsystems"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;distributedsystems&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/cli"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;cli&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/opensource"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;opensource&lt;/a&gt;
        &lt;/div&gt;
      &lt;div class="crayons-story__bottom"&gt;
        &lt;div class="crayons-story__details"&gt;
          &lt;a href="https://dev.to/nebu0528/i-built-a-tool-to-distribute-python-tasks-across-local-machines-heres-how-it-performed-ogb" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left"&gt;
            &lt;div class="multiple_reactions_aggregate"&gt;
              &lt;span class="multiple_reactions_icons_container"&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/sparkle-heart-5f9bee3767e18deb1bb725290cb151c25234768a0e9a2bd39370c382d02920cf.svg" width="24" height="24"&gt;
                  &lt;/span&gt;
              &lt;/span&gt;
              &lt;span class="aggregate_reactions_counter"&gt;1&lt;span class="hidden s:inline"&gt; reaction&lt;/span&gt;&lt;/span&gt;
            &lt;/div&gt;
          &lt;/a&gt;
            &lt;a href="https://dev.to/nebu0528/i-built-a-tool-to-distribute-python-tasks-across-local-machines-heres-how-it-performed-ogb#comments" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left flex items-center"&gt;
              Comments


              &lt;span class="hidden s:inline"&gt;Add Comment&lt;/span&gt;
            &lt;/a&gt;
        &lt;/div&gt;
        &lt;div class="crayons-story__save"&gt;
          &lt;small class="crayons-story__tertiary fs-xs mr-2"&gt;
            3 min read
          &lt;/small&gt;
            
              &lt;span class="bm-initial"&gt;
                

              &lt;/span&gt;
              &lt;span class="bm-success"&gt;
                

              &lt;/span&gt;
            
        &lt;/div&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;/div&gt;


</description>
    </item>
    <item>
      <title>I Built a Tool to Distribute Python Tasks Across Local Machines. Here's How It Performed</title>
      <dc:creator>Nebu0528</dc:creator>
      <pubDate>Sun, 29 Mar 2026 22:57:07 +0000</pubDate>
      <link>https://dev.to/nebu0528/i-built-a-tool-to-distribute-python-tasks-across-local-machines-heres-how-it-performed-ogb</link>
      <guid>https://dev.to/nebu0528/i-built-a-tool-to-distribute-python-tasks-across-local-machines-heres-how-it-performed-ogb</guid>
      <description>&lt;p&gt;I wanted to answer a simple question: &lt;strong&gt;how hard is it to split a Python workload across multiple machines on the same network?&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;Not with a cloud cluster, not Kubernetes, just a few laptops on the same WiFi, sharing the work.&lt;/p&gt;

&lt;p&gt;So I built &lt;a href="https://github.com/Nebu0528/distributor" rel="noopener noreferrer"&gt;distributed-compute-locally&lt;/a&gt; to find out. The goal was maximum simplicity if it takes more than a few lines of code to set up, I've failed. Then I benchmarked it against industry-standard tools to see how it holds up.&lt;/p&gt;

&lt;h2&gt;
  
  
  The API
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;distributed_compute&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;Coordinator&lt;/span&gt;

&lt;span class="n"&gt;coordinator&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;Coordinator&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
&lt;span class="n"&gt;coordinator&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;start_server&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
&lt;span class="n"&gt;results&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;coordinator&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;map&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;my_func&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;data&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;On any other machine on the network:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;pip &lt;span class="nb"&gt;install &lt;/span&gt;distributed-compute-locally
distcompute worker 192.168.1.100
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;That's all you have to do. A coordinator distributes tasks over TCP, workers execute them with &lt;code&gt;cloudpickle&lt;/code&gt;, and results come back in order, same as Python's built-in &lt;code&gt;map()&lt;/code&gt;.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;┌─────────────┐     TCP/5555     ┌──────────┐
│ Coordinator │◄────────────────►│ Worker 1 │
│ (your PC.). │◄────────────────►│ Worker 2 │
│             │◄────────────────►│ Worker 3 │
└─────────────┘                  └──────────┘
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Benchmarks
&lt;/h2&gt;

&lt;p&gt;I ran three standard parallel computing benchmarks on an Apple M2 MacBook (8 cores) with 4 workers:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Benchmark&lt;/th&gt;
&lt;th&gt;Sequential&lt;/th&gt;
&lt;th&gt;4 Workers&lt;/th&gt;
&lt;th&gt;Speedup&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;
&lt;strong&gt;NAS EP&lt;/strong&gt; — NASA Embarrassingly Parallel&lt;/td&gt;
&lt;td&gt;5.0s&lt;/td&gt;
&lt;td&gt;1.4s&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;3.57x&lt;/strong&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;
&lt;strong&gt;Mandelbrot Set&lt;/strong&gt; — 2048×2048, 256 iterations&lt;/td&gt;
&lt;td&gt;12.0s&lt;/td&gt;
&lt;td&gt;3.7s&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;3.27x&lt;/strong&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;
&lt;strong&gt;SHA-256 Search&lt;/strong&gt; — brute-force hash prefix&lt;/td&gt;
&lt;td&gt;6.1s&lt;/td&gt;
&lt;td&gt;1.6s&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;3.72x&lt;/strong&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Average&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;&lt;/td&gt;
&lt;td&gt;&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;3.52x&lt;/strong&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;3.52x on 4 workers (theoretical max 4.0x) — &lt;strong&gt;88% parallel efficiency&lt;/strong&gt;.&lt;/p&gt;

&lt;h3&gt;
  
  
  How Does It Compare?
&lt;/h3&gt;

&lt;p&gt;I was curious to see how this stacks up against Dask and Ray, so I ran the exact same workloads with the same setup (same machine, 4 workers, identical task code):&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Benchmark&lt;/th&gt;
&lt;th&gt;distributed-compute-locally&lt;/th&gt;
&lt;th&gt;Dask&lt;/th&gt;
&lt;th&gt;Ray&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;NAS EP&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;3.57x&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;3.52x&lt;/td&gt;
&lt;td&gt;3.06x&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Mandelbrot&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;3.27x&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;3.44x&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;3.52x&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;SHA-256&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;3.72x&lt;/td&gt;
&lt;td&gt;3.59x&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;3.90x&lt;/strong&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;strong&gt;Average&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;3.52x&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;&lt;strong&gt;3.52x&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;3.49x&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;All three land within the same range of each other. For embarrassingly parallel workloads, the CPU is the bottleneck — not the framework. The task distribution overhead is negligible across all three.&lt;/p&gt;

&lt;h3&gt;
  
  
  Scaling Curve (N-Body Stress Test)
&lt;/h3&gt;

&lt;p&gt;I also ran a heavier workload for an O(n²) pairwise gravity simulation, 500 particles × 100 timesteps and scaled from 1 to 8 workers:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Workers&lt;/th&gt;
&lt;th&gt;Time&lt;/th&gt;
&lt;th&gt;Speedup&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;1&lt;/td&gt;
&lt;td&gt;179.1s&lt;/td&gt;
&lt;td&gt;1.00x&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;2&lt;/td&gt;
&lt;td&gt;145.7s&lt;/td&gt;
&lt;td&gt;1.23x&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;4&lt;/td&gt;
&lt;td&gt;102.7s&lt;/td&gt;
&lt;td&gt;1.74x&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;6&lt;/td&gt;
&lt;td&gt;81.6s&lt;/td&gt;
&lt;td&gt;2.20x&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;8&lt;/td&gt;
&lt;td&gt;78.8s&lt;/td&gt;
&lt;td&gt;2.27x&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;Diminishing returns after 6 workers, that's the M2's asymmetric cores (4 performance + 4 efficiency). The slower efficiency cores become the bottleneck on heavy tasks. On machines with identical cores or across multiple machines, scaling would be more linear.&lt;/p&gt;

&lt;h2&gt;
  
  
  Findings and Learning
&lt;/h2&gt;

&lt;p&gt;Building this tool reinforced something I suspected: &lt;strong&gt;for simple parallel workloads, the framework doesn't matter that much.&lt;/strong&gt; Dask, Ray, and a minimal TCP-based tool all deliver roughly the same speedup. The difference is what they offer.&lt;/p&gt;

&lt;p&gt;Dask and Ray give you a lot of features such as task graphs, dashboards, DataFrame integration, cloud deployment, and a massive ecosystem. They are the right choice for complex pipelines and production infrastructure.&lt;/p&gt;

&lt;p&gt;This tool gives you none of that on purpose. It's for the cases where you just want to &lt;code&gt;map()&lt;/code&gt; a function across a few machines.&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;&lt;/th&gt;
&lt;th&gt;distributed-compute-locally&lt;/th&gt;
&lt;th&gt;Dask / Ray&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Multi-machine setup&lt;/td&gt;
&lt;td&gt;&lt;code&gt;distcompute worker &amp;lt;ip&amp;gt;&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;Scheduler + worker CLI + networking&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;API surface&lt;/td&gt;
&lt;td&gt;&lt;code&gt;coordinator.map()&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;Client, delayed, futures, DataFrame, ...&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Dashboard&lt;/td&gt;
&lt;td&gt;No&lt;/td&gt;
&lt;td&gt;Yes&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Task graphs&lt;/td&gt;
&lt;td&gt;No&lt;/td&gt;
&lt;td&gt;Yes&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Best for&lt;/td&gt;
&lt;td&gt;Quick &lt;code&gt;map()&lt;/code&gt; across LAN&lt;/td&gt;
&lt;td&gt;Complex pipelines, cloud clusters&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;h2&gt;
  
  
  Other Features
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Task retry&lt;/strong&gt; — &lt;code&gt;coordinator.map(func, data, max_retries=3)&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Password auth&lt;/strong&gt; — &lt;code&gt;distcompute coordinator --password secret&lt;/code&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Interactive CLI&lt;/strong&gt; — REPL with status monitoring and task submission&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Large payload handling&lt;/strong&gt; — automatic chunking and compression&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;cloudpickle&lt;/strong&gt; — send lambdas, closures, and local functions&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Try It
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;pip &lt;span class="nb"&gt;install &lt;/span&gt;distributed-compute-locally
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Run the benchmarks yourself:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;git clone https://github.com/Nebu0528/distributor.git
&lt;span class="nb"&gt;cd &lt;/span&gt;distributor
python3 benchmark/benchmark.py 4
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;GitHub: &lt;a href="https://github.com/Nebu0528/distributor" rel="noopener noreferrer"&gt;github.com/Nebu0528/distributor&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;If you find it useful or have feedback, would love to hear it. Open an issue or drop a star.&lt;/p&gt;

</description>
      <category>distributedsystems</category>
      <category>cli</category>
      <category>opensource</category>
      <category>showdev</category>
    </item>
  </channel>
</rss>
