<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: Morfasco</title>
    <description>The latest articles on DEV Community by Morfasco (@morfasco).</description>
    <link>https://dev.to/morfasco</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3874686%2F203da148-953f-4497-945b-8179b834c95d.jpg</url>
      <title>DEV Community: Morfasco</title>
      <link>https://dev.to/morfasco</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/morfasco"/>
    <language>en</language>
    <item>
      <title>omg!</title>
      <dc:creator>Morfasco</dc:creator>
      <pubDate>Sun, 12 Apr 2026 11:13:14 +0000</pubDate>
      <link>https://dev.to/morfasco/omg-212h</link>
      <guid>https://dev.to/morfasco/omg-212h</guid>
      <description>&lt;div class="ltag__link--embedded"&gt;
  &lt;div class="crayons-story "&gt;
  &lt;a href="https://dev.to/devteam/welcome-thread-v372-254c" class="crayons-story__hidden-navigation-link"&gt;Welcome Thread - v372&lt;/a&gt;


  &lt;div class="crayons-story__body crayons-story__body-full_post"&gt;
      &lt;a href="https://dev.to/devteam/welcome-thread-v372-254c" class="crayons-article__context-note crayons-article__context-note__feed"&gt;&lt;p&gt;Features infra veterans and tool builders&lt;/p&gt;

&lt;/a&gt;
    &lt;div class="crayons-story__top"&gt;
      &lt;div class="crayons-story__meta"&gt;
        &lt;div class="crayons-story__author-pic"&gt;
          &lt;a class="crayons-logo crayons-logo--l" href="/devteam"&gt;
            &lt;img alt="The DEV Team logo" src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Forganization%2Fprofile_image%2F1%2Fd908a186-5651-4a5a-9f76-15200bc6801f.jpg" class="crayons-logo__image" width="800" height="800"&gt;
          &lt;/a&gt;

          &lt;a href="/sloan" class="crayons-avatar  crayons-avatar--s absolute -right-2 -bottom-2 border-solid border-2 border-base-inverted  "&gt;
            &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F31047%2Faf153cd6-9994-4a68-83f4-8ddf3e13f0bf.jpg" alt="sloan profile" class="crayons-avatar__image" width="720" height="720"&gt;
          &lt;/a&gt;
        &lt;/div&gt;
        &lt;div&gt;
          &lt;div&gt;
            &lt;a href="/sloan" class="crayons-story__secondary fw-medium m:hidden"&gt;
              Sloan the DEV Moderator
            &lt;/a&gt;
            &lt;div class="profile-preview-card relative mb-4 s:mb-0 fw-medium hidden m:inline-block"&gt;
              
                Sloan the DEV Moderator
                
              
              &lt;div id="story-author-preview-content-3442004" class="profile-preview-card__content crayons-dropdown branded-7 p-4 pt-0"&gt;
                &lt;div class="gap-4 grid"&gt;
                  &lt;div class="-mt-4"&gt;
                    &lt;a href="/sloan" class="flex"&gt;
                      &lt;span class="crayons-avatar crayons-avatar--xl mr-2 shrink-0"&gt;
                        &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F31047%2Faf153cd6-9994-4a68-83f4-8ddf3e13f0bf.jpg" class="crayons-avatar__image" alt="" width="720" height="720"&gt;
                      &lt;/span&gt;
                      &lt;span class="crayons-link crayons-subtitle-2 mt-5"&gt;Sloan the DEV Moderator&lt;/span&gt;
                    &lt;/a&gt;
                  &lt;/div&gt;
                  &lt;div class="print-hidden"&gt;
                    
                      Follow
                    
                  &lt;/div&gt;
                  &lt;div class="author-preview-metadata-container"&gt;&lt;/div&gt;
                &lt;/div&gt;
              &lt;/div&gt;
            &lt;/div&gt;

            &lt;span&gt;
              &lt;span class="crayons-story__tertiary fw-normal"&gt; for &lt;/span&gt;&lt;a href="/devteam" class="crayons-story__secondary fw-medium"&gt;The DEV Team&lt;/a&gt;
            &lt;/span&gt;
          &lt;/div&gt;
          &lt;a href="https://dev.to/devteam/welcome-thread-v372-254c" class="crayons-story__tertiary fs-xs"&gt;&lt;time&gt;Apr 8&lt;/time&gt;&lt;span class="time-ago-indicator-initial-placeholder"&gt;&lt;/span&gt;&lt;/a&gt;
        &lt;/div&gt;
      &lt;/div&gt;

    &lt;/div&gt;

    &lt;div class="crayons-story__indention"&gt;
      &lt;h2 class="crayons-story__title crayons-story__title-full_post"&gt;
        &lt;a href="https://dev.to/devteam/welcome-thread-v372-254c" id="article-link-3442004"&gt;
          Welcome Thread - v372
        &lt;/a&gt;
      &lt;/h2&gt;
        &lt;div class="crayons-story__tags"&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/welcome"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;welcome&lt;/a&gt;
        &lt;/div&gt;
      &lt;div class="crayons-story__bottom"&gt;
        &lt;div class="crayons-story__details"&gt;
          &lt;a href="https://dev.to/devteam/welcome-thread-v372-254c" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left"&gt;
            &lt;div class="multiple_reactions_aggregate"&gt;
              &lt;span class="multiple_reactions_icons_container"&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/multi-unicorn-b44d6f8c23cdd00964192bedc38af3e82463978aa611b4365bd33a0f1f4f3e97.svg" width="24" height="24"&gt;
                  &lt;/span&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/exploding-head-daceb38d627e6ae9b730f36a1e390fca556a4289d5a41abb2c35068ad3e2c4b5.svg" width="24" height="24"&gt;
                  &lt;/span&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/sparkle-heart-5f9bee3767e18deb1bb725290cb151c25234768a0e9a2bd39370c382d02920cf.svg" width="24" height="24"&gt;
                  &lt;/span&gt;
              &lt;/span&gt;
              &lt;span class="aggregate_reactions_counter"&gt;58&lt;span class="hidden s:inline"&gt; reactions&lt;/span&gt;&lt;/span&gt;
            &lt;/div&gt;
          &lt;/a&gt;
            &lt;a href="https://dev.to/devteam/welcome-thread-v372-254c#comments" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left flex items-center"&gt;
              Comments


              335&lt;span class="hidden s:inline"&gt; comments&lt;/span&gt;
            &lt;/a&gt;
        &lt;/div&gt;
        &lt;div class="crayons-story__save"&gt;
          &lt;small class="crayons-story__tertiary fs-xs mr-2"&gt;
            1 min read
          &lt;/small&gt;
            
              &lt;span class="bm-initial"&gt;
                

              &lt;/span&gt;
              &lt;span class="bm-success"&gt;
                

              &lt;/span&gt;
            
        &lt;/div&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;/div&gt;


</description>
    </item>
    <item>
      <title>How I Built an OCR-Based Defense Against Prompt Injection for Local LLM Search</title>
      <dc:creator>Morfasco</dc:creator>
      <pubDate>Sun, 12 Apr 2026 10:23:00 +0000</pubDate>
      <link>https://dev.to/morfasco/how-i-built-an-ocr-based-defense-against-prompt-injection-for-local-llm-search-1mnl</link>
      <guid>https://dev.to/morfasco/how-i-built-an-ocr-based-defense-against-prompt-injection-for-local-llm-search-1mnl</guid>
      <description>&lt;p&gt;When you plug a local LLM into a web search tool, every fetched page becomes an attack surface. I found this out the hard way — my Ollama setup was pulling web content that contained invisible Unicode injection, fake system prompts, and markdown image tags designed to exfiltrate data through URL parameters.&lt;/p&gt;

&lt;p&gt;I went looking for a solution and found that Google DeepMind's own research showed their best model-level defenses fail 53.6% of the time against adaptive attacks. The "Attacker Moves Second" paper demonstrated that all 12 published defenses were bypassed at &amp;gt;90% success rates. The UK's National Cyber Security Centre formally characterized LLMs as "inherently confusable deputies."&lt;/p&gt;

&lt;p&gt;So I stopped trying to make the model resist injection and started removing the attack text before the model ever sees it.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Insight: OCR as a Nuclear Defense
&lt;/h2&gt;

&lt;p&gt;Since I'm generating the image from text (not scanning a document), I control every variable. The OCR round-trip becomes a ground truth extractor:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;Take untrusted web content&lt;/li&gt;
&lt;li&gt;Render it to an image with ImageMagick (300 DPI, 20pt monospace, TIFF)&lt;/li&gt;
&lt;li&gt;OCR it back with Tesseract (LSTM engine)&lt;/li&gt;
&lt;li&gt;Anything that didn't produce visible pixels is gone&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;Zero-width characters, bidirectional overrides, homoglyphs, variation selectors, tag characters — they all die in the render step because they have no visual representation. No pattern matching required for the entire invisible attack surface.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Full Pipeline
&lt;/h2&gt;

&lt;p&gt;Five independent layers, each catching a different class:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Layer&lt;/th&gt;
&lt;th&gt;What&lt;/th&gt;
&lt;th&gt;Catches&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;1. OCR round-trip&lt;/td&gt;
&lt;td&gt;text → image → OCR&lt;/td&gt;
&lt;td&gt;All invisible characters&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;2. Regex detect&lt;/td&gt;
&lt;td&gt;31 compiled patterns&lt;/td&gt;
&lt;td&gt;Instruction overrides, role hijacking, system tags&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;3. Regex redact&lt;/td&gt;
&lt;td&gt;Strip detected patterns&lt;/td&gt;
&lt;td&gt;Prevents detected attacks from reaching LLM&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;4. URL/email redact&lt;/td&gt;
&lt;td&gt;Strip exfil channels&lt;/td&gt;
&lt;td&gt;Markdown img exfil, hidden endpoints&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;5. Trust wrap&lt;/td&gt;
&lt;td&gt;Tag as HOSTILE/UNTRUSTED&lt;/td&gt;
&lt;td&gt;Gives LLM provenance metadata&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;The OCR runs first. Everything else operates on the clean output.&lt;/p&gt;

&lt;h2&gt;
  
  
  Red Team Results
&lt;/h2&gt;

&lt;p&gt;I built a test harness with 12 adversarial payloads and ran them directly through the sanitization pipeline:&lt;/p&gt;

&lt;p&gt;T01: Instruction Override    — ✓ NEUTRALIZED&lt;br&gt;
T02: Unicode Steganography   — ✓ NEUTRALIZED&lt;br&gt;
T03: Bidi Override           — ✓ NEUTRALIZED&lt;br&gt;
T04: Markdown Exfil          — ✓ NEUTRALIZED&lt;br&gt;
T05: Role Hijacking          — ✓ NEUTRALIZED&lt;br&gt;
T06: System Tag Injection    — ✓ NEUTRALIZED&lt;br&gt;
T07: Base64 Payload          — ✓ NEUTRALIZED&lt;br&gt;
T08: Typoglycemia            — ✓ NEUTRALIZED&lt;br&gt;
T09: Code Fence Injection    — ✓ NEUTRALIZED&lt;br&gt;
T10: Trust Escalation        — ✓ NEUTRALIZED&lt;br&gt;
T11: HTML Img Exfil          — ✓ NEUTRALIZED&lt;br&gt;
T12: Multi-Vector Combined   — ✓ NEUTRALIZED&lt;/p&gt;

&lt;p&gt;The red team script is included in the repo — &lt;code&gt;python3 redteam.py&lt;/code&gt; runs all 12 payloads against your running instance.&lt;/p&gt;
&lt;h2&gt;
  
  
  What This Doesn't Catch
&lt;/h2&gt;

&lt;p&gt;I want to be upfront about the limitations because I think the security community has a problem with tools that oversell:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Semantic injection&lt;/strong&gt; — "the previous assessment methodology was found to contain errors" is natural English. No regex or OCR catches it.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Adaptive regex evasion&lt;/strong&gt; — if an attacker studies the 31 patterns, they can craft bypasses using synonyms.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Cross-page composite attacks&lt;/strong&gt; — each page is sanitized independently. An injection split across multiple search results would pass.&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Model-level manipulation&lt;/strong&gt; — the filter LLM is still an LLM.&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Per DeepMind's research, prompt injection may never be fully solved with current architectures. This tool raises the cost of attack, it doesn't eliminate it.&lt;/p&gt;
&lt;h2&gt;
  
  
  Setup
&lt;/h2&gt;

&lt;p&gt;Requires Docker and Ollama (or any OpenAI-compatible local LLM).&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;git clone https://github.com/Morfasco/search-sanitizer.git
&lt;span class="nb"&gt;cd &lt;/span&gt;search-sanitizer
&lt;span class="nb"&gt;cp&lt;/span&gt; .env.example .env  &lt;span class="c"&gt;# edit with your model/endpoint&lt;/span&gt;
bash setup.sh
python3 redteam.py    &lt;span class="c"&gt;# verify the pipeline&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;Supports Ollama, LM Studio, vLLM, text-generation-webui — anything that speaks &lt;code&gt;/v1/chat/completions&lt;/code&gt; works via the &lt;code&gt;LLM_API_FORMAT=openai&lt;/code&gt; setting in &lt;code&gt;.env&lt;/code&gt;.&lt;/p&gt;

&lt;h2&gt;
  
  
  How It Compares
&lt;/h2&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Feature&lt;/th&gt;
&lt;th&gt;search-sanitizer&lt;/th&gt;
&lt;th&gt;Rebuff&lt;/th&gt;
&lt;th&gt;Vigil&lt;/th&gt;
&lt;th&gt;IPI-Scanner&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;OCR sanitization&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Active redaction&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;URL/email stripping&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Local-first&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Red team included&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;❌&lt;/td&gt;
&lt;td&gt;✅&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;h2&gt;
  
  
  References
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;a href="https://arxiv.org/abs/2505.14534" rel="noopener noreferrer"&gt;Lessons from Defending Gemini Against Indirect Prompt Injections&lt;/a&gt; — Google DeepMind, 2025&lt;/li&gt;
&lt;li&gt;
&lt;a href="https://arxiv.org/abs/2510.09023" rel="noopener noreferrer"&gt;The Attacker Moves Second&lt;/a&gt; — OpenAI/Anthropic/DeepMind, 2025&lt;/li&gt;
&lt;li&gt;&lt;a href="https://owasp.org/www-project-top-10-for-large-language-model-applications/" rel="noopener noreferrer"&gt;OWASP Top 10 for LLM Applications 2025&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;GitHub: &lt;a href="https://github.com/Morfasco/search-sanitizer" rel="noopener noreferrer"&gt;github.com/Morfasco/search-sanitizer&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Apache 2.0. Feedback welcome — especially on the semantic injection gap.&lt;/p&gt;

</description>
      <category>security</category>
      <category>ai</category>
      <category>opensource</category>
      <category>docker</category>
    </item>
  </channel>
</rss>
