DEV Community

agenthustler
agenthustler

Posted on

Scraping Digital Nomad Visa Requirements: 50+ Countries Tracked

The Digital Nomad Visa Explosion

Over 50 countries now offer digital nomad visas, each with different requirements for income, insurance, and duration. Keeping track manually is impossible — the rules change constantly. Lets build a scraper that monitors all of them.

What We Track

For each country:

  • Minimum income requirement
  • Visa duration and renewal options
  • Tax implications
  • Health insurance requirements
  • Application fees
  • Processing time

Setup

pip install requests beautifulsoup4 pandas
Enter fullscreen mode Exit fullscreen mode

Government immigration sites are notoriously difficult to scrape. ScraperAPI handles geo-restrictions and JavaScript rendering.

The Country Scraper

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import json

SCRAPER_API_KEY = "YOUR_KEY"

COUNTRY_SOURCES = {
    "Portugal": "https://vistos.mne.gov.pt/en/national-visas/digital-nomad",
    "Spain": "https://www.exteriores.gob.es/digital-nomad-visa",
    "Croatia": "https://mup.gov.hr/en/digital-nomad",
    "Estonia": "https://e-resident.gov.ee/nomadvisa",
    "Barbados": "https://www.barbadoswelcomestamp.bb",
    "Thailand": "https://ltr.boi.go.th",
    "Indonesia": "https://www.imigrasi.go.id/digital-nomad",
    "Greece": "https://digitalnomadforgr.gov.gr"
}

def scrape_visa_page(country, url):
    api_url = f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}&url={url}&render=true"
    response = requests.get(api_url)
    soup = BeautifulSoup(response.text, "html.parser")
    text = soup.get_text(separator=" ", strip=True)

    return {
        "country": country,
        "url": url,
        "min_income": extract_income(text),
        "duration": extract_duration(text),
        "fee": extract_fee(text),
        "raw_text": text[:2000]
    }

def extract_income(text):
    patterns = [
        r"\$([\d,]+)\s*(?:per month|monthly|/month)",
        r"EUR?\s*([\d,]+)\s*(?:per month|monthly)",
        r"minimum.*?income.*?([\d,]+)"
    ]
    for pattern in patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            return match.group(0)
    return "Not found"

def extract_duration(text):
    patterns = [
        r"(\d+)\s*(?:months?|years?)\s*(?:visa|stay|duration)",
        r"valid\s*(?:for)?\s*(\d+)\s*(?:months?|years?)"
    ]
    for pattern in patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            return match.group(0)
    return "Not found"

def extract_fee(text):
    patterns = [
        r"(?:fee|cost|price).*?\$([\d,]+)",
        r"EUR?\s*([\d,]+).*?(?:fee|cost)"
    ]
    for pattern in patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            return match.group(0)
    return "Not found"
Enter fullscreen mode Exit fullscreen mode

Batch Scraping All Countries

import time

def scrape_all_countries():
    results = []
    for country, url in COUNTRY_SOURCES.items():
        try:
            print(f"Scraping {country}...")
            data = scrape_visa_page(country, url)
            results.append(data)
            time.sleep(2)  # Be respectful
        except Exception as e:
            print(f"Error scraping {country}: {e}")
            results.append({"country": country, "error": str(e)})

    return pd.DataFrame(results)
Enter fullscreen mode Exit fullscreen mode

Change Detection

import hashlib
import os

def detect_requirement_changes(current_df, history_file="visa_history.json"):
    changes = []
    history = {}
    if os.path.exists(history_file):
        with open(history_file) as f:
            history = json.load(f)

    for _, row in current_df.iterrows():
        country = row["country"]
        current_hash = hashlib.md5(str(row.to_dict()).encode()).hexdigest()

        if country in history and history[country]["hash"] != current_hash:
            changes.append({
                "country": country,
                "old_income": history[country].get("min_income"),
                "new_income": row.get("min_income"),
                "old_duration": history[country].get("duration"),
                "new_duration": row.get("duration")
            })

        history[country] = {"hash": current_hash, **row.to_dict()}

    with open(history_file, "w") as f:
        json.dump(history, f, indent=2, default=str)

    return changes
Enter fullscreen mode Exit fullscreen mode

Comparison Dashboard

def generate_comparison(df):
    print("\nDigital Nomad Visa Comparison")
    print("=" * 60)
    for _, row in df.iterrows():
        print(f"\n{row[country]}:")
        print(f"  Income: {row.get(min_income, N/A)}")
        print(f"  Duration: {row.get(duration, N/A)}")
        print(f"  Fee: {row.get(fee, N/A)}")

df = scrape_all_countries()
generate_comparison(df)
changes = detect_requirement_changes(df)
if changes:
    print(f"\nDetected {len(changes)} requirement changes!")
    for c in changes:
        print(f"  {c[country]}: income {c[old_income]} -> {c[new_income]}")
Enter fullscreen mode Exit fullscreen mode

Proxy Strategy

Immigration sites often geo-restrict content. ThorData residential proxies let you access country-specific pages. Monitor success rates with ScrapeOps.

Conclusion

Tracking 50+ digital nomad visa programs manually is impossible. With Python and ScraperAPI, you can build an automated tracker that alerts you when requirements change.

Top comments (0)