Scraping Digital Nomad Visa Requirements: 50+ Countries Tracked

#webdev #tutorial #python #programming

The Digital Nomad Visa Explosion

Over 50 countries now offer digital nomad visas, each with different requirements for income, insurance, and duration. Keeping track manually is impossible — the rules change constantly. Lets build a scraper that monitors all of them.

What We Track

For each country:

Minimum income requirement
Visa duration and renewal options
Tax implications
Health insurance requirements
Application fees
Processing time

Setup

pip install requests beautifulsoup4 pandas

Government immigration sites are notoriously difficult to scrape. ScraperAPI handles geo-restrictions and JavaScript rendering.

The Country Scraper

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import json

SCRAPER_API_KEY = "YOUR_KEY"

COUNTRY_SOURCES = {
    "Portugal": "https://vistos.mne.gov.pt/en/national-visas/digital-nomad",
    "Spain": "https://www.exteriores.gob.es/digital-nomad-visa",
    "Croatia": "https://mup.gov.hr/en/digital-nomad",
    "Estonia": "https://e-resident.gov.ee/nomadvisa",
    "Barbados": "https://www.barbadoswelcomestamp.bb",
    "Thailand": "https://ltr.boi.go.th",
    "Indonesia": "https://www.imigrasi.go.id/digital-nomad",
    "Greece": "https://digitalnomadforgr.gov.gr"
}

def scrape_visa_page(country, url):
    api_url = f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}&url={url}&render=true"
    response = requests.get(api_url)
    soup = BeautifulSoup(response.text, "html.parser")
    text = soup.get_text(separator=" ", strip=True)

    return {
        "country": country,
        "url": url,
        "min_income": extract_income(text),
        "duration": extract_duration(text),
        "fee": extract_fee(text),
        "raw_text": text[:2000]
    }

def extract_income(text):
    patterns = [
        r"\$([\d,]+)\s*(?:per month|monthly|/month)",
        r"EUR?\s*([\d,]+)\s*(?:per month|monthly)",
        r"minimum.*?income.*?([\d,]+)"
    ]
    for pattern in patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            return match.group(0)
    return "Not found"

def extract_duration(text):
    patterns = [
        r"(\d+)\s*(?:months?|years?)\s*(?:visa|stay|duration)",
        r"valid\s*(?:for)?\s*(\d+)\s*(?:months?|years?)"
    ]
    for pattern in patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            return match.group(0)
    return "Not found"

def extract_fee(text):
    patterns = [
        r"(?:fee|cost|price).*?\$([\d,]+)",
        r"EUR?\s*([\d,]+).*?(?:fee|cost)"
    ]
    for pattern in patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            return match.group(0)
    return "Not found"

Batch Scraping All Countries

import time

def scrape_all_countries():
    results = []
    for country, url in COUNTRY_SOURCES.items():
        try:
            print(f"Scraping {country}...")
            data = scrape_visa_page(country, url)
            results.append(data)
            time.sleep(2)  # Be respectful
        except Exception as e:
            print(f"Error scraping {country}: {e}")
            results.append({"country": country, "error": str(e)})

    return pd.DataFrame(results)

Change Detection

import hashlib
import os

def detect_requirement_changes(current_df, history_file="visa_history.json"):
    changes = []
    history = {}
    if os.path.exists(history_file):
        with open(history_file) as f:
            history = json.load(f)

    for _, row in current_df.iterrows():
        country = row["country"]
        current_hash = hashlib.md5(str(row.to_dict()).encode()).hexdigest()

        if country in history and history[country]["hash"] != current_hash:
            changes.append({
                "country": country,
                "old_income": history[country].get("min_income"),
                "new_income": row.get("min_income"),
                "old_duration": history[country].get("duration"),
                "new_duration": row.get("duration")
            })

        history[country] = {"hash": current_hash, **row.to_dict()}

    with open(history_file, "w") as f:
        json.dump(history, f, indent=2, default=str)

    return changes

Comparison Dashboard

def generate_comparison(df):
    print("\nDigital Nomad Visa Comparison")
    print("=" * 60)
    for _, row in df.iterrows():
        print(f"\n{row[country]}:")
        print(f"  Income: {row.get(min_income, N/A)}")
        print(f"  Duration: {row.get(duration, N/A)}")
        print(f"  Fee: {row.get(fee, N/A)}")

df = scrape_all_countries()
generate_comparison(df)
changes = detect_requirement_changes(df)
if changes:
    print(f"\nDetected {len(changes)} requirement changes!")
    for c in changes:
        print(f"  {c[country]}: income {c[old_income]} -> {c[new_income]}")

Proxy Strategy

Immigration sites often geo-restrict content. ThorData residential proxies let you access country-specific pages. Monitor success rates with ScrapeOps.

Conclusion

Tracking 50+ digital nomad visa programs manually is impossible. With Python and ScraperAPI, you can build an automated tracker that alerts you when requirements change.

DEV Community