Ozor

Posted on Mar 5

How to Build a Phishing URL Scanner in JavaScript (Free APIs)

#javascript #security #webdev #tutorial

Phishing attacks cost businesses $4.76 billion per year. But you can build a basic URL scanner in under 100 lines of code using free APIs.

Here's what we'll build: a tool that takes any URL and returns a risk assessment based on DNS records, IP geolocation, domain age signals, and a visual screenshot.

The Architecture

Our scanner performs 4 checks on every URL:

DNS Resolution — Does the domain resolve? What IPs does it point to?
IP Geolocation — Where are the servers located? (Suspicious hosting = red flag)
Page Content — Does the page contain login forms, credential fields, or brand impersonation?
Visual Screenshot — Capture what the page looks like for evidence

Each check uses a free API — no API keys needed to start.

Setup

mkdir url-scanner && cd url-scanner
npm init -y

No dependencies needed — we're using fetch (built into Node.js 18+).

The Scanner

// scanner.js
const API_BASE = 'https://frostbyte-api.vercel.app';

async function scanUrl(targetUrl) {
  const url = new URL(targetUrl);
  const domain = url.hostname;

  console.log(`\n🔍 Scanning: ${targetUrl}\n`);

  const results = {
    domain,
    url: targetUrl,
    timestamp: new Date().toISOString(),
    risks: [],
    score: 0 // 0 = safe, 100 = definitely phishing
  };

  // Check 1: DNS Resolution
  console.log('📡 Checking DNS...');
  const dnsRes = await fetch(`${API_BASE}/api/dns/${domain}`);
  const dns = await dnsRes.json();

  if (!dns.A || dns.A.length === 0) {
    results.risks.push('Domain does not resolve (dead or parked)');
    results.score += 30;
  } else {
    results.dns = {
      ip: dns.A?.[0],
      mx: dns.MX || [],
      nameservers: dns.NS || []
    };

    // No MX records = likely not a legitimate business
    if (!dns.MX || dns.MX.length === 0) {
      results.risks.push('No MX records (no email = suspicious for a business site)');
      results.score += 10;
    }
  }

  // Check 2: IP Geolocation
  if (results.dns?.ip) {
    console.log('🌍 Checking IP location...');
    const geoRes = await fetch(`${API_BASE}/api/geo/${results.dns.ip}`);
    const geo = await geoRes.json();

    results.geo = {
      country: geo.country,
      city: geo.city,
      isp: geo.isp,
      org: geo.org
    };

    // Known bulletproof hosting countries
    const suspiciousCountries = ['RU', 'CN', 'KP', 'IR'];
    if (suspiciousCountries.includes(geo.country_code)) {
      results.risks.push(`Hosted in ${geo.country} (high-risk jurisdiction)`);
      results.score += 20;
    }

    // Check for residential ISP hosting (legitimate sites use data centers)
    if (geo.isp && /residential|mobile|wireless/i.test(geo.isp)) {
      results.risks.push('Hosted on residential/mobile connection');
      results.score += 15;
    }
  }

  // Check 3: Domain heuristics
  console.log('🔤 Analyzing domain...');

  // Suspicious TLDs commonly used in phishing
  const suspiciousTLDs = ['.tk', '.ml', '.ga', '.cf', '.gq', '.xyz', '.top', '.buzz', '.click'];
  if (suspiciousTLDs.some(tld => domain.endsWith(tld))) {
    results.risks.push(`Suspicious TLD: ${domain.split('.').pop()}`);
    results.score += 15;
  }

  // Excessive subdomains (login.secure.account.example.com)
  const subdomainCount = domain.split('.').length - 2;
  if (subdomainCount >= 3) {
    results.risks.push(`Excessive subdomains (${subdomainCount}) — common phishing tactic`);
    results.score += 20;
  }

  // Brand impersonation patterns
  const brands = ['paypal', 'apple', 'google', 'microsoft', 'amazon', 'netflix', 'facebook', 'instagram'];
  const domainLower = domain.toLowerCase();
  for (const brand of brands) {
    if (domainLower.includes(brand) && !domainLower.endsWith(`${brand}.com`)) {
      results.risks.push(`Possible ${brand} impersonation in domain`);
      results.score += 25;
    }
  }

  // Homograph/typosquatting signals
  if (/[0-9]/.test(domain.split('.')[0]) && /[a-z]/i.test(domain.split('.')[0])) {
    // Mix of numbers and letters in domain name
    if (domain.split('.')[0].length > 10) {
      results.risks.push('Long domain with mixed alphanumeric characters');
      results.score += 10;
    }
  }

  // Check 4: Screenshot
  console.log('📸 Capturing screenshot...');
  const screenshotUrl = `${API_BASE}/api/screenshot?url=${encodeURIComponent(targetUrl)}&width=1280&height=800`;
  results.screenshotUrl = screenshotUrl;

  // Calculate final verdict
  results.score = Math.min(results.score, 100);
  if (results.score >= 60) {
    results.verdict = '🔴 HIGH RISK — Likely phishing';
  } else if (results.score >= 30) {
    results.verdict = '🟡 MEDIUM RISK — Suspicious, investigate further';
  } else {
    results.verdict = '🟢 LOW RISK — Appears legitimate';
  }

  return results;
}

// Run scanner
const target = process.argv[2];
if (!target) {
  console.log('Usage: node scanner.js <url>');
  console.log('Example: node scanner.js https://example.com');
  process.exit(1);
}

scanUrl(target).then(results => {
  console.log('\n' + '='.repeat(50));
  console.log('SCAN RESULTS');
  console.log('='.repeat(50));
  console.log(`Domain:  ${results.domain}`);
  console.log(`Verdict: ${results.verdict}`);
  console.log(`Score:   ${results.score}/100`);

  if (results.geo) {
    console.log(`Server:  ${results.geo.city}, ${results.geo.country} (${results.geo.isp})`);
  }

  if (results.risks.length > 0) {
    console.log('\nRisk Factors:');
    results.risks.forEach(r => console.log(`  ⚠️  ${r}`));
  } else {
    console.log('\nNo risk factors detected.');
  }

  console.log(`\nScreenshot: ${results.screenshotUrl}`);
}).catch(err => {
  console.error('Scan failed:', err.message);
});

Test It

# Scan a legitimate site
node scanner.js https://github.com

# Output:
# 🔍 Scanning: https://github.com
# 📡 Checking DNS...
# 🌍 Checking IP location...
# 🔤 Analyzing domain...
# 📸 Capturing screenshot...
#
# ==================================================
# SCAN RESULTS
# ==================================================
# Domain:  github.com
# Verdict: 🟢 LOW RISK — Appears legitimate
# Score:   0/100
# Server:  San Francisco, United States (GitHub, Inc.)
# No risk factors detected.

How It Works

The scanner stacks multiple signals. No single check is conclusive, but combined they create a reliable risk score:

Check	What it catches	Score impact
DNS resolution	Dead/parked domains	+30
No MX records	Sites with no email infrastructure	+10
IP geolocation	Bulletproof hosting	+20
Residential IP	Servers on home connections	+15
Suspicious TLD	.tk, .xyz, .buzz, etc.	+15
Deep subdomains	login.secure.verify.example.com	+20
Brand in domain	paypal-login.xyz	+25

Extending the Scanner

Here are a few ways to make this production-ready:

Add page content analysis

// Scrape the page and check for login forms
const scrapeRes = await fetch(
  `${API_BASE}/api/scrape?url=${encodeURIComponent(targetUrl)}&format=text`
);
const pageText = await scrapeRes.text();

// Check for credential harvesting signals
const phishingKeywords = ['verify your account', 'confirm your identity',
  'update your payment', 'suspended', 'unusual activity'];

for (const keyword of phishingKeywords) {
  if (pageText.toLowerCase().includes(keyword)) {
    results.risks.push(`Page contains suspicious text: "${keyword}"`);
    results.score += 15;
  }
}

Batch scanning

const urls = [
  'https://totally-legit-paypal.xyz',
  'https://amazon-verify-account.tk',
  'https://github.com'
];

const results = await Promise.all(urls.map(scanUrl));

results
  .sort((a, b) => b.score - a.score)
  .forEach(r => {
    console.log(`${r.verdict} | ${r.domain} (score: ${r.score})`);
  });

Save results as JSON

const fs = require('fs');

const results = await scanUrl('https://suspicious-site.xyz');
fs.writeFileSync(
  `scan-${Date.now()}.json`,
  JSON.stringify(results, null, 2)
);

The APIs Used

All four APIs are free and require no signup:

DNS Lookup — /api/dns/{domain} — Returns A, AAAA, MX, NS, TXT records
IP Geolocation — /api/geo/{ip} — Returns country, city, ISP, coordinates
Web Scraper — /api/scrape?url= — Returns page content as text/HTML
Screenshot — /api/screenshot?url= — Returns PNG screenshot

Each API call costs 1 credit. New users get 200 free credits — enough for 50 full scans.

Get your free API key: Frostbyte API Gateway

What This Won't Catch

This is a heuristic scanner, not a replacement for threat intelligence feeds. It won't catch:

Compromised legitimate domains
Zero-day phishing campaigns
Sites behind Cloudflare/CDN (IP geo will show CDN, not origin)
URL shorteners (you'd need to resolve the redirect first)

For production use, combine this with blocklists (PhishTank, Google Safe Browsing) and SSL certificate analysis.

Built with Frostbyte API — 40+ developer APIs, 200 free credits, no signup required.

DEV Community