DEV Community

agenthustler
agenthustler

Posted on

How to Build a Brand Mention Tracker with Web Scraping

Knowing when and where your brand gets mentioned online is crucial for reputation management, competitive analysis, and PR. Commercial tools like Mention or Brand24 charge hundreds per month. Let us build our own brand mention tracker with Python.

Why Track Brand Mentions?

  • Reputation management — respond to negative mentions before they spiral
  • PR monitoring — know when media covers your brand
  • Competitive intelligence — track competitor mentions too
  • Customer feedback — find unsolicited reviews and opinions
  • Link building — find unlinked mentions for SEO outreach

Architecture Overview

Our tracker will:

  1. Monitor Google search results for brand mentions
  2. Scrape news sites and forums
  3. Check social media platforms
  4. Analyze sentiment of mentions
  5. Send alerts for new or negative mentions

Setting Up

pip install requests beautifulsoup4 pandas textblob schedule
Enter fullscreen mode Exit fullscreen mode

Core Brand Mention Scraper

import requests
from bs4 import BeautifulSoup
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
import hashlib

@dataclass
class BrandMention:
    brand: str
    source: str
    title: str
    url: str
    snippet: str
    sentiment: float
    found_at: str
    mention_hash: str = ""

    def __post_init__(self):
        self.mention_hash = hashlib.md5(f"{self.url}{self.title}".encode()).hexdigest()

class BrandTracker:
    def __init__(self, brand_name, api_key):
        self.brand = brand_name
        self.api_key = api_key
        self.mentions = []
        self.seen_hashes = set()

    def fetch(self, url):
        proxy_url = f"http://api.scraperapi.com?api_key={self.api_key}&url={url}"
        response = requests.get(proxy_url, timeout=30)
        return response
Enter fullscreen mode Exit fullscreen mode

Google News Monitoring

from urllib.parse import quote_plus

def search_google_news(self, days_back=7):
    query = quote_plus(f"\"{self.brand}\" when:{days_back}d")
    url = f"https://www.google.com/search?q={query}&tbm=nws"

    response = self.fetch(url)
    soup = BeautifulSoup(response.text, "html.parser")

    mentions = []
    results = soup.find_all("div", class_="SoaBEf") or soup.find_all("div", class_="g")

    for result in results:
        title_el = result.find("div", class_="MBeuO") or result.find("h3")
        link_el = result.find("a")
        snippet_el = result.find("div", class_="GI74Re") or result.find("span", class_="aCOpRe")
        source_el = result.find("div", class_="CEMjEf") or result.find("span", class_="UPmit")

        if title_el:
            mention = BrandMention(
                brand=self.brand,
                source=source_el.get_text(strip=True) if source_el else "Google News",
                title=title_el.get_text(strip=True),
                url=link_el["href"] if link_el else "",
                snippet=snippet_el.get_text(strip=True) if snippet_el else "",
                sentiment=0.0,
                found_at=datetime.now().isoformat()
            )
            mentions.append(mention)
    return mentions

BrandTracker.search_google_news = search_google_news
Enter fullscreen mode Exit fullscreen mode

Reddit Monitoring

def search_reddit(self, subreddits=None, limit=25):
    query = quote_plus(self.brand)
    url = f"https://www.reddit.com/search.json?q={query}&sort=new&limit={limit}"

    headers = {"User-Agent": "BrandTracker/1.0"}
    response = requests.get(url, headers=headers, timeout=15)
    data = response.json()

    mentions = []
    for post in data.get("data", {}).get("children", []):
        post_data = post["data"]
        if subreddits and post_data["subreddit"].lower() not in [s.lower() for s in subreddits]:
            continue

        mentions.append(BrandMention(
            brand=self.brand,
            source=f"r/{post_data[subreddit]}",
            title=post_data["title"],
            url=f"https://reddit.com{post_data[permalink]}",
            snippet=post_data.get("selftext", "")[:300],
            sentiment=0.0,
            found_at=datetime.now().isoformat()
        ))
    return mentions

BrandTracker.search_reddit = search_reddit
Enter fullscreen mode Exit fullscreen mode

Hacker News Monitoring

def search_hackernews(self):
    url = f"https://hn.algolia.com/api/v1/search_by_date?query={quote_plus(self.brand)}&tags=story"
    response = requests.get(url, timeout=15)
    data = response.json()

    mentions = []
    for hit in data.get("hits", []):
        mentions.append(BrandMention(
            brand=self.brand,
            source="Hacker News",
            title=hit.get("title", ""),
            url=hit.get("url", f"https://news.ycombinator.com/item?id={hit[objectID]}"),
            snippet=hit.get("story_text", "")[:300] if hit.get("story_text") else "",
            sentiment=0.0,
            found_at=hit.get("created_at", datetime.now().isoformat())
        ))
    return mentions

BrandTracker.search_hackernews = search_hackernews
Enter fullscreen mode Exit fullscreen mode

Sentiment Analysis

from textblob import TextBlob

def analyze_mentions(self):
    for mention in self.mentions:
        text = f"{mention.title} {mention.snippet}"
        blob = TextBlob(text)
        mention.sentiment = round(blob.sentiment.polarity, 3)

    positive = [m for m in self.mentions if m.sentiment > 0.1]
    negative = [m for m in self.mentions if m.sentiment < -0.1]
    neutral = [m for m in self.mentions if -0.1 <= m.sentiment <= 0.1]

    return {
        "total": len(self.mentions),
        "positive": len(positive),
        "negative": len(negative),
        "neutral": len(neutral),
        "avg_sentiment": sum(m.sentiment for m in self.mentions) / len(self.mentions) if self.mentions else 0
    }

BrandTracker.analyze_mentions = analyze_mentions
Enter fullscreen mode Exit fullscreen mode

Full Monitoring Pipeline

import time

def run_full_scan(self):
    print(f"Scanning for {self.brand} mentions...")

    news = self.search_google_news()
    print(f"  Google News: {len(news)} mentions")
    time.sleep(2)

    reddit = self.search_reddit()
    print(f"  Reddit: {len(reddit)} mentions")
    time.sleep(1)

    hn = self.search_hackernews()
    print(f"  Hacker News: {len(hn)} mentions")

    all_mentions = news + reddit + hn
    new_mentions = []
    for mention in all_mentions:
        if mention.mention_hash not in self.seen_hashes:
            self.seen_hashes.add(mention.mention_hash)
            new_mentions.append(mention)

    self.mentions.extend(new_mentions)
    print(f"  New mentions: {len(new_mentions)}")

    analysis = self.analyze_mentions()
    print(f"  Sentiment: +{analysis[positive]} / -{analysis[negative]} / ~{analysis[neutral]}")

    return new_mentions

BrandTracker.run_full_scan = run_full_scan

tracker = BrandTracker("ScraperAPI", api_key="YOUR_KEY")
new = tracker.run_full_scan()
Enter fullscreen mode Exit fullscreen mode

Alerts for Negative Mentions

import smtplib
from email.mime.text import MIMEText

def send_alerts(mentions, threshold=-0.2, email="you@example.com"):
    negative = [m for m in mentions if m.sentiment < threshold]
    if not negative:
        return

    body = f"Found {len(negative)} negative mentions:\n\n"
    for m in negative:
        body += f"- [{m.source}] {m.title}\n  Sentiment: {m.sentiment}\n  {m.url}\n\n"

    print(f"ALERT: {len(negative)} negative mentions detected!")
    for m in negative:
        print(f"  {m.source}: {m.title} (sentiment: {m.sentiment})")
Enter fullscreen mode Exit fullscreen mode

Automated Scheduling

import schedule

def daily_brand_monitor():
    brands = ["YourBrand", "CompetitorA", "CompetitorB"]
    for brand in brands:
        tracker = BrandTracker(brand, api_key="YOUR_KEY")
        new = tracker.run_full_scan()
        send_alerts(new)
        time.sleep(10)

schedule.every(4).hours.do(daily_brand_monitor)

while True:
    schedule.run_pending()
    time.sleep(60)
Enter fullscreen mode Exit fullscreen mode

Exporting Data

import pandas as pd

def export_mentions(mentions, filename="brand_mentions.csv"):
    data = [vars(m) for m in mentions]
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)

    summary = df.groupby("source").agg(
        count=("title", "count"),
        avg_sentiment=("sentiment", "mean")
    ).sort_values("count", ascending=False)

    print(summary)
    return df

export_mentions(tracker.mentions)
Enter fullscreen mode Exit fullscreen mode

Scaling Your Tracker

When monitoring multiple brands across many sources:

  1. ScraperAPI handles Google News scraping with automatic CAPTCHA solving and IP rotation
  2. ThorData provides geo-targeted residential proxies for location-specific brand monitoring
  3. ScrapeOps monitors your scraper fleet to ensure you never miss mentions due to blocked requests

Conclusion

A custom brand mention tracker gives you the same capabilities as expensive SaaS tools at a fraction of the cost. The combination of Google News, Reddit, and Hacker News covers the most important sources for tech brands. Add more sources as needed — Twitter/X, product review sites, or industry forums. The modular architecture makes it easy to extend.

Happy scraping!

Top comments (0)