Knowing when and where your brand gets mentioned online is crucial for reputation management, competitive analysis, and PR. Commercial tools like Mention or Brand24 charge hundreds per month. Let us build our own brand mention tracker with Python.
Why Track Brand Mentions?
- Reputation management — respond to negative mentions before they spiral
- PR monitoring — know when media covers your brand
- Competitive intelligence — track competitor mentions too
- Customer feedback — find unsolicited reviews and opinions
- Link building — find unlinked mentions for SEO outreach
Architecture Overview
Our tracker will:
- Monitor Google search results for brand mentions
- Scrape news sites and forums
- Check social media platforms
- Analyze sentiment of mentions
- Send alerts for new or negative mentions
Setting Up
pip install requests beautifulsoup4 pandas textblob schedule
Core Brand Mention Scraper
import requests
from bs4 import BeautifulSoup
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
import hashlib
@dataclass
class BrandMention:
brand: str
source: str
title: str
url: str
snippet: str
sentiment: float
found_at: str
mention_hash: str = ""
def __post_init__(self):
self.mention_hash = hashlib.md5(f"{self.url}{self.title}".encode()).hexdigest()
class BrandTracker:
def __init__(self, brand_name, api_key):
self.brand = brand_name
self.api_key = api_key
self.mentions = []
self.seen_hashes = set()
def fetch(self, url):
proxy_url = f"http://api.scraperapi.com?api_key={self.api_key}&url={url}"
response = requests.get(proxy_url, timeout=30)
return response
Google News Monitoring
from urllib.parse import quote_plus
def search_google_news(self, days_back=7):
query = quote_plus(f"\"{self.brand}\" when:{days_back}d")
url = f"https://www.google.com/search?q={query}&tbm=nws"
response = self.fetch(url)
soup = BeautifulSoup(response.text, "html.parser")
mentions = []
results = soup.find_all("div", class_="SoaBEf") or soup.find_all("div", class_="g")
for result in results:
title_el = result.find("div", class_="MBeuO") or result.find("h3")
link_el = result.find("a")
snippet_el = result.find("div", class_="GI74Re") or result.find("span", class_="aCOpRe")
source_el = result.find("div", class_="CEMjEf") or result.find("span", class_="UPmit")
if title_el:
mention = BrandMention(
brand=self.brand,
source=source_el.get_text(strip=True) if source_el else "Google News",
title=title_el.get_text(strip=True),
url=link_el["href"] if link_el else "",
snippet=snippet_el.get_text(strip=True) if snippet_el else "",
sentiment=0.0,
found_at=datetime.now().isoformat()
)
mentions.append(mention)
return mentions
BrandTracker.search_google_news = search_google_news
Reddit Monitoring
def search_reddit(self, subreddits=None, limit=25):
query = quote_plus(self.brand)
url = f"https://www.reddit.com/search.json?q={query}&sort=new&limit={limit}"
headers = {"User-Agent": "BrandTracker/1.0"}
response = requests.get(url, headers=headers, timeout=15)
data = response.json()
mentions = []
for post in data.get("data", {}).get("children", []):
post_data = post["data"]
if subreddits and post_data["subreddit"].lower() not in [s.lower() for s in subreddits]:
continue
mentions.append(BrandMention(
brand=self.brand,
source=f"r/{post_data[subreddit]}",
title=post_data["title"],
url=f"https://reddit.com{post_data[permalink]}",
snippet=post_data.get("selftext", "")[:300],
sentiment=0.0,
found_at=datetime.now().isoformat()
))
return mentions
BrandTracker.search_reddit = search_reddit
Hacker News Monitoring
def search_hackernews(self):
url = f"https://hn.algolia.com/api/v1/search_by_date?query={quote_plus(self.brand)}&tags=story"
response = requests.get(url, timeout=15)
data = response.json()
mentions = []
for hit in data.get("hits", []):
mentions.append(BrandMention(
brand=self.brand,
source="Hacker News",
title=hit.get("title", ""),
url=hit.get("url", f"https://news.ycombinator.com/item?id={hit[objectID]}"),
snippet=hit.get("story_text", "")[:300] if hit.get("story_text") else "",
sentiment=0.0,
found_at=hit.get("created_at", datetime.now().isoformat())
))
return mentions
BrandTracker.search_hackernews = search_hackernews
Sentiment Analysis
from textblob import TextBlob
def analyze_mentions(self):
for mention in self.mentions:
text = f"{mention.title} {mention.snippet}"
blob = TextBlob(text)
mention.sentiment = round(blob.sentiment.polarity, 3)
positive = [m for m in self.mentions if m.sentiment > 0.1]
negative = [m for m in self.mentions if m.sentiment < -0.1]
neutral = [m for m in self.mentions if -0.1 <= m.sentiment <= 0.1]
return {
"total": len(self.mentions),
"positive": len(positive),
"negative": len(negative),
"neutral": len(neutral),
"avg_sentiment": sum(m.sentiment for m in self.mentions) / len(self.mentions) if self.mentions else 0
}
BrandTracker.analyze_mentions = analyze_mentions
Full Monitoring Pipeline
import time
def run_full_scan(self):
print(f"Scanning for {self.brand} mentions...")
news = self.search_google_news()
print(f" Google News: {len(news)} mentions")
time.sleep(2)
reddit = self.search_reddit()
print(f" Reddit: {len(reddit)} mentions")
time.sleep(1)
hn = self.search_hackernews()
print(f" Hacker News: {len(hn)} mentions")
all_mentions = news + reddit + hn
new_mentions = []
for mention in all_mentions:
if mention.mention_hash not in self.seen_hashes:
self.seen_hashes.add(mention.mention_hash)
new_mentions.append(mention)
self.mentions.extend(new_mentions)
print(f" New mentions: {len(new_mentions)}")
analysis = self.analyze_mentions()
print(f" Sentiment: +{analysis[positive]} / -{analysis[negative]} / ~{analysis[neutral]}")
return new_mentions
BrandTracker.run_full_scan = run_full_scan
tracker = BrandTracker("ScraperAPI", api_key="YOUR_KEY")
new = tracker.run_full_scan()
Alerts for Negative Mentions
import smtplib
from email.mime.text import MIMEText
def send_alerts(mentions, threshold=-0.2, email="you@example.com"):
negative = [m for m in mentions if m.sentiment < threshold]
if not negative:
return
body = f"Found {len(negative)} negative mentions:\n\n"
for m in negative:
body += f"- [{m.source}] {m.title}\n Sentiment: {m.sentiment}\n {m.url}\n\n"
print(f"ALERT: {len(negative)} negative mentions detected!")
for m in negative:
print(f" {m.source}: {m.title} (sentiment: {m.sentiment})")
Automated Scheduling
import schedule
def daily_brand_monitor():
brands = ["YourBrand", "CompetitorA", "CompetitorB"]
for brand in brands:
tracker = BrandTracker(brand, api_key="YOUR_KEY")
new = tracker.run_full_scan()
send_alerts(new)
time.sleep(10)
schedule.every(4).hours.do(daily_brand_monitor)
while True:
schedule.run_pending()
time.sleep(60)
Exporting Data
import pandas as pd
def export_mentions(mentions, filename="brand_mentions.csv"):
data = [vars(m) for m in mentions]
df = pd.DataFrame(data)
df.to_csv(filename, index=False)
summary = df.groupby("source").agg(
count=("title", "count"),
avg_sentiment=("sentiment", "mean")
).sort_values("count", ascending=False)
print(summary)
return df
export_mentions(tracker.mentions)
Scaling Your Tracker
When monitoring multiple brands across many sources:
- ScraperAPI handles Google News scraping with automatic CAPTCHA solving and IP rotation
- ThorData provides geo-targeted residential proxies for location-specific brand monitoring
- ScrapeOps monitors your scraper fleet to ensure you never miss mentions due to blocked requests
Conclusion
A custom brand mention tracker gives you the same capabilities as expensive SaaS tools at a fraction of the cost. The combination of Google News, Reddit, and Hacker News covers the most important sources for tech brands. Add more sources as needed — Twitter/X, product review sites, or industry forums. The modular architecture makes it easy to extend.
Happy scraping!
Top comments (0)