Python Web Scraping for Small Business: 5 Practical Use Cases

#python #webscraping #tutorial #business

Python Web Scraping for Small Business: 5 Practical Use Cases

Web scraping sounds technical but for small businesses it's one of the most practical Python skills you can develop. Here are 5 real use cases with working code.

1. Price Monitoring

Track competitor prices automatically:

import requests
from bs4 import BeautifulSoup
import csv
from datetime import datetime

def scrape_price(url, price_selector):
    headers = {'User-Agent': 'Mozilla/5.0'}
    r = requests.get(url, headers=headers, timeout=10)
    soup = BeautifulSoup(r.text, 'html.parser')
    el = soup.select_one(price_selector)
    return el.text.strip() if el else None

# Track daily prices
products = [
    {'name': 'Widget A', 'url': 'https://example.com/widget-a', 'selector': '.price'},
    {'name': 'Widget B', 'url': 'https://example.com/widget-b', 'selector': '#product-price'},
]

with open('prices.csv', 'a', newline='') as f:
    writer = csv.writer(f)
    date = datetime.now().strftime('%Y-%m-%d')
    for product in products:
        price = scrape_price(product['url'], product['selector'])
        writer.writerow([date, product['name'], price])
        print(f"{product['name']}: {price}")

2. Lead Generation from Job Boards

If you're a B2B service provider, companies posting certain job roles are your warm leads:

import requests
from bs4 import BeautifulSoup

def find_companies_hiring(keyword, location='remote'):
    url = f'https://jobs.github.com/positions.json?description={keyword}&location={location}'
    r = requests.get(url)
    jobs = r.json()
    companies = set()
    for job in jobs:
        companies.add(job['company'])
    return companies

# Find companies needing your services
hiring_python = find_companies_hiring('python automation')
print(f'Found {len(hiring_python)} companies hiring')
print('Sample:', list(hiring_python)[:5])

3. Review Monitoring

Track what customers say about your business:

import requests
from bs4 import BeautifulSoup
import smtplib

def get_google_review_count(place_url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    }
    r = requests.get(place_url, headers=headers)
    soup = BeautifulSoup(r.text, 'html.parser')
    # Find review count pattern like '(243)'
    import re
    match = re.search(r'\((\d+)\)', r.text)
    return int(match.group(1)) if match else 0

4. Social Media Monitoring

Track mentions without paying for expensive tools:

import requests
import json

def search_reddit_mentions(brand_name, subreddit='all'):
    url = f'https://www.reddit.com/r/{subreddit}/search.json'
    params = {
        'q': brand_name,
        'sort': 'new',
        'limit': 25,
        'restrict_sr': False
    }
    headers = {'User-Agent': 'Python/3.12 BrandMonitor/1.0'}
    r = requests.get(url, params=params, headers=headers)
    posts = r.json()['data']['children']
    return [{
        'title': p['data']['title'],
        'url': p['data']['url'],
        'score': p['data']['score']
    } for p in posts]

mentions = search_reddit_mentions('your-brand-name')
for m in mentions:
    print(f"{m['score']:4d} | {m['title'][:60]}")

5. Content Aggregation

Build a daily industry newsletter automatically:

import feedparser
from datetime import datetime, timedelta

FEEDS = [
    'https://hnrss.org/frontpage',
    'https://feeds.feedburner.com/TechCrunch',
    'https://www.indiehackers.com/feed.rss',
]

def get_todays_posts(feeds):
    today = datetime.now().date()
    posts = []
    for feed_url in feeds:
        feed = feedparser.parse(feed_url)
        for entry in feed.entries[:20]:
            published = datetime(*entry.published_parsed[:3]).date() if hasattr(entry, 'published_parsed') else None
            if published == today:
                posts.append({
                    'title': entry.title,
                    'link': entry.link,
                    'source': feed.feed.title
                })
    return posts

posts = get_todays_posts(FEEDS)
for p in sorted(posts, key=lambda x: x['source']):
    print(f"[{p['source']}] {p['title']}")
    print(f"  {p['link']}")

Getting Started

If you want ready-made scripts for any of these use cases, I've packaged several into a kit:

Freelance Business Starter Kit — includes lead tracking templates and business automation scripts, $9.

Or if you need custom automation built for your specific use case, I offer done-for-you HN lead reports at $75 — custom list of 20 hiring companies matching your skills, delivered in 24h.

What business scraping use case would be most valuable for you? Drop a comment.