Bandcamp is the leading platform for independent musicians to sell their music directly to fans. With millions of artists and albums, Bandcamp data is valuable for music market research, pricing analysis, and discovering emerging artists.
Here's how to scrape Bandcamp data with Python.
Use Cases
- Music market research: Analyze pricing trends across genres
- Artist discovery: Find emerging artists by sales data and reviews
- Pricing strategy: Compare how artists price their work
- Genre analysis: Map the indie music landscape
- Label intelligence: Track independent label catalogs
Scraping Album Pages
import requests
from bs4 import BeautifulSoup
import json
import re
import time
HEADERS = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
}
def scrape_album(album_url):
"""Extract album details from a Bandcamp album page."""
response = requests.get(album_url, headers=HEADERS)
soup = BeautifulSoup(response.text, "html.parser")
# Bandcamp embeds structured data in JSON-LD
ld_json = soup.find("script", {"type": "application/ld+json"})
if ld_json:
data = json.loads(ld_json.string)
else:
data = {}
# Extract from meta tags and page elements
title = soup.select_one("h2.trackTitle")
artist = soup.select_one("span[itemprop='byArtist'] a")
price_el = soup.select_one(".buyItem .base-text-color")
# Get track listing
tracks = []
track_rows = soup.select("table.track_list tr.track_row_view")
for row in track_rows:
track_title = row.select_one(".track-title")
track_duration = row.select_one(".time")
tracks.append({
"title": track_title.text.strip() if track_title else "",
"duration": track_duration.text.strip() if track_duration else "",
})
# Get tags
tags = [tag.text.strip() for tag in soup.select(".tralbumData .tag")]
return {
"title": title.text.strip() if title else data.get("name", ""),
"artist": artist.text.strip() if artist else "",
"url": album_url,
"price": price_el.text.strip() if price_el else "Name Your Price",
"release_date": data.get("datePublished", ""),
"tracks": tracks,
"num_tracks": len(tracks),
"tags": tags,
"image_url": data.get("image", ""),
"description": data.get("description", ""),
}
Scraping Artist Pages
def scrape_artist(artist_url):
"""Extract artist info and discography from Bandcamp."""
response = requests.get(artist_url, headers=HEADERS)
soup = BeautifulSoup(response.text, "html.parser")
# Artist name
name = soup.select_one("#band-name-location .title")
location = soup.select_one("#band-name-location .location")
# Bio
bio = soup.select_one(".signed-out-artists-bio-text")
# Discography
albums = []
disc_items = soup.select(".music-grid-item")
for item in disc_items:
link = item.select_one("a")
title = item.select_one(".title")
albums.append({
"title": title.text.strip() if title else "",
"url": link["href"] if link and link.get("href") else "",
})
return {
"name": name.text.strip() if name else "",
"location": location.text.strip() if location else "",
"bio": bio.text.strip() if bio else "",
"discography": albums,
"album_count": len(albums),
}
Exploring Genre Tags
def scrape_tag_page(tag, page=1):
"""Get albums from a Bandcamp tag/genre page."""
url = f"https://bandcamp.com/tag/{tag}?page={page}"
response = requests.get(url, headers=HEADERS)
soup = BeautifulSoup(response.text, "html.parser")
albums = []
items = soup.select(".item_list .item")
for item in items:
title = item.select_one(".itemtext")
artist = item.select_one(".itemsubtext")
link = item.select_one("a")
albums.append({
"title": title.text.strip() if title else "",
"artist": artist.text.strip() if artist else "",
"url": link["href"] if link else "",
"tag": tag,
})
return albums
def scrape_genre(tag, max_pages=5):
"""Scrape multiple pages of a genre tag."""
all_albums = []
for page in range(1, max_pages + 1):
albums = scrape_tag_page(tag, page)
if not albums:
break
all_albums.extend(albums)
print(f"Tag '{tag}' page {page}: {len(albums)} albums")
time.sleep(2)
return all_albums
Price Analysis
def analyze_pricing(albums_with_details):
"""Analyze pricing patterns across albums."""
prices = []
name_your_price = 0
free = 0
for album in albums_with_details:
price_str = album.get("price", "")
if "name your price" in price_str.lower():
name_your_price += 1
elif "free" in price_str.lower():
free += 1
else:
# Extract numeric price
match = re.search(r'[\$€£](\d+\.?\d*)', price_str)
if match:
prices.append(float(match.group(1)))
return {
"total_albums": len(albums_with_details),
"paid_albums": len(prices),
"name_your_price": name_your_price,
"free_albums": free,
"avg_price": round(sum(prices) / len(prices), 2) if prices else 0,
"min_price": min(prices) if prices else 0,
"max_price": max(prices) if prices else 0,
"median_price": sorted(prices)[len(prices)//2] if prices else 0,
}
Production Bandcamp Scraping
For large-scale Bandcamp data extraction, the Bandcamp Scraper on Apify handles complex pagination, dynamic content loading, and data normalization automatically. Perfect for building comprehensive music databases.
When scraping at scale, use ThorData proxies to distribute requests across residential IPs and avoid rate limits.
Saving Data
import csv
def save_albums_csv(albums, filename="bandcamp_albums.csv"):
if not albums:
return
# Flatten tracks list for CSV
flat_albums = []
for album in albums:
flat = {k: v for k, v in album.items() if k != "tracks"}
flat["tags"] = ", ".join(album.get("tags", []))
flat_albums.append(flat)
keys = flat_albums[0].keys()
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=keys)
writer.writeheader()
writer.writerows(flat_albums)
print(f"Saved {len(flat_albums)} albums to {filename}")
Best Practices
- Use JSON-LD data: Bandcamp embeds structured data — parse it first before scraping HTML
- Rate limit: 2-3 seconds between requests
- Use ThorData for residential proxies when scraping at volume
- Respect artist content: Scrape metadata, not actual audio files
- Cache results: Album data rarely changes after release
Conclusion
Bandcamp is a treasure trove of indie music data. From pricing trends to genre analysis, the data powers valuable market insights. Use the techniques above for small projects, or the Bandcamp Scraper on Apify for production workloads.
Happy data mining!
Top comments (0)