Scraping Weather Data: NOAA, Weather.com, and Open APIs
Weather data powers agriculture, logistics, insurance, and event planning applications. This guide covers how to collect weather data from multiple sources using Python.
Data Sources Overview
| Source | Type | Cost | Best For |
|---|---|---|---|
| NOAA Climate Data | API | Free | Historical weather data |
| OpenWeatherMap | API | Free tier | Current and forecast |
| Weather.com | Scraping | Free | Detailed forecasts |
| Visual Crossing | API | Free tier | Historical + forecast |
Setup
pip install requests beautifulsoup4 pandas matplotlib
NOAA Climate Data (Free Official API)
import requests
import pandas as pd
from datetime import datetime, timedelta
class NOAAScraper:
def __init__(self, token):
self.base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2"
self.headers = {"token": token}
def get_stations(self, location_id, dataset="GHCND"):
response = requests.get(
f"{self.base_url}/stations",
headers=self.headers,
params={
"datasetid": dataset,
"locationid": location_id,
"limit": 25
}
)
return response.json().get("results", [])
def get_daily_data(self, station_id, start_date, end_date, datatypes=None):
params = {
"datasetid": "GHCND",
"stationid": station_id,
"startdate": start_date,
"enddate": end_date,
"units": "standard",
"limit": 1000
}
if datatypes:
params["datatypeid"] = datatypes
response = requests.get(
f"{self.base_url}/data",
headers=self.headers,
params=params
)
return response.json().get("results", [])
def get_temperature_history(self, station_id, days=365):
end = datetime.now()
start = end - timedelta(days=days)
data = self.get_daily_data(
station_id,
start.strftime("%Y-%m-%d"),
end.strftime("%Y-%m-%d"),
datatypes="TMAX,TMIN,TAVG"
)
df = pd.DataFrame(data)
if not df.empty:
df["date"] = pd.to_datetime(df["date"])
df = df.pivot(index="date", columns="datatype", values="value")
return df
# Usage (get free token at ncdc.noaa.gov)
noaa = NOAAScraper("your_token_here")
stations = noaa.get_stations("FIPS:36") # New York state
for s in stations[:3]:
print(f"{s['name']} - {s['id']}")
OpenWeatherMap API
class OpenWeatherScraper:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = "https://api.openweathermap.org/data/2.5"
def get_current(self, city):
response = requests.get(
f"{self.base_url}/weather",
params={"q": city, "appid": self.api_key, "units": "imperial"}
)
data = response.json()
return {
"city": data["name"],
"temp": data["main"]["temp"],
"feels_like": data["main"]["feels_like"],
"humidity": data["main"]["humidity"],
"description": data["weather"][0]["description"],
"wind_speed": data["wind"]["speed"]
}
def get_forecast(self, city, days=5):
response = requests.get(
f"{self.base_url}/forecast",
params={"q": city, "appid": self.api_key, "units": "imperial"}
)
data = response.json()
forecasts = []
for item in data.get("list", []):
forecasts.append({
"datetime": item["dt_txt"],
"temp": item["main"]["temp"],
"humidity": item["main"]["humidity"],
"description": item["weather"][0]["description"],
"rain_prob": item.get("pop", 0) * 100
})
return pd.DataFrame(forecasts)
weather = OpenWeatherScraper("your_api_key")
current = weather.get_current("New York")
print(f"NYC: {current['temp']}F, {current['description']}")
Scraping Weather.com
class WeatherComScraper:
def __init__(self, proxy_key=None):
self.session = requests.Session()
self.proxy_key = proxy_key
def fetch(self, url):
if self.proxy_key:
api_url = f"http://api.scraperapi.com?api_key={self.proxy_key}&url={url}&render=true"
return self.session.get(api_url, timeout=30)
return self.session.get(url, headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
})
def get_10day_forecast(self, location_code):
url = f"https://weather.com/weather/tenday/l/{location_code}"
response = self.fetch(url)
soup = BeautifulSoup(response.text, "html.parser")
forecasts = []
days = soup.select("[data-testid=DailyForecast] details")
for day in days:
day_name = day.select_one("[data-testid=daypartName]")
temp_high = day.select_one("[data-testid=TemperatureValue]:first-child")
temp_low = day.select_one("[data-testid=lowTempValue]")
desc = day.select_one("[data-testid=wxPhrase]")
precip = day.select_one("[data-testid=Precipitation]")
forecasts.append({
"day": day_name.text.strip() if day_name else "",
"high": temp_high.text.strip() if temp_high else "",
"low": temp_low.text.strip() if temp_low else "",
"description": desc.text.strip() if desc else "",
"precipitation": precip.text.strip() if precip else "0%"
})
return forecasts
# Usage
wc = WeatherComScraper()
forecast = wc.get_10day_forecast("10001:4:US")
for day in forecast:
print(f"{day['day']}: {day['high']}/{day['low']} - {day['description']}")
Multi-Source Weather Dashboard
import matplotlib.pyplot as plt
def compare_forecasts(city, location_code, owm_key):
owm = OpenWeatherScraper(owm_key)
wc = WeatherComScraper()
owm_forecast = owm.get_forecast(city)
wc_forecast = wc.get_10day_forecast(location_code)
print(f"Weather Comparison for {city}")
print("=" * 50)
print(f"\nOpenWeatherMap 5-Day:")
print(owm_forecast[["datetime", "temp", "description"]].head(10))
print(f"\nWeather.com 10-Day:")
for day in wc_forecast[:5]:
print(f" {day['day']}: {day['high']}/{day['low']}")
compare_forecasts("Chicago", "60601:4:US", "your_key")
Scaling Weather Data Collection
- ScraperAPI — JS rendering for Weather.com dynamic pages
- ThorData — geo-targeted proxies for location-specific forecasts
- ScrapeOps — monitor collection jobs across multiple weather sources
Conclusion
Combine official APIs (NOAA, OpenWeatherMap) with web scraping (Weather.com) for comprehensive weather data. Use it to power agricultural planning, logistics optimization, or event scheduling tools.
Follow for more data scraping tutorials!
Top comments (0)