Scraping Weather Data: NOAA, Weather.com, and Open APIs

#python #tutorial #webdev #programming

Scraping Weather Data: NOAA, Weather.com, and Open APIs

Weather data powers agriculture, logistics, insurance, and event planning applications. This guide covers how to collect weather data from multiple sources using Python.

Data Sources Overview

Source	Type	Cost	Best For
NOAA Climate Data	API	Free	Historical weather data
OpenWeatherMap	API	Free tier	Current and forecast
Weather.com	Scraping	Free	Detailed forecasts
Visual Crossing	API	Free tier	Historical + forecast

Setup

pip install requests beautifulsoup4 pandas matplotlib

NOAA Climate Data (Free Official API)

import requests
import pandas as pd
from datetime import datetime, timedelta

class NOAAScraper:
    def __init__(self, token):
        self.base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2"
        self.headers = {"token": token}

    def get_stations(self, location_id, dataset="GHCND"):
        response = requests.get(
            f"{self.base_url}/stations",
            headers=self.headers,
            params={
                "datasetid": dataset,
                "locationid": location_id,
                "limit": 25
            }
        )
        return response.json().get("results", [])

    def get_daily_data(self, station_id, start_date, end_date, datatypes=None):
        params = {
            "datasetid": "GHCND",
            "stationid": station_id,
            "startdate": start_date,
            "enddate": end_date,
            "units": "standard",
            "limit": 1000
        }
        if datatypes:
            params["datatypeid"] = datatypes

        response = requests.get(
            f"{self.base_url}/data",
            headers=self.headers,
            params=params
        )
        return response.json().get("results", [])

    def get_temperature_history(self, station_id, days=365):
        end = datetime.now()
        start = end - timedelta(days=days)

        data = self.get_daily_data(
            station_id,
            start.strftime("%Y-%m-%d"),
            end.strftime("%Y-%m-%d"),
            datatypes="TMAX,TMIN,TAVG"
        )

        df = pd.DataFrame(data)
        if not df.empty:
            df["date"] = pd.to_datetime(df["date"])
            df = df.pivot(index="date", columns="datatype", values="value")
        return df

# Usage (get free token at ncdc.noaa.gov)
noaa = NOAAScraper("your_token_here")
stations = noaa.get_stations("FIPS:36")  # New York state
for s in stations[:3]:
    print(f"{s['name']} - {s['id']}")

OpenWeatherMap API

class OpenWeatherScraper:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://api.openweathermap.org/data/2.5"

    def get_current(self, city):
        response = requests.get(
            f"{self.base_url}/weather",
            params={"q": city, "appid": self.api_key, "units": "imperial"}
        )
        data = response.json()
        return {
            "city": data["name"],
            "temp": data["main"]["temp"],
            "feels_like": data["main"]["feels_like"],
            "humidity": data["main"]["humidity"],
            "description": data["weather"][0]["description"],
            "wind_speed": data["wind"]["speed"]
        }

    def get_forecast(self, city, days=5):
        response = requests.get(
            f"{self.base_url}/forecast",
            params={"q": city, "appid": self.api_key, "units": "imperial"}
        )
        data = response.json()

        forecasts = []
        for item in data.get("list", []):
            forecasts.append({
                "datetime": item["dt_txt"],
                "temp": item["main"]["temp"],
                "humidity": item["main"]["humidity"],
                "description": item["weather"][0]["description"],
                "rain_prob": item.get("pop", 0) * 100
            })
        return pd.DataFrame(forecasts)

weather = OpenWeatherScraper("your_api_key")
current = weather.get_current("New York")
print(f"NYC: {current['temp']}F, {current['description']}")

Scraping Weather.com

class WeatherComScraper:
    def __init__(self, proxy_key=None):
        self.session = requests.Session()
        self.proxy_key = proxy_key

    def fetch(self, url):
        if self.proxy_key:
            api_url = f"http://api.scraperapi.com?api_key={self.proxy_key}&url={url}&render=true"
            return self.session.get(api_url, timeout=30)
        return self.session.get(url, headers={
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
        })

    def get_10day_forecast(self, location_code):
        url = f"https://weather.com/weather/tenday/l/{location_code}"
        response = self.fetch(url)
        soup = BeautifulSoup(response.text, "html.parser")

        forecasts = []
        days = soup.select("[data-testid=DailyForecast] details")

        for day in days:
            day_name = day.select_one("[data-testid=daypartName]")
            temp_high = day.select_one("[data-testid=TemperatureValue]:first-child")
            temp_low = day.select_one("[data-testid=lowTempValue]")
            desc = day.select_one("[data-testid=wxPhrase]")
            precip = day.select_one("[data-testid=Precipitation]")

            forecasts.append({
                "day": day_name.text.strip() if day_name else "",
                "high": temp_high.text.strip() if temp_high else "",
                "low": temp_low.text.strip() if temp_low else "",
                "description": desc.text.strip() if desc else "",
                "precipitation": precip.text.strip() if precip else "0%"
            })

        return forecasts

# Usage
wc = WeatherComScraper()
forecast = wc.get_10day_forecast("10001:4:US")
for day in forecast:
    print(f"{day['day']}: {day['high']}/{day['low']} - {day['description']}")

Multi-Source Weather Dashboard

import matplotlib.pyplot as plt

def compare_forecasts(city, location_code, owm_key):
    owm = OpenWeatherScraper(owm_key)
    wc = WeatherComScraper()

    owm_forecast = owm.get_forecast(city)
    wc_forecast = wc.get_10day_forecast(location_code)

    print(f"Weather Comparison for {city}")
    print("=" * 50)
    print(f"\nOpenWeatherMap 5-Day:")
    print(owm_forecast[["datetime", "temp", "description"]].head(10))
    print(f"\nWeather.com 10-Day:")
    for day in wc_forecast[:5]:
        print(f"  {day['day']}: {day['high']}/{day['low']}")

compare_forecasts("Chicago", "60601:4:US", "your_key")

Scaling Weather Data Collection

ScraperAPI — JS rendering for Weather.com dynamic pages
ThorData — geo-targeted proxies for location-specific forecasts
ScrapeOps — monitor collection jobs across multiple weather sources

Conclusion

Combine official APIs (NOAA, OpenWeatherMap) with web scraping (Weather.com) for comprehensive weather data. Use it to power agricultural planning, logistics optimization, or event scheduling tools.

Follow for more data scraping tutorials!

DEV Community

Scraping Weather Data: NOAA, Weather.com, and Open APIs

Scraping Weather Data: NOAA, Weather.com, and Open APIs

Data Sources Overview

Setup

NOAA Climate Data (Free Official API)

OpenWeatherMap API

Scraping Weather.com

Multi-Source Weather Dashboard

Scaling Weather Data Collection

Conclusion

Top comments (0)