DEV Community

agenthustler
agenthustler

Posted on

Scraping Weather Data: NOAA, Weather.com, and Open APIs

Scraping Weather Data: NOAA, Weather.com, and Open APIs

Weather data powers agriculture, logistics, insurance, and event planning applications. This guide covers how to collect weather data from multiple sources using Python.

Data Sources Overview

Source Type Cost Best For
NOAA Climate Data API Free Historical weather data
OpenWeatherMap API Free tier Current and forecast
Weather.com Scraping Free Detailed forecasts
Visual Crossing API Free tier Historical + forecast

Setup

pip install requests beautifulsoup4 pandas matplotlib
Enter fullscreen mode Exit fullscreen mode

NOAA Climate Data (Free Official API)

import requests
import pandas as pd
from datetime import datetime, timedelta

class NOAAScraper:
    def __init__(self, token):
        self.base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2"
        self.headers = {"token": token}

    def get_stations(self, location_id, dataset="GHCND"):
        response = requests.get(
            f"{self.base_url}/stations",
            headers=self.headers,
            params={
                "datasetid": dataset,
                "locationid": location_id,
                "limit": 25
            }
        )
        return response.json().get("results", [])

    def get_daily_data(self, station_id, start_date, end_date, datatypes=None):
        params = {
            "datasetid": "GHCND",
            "stationid": station_id,
            "startdate": start_date,
            "enddate": end_date,
            "units": "standard",
            "limit": 1000
        }
        if datatypes:
            params["datatypeid"] = datatypes

        response = requests.get(
            f"{self.base_url}/data",
            headers=self.headers,
            params=params
        )
        return response.json().get("results", [])

    def get_temperature_history(self, station_id, days=365):
        end = datetime.now()
        start = end - timedelta(days=days)

        data = self.get_daily_data(
            station_id,
            start.strftime("%Y-%m-%d"),
            end.strftime("%Y-%m-%d"),
            datatypes="TMAX,TMIN,TAVG"
        )

        df = pd.DataFrame(data)
        if not df.empty:
            df["date"] = pd.to_datetime(df["date"])
            df = df.pivot(index="date", columns="datatype", values="value")
        return df

# Usage (get free token at ncdc.noaa.gov)
noaa = NOAAScraper("your_token_here")
stations = noaa.get_stations("FIPS:36")  # New York state
for s in stations[:3]:
    print(f"{s['name']} - {s['id']}")
Enter fullscreen mode Exit fullscreen mode

OpenWeatherMap API

class OpenWeatherScraper:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://api.openweathermap.org/data/2.5"

    def get_current(self, city):
        response = requests.get(
            f"{self.base_url}/weather",
            params={"q": city, "appid": self.api_key, "units": "imperial"}
        )
        data = response.json()
        return {
            "city": data["name"],
            "temp": data["main"]["temp"],
            "feels_like": data["main"]["feels_like"],
            "humidity": data["main"]["humidity"],
            "description": data["weather"][0]["description"],
            "wind_speed": data["wind"]["speed"]
        }

    def get_forecast(self, city, days=5):
        response = requests.get(
            f"{self.base_url}/forecast",
            params={"q": city, "appid": self.api_key, "units": "imperial"}
        )
        data = response.json()

        forecasts = []
        for item in data.get("list", []):
            forecasts.append({
                "datetime": item["dt_txt"],
                "temp": item["main"]["temp"],
                "humidity": item["main"]["humidity"],
                "description": item["weather"][0]["description"],
                "rain_prob": item.get("pop", 0) * 100
            })
        return pd.DataFrame(forecasts)

weather = OpenWeatherScraper("your_api_key")
current = weather.get_current("New York")
print(f"NYC: {current['temp']}F, {current['description']}")
Enter fullscreen mode Exit fullscreen mode

Scraping Weather.com

class WeatherComScraper:
    def __init__(self, proxy_key=None):
        self.session = requests.Session()
        self.proxy_key = proxy_key

    def fetch(self, url):
        if self.proxy_key:
            api_url = f"http://api.scraperapi.com?api_key={self.proxy_key}&url={url}&render=true"
            return self.session.get(api_url, timeout=30)
        return self.session.get(url, headers={
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
        })

    def get_10day_forecast(self, location_code):
        url = f"https://weather.com/weather/tenday/l/{location_code}"
        response = self.fetch(url)
        soup = BeautifulSoup(response.text, "html.parser")

        forecasts = []
        days = soup.select("[data-testid=DailyForecast] details")

        for day in days:
            day_name = day.select_one("[data-testid=daypartName]")
            temp_high = day.select_one("[data-testid=TemperatureValue]:first-child")
            temp_low = day.select_one("[data-testid=lowTempValue]")
            desc = day.select_one("[data-testid=wxPhrase]")
            precip = day.select_one("[data-testid=Precipitation]")

            forecasts.append({
                "day": day_name.text.strip() if day_name else "",
                "high": temp_high.text.strip() if temp_high else "",
                "low": temp_low.text.strip() if temp_low else "",
                "description": desc.text.strip() if desc else "",
                "precipitation": precip.text.strip() if precip else "0%"
            })

        return forecasts

# Usage
wc = WeatherComScraper()
forecast = wc.get_10day_forecast("10001:4:US")
for day in forecast:
    print(f"{day['day']}: {day['high']}/{day['low']} - {day['description']}")
Enter fullscreen mode Exit fullscreen mode

Multi-Source Weather Dashboard

import matplotlib.pyplot as plt

def compare_forecasts(city, location_code, owm_key):
    owm = OpenWeatherScraper(owm_key)
    wc = WeatherComScraper()

    owm_forecast = owm.get_forecast(city)
    wc_forecast = wc.get_10day_forecast(location_code)

    print(f"Weather Comparison for {city}")
    print("=" * 50)
    print(f"\nOpenWeatherMap 5-Day:")
    print(owm_forecast[["datetime", "temp", "description"]].head(10))
    print(f"\nWeather.com 10-Day:")
    for day in wc_forecast[:5]:
        print(f"  {day['day']}: {day['high']}/{day['low']}")

compare_forecasts("Chicago", "60601:4:US", "your_key")
Enter fullscreen mode Exit fullscreen mode

Scaling Weather Data Collection

  • ScraperAPI — JS rendering for Weather.com dynamic pages
  • ThorData — geo-targeted proxies for location-specific forecasts
  • ScrapeOps — monitor collection jobs across multiple weather sources

Conclusion

Combine official APIs (NOAA, OpenWeatherMap) with web scraping (Weather.com) for comprehensive weather data. Use it to power agricultural planning, logistics optimization, or event scheduling tools.


Follow for more data scraping tutorials!

Top comments (0)