Satellite imagery and Earth observation data are freely available from NASA and ESA. These datasets power everything from climate research to precision agriculture. Let's build Python tools to access and process this data.
Available Satellite Data Sources
- Sentinel Hub (ESA): Sentinel-1 (radar), Sentinel-2 (optical), Sentinel-3 (ocean/land)
- NASA Earthdata: Landsat, MODIS, VIIRS, GPM (precipitation)
- NOAA: Weather satellites, ocean data
- Planet (commercial): Daily global imagery
Setting Up
pip install requests rasterio numpy matplotlib sentinelhub
Accessing NASA Earthdata
NASA's CMR (Common Metadata Repository) API lets you search their entire catalog:
import requests
import json
def search_earthdata(keyword, start_date, end_date, bbox=None):
"""Search NASA Earthdata for datasets."""
url = "https://cmr.earthdata.nasa.gov/search/granules.json"
params = {
"keyword": keyword,
"temporal[]": f"{start_date},{end_date}",
"page_size": 50,
"sort_key": "-start_date"
}
if bbox: # [west, south, east, north]
params["bounding_box[]"] = ",".join(map(str, bbox))
resp = requests.get(url, params=params, timeout=30)
data = resp.json()
results = []
for entry in data.get("feed", {}).get("entry", []):
results.append({
"title": entry.get("title", ""),
"dataset": entry.get("dataset_id", ""),
"time_start": entry.get("time_start", ""),
"time_end": entry.get("time_end", ""),
"links": [
l["href"] for l in entry.get("links", [])
if l.get("rel") == "http://esipfed.org/ns/fedsearch/1.1/data#"
]
})
return results
# Search for Landsat imagery over California
results = search_earthdata(
"Landsat",
"2025-01-01", "2025-03-01",
bbox=[-124.48, 32.53, -114.13, 42.01]
)
print(f"Found {len(results)} granules")
Downloading Sentinel-2 Data
from sentinelhub import (
SHConfig, BBox, CRS, SentinelHubRequest,
DataCollection, MimeType, bbox_to_dimensions
)
def download_sentinel2(bbox_coords, time_interval, resolution=10):
"""Download Sentinel-2 imagery for an area."""
config = SHConfig()
config.sh_client_id = "YOUR_CLIENT_ID"
config.sh_client_secret = "YOUR_CLIENT_SECRET"
bbox = BBox(bbox=bbox_coords, crs=CRS.WGS84)
size = bbox_to_dimensions(bbox, resolution=resolution)
evalscript = """
//VERSION=3
function setup() {
return {
input: [{ bands: ["B04", "B03", "B02", "B08"] }],
output: { bands: 4 }
};
}
function evaluatePixel(sample) {
return [sample.B04, sample.B03, sample.B02, sample.B08];
}
"""
request = SentinelHubRequest(
evalscript=evalscript,
input_data=[
SentinelHubRequest.input_data(
data_collection=DataCollection.SENTINEL2_L2A,
time_interval=time_interval
)
],
responses=[
SentinelHubRequest.output_response("default", MimeType.TIFF)
],
bbox=bbox,
size=size,
config=config
)
data = request.get_data()
return data[0] # numpy array
# Download imagery for a farm area
image = download_sentinel2(
bbox_coords=[-122.5, 37.7, -122.3, 37.9],
time_interval=("2025-01-01", "2025-01-31"),
resolution=10
)
print(f"Image shape: {image.shape}")
Computing Vegetation Index (NDVI)
import numpy as np
import matplotlib.pyplot as plt
def compute_ndvi(sentinel_image):
"""Calculate NDVI from Sentinel-2 bands."""
red = sentinel_image[:, :, 0].astype(float)
nir = sentinel_image[:, :, 3].astype(float)
ndvi = np.where(
(nir + red) == 0, 0,
(nir - red) / (nir + red)
)
return ndvi
def plot_ndvi(ndvi, title="NDVI Map"):
"""Visualize NDVI with color mapping."""
fig, ax = plt.subplots(figsize=(10, 8))
im = ax.imshow(ndvi, cmap="RdYlGn", vmin=-1, vmax=1)
plt.colorbar(im, ax=ax, label="NDVI")
ax.set_title(title)
plt.savefig("ndvi_map.png", dpi=150, bbox_inches="tight")
plt.show()
ndvi = compute_ndvi(image)
plot_ndvi(ndvi, "NDVI - San Francisco Bay Area")
Time Series Analysis
Track vegetation health over time:
def ndvi_time_series(bbox_coords, start_date, end_date, interval_days=16):
"""Generate NDVI time series for an area."""
from datetime import datetime, timedelta
start = datetime.strptime(start_date, "%Y-%m-%d")
end = datetime.strptime(end_date, "%Y-%m-%d")
current = start
series = []
while current < end:
next_date = current + timedelta(days=interval_days)
time_range = (
current.strftime("%Y-%m-%d"),
next_date.strftime("%Y-%m-%d")
)
try:
image = download_sentinel2(bbox_coords, time_range)
ndvi = compute_ndvi(image)
series.append({
"date": current.strftime("%Y-%m-%d"),
"mean_ndvi": float(np.nanmean(ndvi)),
"max_ndvi": float(np.nanmax(ndvi)),
"min_ndvi": float(np.nanmin(ndvi))
})
except Exception as e:
print(f"No data for {current.strftime('%Y-%m-%d')}: {e}")
current = next_date
return pd.DataFrame(series)
Scraping Supplementary Data
Combine satellite data with scraped ground-truth data for better analysis. Use ScraperAPI to collect weather station data, crop reports, and market prices. ThorData helps when scraping agricultural data from international sources, and ScrapeOps monitors your data pipeline.
Practical Applications
def crop_health_report(farm_bbox, date_range):
"""Generate a crop health report for a farm."""
image = download_sentinel2(farm_bbox, date_range)
ndvi = compute_ndvi(image)
total_pixels = ndvi.size
healthy = np.sum(ndvi > 0.6) / total_pixels * 100
stressed = np.sum((ndvi > 0.2) & (ndvi <= 0.6)) / total_pixels * 100
bare = np.sum(ndvi <= 0.2) / total_pixels * 100
report = {
"healthy_vegetation": f"{healthy:.1f}%",
"stressed_vegetation": f"{stressed:.1f}%",
"bare_soil": f"{bare:.1f}%",
"mean_ndvi": f"{np.nanmean(ndvi):.3f}",
"assessment": "Good" if healthy > 70 else "Needs attention"
}
return report
Conclusion
Satellite data scraping opens up powerful possibilities in agriculture, environmental monitoring, urban planning, and climate research. NASA and ESA provide free data. The challenge is processing it efficiently. Start with Sentinel-2 optical data (easiest to work with), master NDVI analysis, then expand to radar data and multi-temporal analysis.
The combination of satellite imagery and ground-truth web scraping creates datasets that would cost millions to collect manually.
Top comments (0)