Building a REST API Client Library in Python for Video Data

#python #api #sdk #tutorial

Why Build a Client Library

When your video platform talks to the YouTube Data API from multiple scripts -- the cron fetcher, the metadata enricher, the broken link checker -- you end up duplicating HTTP logic everywhere. At DailyWatch, we consolidated all YouTube API interactions into a single Python SDK with retry logic, rate limiting, and caching built in.

The Core Client

import time
import hashlib
import json
import requests
from typing import Optional
from dataclasses import dataclass, field
from functools import lru_cache

@dataclass
class APIConfig:
    api_key: str
    base_url: str = "https://www.googleapis.com/youtube/v3"
    max_retries: int = 3
    base_delay: float = 1.0
    timeout: int = 15
    daily_quota: int = 10000
    cache_ttl: int = 3600  # 1 hour

class QuotaExhaustedError(Exception):
    pass

class YouTubeClient:
    """Reusable YouTube Data API client with retry, rate limiting, and caching."""

    def __init__(self, config: APIConfig):
        self.config = config
        self.session = requests.Session()
        self.session.headers.update({"Accept": "application/json"})
        self._quota_used = 0
        self._cache: dict[str, tuple[float, dict]] = {}

    def _cache_key(self, endpoint: str, params: dict) -> str:
        raw = f"{endpoint}:{json.dumps(params, sort_keys=True)}"
        return hashlib.md5(raw.encode()).hexdigest()

    def _get_cached(self, key: str) -> Optional[dict]:
        if key in self._cache:
            timestamp, data = self._cache[key]
            if time.time() - timestamp < self.config.cache_ttl:
                return data
            del self._cache[key]
        return None

    def _request(self, endpoint: str, params: dict, quota_cost: int = 1) -> dict:
        """Make an API request with retry, backoff, caching, and quota tracking."""
        # Check quota
        if self._quota_used + quota_cost > self.config.daily_quota:
            raise QuotaExhaustedError(
                f"Would exceed daily quota ({self._quota_used}/{self.config.daily_quota})"
            )

        # Check cache
        cache_key = self._cache_key(endpoint, params)
        cached = self._get_cached(cache_key)
        if cached is not None:
            return cached

        url = f"{self.config.base_url}/{endpoint}"
        params["key"] = self.config.api_key
        last_exception = None

        for attempt in range(self.config.max_retries):
            try:
                response = self.session.get(url, params=params, timeout=self.config.timeout)

                if response.status_code == 200:
                    data = response.json()
                    self._quota_used += quota_cost
                    self._cache[cache_key] = (time.time(), data)
                    return data

                if response.status_code == 403:
                    error_reason = response.json().get("error", {}).get("errors", [{}])[0].get("reason", "")
                    if error_reason == "quotaExceeded":
                        raise QuotaExhaustedError("YouTube API quota exceeded")

                if response.status_code in (429, 500, 503):
                    delay = self.config.base_delay * (2 ** attempt)
                    time.sleep(delay)
                    continue

                response.raise_for_status()

            except requests.ConnectionError as e:
                last_exception = e
                delay = self.config.base_delay * (2 ** attempt)
                time.sleep(delay)

        raise last_exception or RuntimeError(f"Failed after {self.config.max_retries} retries")

    @property
    def quota_remaining(self) -> int:
        return self.config.daily_quota - self._quota_used

High-Level Methods

Build domain-specific methods on top of the core:

    def get_trending(self, region: str = "US", max_results: int = 25, category_id: str = "") -> list[dict]:
        """Fetch trending videos for a region."""
        params = {
            "part": "snippet,statistics,contentDetails",
            "chart": "mostPopular",
            "regionCode": region,
            "maxResults": min(max_results, 50),
        }
        if category_id:
            params["videoCategoryId"] = category_id

        data = self._request("videos", params)
        return data.get("items", [])

    def get_video_details(self, video_ids: list[str]) -> list[dict]:
        """Batch fetch video details (up to 50 per call)."""
        results = []
        for i in range(0, len(video_ids), 50):
            batch = video_ids[i:i + 50]
            params = {
                "part": "snippet,statistics,contentDetails",
                "id": ",".join(batch),
            }
            data = self._request("videos", params)
            results.extend(data.get("items", []))
        return results

    def search_videos(self, query: str, region: str = "US", max_results: int = 10) -> list[dict]:
        """Search for videos. Costs 100 quota units per call."""
        params = {
            "part": "snippet",
            "q": query,
            "type": "video",
            "regionCode": region,
            "maxResults": min(max_results, 50),
        }
        data = self._request("search", params, quota_cost=100)
        return data.get("items", [])

    def get_categories(self, region: str = "US") -> dict[str, str]:
        """Get video category ID to name mapping."""
        params = {
            "part": "snippet",
            "regionCode": region,
        }
        data = self._request("videoCategories", params)
        return {
            item["id"]: item["snippet"]["title"]
            for item in data.get("items", [])
        }

Usage in Practice

# Initialize once, use everywhere
config = APIConfig(api_key="AIzaSy...")
client = YouTubeClient(config)

# Fetch trending from multiple regions
for region in ["US", "GB", "DE", "FR", "IN", "BR", "AU", "CA"]:
    videos = client.get_trending(region=region, max_results=25)
    print(f"[{region}] {len(videos)} trending videos (quota left: {client.quota_remaining})")

# Batch details (uses cache if recently fetched)
details = client.get_video_details(["dQw4w9WgXcQ", "jNQXAC9IVRw"])

# Search (expensive -- 100 quota units)
results = client.search_videos("python tutorial", region="US")

Key Design Decisions

Exponential backoff: Each retry doubles the wait time, preventing thundering herd
In-memory cache: Avoids redundant API calls within the same run
Quota tracking: Proactively prevents quota exhaustion instead of discovering it mid-pipeline
Batch-aware: The get_video_details method automatically chunks large ID lists

This client powers every API interaction at DailyWatch. Having retry logic and quota awareness in one place eliminated an entire class of intermittent failures from our pipeline.

This article is part of the Building DailyWatch series. Check out DailyWatch to see these techniques in action.