DEV Community

agenthustler
agenthustler

Posted on

Scraping Shipping Container Tracking: Maersk, MSC, and CMA CGM with Python

Global shipping container tracking is a massive data problem. Shippers, freight forwarders, and supply chain analysts need real-time visibility across multiple carriers. The big three (Maersk, MSC, CMA CGM) each have their own tracking portals, but no unified API.

Let's build a multi-carrier container tracker with Python.

Why Container Tracking Data?

  • Freight forwarders manage shipments across multiple carriers simultaneously
  • Supply chain teams need ETAs to plan warehouse receiving
  • Trade analysts track global shipping patterns and port congestion
  • E-commerce businesses want accurate delivery estimates

Setting Up

pip install requests beautifulsoup4 pandas
Enter fullscreen mode Exit fullscreen mode

The Multi-Carrier Tracker

import requests
from bs4 import BeautifulSoup
from datetime import datetime
from dataclasses import dataclass
import time

SCRAPER_API_KEY = "YOUR_KEY"

@dataclass
class ContainerEvent:
    timestamp: str
    location: str
    status: str
    vessel: str = ""

@dataclass
class ContainerTracking:
    container_id: str
    carrier: str
    origin: str
    destination: str
    current_status: str
    eta: str
    events: list

class CarrierTracker:
    def __init__(self, api_key: str):
        self.api_key = api_key

    def _fetch(self, url: str, render: bool = True) -> str:
        params = {"api_key": self.api_key, "url": url, "render": str(render).lower()}
        resp = requests.get("https://api.scraperapi.com", params=params, timeout=90)
        return resp.text

    def _text(self, soup, sel):
        el = soup.select_one(sel)
        return el.get_text(strip=True) if el else ""
Enter fullscreen mode Exit fullscreen mode

Maersk Tracking

    def track_maersk(self, container_id: str) -> ContainerTracking:
        url = f"https://www.maersk.com/tracking/{container_id}"
        html = self._fetch(url)
        soup = BeautifulSoup(html, "html.parser")

        events = []
        for event_el in soup.select(".tracking-event"):
            events.append(ContainerEvent(
                timestamp=self._text(event_el, ".event-date"),
                location=self._text(event_el, ".event-location"),
                status=self._text(event_el, ".event-status"),
            ))

        return ContainerTracking(
            container_id=container_id, carrier="Maersk",
            origin=self._text(soup, ".origin-port"),
            destination=self._text(soup, ".destination-port"),
            current_status=self._text(soup, ".current-status"),
            eta=self._text(soup, ".eta-date"),
            events=[vars(e) for e in events]
        )
Enter fullscreen mode Exit fullscreen mode

MSC Tracking

    def track_msc(self, container_id: str) -> ContainerTracking:
        url = f"https://www.msc.com/track-a-shipment?trackingNumber={container_id}"
        html = self._fetch(url)
        soup = BeautifulSoup(html, "html.parser")

        events = []
        for row in soup.select(".tracking-table tbody tr"):
            cells = row.select("td")
            if len(cells) >= 3:
                events.append(ContainerEvent(
                    timestamp=cells[0].get_text(strip=True),
                    location=cells[1].get_text(strip=True),
                    status=cells[2].get_text(strip=True)
                ))

        return ContainerTracking(
            container_id=container_id, carrier="MSC",
            origin=self._text(soup, ".origin"),
            destination=self._text(soup, ".destination"),
            current_status=events[0].status if events else "Unknown",
            eta=self._text(soup, ".eta"),
            events=[vars(e) for e in events]
        )
Enter fullscreen mode Exit fullscreen mode

CMA CGM Tracking

    def track_cma_cgm(self, container_id: str) -> ContainerTracking:
        url = f"https://www.cma-cgm.com/ebusiness/tracking/search?SearchBy=Container&Reference={container_id}"
        html = self._fetch(url)
        soup = BeautifulSoup(html, "html.parser")

        events = []
        for event_div in soup.select(".timeline-event"):
            events.append(ContainerEvent(
                timestamp=self._text(event_div, ".date"),
                location=self._text(event_div, ".location"),
                status=self._text(event_div, ".description"),
                vessel=self._text(event_div, ".vessel")
            ))

        return ContainerTracking(
            container_id=container_id, carrier="CMA CGM",
            origin=self._text(soup, ".pol"),
            destination=self._text(soup, ".pod"),
            current_status=events[0].status if events else "Unknown",
            eta=self._text(soup, ".eta-value"),
            events=[vars(e) for e in events]
        )
Enter fullscreen mode Exit fullscreen mode

Unified Tracking Interface

    def track(self, container_id: str, carrier: str = None) -> ContainerTracking:
        carrier_map = {
            "maersk": self.track_maersk,
            "msc": self.track_msc,
            "cmacgm": self.track_cma_cgm,
        }

        if carrier:
            tracker = carrier_map.get(carrier.lower().replace(" ", ""))
            if tracker:
                return tracker(container_id)

        for name, tracker_fn in carrier_map.items():
            try:
                result = tracker_fn(container_id)
                if result.current_status != "Unknown":
                    return result
            except Exception:
                continue
            time.sleep(2)

        return ContainerTracking(
            container_id=container_id, carrier="Unknown",
            origin="", destination="", current_status="Not Found",
            eta="", events=[]
        )

def track_multiple(container_ids: list[str]):
    tracker = CarrierTracker(SCRAPER_API_KEY)

    for cid in container_ids:
        print(f"Tracking: {cid}")
        result = tracker.track(cid)
        print(f"  Carrier: {result.carrier}")
        print(f"  Route: {result.origin} -> {result.destination}")
        print(f"  Status: {result.current_status}")
        print(f"  ETA: {result.eta}")
        print(f"  Events: {len(result.events)}")
        time.sleep(3)

track_multiple(["MSKU1234567", "MSCU7654321", "CMAU9876543"])
Enter fullscreen mode Exit fullscreen mode

Scaling Up

For tracking hundreds of containers across carriers, ScraperAPI handles proxy rotation and JavaScript rendering that these shipping portals require. ThorData residential proxies work well for carrier sites that block datacenter IPs. Monitor your tracking pipeline reliability with ScrapeOps.

What You Can Build

  1. Multi-carrier tracking dashboard for freight forwarders
  2. ETA prediction engine using historical transit times
  3. Port congestion monitor from aggregated vessel data
  4. Supply chain visibility API for e-commerce platforms

Container tracking is a B2B problem where companies pay real money for real-time visibility. The fragmentation across carriers is your opportunity.

Top comments (0)