Global shipping container tracking is a massive data problem. Shippers, freight forwarders, and supply chain analysts need real-time visibility across multiple carriers. The big three (Maersk, MSC, CMA CGM) each have their own tracking portals, but no unified API.
Let's build a multi-carrier container tracker with Python.
Why Container Tracking Data?
- Freight forwarders manage shipments across multiple carriers simultaneously
- Supply chain teams need ETAs to plan warehouse receiving
- Trade analysts track global shipping patterns and port congestion
- E-commerce businesses want accurate delivery estimates
Setting Up
pip install requests beautifulsoup4 pandas
The Multi-Carrier Tracker
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from dataclasses import dataclass
import time
SCRAPER_API_KEY = "YOUR_KEY"
@dataclass
class ContainerEvent:
timestamp: str
location: str
status: str
vessel: str = ""
@dataclass
class ContainerTracking:
container_id: str
carrier: str
origin: str
destination: str
current_status: str
eta: str
events: list
class CarrierTracker:
def __init__(self, api_key: str):
self.api_key = api_key
def _fetch(self, url: str, render: bool = True) -> str:
params = {"api_key": self.api_key, "url": url, "render": str(render).lower()}
resp = requests.get("https://api.scraperapi.com", params=params, timeout=90)
return resp.text
def _text(self, soup, sel):
el = soup.select_one(sel)
return el.get_text(strip=True) if el else ""
Maersk Tracking
def track_maersk(self, container_id: str) -> ContainerTracking:
url = f"https://www.maersk.com/tracking/{container_id}"
html = self._fetch(url)
soup = BeautifulSoup(html, "html.parser")
events = []
for event_el in soup.select(".tracking-event"):
events.append(ContainerEvent(
timestamp=self._text(event_el, ".event-date"),
location=self._text(event_el, ".event-location"),
status=self._text(event_el, ".event-status"),
))
return ContainerTracking(
container_id=container_id, carrier="Maersk",
origin=self._text(soup, ".origin-port"),
destination=self._text(soup, ".destination-port"),
current_status=self._text(soup, ".current-status"),
eta=self._text(soup, ".eta-date"),
events=[vars(e) for e in events]
)
MSC Tracking
def track_msc(self, container_id: str) -> ContainerTracking:
url = f"https://www.msc.com/track-a-shipment?trackingNumber={container_id}"
html = self._fetch(url)
soup = BeautifulSoup(html, "html.parser")
events = []
for row in soup.select(".tracking-table tbody tr"):
cells = row.select("td")
if len(cells) >= 3:
events.append(ContainerEvent(
timestamp=cells[0].get_text(strip=True),
location=cells[1].get_text(strip=True),
status=cells[2].get_text(strip=True)
))
return ContainerTracking(
container_id=container_id, carrier="MSC",
origin=self._text(soup, ".origin"),
destination=self._text(soup, ".destination"),
current_status=events[0].status if events else "Unknown",
eta=self._text(soup, ".eta"),
events=[vars(e) for e in events]
)
CMA CGM Tracking
def track_cma_cgm(self, container_id: str) -> ContainerTracking:
url = f"https://www.cma-cgm.com/ebusiness/tracking/search?SearchBy=Container&Reference={container_id}"
html = self._fetch(url)
soup = BeautifulSoup(html, "html.parser")
events = []
for event_div in soup.select(".timeline-event"):
events.append(ContainerEvent(
timestamp=self._text(event_div, ".date"),
location=self._text(event_div, ".location"),
status=self._text(event_div, ".description"),
vessel=self._text(event_div, ".vessel")
))
return ContainerTracking(
container_id=container_id, carrier="CMA CGM",
origin=self._text(soup, ".pol"),
destination=self._text(soup, ".pod"),
current_status=events[0].status if events else "Unknown",
eta=self._text(soup, ".eta-value"),
events=[vars(e) for e in events]
)
Unified Tracking Interface
def track(self, container_id: str, carrier: str = None) -> ContainerTracking:
carrier_map = {
"maersk": self.track_maersk,
"msc": self.track_msc,
"cmacgm": self.track_cma_cgm,
}
if carrier:
tracker = carrier_map.get(carrier.lower().replace(" ", ""))
if tracker:
return tracker(container_id)
for name, tracker_fn in carrier_map.items():
try:
result = tracker_fn(container_id)
if result.current_status != "Unknown":
return result
except Exception:
continue
time.sleep(2)
return ContainerTracking(
container_id=container_id, carrier="Unknown",
origin="", destination="", current_status="Not Found",
eta="", events=[]
)
def track_multiple(container_ids: list[str]):
tracker = CarrierTracker(SCRAPER_API_KEY)
for cid in container_ids:
print(f"Tracking: {cid}")
result = tracker.track(cid)
print(f" Carrier: {result.carrier}")
print(f" Route: {result.origin} -> {result.destination}")
print(f" Status: {result.current_status}")
print(f" ETA: {result.eta}")
print(f" Events: {len(result.events)}")
time.sleep(3)
track_multiple(["MSKU1234567", "MSCU7654321", "CMAU9876543"])
Scaling Up
For tracking hundreds of containers across carriers, ScraperAPI handles proxy rotation and JavaScript rendering that these shipping portals require. ThorData residential proxies work well for carrier sites that block datacenter IPs. Monitor your tracking pipeline reliability with ScrapeOps.
What You Can Build
- Multi-carrier tracking dashboard for freight forwarders
- ETA prediction engine using historical transit times
- Port congestion monitor from aggregated vessel data
- Supply chain visibility API for e-commerce platforms
Container tracking is a B2B problem where companies pay real money for real-time visibility. The fragmentation across carriers is your opportunity.
Top comments (0)