DEV Community

ahmet gedik
ahmet gedik

Posted on

Implementing Sitemap Ping for Faster Crawling

When you update content frequently, you want search engines to crawl your new pages quickly. Here's how I implement sitemap generation and ping on TopVideoHub, where new trending videos are added every few hours.

Generating Dynamic Sitemaps

With thousands of video pages across 9 regions, a single sitemap file isn't practical. I use a sitemap index that references multiple sub-sitemaps:

class SitemapGenerator {
    private const string BASE_URL = 'https://topvideohub.com';
    private const int MAX_URLS_PER_SITEMAP = 5000;

    public function __construct(private readonly \PDO $db) {}

    public function generateIndex(): string {
        $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
        $xml .= '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";

        // Static pages sitemap
        $xml .= $this->sitemapEntry('sitemap-pages.xml');

        // Video sitemaps (paginated)
        $totalVideos = $this->db->query(
            "SELECT COUNT(*) FROM videos"
        )->fetchColumn();
        $pages = ceil($totalVideos / self::MAX_URLS_PER_SITEMAP);

        for ($i = 1; $i <= $pages; $i++) {
            $xml .= $this->sitemapEntry("sitemap-videos-{$i}.xml");
        }

        // Category sitemaps per region
        $xml .= $this->sitemapEntry('sitemap-categories.xml');

        $xml .= '</sitemapindex>';
        return $xml;
    }

    public function generateVideoSitemap(int $page): string {
        $offset = ($page - 1) * self::MAX_URLS_PER_SITEMAP;

        $videos = $this->db->query(
            "SELECT video_id, title, thumbnail_url, published_at 
             FROM videos 
             ORDER BY fetched_at DESC 
             LIMIT ? OFFSET ?",
            [self::MAX_URLS_PER_SITEMAP, $offset]
        )->fetchAll(\PDO::FETCH_ASSOC);

        $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
        $xml .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"' . "\n";
        $xml .= '  xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">' . "\n";

        foreach ($videos as $v) {
            $xml .= "  <url>\n";
            $xml .= "    <loc>" . self::BASE_URL . "/watch/" . $v['video_id'] . "</loc>\n";
            $xml .= "    <lastmod>" . date('Y-m-d', strtotime($v['published_at'])) . "</lastmod>\n";
            $xml .= "    <changefreq>weekly</changefreq>\n";
            $xml .= "    <video:video>\n";
            $xml .= "      <video:thumbnail_loc>" . htmlspecialchars($v['thumbnail_url']) . "</video:thumbnail_loc>\n";
            $xml .= "      <video:title>" . htmlspecialchars($v['title']) . "</video:title>\n";
            $xml .= "    </video:video>\n";
            $xml .= "  </url>\n";
        }

        $xml .= '</urlset>';
        return $xml;
    }

    private function sitemapEntry(string $filename): string {
        return "  <sitemap>\n"
            . "    <loc>" . self::BASE_URL . "/" . $filename . "</loc>\n"
            . "    <lastmod>" . date('Y-m-d\TH:i:sP') . "</lastmod>\n"
            . "  </sitemap>\n";
    }
}
Enter fullscreen mode Exit fullscreen mode

Pinging Search Engines

After generating new sitemaps, ping Google and Bing:

class SitemapPinger {
    private const array PING_URLS = [
        'google' => 'https://www.google.com/ping?sitemap=',
        'bing'   => 'https://www.bing.com/ping?sitemap=',
    ];

    public static function ping(string $sitemapUrl): array {
        $results = [];

        foreach (self::PING_URLS as $engine => $pingBase) {
            $url = $pingBase . urlencode($sitemapUrl);

            $ch = curl_init($url);
            curl_setopt_array($ch, [
                CURLOPT_RETURNTRANSFER => true,
                CURLOPT_TIMEOUT => 10,
                CURLOPT_FOLLOWLOCATION => true,
            ]);

            $response = curl_exec($ch);
            $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
            curl_close($ch);

            $results[$engine] = [
                'success' => $httpCode >= 200 && $httpCode < 300,
                'code'    => $httpCode,
            ];
        }

        return $results;
    }
}
Enter fullscreen mode Exit fullscreen mode

Integration with Cron

Ping happens automatically after each video fetch on TopVideoHub:

// At the end of fetch_videos.php
if ($newVideosCount > 0) {
    // Regenerate sitemaps
    $generator = new SitemapGenerator($db);
    file_put_contents('public/sitemap.xml', $generator->generateIndex());

    // Ping search engines
    $results = SitemapPinger::ping('https://topvideohub.com/sitemap.xml');
    foreach ($results as $engine => $result) {
        $status = $result['success'] ? 'OK' : 'FAIL';
        echo "Ping {$engine}: {$status} (HTTP {$result['code']})\n";
    }
}
Enter fullscreen mode Exit fullscreen mode

IndexNow for Instant Indexing

For even faster indexing, implement IndexNow (supported by Bing and Yandex):

class IndexNow {
    private const string ENDPOINT = 'https://api.indexnow.org/indexnow';

    public function __construct(
        private readonly string $apiKey,
        private readonly string $host = 'topvideohub.com',
    ) {}

    public function submit(array $urls): bool {
        $payload = json_encode([
            'host'    => $this->host,
            'key'     => $this->apiKey,
            'urlList' => array_slice($urls, 0, 10000),
        ]);

        $ch = curl_init(self::ENDPOINT);
        curl_setopt_array($ch, [
            CURLOPT_POST           => true,
            CURLOPT_POSTFIELDS     => $payload,
            CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_TIMEOUT        => 15,
        ]);

        $response = curl_exec($ch);
        $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        return $code >= 200 && $code < 300;
    }
}
Enter fullscreen mode Exit fullscreen mode

This approach ensures that new trending video pages are indexed within hours rather than days.

Top comments (0)