DEV Community

Eko Priyanto
Eko Priyanto

Posted on

Database sekolah Indonesia

Database sekolah Indonesia dengan cara scrape dari api tidak resmi https://api-sekolah-indonesia.vercel.app/sekolah/s?sekolah=&perPage=100

berikut script multicurl untuk generate csv file dari api tersebut.

<?php

set_time_limit(0);

// CONFIG
$baseUrl   = "https://api-sekolah-indonesia.vercel.app/sekolah/s?sekolah=&perPage=100&page=";
$maxBatch  = 10; // jumlah request paralel
$output    = __DIR__ . "/sekolah.csv";

// buka file CSV
$fp = fopen($output, 'w');

// header CSV
fputcsv($fp, [
    'npsn', 'sekolah', 'bentuk', 'status',
    'alamat', 'kecamatan', 'kabupaten', 'provinsi',
    'lintang', 'bujur'
]);

$page = 1;
$running = true;

while ($running) {

    $mh = curl_multi_init();
    $handles = [];

    // batching request
    for ($i = 0; $i < $maxBatch; $i++) {
        $url = $baseUrl . ($page + $i);

        $ch = curl_init();
        curl_setopt_array($ch, [
            CURLOPT_URL => $url,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_TIMEOUT => 30,
        ]);

        curl_multi_add_handle($mh, $ch);
        $handles[$page + $i] = $ch;
    }

    // eksekusi multicurl
    do {
        curl_multi_exec($mh, $runningCurl);
        curl_multi_select($mh);
    } while ($runningCurl > 0);

    // proses hasil
    $running = false;

    foreach ($handles as $p => $ch) {
        $response = curl_multi_getcontent($ch);
        $data = json_decode($response, true);

        if (!empty($data['dataSekolah'])) {
            $running = true;

            foreach ($data['dataSekolah'] as $row) {

                fputcsv($fp, [
                    trim($row['npsn']),
                    trim($row['sekolah']),
                    trim($row['bentuk']),
                    trim($row['status']),
                    trim($row['alamat_jalan']),
                    trim($row['kecamatan']),
                    trim($row['kabupaten_kota']),
                    trim($row['propinsi']),
                    trim($row['lintang']),
                    trim($row['bujur']),
                ]);
            }

            echo "Page $p selesai\n";
        }

        curl_multi_remove_handle($mh, $ch);
        curl_close($ch);
    }

    curl_multi_close($mh);

    $page += $maxBatch;

    echo "Batch selesai, lanjut ke page $page\n";
}

fclose($fp);

echo "Selesai! File: $output\n";
Enter fullscreen mode Exit fullscreen mode

Top comments (0)