DEV Community

wangzhi
wangzhi

Posted on

A web crawler program for crawling Echarts official website examples implemented by Puppeter

I have been idle and bored these days, thinking about whether I can simplify the use of Echarts configuration items. Currently, it is just a simple idea, and only the generated Storybook has been implemented for the time being. I am still thinking about simplifying the use of configuration items.

Project directory structure

  • /bots
    • /bots.mjs
    • /template.mjs
    • /package.json
    • /assests/

package.json content

{
  "name": "bots",
  "private": true,
  "version": "0.0.0",
  "type": "module",
  "scripts": {
    "run": "node bots/bots.mjs"
  },
  "dependencies": {
    "puppeteer": "^22.6.3"
  },
  "devDependencies": {}
}

Enter fullscreen mode Exit fullscreen mode

bots.mjs content

import puppeteer from "puppeteer";
import fs from "node:fs";
import {
  storiesTpl,
  storiesArgs,
  generOptions,
  generOptionsWithFn,
} from "./template.mjs";

const ECHARTS_BASE_URL = "https://echarts.apache.org/examples/en/index.html";

function capitalizeFirstLetter(str) {
  if (!str || str.length === 0) {
    return "";
  }

  str = str.toLowerCase();

  return str.charAt(0).toUpperCase() + str.slice(1);
}

(async function () {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();

  // Navigate the page to a URL
  await page.goto(ECHARTS_BASE_URL);

  // Set screen size
  await page.setViewport({ width: 1080, height: 1024 });

  // Type into search box
  // const examples = await page.type([".example-list-panel"]);

  const searchResultSelector = ".example-list-panel > div";

  const results = await page.$$(searchResultSelector);

  for (const element of results) {
    // gener namespace

    const ele = await element.$(".chart-type-head");

    const title = await ele.evaluate((el) => el.textContent);

    let namespace = title.split(" ").filter(Boolean);

    namespace = namespace.slice(0, namespace.length - 1);

    namespace = namespace
      .map((item) => item.replace("\n", "").replace("/", ""))
      .filter(Boolean)
      .join("");

    console.log(`${namespace} start`);



    const instances = await element.$$(".row .example-list-item");

    const components = [];

    for (const instance of instances) {
      // title
      const titleElement = await instance.$(".example-title");

      const subTitle = await titleElement.evaluate((el) => el.textContent);

      const titles = subTitle
        .split(" ")
        .filter(Boolean)
        .map((item) =>
          item
            .replace(/\+/g, "")
            .replace(/\(/g, "")
            .replace(/\)/g, "")
            .replace(/-/g, "")
        );

      const title = titles.map((item) => capitalizeFirstLetter(item)).join("");



      const link = await instance.$(".example-link");




      const newPagePromise = new Promise((resolve) => {
        browser.on("targetcreated", async (target) => {
          if (target.type() === "page") {
            const targetPage = await target.page();
            const url = await targetPage.url();
            if (url.includes("editor")) {

              resolve(targetPage);
            }
          }
        });
      });


      await link.click();


      const newPage = await newPagePromise;

      await newPage.setViewport({ width: 40000, height: 20000 });


      await newPage.waitForSelector(".ace_text-layer");

      await new Promise((resolve) => {
        setTimeout(() => {
          resolve();
        }, 3000);
      });

      let content = await newPage.evaluate(
        () => document.querySelector(".ace_text-layer").innerText
      );

      content = content
        .replace(/\[\]/g, "[] as any")
        .replace(/<click to see more...>/g, "")
        .replace(/var/g, "let");

      let options;

      if (content.includes("myChart")) {
        options = generOptionsWithFn({ options: content });
      } else {
        options = generOptions({ options: content });
      }

      components.push({ options, title });

      await newPage.close();
    }

    const args = components
      .filter(({ options }) => {
        if (options.includes("$")) return false;
        return true;
      })
      .map(({ options, title }) =>
        storiesArgs({ options: options, name: title })
      )
      .join("\r\n");

    const scripts = storiesTpl({
      namespace: `Charts/${namespace}`,
      components: args,
    });

    fs.writeFileSync(`./bots/assests/${namespace}.stories.ts`, scripts);
    console.log(`${namespace} end`);
  }
})();

Enter fullscreen mode Exit fullscreen mode

template.mjs content

export const storiesTpl = ({ namespace, components }) => `
/* eslint-disable @typescript-eslint/no-unused-vars */
/* eslint-disable @typescript-eslint/no-explicit-any */
/* eslint-disable @typescript-eslint/no-unused-lets */
/* eslint-disable prefer-const */
//@ ts-nocheck

import type { Meta, StoryObj } from "@storybook/react";
// your components
import { Charts } from "./Charts";
import * as echarts from 'echarts'

const ROOT_PATH="https://echarts.apache.org/"

const meta = {
  title: "${namespace}",
  component: Charts,
  parameters: {
    layout: "centered",
  },
  tags: ["autodocs"],
  // More on argTypes: https://storybook.js.org/docs/api/argtypes
  argTypes: {},
} satisfies Meta<typeof Charts>;

export default meta;
type Story = StoryObj<typeof meta>;
${components}
`;

export const storiesArgs = ({ name, options }) => ` ;
export const ${name}: Story = {
    args: {
      ${options},
    },
  };`;

export const generOptionsWithFn = ({
  options,
}) => `getEchartsInstance: function(myChart: any) {
  let option;
  ${options};
  return option
}`;

export const generOptions = ({ options }) => `options:(function() {
    let option;
    ${options};
    return option
})()`;

Enter fullscreen mode Exit fullscreen mode

After running, wait for a period of time, and the. stores.ts file will be generated in the assets path

FAQ

  1. Unfortunately, so far this is just an example and there is still a considerable way to go.

Top comments (0)