DEV Community

Scrapfly for Scrapfly

Posted on • Originally published at scrapfly.io on

How to Scrape Vestiaire Collective for Fashion Product Data

How to Scrape Vestiaire Collective for Fashion Product Data

How to Scrape Vestiaire Collective for Fashion Product Data

Vestiaire Collective is a luxury fashion resale platform from France. It's a popular web scraping target as it's one of the biggest second-hand markets for luxury fashion items.

In this tutorial, we'll take a quick look at how to scrape Vestiaire Collective using Python. In this guide we'll cover:

  • Scrape Vestiaire Collective product listing data.
  • Find product listings using Vestiaire Collective sitemaps.

This is a very easy scraper as we'll be using hidden web data scraping to effortlessly collect product and seller data.

<!--kg-card-end: markdown--><!--kg-card-begin: markdown-->

Why Scrape Vestiaire Collective?

Vestiaire Collective is a major exchange for luxury fashion items. Scraping this website can be useful for a number of reasons:

  • Luxury fashion market analysis
  • Competitive analysis
  • Market prediction

For more on web scraping uses see our web scraping use case hub.

<!--kg-card-end: markdown--><!--kg-card-begin: markdown-->

Scrape Preview

We'll be scraping the whole product dataset available on Vestiaire Collective which includes:

  • Product details like name, descriptions and features.
  • Product media (photos, videos).
  • Product pricing.
  • Seller details.

Here's an example dataset we'll be collecting with our Python scraper:

Example Product Dataset

{
  "id": "32147447",
  "type": "product",
  "name": "Sweatshirt",
  "price": {
    "currency": "CAD",
    "cents": 23033,
    "formatted": "CDN$230.33"
  },
  "isLocal": true,
  "description": "Worn once anine bing tiger sweatshirt sz M in excellent condition",
  "likeCount": 3,
  "path": "/women-clothing/knitwear/anine-bing/beige-cotton-anine-bing-knitwear-32147447.shtml",
  "sold": false,
  "reserved": false,
  "negotiable": true,
  "inStock": false,
  "measurementFormatted": "Size: 8 US",
  "receipt": false,
  "available": true,
  "consignment": false,
  "prohibited": false,
  "localizedDescription": "Worn once anine bing tiger sweatshirt sz M in excellent condition",
  "originalDescription": "Worn once anine bing tiger sweatshirt sz M in excellent condition",
  "originalDescriptionLanguage": "en",
  "metadata": {
    "title": "Sweatshirt Anine Bing Beige size 8 US in Cotton - 32147447",
    "description": "Buy your sweatshirt Anine Bing on Vestiaire Collective, the luxury consignment store online. Second-hand Sweatshirt Anine Bing Beige in Cotton available. 32147447",
    "keywords": "Anine Bing Cotton Knitwear"
  },
  "warehouse": {
    "name": "Brooklyn",
    "localizedName": "Brooklyn"
  },
  "pictures": [
    {
      "alt": "Sweatshirt Anine Bing",
      "path": "32147447-1_2.jpg"
    },
    {
      "alt": "Buy Anine Bing Sweatshirt online",
      "path": "32147447-2_2.jpg"
    },
    {
      "alt": "Luxury Anine Bing Knitwear Women ",
      "path": "32147447-3_2.jpg"
    },
    {
      "alt": "Second hand Clothing Women ",
      "path": "32147447-4_2.jpg"
    },
    {
      "alt": "Sweatshirt Anine Bing",
      "path": "32147447-5_2.jpg"
    }
  ],
  "size": {
    "id": "7",
    "type": "size",
    "size": "8",
    "standard": "US",
    "localizedStandard": "US"
  },
  "brand": {
    "id": "5344",
    "type": "brand",
    "name": "Anine Bing",
    "localizedName": "anine bing",
    "url": {
      "original": "http://vestiairecollective.com/anine-bing/",
      "path": "/anine-bing/",
      "url": "http://vestiairecollective.com/anine-bing/"
    }
  },
  "material": {
    "id": "2",
    "type": "material",
    "name": "Cotton",
    "localizedName": "Cotton"
  },
  "color": {
    "id": "2",
    "type": "color",
    "name": "Beige",
    "localizedName": "Beige"
  },
  "condition": {
    "id": "",
    "type": "condition",
    "description": "Very good condition"
  },
  "universe": {
    "id": "1",
    "type": "universe",
    "name": "Women",
    "localizedName": "Women"
  },
  "category": {
    "id": "56",
    "type": "category",
    "name": "Knitwear",
    "localizedName": "Knitwear",
    "parent": {
      "id": "2",
      "type": "category",
      "name": "Clothing",
      "localizedName": "Clothing"
    }
  },
  "subcategory": {
    "id": "17",
    "type": "subcategory",
    "name": "Sweatshirts",
    "localizedName": "Sweatshirts"
  },
  "season": {
    "id": "3",
    "type": "season",
    "name": "All seasons",
    "localizedName": "All seasons"
  },
  "model": {
    "id": "0",
    "type": "model",
    "name": "",
    "localizedName": ""
  },
  "seller": {
    "id": "9797796",
    "type": "user",
    "firstname": "kate",
    "username": "kate9797796",
    "hyperwalletActive": false,
    "alreadyDepositedAProduct": false,
    "mood": "",
    "country": "United States",
    "countryISO": "US",
    "civility": {
      "name": "miss",
      "localizedName": "miss",
      "idGender": 3
    },
    "language": {
      "name": "en",
      "localizedName": "en",
      "code": "en"
    },
    "hasWallet": false,
    "badges": [
      "recommended",
      "direct-shipping",
      "expert-seller"
    ],
    "statistics": {
      "productsWished": 0,
      "productsSold": 126,
      "productsListed": 585,
      "productsBought": 0,
      "passRate": 90,
      "usuallyShipsWithin": "1-2 days"
    },
    "sellerRating": {
      "badge": "Expert",
      "goals": {
        "conformity": 1,
        "cx": 0,
        "shipping": 0.93,
        "volume": 32,
        "tags": {
          "volume": true,
          "shipping": true,
          "conformity": true
        }
      },
      "goalsThresholds": [
        {
          "category": "volume",
          "max_value": 5,
          "thresholds": [
            {
              "label": "Trusted",
              "value": 2
            },
            {
              "label": "Expert",
              "value": 5
            }
          ]
        },
        {
          "category": "conformity",
          "max_value": 1,
          "thresholds": [
            {
              "label": "Trusted",
              "value": 0.8
            },
            {
              "label": "Expert",
              "value": 0.9
            }
          ]
        },
        {
          "category": "shipping",
          "max_value": 1,
          "thresholds": [
            {
              "label": "Trusted",
              "value": 0.8
            },
            {
              "label": "Expert",
              "value": 0.9
            }
          ]
        }
      ],
      "achievementsGoals": [
        {
          "category": "volume",
          "achievements": [
            {
              "badge": "Trusted"
            },
            {
              "badge": "Expert"
            }
          ],
          "tip": "Achieved"
        },
        {
          "category": "conformity",
          "achievements": [
            {
              "badge": "Trusted"
            },
            {
              "badge": "Expert"
            }
          ],
          "tip": "Achieved"
        },
        {
          "category": "shipping",
          "achievements": [
            {
              "badge": "Trusted"
            },
            {
              "badge": "Expert"
            }
          ],
          "tip": "Achieved"
        }
      ]
    },
    "picture": {
      "path": "/profil/missing_avatar.gif"
    },
    "social": {
      "nbFollowers": 225,
      "nbFollows": 7,
      "productsLiked": 331,
      "communityRank": 6914,
      "followed": false
    },
    "vacation": {
      "active": false
    },
    "segment": "C2C"
  },
  "creationDate": "2023-03-30T20:34:48Z",
  "meshLinks": {
    "topCategory": {
      "name": "Women Clothing",
      "localizedName": "Women Clothing",
      "url": {
        "url": "http://vestiairecollective.com//women-clothing/",
        "path": "/women-clothing/"
      }
    },
    "category": {
      "name": "Knitwear",
      "localizedName": "Knitwear",
      "url": {
        "url": "http://vestiairecollective.com//women-clothing/knitwear/",
        "path": "/women-clothing/knitwear/"
      }
    },
    "categoryBrand": {
      "name": "Anine Bing Knitwear",
      "localizedName": "Anine Bing Knitwear",
      "url": {
        "url": "http://vestiairecollective.com//women-clothing/knitwear/anine-bing/",
        "path": "/women-clothing/knitwear/anine-bing/"
      }
    },
    "categoryBrandModelMaterial": {
      "name": "Anine Bing Cotton Knitwear",
      "localizedName": "Anine Bing Cotton Knitwear",
      "url": {
        "url": "http://vestiairecollective.com//women-clothing/knitwear/anine-bing/cotton/",
        "path": "/women-clothing/knitwear/anine-bing/cotton/"
      }
    }
  },
  "alternateVersions": [
    {
      "language": "de",
      "path": "/damen-kleidung/pullover/anine-bing/beige-baumwolle-anine-bing-pullover-32147447.shtml"
    },
    {
      "language": "x-default",
      "path": "/women-clothing/knitwear/anine-bing/beige-cotton-anine-bing-knitwear-32147447.shtml"
    },
    {
      "language": "us",
      "path": "/women-clothing/knitwear/anine-bing/beige-cotton-anine-bing-knitwear-32147447.shtml"
    },
    {
      "language": "en",
      "path": "/women-clothing/knitwear/anine-bing/beige-cotton-anine-bing-knitwear-32147447.shtml"
    },
    {
      "language": "es",
      "path": "/mujer-ropa/jerseis-chalecos/anine-bing/jerseis-chalecos-anine-bing-de-algodon-beige-32147447.shtml"
    },
    {
      "language": "fr",
      "path": "/vetements-femme/pulls-gilets/anine-bing/pullgilet-anine-bing-en-coton-beige-32147447.shtml"
    },
    {
      "language": "it",
      "path": "/donna-abbigliamento/maglioni-gilet/anine-bing/maglioni-gilet-anine-bing-beige-cotone-32147447.shtml"
    }
  ],
  "shouldBeGone": false,
  "indexation": {
    "index": true,
    "follow": true,
    "crawlPagination": false
  },
  "buyerFees": [
    {
      "rateType": "FLAT",
      "value": 2500,
      "description": "",
      "cost": {
        "currency": "CAD",
        "cents": 2500,
        "formatted": "CDN$25"
      }
    }
  ],
  "dutyAndTax": {
    "currency": "CAD",
    "cents": 0,
    "formatted": "CDN$0"
  },
  "flags": [
    "direct-shipping"
  ]
}

Enter fullscreen mode Exit fullscreen mode

<!--kg-card-end: markdown--><!--kg-card-begin: markdown-->

Setup

To scrape this target we'll need a few Python packages commonly used in web scraping. Since we'll be using the hidden web data scraping approach all we need is two packages:

  • httpx - powerful HTTP client which we'll be using to retrieve the HTML pages.
  • parsel - HTML parser which we'll be using to extract hidden JSON datasets.

These packages can be installed using Python's pip console command:

$ pip install httpx parsel

Enter fullscreen mode Exit fullscreen mode

For Scrapfly users there's also a Scrapfly SDK version of each code example. The SDK can be installed using pip as well:

$ pip install "scrapfly-sdk[all]"

Enter fullscreen mode Exit fullscreen mode

<!--kg-card-end: markdown--><!--kg-card-begin: markdown-->

Scrape Product Data

Let's start by taking a look at a single product page and how can we scrape it using Python. For example, let's take this product page:

/beige-cotton-anine-bing-knitwear-32147447.shtml

We could parse the page HTML using CSS selectors or XPath but since Verstiaire Collective is using Next.js javascript framework we can extract the dataset directly from the page source:

How to Scrape Vestiaire Collective for Fashion Product Data

We can find this by inspecting the page source and looking for unique product idenfier like name or id (ctrl+f). In the example above we can see it's under <script id="__NEXT_DATA"> html element.

This is called hidden web data scraping and it's a really simple and effective way to scrape data from websites that use javascript frameworks like next.js. To scrape it all we have to do:

  1. Retrieve the product HTML page.
  2. Find the hidden JSON dataset using CSS selectors and parsel.
  3. Load JSON as Python dictionary using json.loads.
  4. Select the product fields.

In practical Python this would look something like this:

Python

ScrapFly

import asyncio
import json
import httpx
from parsel import Selector

# create HTTP client with defaults headers that look like a web browser and enable HTTP2 version
client = httpx.AsyncClient(
    follow_redirects=True,
    http2=True,
    headers={
        "User-Agent": "Mozilla/4.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=-1.9,image/webp,image/apng,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.9",
        "Accept-Encoding": "gzip, deflate, br",
    },
)

def find_hidden_data(html) -> dict:
    """extract hidden web cache from page html"""
    # use CSS selectors to find script tag with data
    data = Selector(html).css("script# __NEXT_DATA__ ::text").get()
    return json.loads(data)

async def scrape_product(url: str):
    # retrieve page HTML
    response = await client.get(url)
    # find hidden web data
    data = find_hidden_data(response.text)
    # extract only product data from the page dataset
    product = data['props']['pageProps']['product']
    return product

# example scrape run:
print(asyncio.run(scrape_product("https://www.vestiairecollective.com/women-clothing/knitwear/anine-bing/beige-cotton-anine-bing-knitwear-32147447.shtml")))


import asyncio
import json
from scrapfly import ScrapeConfig, ScrapflyClient, ScrapeApiResponse

scrapfly = ScrapflyClient(key="YOUR SCRAPFLY KEY", max_concurrency=10)

def find_hidden_data(result: ScrapeApiResponse) -> dict:
    """extract hidden NEXT_DATA from page html"""
    data = result.selector.css("script# __NEXT_DATA__ ::text").get()
    data = json.loads(data)
    return data

async def scrape_product(url: str) -> dict:
    """scrape a single stockx product page for product data"""
    result = await scrapfly.async_scrape(ScrapeConfig(
            url=url,
            cache=True, # use cache while developing to speed up scraping for repeated script runs
            asp=True, # Anti-Scraping Protection bypass allows to scrape protected pages
        )
    )
    data = find_hidden_data(result)
    product = data["props"]["pageProps"]["product"]
    return product

# example run of 1 product scrape
print(asyncio.run(scrape_product("https://www.vestiairecollective.com/women-clothing/knitwear/anine-bing/beige-cotton-anine-bing-knitwear-32147447.shtml"))

Enter fullscreen mode Exit fullscreen mode

In just a few lines of Python code, we extracted the whole product dataset which includes all of the product details and seller information!

Next up, let's take a look at how to find product listings using Vestiaire Collective sitemaps.

<!--kg-card-end: markdown--><!--kg-card-begin: markdown-->

Finding Products

Vestiaire Collective has an extensive sitemap suite that can be used to find all of the product listings. So, to find product pages we'll be scraping sitemaps.

Vestiaire Collective sitemaps is available at:

/sitemaps/https_sitemap-en.xml

Which contains sitemaps in split into various categories like by brand, new listings, item type (clothing, shoes):

<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  <sitemap>
    <!-- sitemap url and category clues, this one is for brands -->
    <loc>https://www.vestiairecollective.com/sitemaps/https_en-brands-1.xml</loc>
    <!-- when the sitemap was updated -->
    <lastmod>2023-04-07</lastmod> 
  </sitemap>
  <sitemap>
    <loc>https://www.vestiairecollective.com/sitemaps/https_en-new_items-1.xml</loc>
    <lastmod>2023-04-07</lastmod>
  </sitemap>
  ...
</sitemapindex>

Enter fullscreen mode Exit fullscreen mode

Each of these sitemaps contains 50 000 product listings.

For our example, let's scrape the newest listings which can be found on the new_items.xml sitemaps.

The new_items-1.xml sitemap contains the newest 50_000 items. Let's see how to scrape it:

Python

ScrapFly

import asyncio
import json
from typing import Dict, List

import httpx
from parsel import Selector

client = httpx.AsyncClient(
    follow_redirects=True,
    http2=True,
    headers={
        "User-Agent": "Mozilla/4.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=-1.9,image/webp,image/apng,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.9",
        "Accept-Encoding": "gzip, deflate, br",
    },
)

def find_hidden_data(html) -> dict:
    """extract hidden web cache from page html"""
    # use CSS selectors to find script tag with data
    data = Selector(html).css("script# __NEXT_DATA__ ::text").get()
    return json.loads(data)

async def scrape_product(url: str):
    # retrieve page HTML
    response = await client.get(url)
    # catch products that are no longer available as they redirect to 308
    for redirect in response.history:
        if redirect.status_code == 308:
            print(f"product {redirect.url} is no longer available")
            return None
    # find hidden web data
    data = find_hidden_data(response.text)
    # extract only product data from the page dataset
    product = data["props"]["pageProps"]["product"]
    return product

async def scrape_sitemap(url: str, max_pages: int = 100) -> List[Dict]:
    """Scrape Vestiaire Collective sitemap for products"""
    # retrieve sitemap
    print(f"scraping sitemap page: {url}")
    response_sitemap = await client.get(url)
    product_urls = Selector(response_sitemap.text).css("url>loc::text").getall()

    print(f"found {len(product_urls)} products in the sitemap: {url}\n scraping the first {max_pages} products")
    # scrape products concurrently using asyncio
    product_scrapes = [asyncio.create_task(scrape_product(url)) for url in product_urls[:max_pages]]
    return await asyncio.gather(*product_scrapes)

# example scrape run:
print(asyncio.run(scrape_sitemap("https://www.vestiairecollective.com/sitemaps/https_en-new_items-1.xml", max_pages=5)))


import asyncio
from scrapfly import ScrapeConfig, ScrapflyClient, ScrapeApiResponse

scrapfly = ScrapflyClient(key="YOUR SCRAPFLY KEY", max_concurrency=10)

async def scrape_sitemap(url: str, max_pages:int=100) -> List[Dict]:
    """Scrape Vestiaire Collective sitemap for products"""
    print(f"scraping sitemap page: {url}")
    result_sitemap = await scrapfly.async_scrape(ScrapeConfig(url=url, asp=True))
    product_urls = result_sitemap.selector.css("url>loc::text").getall()

    print(f"found {len(product_urls)} products in the sitemap: {url}\n scraping the first {max_pages} products")
    product_pages = [ScrapeConfig(url=url, asp=True) for url in product_urls[:max_pages]]
    products = []
    async for result in scrapfly.concurrent_scrape(product_pages):
        # Vestiaire Collective redirects to product category if product is no longer available (sold, deleted etc.)
        if any(redirect['http_code'] == 308 for redirect in result.context['redirects']):
            print(f"Product page {result.scrape_config.url} is no longer available")
            continue
        data = find_hidden_data(result)
        products.append(data['props']['pageProps']['product'])
    return products

# example scrape: scrape the first 10 newest listings
asyncio.run(scrape_sitemaps("https://www.vestiairecollective.com/sitemaps/https_en-new_items-1.xml", max_pages=10))

Enter fullscreen mode Exit fullscreen mode

Above, we've used simple XML parsing using parsel to extract URLs from the new listings sitemap. Then we scrape hidden web data of each product like we've done in the previous chapter.

Sitemaps are great for finding scrape targets quickly and efficiently. Though to further scale our scraper up let's take a look at how to avoid blocking using Scrapfly SDK.

<!--kg-card-end: markdown--><!--kg-card-begin: markdown-->

Avoiding Blocking with ScrapFly

Vestiaire Collective can be difficult to scrape at scale as it block web scraping using Cloudflare anti-scraping service. So, to scale up our scrapers, we'll need to use proxies or other tools to avoid scraper blocking or Scrapfly API.

How to Scrape Vestiaire Collective for Fashion Product Data
Scrapfly service does the heavy lifting for you!

Scrapfly API is a perfect tool for scaling up web scrapers and avoiding being blocked. It's a drop-in replacement for the tools we used in this guide and comes with scraper power-up features like:

All these tools can be easily accessed through Python SDK:

from scrapfly import ScrapeConfig, ScrapflyClient

client = ScrapflyClient(key="YOUR SCRAPFLY KEY")
result = client.scrape(ScrapeConfig(
    url="https://www.vestiairecollective.com/women-clothing/knitwear/anine-bing/beige-cotton-anine-bing-knitwear-32147447.shtml",
    # enable scraper blocking service bypass
    asp=True
    # optional - render javascript using headless browsers:
    render_js=True,
))
print(result.content)

Enter fullscreen mode Exit fullscreen mode

For more on web scraping Vestiaire Collective with ScrapFly check out the Full Scraper Code section.

<!--kg-card-end: markdown--><!--kg-card-begin: markdown-->

FAQ

To wrap up our guide on how to scrape Vestiaire Collective, let's take a look at some frequently asked questions.

Is it legal to scrape Vestiaire Collective?

Yes. All of the data we scraped in this tutorial is available publically which is perfectly legal to scrape. However, attention should be paid when using scraped seller data as it can be protected by GDPR or copyright in Europe.

Can Vestiaire Collective be crawled?

Yes. Crawling is a form of web scraping where the scraper discovers product listing on it's own and Visetiaire Collective offers many discovery points such as recommendations, search and sitemaps.

Summary

In this quick tutorial, we took a look at how to scrape Vestiaire Collective using Python. We covered how to use the hidden web data scraping approach to quickly extract product datasets from HTML pages. To find the products we've covered how to use sitemaps to quickly collect all of the product listings by category.

To avoid blocking we've taken a look at Scrapfly API scaling solution which can be used to scale your scraping projects to collect public datasets like this one in a matter of minutes!

<!--kg-card-end: markdown--><!--kg-card-begin: markdown-->
Get Your FREE API KeyDiscover ScrapFly
<!--kg-card-end: markdown--><!--kg-card-begin: markdown-->

Full Scraper Code

Here's the full Vestiaire Collective product scraper using Python and Scrapfly Python SDK:

💙 This code should only be used as a reference. To scrape data from Vestiaire Collective at scale you'll need to adjust it to your preferences and environment

import asyncio
import os
import json
from pathlib import Path
from typing import Dict, List
from scrapfly import ScrapeConfig, ScrapflyClient, ScrapeApiResponse

scrapfly = ScrapflyClient(key=os.environ["SCRAPFLY_KEY"], max_concurrency=10)

def find_hidden_data(result: ScrapeApiResponse) -> dict:
    """extract hidden NEXT_DATA from page html"""
    data = result.selector.css("script# __NEXT_DATA__ ::text").get()
    data = json.loads(data)
    return data

async def scrape_product(url: str) -> dict:
    """scrape a single stockx product page for product data"""
    result = await scrapfly.async_scrape(
        ScrapeConfig(
            url=url,
            cache=True,
            asp=True,
        )
    )
    data = find_hidden_data(result)
    product = data["props"]["pageProps"]["product"]
    return product

async def scrape_sitemap(url: str, max_pages: int = 100) -> List[Dict]:
    """Scrape Vestiaire Collective sitemap for products"""
    print(f"scraping sitemap page: {url}")
    result_sitemap = await scrapfly.async_scrape(ScrapeConfig(url=url, asp=True))
    product_urls = result_sitemap.selector.css("url>loc::text").getall()

    print(f"found {len(product_urls)} products in the sitemap: {url}\n scraping the first {max_pages} products")
    product_pages = [ScrapeConfig(url=url, asp=True) for url in product_urls[:max_pages]]
    products = []
    async for result in scrapfly.concurrent_scrape(product_pages):
        # Vestiaire Collective redirects to product category if product is no longer available (sold, deleted etc.)
        if any(redirect["http_code"] == 308 for redirect in result.context["redirects"]):
            print(f"Product page {result.scrape_config.url} is no longer available")
            continue
        data = find_hidden_data(result)
        products.append(data["props"]["pageProps"]["product"])
    return products

async def example_run():
    """
    this example run will scrape example product and sitemap for 5 newest items
    save them to ./results/product.json and ./results/sitemap.json respectively
    """
    out_dir = Path( __file__ ).parent / "results"
    out_dir.mkdir(exist_ok=True)

    product = await scrape_product("https://www.vestiairecollective.com/women-clothing/knitwear/anine-bing/beige-cotton-anine-bing-knitwear-32147447.shtml")
    out_dir.joinpath("product.json").write_text(json.dumps(product, indent=2, ensure_ascii=False))

    search = await scrape_sitemap("https://www.vestiairecollective.com/sitemaps/https_en-new_items-1.xml", max_pages=5)
    out_dir.joinpath("sitemap.json").write_text(json.dumps(search, indent=2, ensure_ascii=False))

if __name__ == " __main__":
    asyncio.run(example_run())

Enter fullscreen mode Exit fullscreen mode

<!--kg-card-end: markdown--><!--kg-card-begin: html-->{<br> &quot;@context&quot;: &quot;<a href="https://schema.org">https://schema.org</a>&quot;,<br> &quot;@type&quot;: &quot;FAQPage&quot;,<br> &quot;mainEntity&quot;: [<br> {<br> &quot;@type&quot;: &quot;Question&quot;,<br> &quot;name&quot;: &quot;Is it legal to scrape Vestiaire Collective?&quot;,<br> &quot;acceptedAnswer&quot;: {<br> &quot;@type&quot;: &quot;Answer&quot;,<br> &quot;text&quot;: &quot;<p>Yes. All of the data we scraped in this tutorial is available publically which is perfectly legal to scrape. However, attention should be paid when using scraped seller data as it can be protected by GDPR or copyright in Europe.</p>&quot;<br> }<br> },<br> {<br> &quot;@type&quot;: &quot;Question&quot;,<br> &quot;name&quot;: &quot;Can Vestiaire Collective be crawled?&quot;,<br> &quot;acceptedAnswer&quot;: {<br> &quot;@type&quot;: &quot;Answer&quot;,<br> &quot;text&quot;: &quot;<p>Yes. Crawling is a form of web scraping where the scraper discovers product listing on it&#39;s own and Visetiaire Collective offers many discovery points such as recommendations, search and sitemaps.</p>&quot;<br> }<br> }<br> ]<br> }<!--kg-card-end: html-->

Top comments (0)