DEV Community

Emerson Fernandes
Emerson Fernandes

Posted on

scraping all airconditioned from the free market sales site

from bs4 import BeautifulSoup
import requests
from time import sleep
import csv
import re
import math

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
    }

response = requests.get("https://lista.mercadolivre.com.br/ar-condicionado#D[A:ar%20condicionado]", headers=headers)
soup = BeautifulSoup(response.content, "html.parser")

items = soup.find('span', class_='ui-search-search-result__quantity-results shops-custom-secondary-font').get_text()
itemsI = items.find(' ')
items = items[:itemsI]
items = items.replace('.','')
items = math.ceil(int(items)/50)
index = 1



dictItems = {'Title':[], 'Price':[], 'Link':[]}


for i in range(1, items+1):
    nextPage = f'https://lista.mercadolivre.com.br/eletrodomesticos/ar-ventilacao/ar-condicionado/ar-condicionado_Desde_{index}_NoIndex_True'
    response = requests.get(nextPage, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    ar = soup.find_all('li', class_=re.compile('ui-search-layout__item shops__layout-item'))

    for i in ar:
        name = i.find('h2', class_=re.compile('ui-search-item__title shops__item-title')).get_text()
        price = i.find('span', class_=re.compile('andes-money-amount__fraction')).get_text()
        urls = i.find('a')['href']
        dictItems['Title'].append(name)
        dictItems['Price'].append(price)
        dictItems['Link'].append(urls.get('href'))
Enter fullscreen mode Exit fullscreen mode

Top comments (0)