DEV Community

Emerson Fernandes
Emerson Fernandes

Posted on

1

scraping all airconditioned from the free market sales site

from bs4 import BeautifulSoup
import requests
from time import sleep
import csv
import re
import math

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
    }

response = requests.get("https://lista.mercadolivre.com.br/ar-condicionado#D[A:ar%20condicionado]", headers=headers)
soup = BeautifulSoup(response.content, "html.parser")

items = soup.find('span', class_='ui-search-search-result__quantity-results shops-custom-secondary-font').get_text()
itemsI = items.find(' ')
items = items[:itemsI]
items = items.replace('.','')
items = math.ceil(int(items)/50)
index = 1



dictItems = {'Title':[], 'Price':[], 'Link':[]}


for i in range(1, items+1):
    nextPage = f'https://lista.mercadolivre.com.br/eletrodomesticos/ar-ventilacao/ar-condicionado/ar-condicionado_Desde_{index}_NoIndex_True'
    response = requests.get(nextPage, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    ar = soup.find_all('li', class_=re.compile('ui-search-layout__item shops__layout-item'))

    for i in ar:
        name = i.find('h2', class_=re.compile('ui-search-item__title shops__item-title')).get_text()
        price = i.find('span', class_=re.compile('andes-money-amount__fraction')).get_text()
        urls = i.find('a')['href']
        dictItems['Title'].append(name)
        dictItems['Price'].append(price)
        dictItems['Link'].append(urls.get('href'))
Enter fullscreen mode Exit fullscreen mode

Top comments (0)