DEV Community

J
J

Posted on

I Don't Know How To Describle.

from datetime import datetime
from logger_config import Logger_config
import asyncio
import aiohttp

class Trademe_Scraper:
    def __init__(self):
        self.logger = Logger_config().get_logger()
        self.url = ??? Who I AM
        self.headers = {
        'authority': 'api.trademe.co.nz',
        'method': 'GET',
        'accept': 'pplication/json, text/plain, */*',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'en,en-US;q=0.9',
        'cache-control': 'max-age=0',
        'newrelic': 'eyJ2IjpbMCwx=XSwiZCI6eyJ0eSI6IkJyb3dzZXIiLCJhYyI6IjQzODYzOCIsImFwIjoiMzgwMDc2Nzg0IiwiaWQiOiIxOGZjMWRkNzM1YTE4MmViIiwidHIiOiI4NGRiNzIyODE2Yjk1NzhjZDEzMTIxYjc3MGQ3MzQwMCIsInRpIjoxNjcwMjg3Mjg0NzU2fX0=',
        'origin': 'https://www.trademe.co.nz',
        'referer': 'https://www.trademe.co.nz/',
        'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Google Chrome";v="108"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'some-site',
        'upgrade-insecure-requests': '1',
    }
        self.total_batches = []
        self.box = 0
        self.timestamp = datetime.utcnow()


    async def scrape(self, ID):
        params = {
            'page': f'{ID}',
            'rows': '3000',
        }
        async with aiohttp.ClientSession() as session:
            async with session.get(url=self.url, headers=self.headers, params=params) as r:
                if r:
                    json_data = await r.json()
                    batch = json_data['List']

                    def convert_milliseconds(inputs):
                        # Convert to Seconds
                        seconds = int(inputs) / 1000.0
                        # Return IsoDate
                        return datetime.utcfromtimestamp(seconds).isoformat()

                    for x in range(len(batch)):
                        batch[x]["G_Date"] = self.timestamp  # Converted to ISO
                        try:
                            batch[x]["StartDate"] = convert_milliseconds(batch[x]["StartDate"][6:-2])
                        except:
                            pass
                        try:
                            batch[x]["EndDate"] = convert_milliseconds(batch[x]["EndDate"][6:-2])
                        except:
                            pass
                        try:
                            batch[x]["AsAt"] = convert_milliseconds(batch[x]["AsAt"][6:-2])
                        except:
                            pass
                        try:
                            batch[x]["OpenHomes"][0]["Start"] = convert_milliseconds(
                                batch[x]["OpenHomes"][0]["Start"][6:-2])
                            batch[x]["OpenHomes"][0]["End"] = convert_milliseconds(
                                batch[x]["OpenHomes"][0]["End"][6:-2])
                        except:
                            pass
                        try:
                            batch[x]["OpenHomes"][1]["Start"] = convert_milliseconds(
                                batch[x]["OpenHomes"][1]["Start"][6:-2])
                            batch[x]["OpenHomes"][1]["End"] = convert_milliseconds(
                                batch[x]["OpenHomes"][1]["End"][6:-2])
                        except:
                            pass
                    self.total_batches.extend(batch)
                    self.box += 1
                    self.logger.info(f"box: {self.box}, len: {len(self.total_batches)}")
                else:
                    self.logger.critical(f"Scraping ERROR {ID}")

    async def Gather(self):
        tasks = []
        for ID in range(1,/):
            tasks.append(self.scrape(ID))
        await asyncio.gather(*tasks)

    def run(self):
        asyncio.run(self.Gather())
        if len(self.total_batches) > 31990:  #Data Integrity Simple Checker!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            self.logger.info(f"Scraping Success, return total_batches. box: {self.box}, {len(self.total_batches)}")
            return self.total_batches
        else:
            self.logger.critical(
                f"Scraping unsuccess, Not meet batch requirements. box: {self.box}, {len(self.total_batches)}")
            return False
Enter fullscreen mode Exit fullscreen mode

Top comments (0)