from datetime import datetime
from logger_config import Logger_config
import asyncio
import aiohttp
class Trademe_Scraper:
def __init__(self):
self.logger = Logger_config().get_logger()
self.url = ??? Who I AM
self.headers = {
'authority': 'api.trademe.co.nz',
'method': 'GET',
'accept': 'pplication/json, text/plain, */*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en,en-US;q=0.9',
'cache-control': 'max-age=0',
'newrelic': 'eyJ2IjpbMCwx=XSwiZCI6eyJ0eSI6IkJyb3dzZXIiLCJhYyI6IjQzODYzOCIsImFwIjoiMzgwMDc2Nzg0IiwiaWQiOiIxOGZjMWRkNzM1YTE4MmViIiwidHIiOiI4NGRiNzIyODE2Yjk1NzhjZDEzMTIxYjc3MGQ3MzQwMCIsInRpIjoxNjcwMjg3Mjg0NzU2fX0=',
'origin': 'https://www.trademe.co.nz',
'referer': 'https://www.trademe.co.nz/',
'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Google Chrome";v="108"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'some-site',
'upgrade-insecure-requests': '1',
}
self.total_batches = []
self.box = 0
self.timestamp = datetime.utcnow()
async def scrape(self, ID):
params = {
'page': f'{ID}',
'rows': '3000',
}
async with aiohttp.ClientSession() as session:
async with session.get(url=self.url, headers=self.headers, params=params) as r:
if r:
json_data = await r.json()
batch = json_data['List']
def convert_milliseconds(inputs):
# Convert to Seconds
seconds = int(inputs) / 1000.0
# Return IsoDate
return datetime.utcfromtimestamp(seconds).isoformat()
for x in range(len(batch)):
batch[x]["G_Date"] = self.timestamp # Converted to ISO
try:
batch[x]["StartDate"] = convert_milliseconds(batch[x]["StartDate"][6:-2])
except:
pass
try:
batch[x]["EndDate"] = convert_milliseconds(batch[x]["EndDate"][6:-2])
except:
pass
try:
batch[x]["AsAt"] = convert_milliseconds(batch[x]["AsAt"][6:-2])
except:
pass
try:
batch[x]["OpenHomes"][0]["Start"] = convert_milliseconds(
batch[x]["OpenHomes"][0]["Start"][6:-2])
batch[x]["OpenHomes"][0]["End"] = convert_milliseconds(
batch[x]["OpenHomes"][0]["End"][6:-2])
except:
pass
try:
batch[x]["OpenHomes"][1]["Start"] = convert_milliseconds(
batch[x]["OpenHomes"][1]["Start"][6:-2])
batch[x]["OpenHomes"][1]["End"] = convert_milliseconds(
batch[x]["OpenHomes"][1]["End"][6:-2])
except:
pass
self.total_batches.extend(batch)
self.box += 1
self.logger.info(f"box: {self.box}, len: {len(self.total_batches)}")
else:
self.logger.critical(f"Scraping ERROR {ID}")
async def Gather(self):
tasks = []
for ID in range(1,/):
tasks.append(self.scrape(ID))
await asyncio.gather(*tasks)
def run(self):
asyncio.run(self.Gather())
if len(self.total_batches) > 31990: #Data Integrity Simple Checker!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
self.logger.info(f"Scraping Success, return total_batches. box: {self.box}, {len(self.total_batches)}")
return self.total_batches
else:
self.logger.critical(
f"Scraping unsuccess, Not meet batch requirements. box: {self.box}, {len(self.total_batches)}")
return False
For further actions, you may consider blocking this person and/or reporting abuse
Top comments (0)