Hello!
Because I'm doing a web scraping course, I decided to write a small script to extract the response from a rare version of XML/JSON that they have in https://mapas.race.es, and thus be able to generate a JSON file or several CSV files separated by the following categories
- Incidents
- Cameras
- Radars
- BlackPoints
- OilStations
- Parking
import requests
import json
import pandas as pd
formatData="json" # json / csv
nameFile = "race"
incidents = 1
cameras = 1
radars = 1
oilStations = 1
blackPoints = 1
parkings = 1
url = "https://mapas.race.es/WebServices/srvRace.asmx/ObtenerDatos?pstrIncidencias=" + \
str(incidents)+"&pstrCamaras="+str(cameras)+"&pstrRadares="+str(radars) + \
"&pstrGasolineras="+str(oilStations)+"&pstrPuntosNegros=" + \
str(blackPoints)+"&pstrParking="+str(parkings)
headers = {
"authority": "infocar.dgt.es",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
"host": "mapas.race.es",
"referer": "https://mapas.race.es/",
}
response = requests.get(url, headers=headers)
initialText = response.text
splitText = response.text.split('<string xmlns="http://tempuri.org/">')
jsonToLoad = splitText[1].split("</string>")[0]
jsonRequest = json.loads(jsonToLoad)
items = {}
incidentsItems = []
camerasItems = []
radarsItems = []
oilStationsItems = []
blackPointsItems = []
parkingsItems = []
def get_object(type, item, id=None, image=None):
if type == "incidents":
return {
"id": str(id),
"lat": str(item["Latitud"]),
"lng": str(item["Longitud"]),
"type": str(item["Tipo"]),
"date": item["Fecha"],
"reason": item["Causa"],
"level": item["Nivel"],
"province": item["Provincia"],
"poblation": item["Poblacion"],
"street": item["Carretera"]
}
if type == "cameras":
return {
"id": str(item["Id"]),
"lat": str(item["Latitud"]),
"lng": str(item["Longitud"]),
"image": image
}
if type == "radars" or type == "oilStations" or type == "blackPoints" or type == "parkings":
return {
"id": str(item["Id"]),
"lat": str(item["Latitud"]),
"lng": str(item["Longitud"]),
}
if incidents == 1:
i = 1
for itemIncidence in jsonRequest["Incidencias"]:
obj = get_object('incidents', itemIncidence, str(i))
incidentsItems.append(obj)
i += 1
items["incidents"] = incidentsItems
if cameras == 1:
for itemCameras in jsonRequest["Camaras"]:
image = "http://infocar.dgt.es/etraffic/data/camaras/" + \
str(itemCameras['Id'])+".jpg"
obj = get_object('cameras', itemCameras, "", image)
camerasItems.append(obj)
items["cameras"] = camerasItems
if radars == 1:
for itemRadar in jsonRequest["Radares"]:
obj = get_object('radars', itemRadar)
radarsItems.append(obj)
items["radars"] = radarsItems
if oilStations == 1:
for ItemsOilStation in jsonRequest["Gasolineras"]:
obj = get_object('oilStations', ItemsOilStation)
oilStationsItems.append(obj)
items["oilStations"] = oilStationsItems
if blackPoints == 1:
for itemBlackPoint in jsonRequest["PuntosNegros"]:
obj = get_object('blackPoints', itemBlackPoint)
blackPointsItems.append(obj)
items["blackPoints"] = blackPointsItems
if parkings == 1:
for itemParking in jsonRequest["Parking"]:
obj = get_object('parkings', itemParking)
parkingsItems.append(obj)
items["parkings"] = parkingsItems
if formatData == "json":
f = open(nameFile + '.' + formatData, "w")
itemsDumps = json.dumps(items, indent=2)
f.write(itemsDumps)
elif formatData == "csv":
incidentsDF = pd.DataFrame(items["incidents"])
camerasDF = pd.DataFrame(items["cameras"])
radarsDF = pd.DataFrame(items["radars"])
oilStationsDF = pd.DataFrame(items["oilStations"])
blackPointsDF = pd.DataFrame(items["blackPoints"])
parkingsDF = pd.DataFrame(items["parkings"])
incidentsDF.to_csv(nameFile + "_incidents." + formatData, index=False)
camerasDF.to_csv(nameFile + "_cameras." + formatData, index=False)
radarsDF.to_csv(nameFile + "_radars." + formatData, index=False)
oilStationsDF.to_csv(nameFile + "_oilStations." + formatData)
blackPointsDF.to_csv(nameFile + "_blackPoints." + formatData, index=False)
parkingsDF.to_csv(nameFile + "_parkings." + formatData, index=False)
print('β
'+ formatData +' file/s generated')
For the vast majority of people who read this, it will not be worth anything, since it is data from Spain, but as a curiosity, you can see how it is done and encourage me to improve it!
Don't be mean, I'm learning Python...
Top comments (0)