Air Quality Index
I'v been experimenting to fetch data related to AQI in China recently.
A good resource is http://aqicn.org, which provides air pollution data in a good way by city, and I try to write a python script to crawl the data from it.
#-*-coding: utf-8 -*- | |
#!/usr/bin/python | |
from datetime import datetime | |
import pytz | |
import requests | |
from bs4 import BeautifulSoup | |
class NetworkException(Exception): | |
pass | |
def get_aqi(city): | |
base_url = 'http://aqicn.org/city/' | |
city_url = base_url + city | |
headers = { | |
'User-Agent': 'Mozilla/5.0' | |
} | |
r = requests.get(city_url, headers=headers) | |
if r.status_code == 200: | |
html_doc = r.content | |
soup = BeautifulSoup(html_doc, 'html.parser') | |
aqi = soup.find(id='aqiwgtvalue').string | |
pm25 = soup.find(id='cur_pm25').string | |
pm10 = soup.find(id='cur_pm10').string | |
o3 = soup.find(id='cur_o3').string | |
no2 = soup.find(id='cur_no2').string | |
so2 = soup.find(id='cur_so2').string | |
co = soup.find(id='cur_co').string | |
temp = soup.find(id='cur_t').string | |
dew = soup.find(id='cur_d').string | |
pressure = soup.find(id='cur_p').string | |
humidity = soup.find(id='cur_h').string | |
wind = soup.find(id='cur_w').string | |
tz = pytz.timezone(pytz.country_timezones('cn')[0]) | |
d = datetime.now(tz) | |
timestamp = d.strftime("%Y-%m-%d %H:%M:%S") | |
return dict(aqi=aqi, pm25=pm25, pm10=pm10, o3=o3, no2=no2, so2=so2, co=co, temp=temp, dew=dew, pressure=pressure, humidity=humidity, wind=wind, timestamp=timestamp) | |
else: | |
raise NetworkException |
Another website is http://www.stateair.net/, it reports AQI from US consulate in Chinese major cities, i.e. Beijing, Shanghai, Guangzhou, Chengdu and Shenyang. Similarly, the code to fetch data from it shown below.
import requests | |
from bs4 import BeautifulSoup | |
class NetworkException(Exception): | |
pass | |
cities = {'beijing': '1', 'chengdu': '2', 'guangzhou': '3', 'shanghai': '4', 'shenyang': '5'} | |
def get_pm25_24h(city): | |
city_code = cities.get(city.lower()) | |
if not city_code: | |
return city + " not available." | |
base_url = 'http://www.stateair.net/web/rss/1/' | |
r = requests.get(base_url + city_code + '.xml') | |
if r.status_code == 200: | |
data = [] | |
xml_doc = r.content | |
soup = BeautifulSoup(xml_doc, 'xml') | |
items = soup.find_all('item') | |
for item in items: | |
title = item.title.string | |
link = item.link.string | |
description = item.description.string | |
param = item.Param.string | |
conc = item.Conc.string | |
aqi = item.AQI.string | |
desc = item.Desc.string | |
reading_date_time = item.ReadingDateTime.string | |
d = { | |
'title': title, | |
'link': link, | |
'description': description, | |
'Param': param, | |
'Conc': conc, | |
'AQI': aqi, | |
'Desc': desc, | |
'ReadingDateTime': reading_date_time | |
} | |
data.append(d) | |
return data | |
else: | |
raise NetworkException |
Both of the code above are using Python and handy packages like requests and Beautiful Soup.
The stateair website also provides historical data for download in CSV format, therefore I use those data to create a simple visulization page. Check it from https://chuanjin.github.io/pm25/ and source code is availabe from Github repo.
Last but not least, I found pm25.in website, and they provide real time data for all Chinese cities, even better, they have opened their API for develper and I also created a Python library based on their API. Code available from https://github.com/chuanjin/python-aqi-zh
Top comments (0)