获取所有的数据,保存在csv文件中
引入的库
- import requests
- from bs4 import BeautifulSoup
- import csv
main函数
def main(): city_list = get_all_citys()
header = ['city', 'AQI', 'pm2.5/h', 'pm10/h', 'co/h', 'no2/h', 'o3/h', 'o3/8h', 'so2/h'] # 写入的方式打开文件,没有会自动创建 with open('china_city_aqi.csv', 'w', encoding='utf-8', newline='') as f: # 拿到writer writer = csv.writer(f) # 写入第一行 writer.writerow(header) # enumerate获取索引号 for i, city in enumerate(city_list): city_name = city[0] city_pinyin = city[1] city_aqi = get_city_aqi(city_pinyin) # 列表相加 row = [city_name] + city_aqi writer.writerow(row) if (i + 1) % 10 == 0: print('已处理{}条记录, 共{}条记录'.format(i+1, len(city_list)))
if __name__ == '__main__': main()
|
获取所有城市的名字和链接: get_all_citys()
def get_all_citys(): # 首页地址 url = 'http://pm25.in/' # 获取整个页面的html r = requests.get(url, timeout = 30) soup = BeautifulSoup(r.text, 'lxml') city_div = soup.find('div', {'class': 'all'}).find('div', {'class': 'bottom'}) city_link = city_div.find_all('a') city_list = [] for link in city_link: city_name = link.text # href="/beijing" city_pinyin = link['href'][1:] # 去掉斜杠 # 以元祖的形式保存 city_list.append((city_name, city_pinyin)) return city_list
|
获取特定城市的数据: get_city_aqi(city)
def get_city_aqi(city): url = 'http://pm25.in/' + city r = requests.get(url, timeout = 30) soup = BeautifulSoup(r.text, 'lxml') div_list = soup.find_all('div', {'class': 'span1'})
city_aqi = [] # 一共要获取8个数据 for i in range(8): div_content = div_list[i] # strip去掉前后空格 caption = div_content.find('div', {'class': 'caption'}).text.strip() value = div_content.find('div', {'class': 'value'}).text.strip() # city_aqi.append((caption, value)) city_aqi.append(value) return city_aqi
|