''' 数据清洗 ''' import pandas as pd import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False
def main(): aqi_data = pd.read_csv('china_city_aqi.csv') # 前两行 # print(aqi_data.head(2)) # 后两行 # print(aqi_data.tail(2)) # print('基本信息: ') # print(aqi_data.info())
# 数据清洗 # 只保留数据 > 0 # filter_condition = aqi_data['AQI'] > 0 # clean_aqi_data = aqi_data[filter_condition]
clean_aqi_data = aqi_data[aqi_data['AQI'] > 0]
# print('AQI最大值: ', aqi_data['AQI'].max()) # print('AQI均值: ', aqi_data['AQI'].mean())
# top10 top10 = clean_aqi_data.sort_values(by=['AQI']).head(50) # print('空气质量最好的10个城市:', top10)
top10.plot(kind='bar', x='city', y='AQI', title='空气最好的10个城市', figsize=(20, 10)) plt.savefig('top_10_aqi.png') plt.show()
bottom10 = clean_aqi_data.sort_values(by=['AQI'], ascending=False).head(10) # print('空气质量最差的10个城市:', bottom10)
# 数据保存csv文件, 不要索引号 # top10.to_csv('top10_aqi.csv', index=False)
if __name__ == '__main__': main()
|