pandas分析数据


'''
数据清洗
'''
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False


def main():
aqi_data = pd.read_csv('china_city_aqi.csv')
# 前两行
# print(aqi_data.head(2))
# 后两行
# print(aqi_data.tail(2))
# print('基本信息: ')
# print(aqi_data.info())

# 数据清洗
# 只保留数据 > 0
# filter_condition = aqi_data['AQI'] > 0
# clean_aqi_data = aqi_data[filter_condition]

clean_aqi_data = aqi_data[aqi_data['AQI'] > 0]

# print('AQI最大值: ', aqi_data['AQI'].max())
# print('AQI均值: ', aqi_data['AQI'].mean())

# top10
top10 = clean_aqi_data.sort_values(by=['AQI']).head(50)
# print('空气质量最好的10个城市:', top10)

top10.plot(kind='bar', x='city', y='AQI', title='空气最好的10个城市', figsize=(20, 10))
plt.savefig('top_10_aqi.png')
plt.show()

bottom10 = clean_aqi_data.sort_values(by=['AQI'], ascending=False).head(10)
# print('空气质量最差的10个城市:', bottom10)

# 数据保存csv文件, 不要索引号
# top10.to_csv('top10_aqi.csv', index=False)


if __name__ == '__main__':
main()
文章目录
|