边听莫烦老师的课边敲下来的
以下代码稍有些乱,可以点击上面的链接观看视频,在简介有他的github链接,里面有代码,不过由于版本更新,视频中的代码有的会保存,可以参考我的改一下(2021.10不会报错)
# 类比字典
import pandas as pd
import numpy as np
# import matplotlib
import matplotlib.pyplot as pltdef class1(): # 创建DataFrames1 = pd.Series([1, 3, 6, np.nan, 44, 1])print(s1)datas1 = pd.date_range('20211008', periods=6)print(datas1)df1 = pd.DataFrame(np.random.randn(6, 4), index=datas1, columns=['a', 'b', 'c', 'd'])print(df1)# DataFrame 还有其他创建方式如 A:np.array(.....)(一列)print(df1.dtypes)lumns) # 输出列名print(df1.describe()) # 输出平均值,25%,最大最小等# 排序 sort_valuesdef class2(): # 查 筛选 选择datas2 = pd.date_range('20211008', periods=6)df2 = pd.DataFrame(np.arange(24).reshape((6, 4)), index=datas2, columns=['a', 'b', 'c', 'd'])print(df2['a'], df2.a)# 选择print(df2[0:3])print(df2['20211008':'20211010']) # 这两种输出相同# select by label:locprint(df2.loc['20211009'])print(df2.loc[:, ['a', 'b']]) # 全部行,ab列# select by position:ilocprint(df2.iloc[[1, 3], [2, 3]])# mixed selection:ix上述两种混合(deprecated)# print(df.ix[:3, ['a', 'c']])# DeprecationWarning:.ix is deprecated(弃用 贬低).# Please use [.loc] for label based indexing or [.iloc] for positional indexing# Boolean(布尔) indexingprint(df2[df2.a < 8])def class3(): # 增 改passdatas3 = pd.date_range('20211008', periods=6)df3 = pd.DataFrame(np.arange(24).reshape((6, 4)), index=datas3, columns=['a', 'b', 'c', 'd'])# 改变某一个值df3.iloc[2, 2] = 1234df3.loc['20211012', 'b'] = 4321print(df3)df3.a[df3.a >= 16] = 0 # 条件置位一行df[df3.a >= 16] = 0print(df3)# 增加一行df3['f'] = np.nandf3['e'] = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20211008', periods=6))print(df3)def class4(): # 处理丢失数据datas = pd.date_range('20211008', periods=6)df = pd.DataFrame(np.arange(24).reshape((6, 4)), index=datas, columns=['a', 'b', 'c', 'd'])df.iloc[0, 1] = np.nandf.iloc[1, 2] = np.nan# 处理丢失数据--扔掉一行print(df)print(df.dropna(axis=0, how='any')) # how = 'all'# 处理丢失数据--填充print(df.fillna(value=0))print(df.isnull())print(np.any(df.isnull()) == True)def class5(): # 读取保存data5 = pd.read_csv('student.csv') # 读取print(_pickle('student.pickle') # 保存def class6(): # 合并# concatenatingdf61 = pd.s((3, 4)) * 0, columns=['a', 'b', 'c', 'd'])df62 = pd.s((3, 4)) * 1, columns=['a', 'b', 'c', 'd'])df63 = pd.s((3, 4)) * 2, columns=['a', 'b', 'c', 'd'])print(df62)res6 = pd.concat([df61, df62, df63], axis=0, ignore_index=True) # 0竖向 1横向print(res6)# join, ['inner', 'outer']df64 = pd.s((3, 4)) * 0, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])df65 = pd.s((3, 4)) * 1, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])at([df64, df65], join='outer', ignore_index=True, sort=False)) # 合并,没有的用nan填充at([df64, df65], join='inner', ignore_index=True)) # 取交集# join_axis似乎弃用# reindex_like()at([df64, index_like(df64)], axis=1))# append 增加 结合Series添加一行def class7(): # 合并# merging 2 df by key/keys.(may be used in database)left7 = pd.DataFrame({'key': ['ko', 'k1', 'k2', 'k3'],'a': ['a0', 'a1', 'a2', 'a3'],'b': ['b0', 'b1', 'b2', 'b3']})right7 = pd.DataFrame({'key': ['ko', 'k1', 'k2', 'k3'],'c': ['c0', 'c1', 'c2', 'c3'],'d': ['d0', 'd1', 'd2', 'd3']})print(left7, 'n', right7)(left7, right7, on='key'))# consider 2 keys(left7,right7 have key1,key2)# 合并形式 : on = ['key1', 'key2'] 默认inner,还有left,right7,outer# indicator 指示剂--可以显示如何合并的def class8():# plot data# Series的显示data81 = pd.Series(np.random.randn(1000), index=np.arange(1000))data81 = data81.cumsum()data81.plot()plt.show()# DataFrame的显示data82 = pd.DataFrame(np.random.randn(1000, 4), index=np.arange(1000), columns=list("ABCD"))data82 = data82.cumsum()data82.plot()plt.show()# plot methods: bar hist scatter box 等# scatter散点图的绘制:ax = data82.plot.scatter(x='A', y='B', color='DarkBlue', label='Class 1')data82.plot.scatter(x='A', y='C', color='DarkGreen', label='Class 2', ax=ax)plt.show()if __name__ == '__main__':pass
本文发布于:2024-02-02 10:22:50,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170684056943167.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |