基于streamlit的可视化交互简单数据分析、数据预处理模板

阅读: 评论:0

基于streamlit的可视化交互简单数据分析、数据预处理模板

基于streamlit的可视化交互简单数据分析、数据预处理模板

def PS(PS):if PS == "数据预处理":st.write(PS)image = Image.open('E:/桌面/叶脉处理法.png')st.image(image, caption='叶脉处理法', use_column_width=True)read = st.sidebar.selectbox("读取文件类型", ("csv", "excle"))title = _input('输入地址', '')flie = pd.read_csv(title + "." + read, encoding="GBK")stats = os.stat(title + "." + read)st.write("读取文件成功!文件大小为:%.1f" % (stats.st_size / 1024 / 1024), "MB")st.write(stats)n = st.select_slider("滑动显示数量", options=[5, 10, 20, 50, 100])show = st.selectbox("是否显示表和描述", ("否", "是"))if show == "否":st.write("不显示")elif show == "是":st.dataframe(flie[0:n])st.write("描述",flie.describe())else:passshow = st.selectbox("是否显示列信息", ("否", "是"))if show == "否":st.write("不显示")elif show == "是":st.write(flie.dtypes)else:passshow_dup = st.selectbox("查看重复数据", ("否", "是"))if show_dup == "是":st.write("重复数据数量",flie.duplicated().sum())drop = st.selectbox("删除重复数据", ("否", "是"))passif drop == "是":flie = flie.drop_duplicates()st.write("重复数据数量",flie.duplicated().sum())else:st.write("不删除")passelse:st.write("不查看")passtransform = st.selectbox("是否转换类型", ("否", "是"))if transform == "否":st.write("不转换")elif transform == "是":transform_class = st.selectbox("转换类型", ("无","时间格式", "自定义类型"))if transform_class!="无":transform_line = st.selectbox("转换的列", lumns)if transform_class =="时间格式":date = st.selectbox("格式", ("","%Y", "%Y%m","%Y%m%d","%m%d","%d"))transform_true = st.selectbox("是否转换", ("否", "是"))if transform_true == "是":flie[transform_line] = pd.to_datetime(flie[transform_line], format=date, errors='coerce')else:passelif transform_class =="自定义类型":transform_data = st.text_input("转换格式默认int", "int")transform_true = st.selectbox("是否转换", ("否", "是"))if transform_true == "是":flie[transform_line] = flie[transform_line].astype(transform_data)st.write(transform_line + "格式为:", flie[transform_line].dtypes)else:passshow = st.selectbox("是否选取聚合groupby", ("否", "是"))if show == "否":st.write("不显示")elif show == "是":choose = st.multiselect("选择聚合列", lumns)choose_1 = st.selectbox("选择作用列",lumns)choose_m = st.selectbox("选择方法", ["无","mean","max","min","sum"])if choose_m !="无":S_n = st.select_slider("滑动数量", options=[5, 10, 20, 50, 100])if choose != [] and choose_1 != [] and choose_m == "mean":S = upby(choose)[choose_1].mean()elif choose != [] and choose_1 != [] and choose_m == "max":S = upby(choose)[choose_1].max()elif choose != [] and choose_1 != [] and choose_m == "min":S = upby(choose)[choose_1].min()elif choose != [] and choose_1 != [] and choose_m == "sum":S = upby(choose)[choose_1].sum()else:passst.write(S[0:S_n])choose_line = st.multiselect("查看列空值", lumns)if choose_line!=[]:st.write(flie[choose_line].isnull().sum())else:passfull = st.selectbox("是否自动填充空值默认mean", ("否", "自动填充","手动填充"))if full =="自动填充":choose_full = st.selectbox("填充列", lumns)full_ture = st.selectbox("是否填充", ("否", "是"))if full_ture =="是":flie[choose_full].fillna(flie[choose_full].mean, inplace=True)st.write(choose_full+"列的空值:",flie[choose_full].isnull().sum())else:passelif full=="手动填充":choose_full = st.selectbox("填充列", lumns)full_num = st.text_input("手动填充默认null", 'null')full_message = st.selectbox("查看列信息", ("是","否"))if full_message =="是":st.write(choose_full + "列的空值:", flie[choose_full].isnull().sum())st.write(flie[choose_full].describe())full_ture = st.selectbox("是否填充", ("否", "是"))if full_ture =="是":flie[choose_full].fillna(flie[choose_full].mean, inplace=True)st.write(choose_full+"列的空值:",flie[choose_full].isnull().sum())else:passshow_table = st.selectbox("显示表", ("否", "是"))if show_table == "是":table_num = st.select_slider("滑动显示表", options=[5, 10, 20, 50, 100])st.dataframe(flie[0:n])st.write("描述", flie.describe())st.write("信息", flie.dtypes)save_table = st.selectbox("保存二维表", ("否", "是"))if save_table == "是":address = st.text_input("地址", "K:/")flie_name = st.text_input("地址", "文件名")format = st.selectbox("文件类型", ("csv", "excle"))save_true = st.selectbox("是否保存", ("否", "是"))if save_true == "是":_csv(address+flie_name+"."+format)stats = os.stat(address+flie_name+"."+format)st.write("保存成功!文件大小为:%.1f"%(stats.st_size/1024/1024),"MB")st.write(address+flie_name+"."+format)else:passelse:pass
def PS(PS):if PS == "数据预处理":st.write(PS)image = Image.open('E:/桌面/叶脉处理法.png')st.image(image, caption='叶脉处理法', use_column_width=True)read = st.sidebar.selectbox("读取文件类型", ("csv", "excle"))title = _input('输入地址', '')flie = pd.read_csv(title + "." + read, encoding="GBK")stats = os.stat(title + "." + read)st.write("读取文件成功!文件大小为:%.1f" % (stats.st_size / 1024 / 1024), "MB")st.write(stats)n = st.select_slider("滑动显示数量", options=[5, 10, 20, 50, 100])show = st.selectbox("是否显示表和描述", ("否", "是"))if show == "否":st.write("不显示")elif show == "是":st.dataframe(flie[0:n])st.write("描述",flie.describe())else:passshow = st.selectbox("是否显示列信息", ("否", "是"))if show == "否":st.write("不显示")elif show == "是":st.write(flie.dtypes)else:passshow_dup = st.selectbox("查看重复数据", ("否", "是"))if show_dup == "是":st.write("重复数据数量",flie.duplicated().sum())drop = st.selectbox("删除重复数据", ("否", "是"))passif drop == "是":flie = flie.drop_duplicates()st.write("重复数据数量",flie.duplicated().sum())else:st.write("不删除")passelse:st.write("不查看")passtransform = st.selectbox("是否转换类型", ("否", "是"))if transform == "否":st.write("不转换")elif transform == "是":transform_class = st.selectbox("转换类型", ("无","时间格式", "自定义类型"))if transform_class!="无":transform_line = st.selectbox("转换的列", lumns)if transform_class =="时间格式":date = st.selectbox("格式", ("","%Y", "%Y%m","%Y%m%d","%m%d","%d"))transform_true = st.selectbox("是否转换", ("否", "是"))if transform_true == "是":flie[transform_line] = pd.to_datetime(flie[transform_line], format=date, errors='coerce')else:passelif transform_class =="自定义类型":transform_data = st.text_input("转换格式默认int", "int")transform_true = st.selectbox("是否转换", ("否", "是"))if transform_true == "是":flie[transform_line] = flie[transform_line].astype(transform_data)st.write(transform_line + "格式为:", flie[transform_line].dtypes)else:passshow = st.selectbox("是否选取聚合groupby", ("否", "是"))if show == "否":st.write("不显示")elif show == "是":choose = st.multiselect("选择聚合列", lumns)choose_1 = st.selectbox("选择作用列",lumns)choose_m = st.selectbox("选择方法", ["无","mean","max","min","sum"])if choose_m !="无":S_n = st.select_slider("滑动数量", options=[5, 10, 20, 50, 100])if choose != [] and choose_1 != [] and choose_m == "mean":S = upby(choose)[choose_1].mean()elif choose != [] and choose_1 != [] and choose_m == "max":S = upby(choose)[choose_1].max()elif choose != [] and choose_1 != [] and choose_m == "min":S = upby(choose)[choose_1].min()elif choose != [] and choose_1 != [] and choose_m == "sum":S = upby(choose)[choose_1].sum()else:passst.write(S[0:S_n])choose_line = st.multiselect("查看列空值", lumns)if choose_line!=[]:st.write(flie[choose_line].isnull().sum())else:passfull = st.selectbox("是否自动填充空值默认mean", ("否", "自动填充","手动填充"))if full =="自动填充":choose_full = st.selectbox("填充列", lumns)full_ture = st.selectbox("是否填充", ("否", "是"))if full_ture =="是":flie[choose_full].fillna(flie[choose_full].mean, inplace=True)st.write(choose_full+"列的空值:",flie[choose_full].isnull().sum())else:passelif full=="手动填充":choose_full = st.selectbox("填充列", lumns)full_num = st.text_input("手动填充默认null", 'null')full_message = st.selectbox("查看列信息", ("是","否"))if full_message =="是":st.write(choose_full + "列的空值:", flie[choose_full].isnull().sum())st.write(flie[choose_full].describe())full_ture = st.selectbox("是否填充", ("否", "是"))if full_ture =="是":flie[choose_full].fillna(flie[choose_full].mean, inplace=True)st.write(choose_full+"列的空值:",flie[choose_full].isnull().sum())else:passshow_table = st.selectbox("显示表", ("否", "是"))if show_table == "是":table_num = st.select_slider("滑动显示表", options=[5, 10, 20, 50, 100])st.dataframe(flie[0:n])st.write("描述", flie.describe())st.write("信息", flie.dtypes)save_table = st.selectbox("保存二维表", ("否", "是"))if save_table == "是":address = st.text_input("地址", "K:/")flie_name = st.text_input("地址", "文件名")format = st.selectbox("文件类型", ("csv", "excle"))save_true = st.selectbox("是否保存", ("否", "是"))if save_true == "是":_csv(address+flie_name+"."+format)stats = os.stat(address+flie_name+"."+format)st.write("保存成功!文件大小为:%.1f"%(stats.st_size/1024/1024),"MB")st.write(address+flie_name+"."+format)else:passelse:pass

前面一次把streamlit可视化交互的机器学习模板做了并且发了,现在这个数据预处理要做的非常多,打算分批分次完成,主要是数据转换还有数据清洗,提取特征值自由度高难度较大,所以逐步突破

叶脉处理方法

是我想出来可以对应整个数据预处理或者数据提取特征值的一个流程思路

因为在整个处理过程中,有很多是必备或者说是使用率较高的,就类似于决策树,将重要的分支放在前面,这样整体效率较高。

但streamlit框架是不断刷新的,所以在考虑整体来说,类似于决策树又不是决策树,要有一定的自由度,而不是全部运行,所以在 运行时考虑用是否显示或者是否处理来限制,这样可以减少显示长度及其减少开销,所以整体流程会考虑这个图,也是通过思考得出的一种解,但并非最优。

本文发布于:2024-02-05 06:18:48,感谢您对本站的认可!

本文链接:https://www.4u4v.net/it/170726082963771.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:数据   模板   简单   streamlit
留言与评论(共有 0 条评论)
   
验证码:

Copyright ©2019-2022 Comsenz Inc.Powered by ©

网站地图1 网站地图2 网站地图3 网站地图4 网站地图5 网站地图6 网站地图7 网站地图8 网站地图9 网站地图10 网站地图11 网站地图12 网站地图13 网站地图14 网站地图15 网站地图16 网站地图17 网站地图18 网站地图19 网站地图20 网站地图21 网站地图22/a> 网站地图23