#导入pandas库import pandas as pd#OneHotEncoder用来将数值型类别变量转换为0-1的标志性变量#LabelEncoder用来将字符串型变量转换为数值型变量from sklearn.preprocessing import OneHotEncoder,LabelEncoder #生成数据df=pd.DataFrame({'id':[321313,246852,447902], 'sex':['male','Female','Female'], 'level':['high','low','middle'], 'score':[1,2,3]})print(df) id sex level score0 321313 male high 11 246852 Female low 22 447902 Female middle 3#拆分id和数据列id_data=df[['id']] #获得id列raw_convert_data=df.iloc[:,1:] #指定要转换的列print(raw_convert_data) sex level score0 male high 11 Female low 22 Female middle 3#将数值型分类向量转换为标志变量model_enc=OneHotEncoder() #建立标志转换模型对象df_new2=model_enc.fit_transform(raw_convert_data).toarray() #标志转换#合并数据df_all=pd.concat((id_data,pd.DataFrame(df_new2)),axis=1) #重新组合为新数据框print(df_all) #打印输出转换后的数据框 id 0 1 2 3 4 5 6 70 321313 0.0 1.0 1.0 0.0 0.0 1.0 0.0 0.01 246852 1.0 0.0 0.0 1.0 0.0 0.0 1.0 0.02 447902 1.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0# 使用pandas的get_dummies做标志转换df_new3 = pd.get_dummies(raw_convert_data)df_all2 = pd.concat((id_data, pd.DataFrame(df_new3)), axis=1) # 重新组合为数据框print(df_all2) # 打印输出转换后的数据框 id score sex_Female sex_male level_high level_low level_middle0 321313 1 0 1 1 0 01 246852 2 1 0 0 1 02 447902 3 1 0 0 0 1