one_hot_encoder和label_encoder
one_hot_encoder和label_encoder可以处理类别类型的特征。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | import pandas as pd from sklearn.preprocessing import LabelEncoder def one_hot_encoder(df, nan_as_category = True ): original_columns = list (df.columns) categorical_columns = [col for col in df.columns if df[col].dtype = = 'object' ] df = pd.get_dummies(df, columns = categorical_columns, dummy_na = nan_as_category) new_columns = [c for c in df.columns if c not in original_columns] return df, new_columns def label_encoder(df): original_columns = list (df.columns) categorical_columns = [col for col in df.columns if df[col].dtype = = 'object' ] for col in categorical_columns: df[col] = LabelEncoder().fit_transform(df[col].astype( 'str' )) new_columns = [c for c in df.columns if c not in original_columns] return df, categorical_columns |