Les pandas suppriment les valeurs aberrantes pour plusieurs colonnes
def cap_data(df):
for col in df.columns:
print("capping the ",col)
if (((df[col].dtype)=='float64') | ((df[col].dtype)=='int64')):
percentiles = df[col].quantile([0.01,0.99]).values
df[col][df[col] <= percentiles[0]] = percentiles[0]
df[col][df[col] >= percentiles[1]] = percentiles[1]
else:
df[col]=df[col]
return df
final_df=cap_data(df)
Real Raccoon