combiner la classe clairsemée
categorical_features=[feature for feature in dataset.columns if dataset[feature].dtype=='O']
for feature in categorical_features:
temp=dataset[feature].value_counts(normalize=True)
temp_df=temp[temp>0.01].index
dataset[feature]=np.where(dataset[feature].isin(temp_df),dataset[feature],'Rare_var')
# condition satisfies then 'X' else'Y'
#condition----------> ,take this values else,'Rare_var'
Lazy long python