Classification de texte
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.calibration import CalibratedClassifierCV
model = Pipeline([('vect', CountVectorizer(ngram_range=(1, 10),min_df=5, max_df = .70)),
('tfidf', TfidfTransformer(norm='l1')),
('clf', CalibratedClassifierCV(base_estimator= SGDClassifier(penalty='elasticnet',alpha=0.001, max_iter=500,l1_ratio=.1,random_state=45),method = 'isotonic')),
])
model.fit(X_train, y_train)
# Predictions
test_y_pred = model.predict(X_test)
train_y_pred = model.predict(X_train)
print('Train accuracy %s' %accuracy_score(train_y_pred, y_train))
print('Test accuracy %s' % accuracy_score(test_y_pred, y_test))
print('\nTrain Report\n')
print(classification_report(y_train, train_y_pred))
print('Test Report\n')
print(classification_report(y_test, test_y_pred))
Lazy Leopard