Выводы:
Выводы:
1
2
3
4
vectorizer = CountVectorizer()
bow = vectorizer.fit_transform(corpus)
vectorizer.vocabulary_
bow.toarray()
1
2
3
4
array([[0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1],
[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0],
[0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0]])
Выводы:
Выводы:
Выводы:
1
2
3
4
5
6
7
8
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()
eclf = VotingClassifier(
estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='hard')
1
2
3
4
5
6
7
8
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier()
bagging_clf = BaggingClassifier(base_estimator=tree,
n_estimators=1500,
random_state=42)
bagging_clf.fit(X_train, y_train)
Выводы:
1
2
3
4
5
6
7
8
9
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import StackingRegressor
final_estimator = GradientBoostingRegressor(
n_estimators=25, subsample=0.5,
min_samples_leaf=25, max_features=1,
random_state=42)
reg = StackingRegressor(
estimators=estimators,
final_estimator=final_estimator)
Выводы:
Выводы:
Выводы:
Выводы:
![](https://onnx.ai/sklearn-onnx/_images/blockdiag-0e2bbe287bfc020181cb7981832f234c906f12a6.png{: .align-center style=”width: 60%;”}
1
2
3
4
5
6
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.decomposition import PCA
pipe = Pipeline([('reduce_dim', PCA()),
('clf', SVC())])
pipe.fit(iris.data, iris.target)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
numeric_features = [0, 1, 2] # ["vA", "vB", "vC"]
categorical_features = [3, 4] # ["vcat", "vcat2"]
classifier = LogisticRegression(C=0.01, ...)
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
('onehot', OneHotEncoder(sparse_output=True, handle_unknown='ignore')),
('tsvd', TruncatedSVD(n_components=1, algorithm='arpack', tol=1e-4))
])
preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numeric_features),
('cat', categorical_transformer, categorical_features)
])
model = Pipeline(steps=[
('precprocessor', preprocessor),
('classifier', classifier)
])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
simple_imputer = SimpleImputer(strategy='median')
std_scaler = StandardScaler()
pipe_num = Pipeline([('imputer', simple_imputer), ('scaler', std_scaler)])
s_imputer = SimpleImputer(strategy='constant', fill_value='unknown')
ohe_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
pipe_cat = Pipeline([('imputer', s_imputer), ('encoder', ohe_encoder)])
col_transformer = ColumnTransformer([
('num_preproc', pipe_num,
[x for x in features.columns if features[x].dtype!='object']),
('cat_preproc', pipe_cat,
[x for x in features.columns if features[x].dtype=='object'])])
final_pipe = Pipeline([('preproc', col_transformer),
('model', model)])
final_pipe.fit(features_train, target_train)
preds = final_pipe.predict(features_test)
1
2
3
4
5
6
7
8
9
10
11
12
from numpy.random import randint
from sklearn.base import BaseEstimator, TransformerMixin
class CustomTransformer(BaseEstimator, TransformerMixin):
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
# Perform arbitary transformation
X["random_int"] = randint(0, 10, X.shape[0])
return X
1
2
3
4
5
6
7
8
9
10
11
12
import pandas as pd
from sklearn.pipeline import Pipeline
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
pipe = Pipeline(
steps=[
("use_custom_transformer", CustomTransformer())
]
)
transformed_df = pipe.fit_transform(df)
Выводы: