Delete 3217-classification-lr-example3.py

116f766e · Chauhan Chauhan · 6446b759 · 6446b759
Commit 116f766e authored Apr 23, 2023 by Chauhan Chauhan
--- a/3217-classification-lr-example3.py
+++ b/3217-classification-lr-example3.py
-
-import numpy as np
-import matplotlib.pyplot as plt
-import pandas as pd
-
-from sklearn import datasets
-from sklearn.decomposition import PCA
-from sklearn.linear_model import LogisticRegression
-from sklearn.pipeline import Pipeline
-from sklearn.model_selection import GridSearchCV
-from sklearn.preprocessing import StandardScaler
-
-# Define a pipeline to search for the best combination of PCA truncation
-# and classifier regularization.
-pca = PCA()
-# Define a Standard Scaler to normalize inputs
-scaler = StandardScaler()
-
-# set the tolerance to a large value to make the example faster
-logistic = LogisticRegression(max_iter=10000, tol=0.1)
-pipe = Pipeline(steps=[("scaler", scaler), ("pca", pca), ("logistic", logistic)])
-
-X_digits, y_digits = datasets.load_digits(return_X_y=True)
-
-
-
-
-
-print (X_digits.shape)
-print (y_digits.shape)
-
-# Parameters of pipelines can be set using '__' separated parameter names:
-param_grid = {
-    "pca__n_components": [5, 15, 30, 45, 60],
-    "logistic__C": np.logspace(-1, 1, 1),
-}
-search = GridSearchCV(pipe, param_grid, n_jobs=2,cv=5)
-search.fit(X_digits, y_digits)
-print("Best parameter (CV score=%0.3f):" % search.best_score_)
-print(search.best_params_)
-
-# Plot the PCA spectrum
-pca.fit(X_digits)
-
-fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))
-ax0.plot(
-    np.arange(1, pca.n_components_ + 1), pca.explained_variance_ratio_, "+", linewidth=2
-)
-ax0.set_ylabel("PCA explained variance ratio")
-
-ax0.axvline(
-    search.best_estimator_.named_steps["pca"].n_components,
-    linestyle=":",
-    label="n_components chosen",
-)
-ax0.legend(prop=dict(size=12))
-
-# For each number of components, find the best classifier results
-results = pd.DataFrame(search.cv_results_)
-print (results)
-components_col = "param_pca__n_components"
-best_clfs = results.groupby(components_col).apply(
-    lambda g: g.nlargest(1, "mean_test_score")
-)
-
-best_clfs.plot(
-    x=components_col, y="mean_test_score", yerr="std_test_score", legend=False, ax=ax1
-)
-ax1.set_ylabel("Classification accuracy (val)")
-ax1.set_xlabel("n_components")
-
-plt.xlim(-1, 70)
-
-plt.tight_layout()
-plt.show()