diff --git a/3217-classification-lr-example1.py b/3217-classification-lr-example1.py deleted file mode 100644 index 2e52436c3f817deed0347943c3859e055ddb9370..0000000000000000000000000000000000000000 --- a/3217-classification-lr-example1.py +++ /dev/null @@ -1,43 +0,0 @@ -#Taken from Scikit - -import matplotlib.pyplot as plt -from sklearn.linear_model import LogisticRegression -from sklearn import datasets -from sklearn.inspection import DecisionBoundaryDisplay - -# import some data from a predefined datatset -iris = datasets.load_iris() -X = iris.data[:, :2] # we only take the first two features. -Y = iris.target -#print shape of the array for X and Y. Also get value of targets -print (X.shape) -print (Y) -print (Y.shape) - - -# Create an instance of Logistic Regression Classifier and fit the data. -logreg = LogisticRegression(C=1) -logreg.fit(X, Y) - -_, ax = plt.subplots(figsize=(4, 3)) -DecisionBoundaryDisplay.from_estimator( - logreg, - X, - cmap=plt.cm.Paired, - ax=ax, - response_method="auto", - plot_method="pcolormesh", - shading="auto", - xlabel="Sepal length", - ylabel="Sepal width", - eps=0.5, -) - -# Plot the training points -plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors="k", cmap=plt.cm.Paired) - - -plt.xticks(()) -plt.yticks(()) - -plt.show() diff --git a/3217-classification-lr-example2.py b/3217-classification-lr-example2.py deleted file mode 100644 index 8896f7e5c4680a3dfe44e9a69d2b1f8e6dd66659..0000000000000000000000000000000000000000 --- a/3217-classification-lr-example2.py +++ /dev/null @@ -1,44 +0,0 @@ -from sklearn import datasets, neighbors, linear_model -from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score -import matplotlib.pyplot as plt - -X_digits, y_digits = datasets.load_digits(return_X_y=True) -X_digits = X_digits / X_digits.max() - -n_samples = len(X_digits) - -ratio = 0.9 -print (n_samples) -#train data -X_train = X_digits[: int(ratio * n_samples)] -y_train = y_digits[: int(ratio * n_samples)] -print (X_train.shape) - - -#test data - -X_test = X_digits[int(ratio * n_samples) :] -y_test = y_digits[int(ratio * n_samples) :] -print (X_test.shape) - -logistic = linear_model.LogisticRegression(max_iter=1000) - -print( - "LogisticRegression score: %f" - % logistic.fit(X_train, y_train).score(X_test, y_test)) - -#Get results on actual test labels and predicted labels - -predictions = logistic.predict(X_test) - -#print (predictions) -#print (y_test) -#get f1 score - -print (f1_score(y_test, predictions, average='macro')) - -#get confusion matrix -cm = confusion_matrix(y_test, predictions, labels=logistic.classes_) -disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=logistic.classes_) -disp.plot() -plt.show() diff --git a/3217-classification-lr-example3.py b/3217-classification-lr-example3.py deleted file mode 100644 index c7b464de4a26735759d96a65b1f46f64485770ae..0000000000000000000000000000000000000000 --- a/3217-classification-lr-example3.py +++ /dev/null @@ -1,75 +0,0 @@ - -import numpy as np -import matplotlib.pyplot as plt -import pandas as pd - -from sklearn import datasets -from sklearn.decomposition import PCA -from sklearn.linear_model import LogisticRegression -from sklearn.pipeline import Pipeline -from sklearn.model_selection import GridSearchCV -from sklearn.preprocessing import StandardScaler - -# Define a pipeline to search for the best combination of PCA truncation -# and classifier regularization. -pca = PCA() -# Define a Standard Scaler to normalize inputs -scaler = StandardScaler() - -# set the tolerance to a large value to make the example faster -logistic = LogisticRegression(max_iter=10000, tol=0.1) -pipe = Pipeline(steps=[("scaler", scaler), ("pca", pca), ("logistic", logistic)]) - -X_digits, y_digits = datasets.load_digits(return_X_y=True) - - - - - -print (X_digits.shape) -print (y_digits.shape) - -# Parameters of pipelines can be set using '__' separated parameter names: -param_grid = { - "pca__n_components": [5, 15, 30, 45, 60], - "logistic__C": np.logspace(-1, 1, 1), -} -search = GridSearchCV(pipe, param_grid, n_jobs=2,cv=5) -search.fit(X_digits, y_digits) -print("Best parameter (CV score=%0.3f):" % search.best_score_) -print(search.best_params_) - -# Plot the PCA spectrum -pca.fit(X_digits) - -fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6)) -ax0.plot( - np.arange(1, pca.n_components_ + 1), pca.explained_variance_ratio_, "+", linewidth=2 -) -ax0.set_ylabel("PCA explained variance ratio") - -ax0.axvline( - search.best_estimator_.named_steps["pca"].n_components, - linestyle=":", - label="n_components chosen", -) -ax0.legend(prop=dict(size=12)) - -# For each number of components, find the best classifier results -results = pd.DataFrame(search.cv_results_) -print (results) -components_col = "param_pca__n_components" -best_clfs = results.groupby(components_col).apply( - lambda g: g.nlargest(1, "mean_test_score") -) - -best_clfs.plot( - x=components_col, y="mean_test_score", yerr="std_test_score", legend=False, ax=ax1 -) -ax1.set_ylabel("Classification accuracy (val)") -ax1.set_xlabel("n_components") - -plt.xlim(-1, 70) - -plt.tight_layout() -plt.show() diff --git a/3217-classification-lr-example4.py b/3217-classification-lr-example4.py deleted file mode 100644 index 0ea5ae675780c2dae877adc3dca768c029b69bb9..0000000000000000000000000000000000000000 --- a/3217-classification-lr-example4.py +++ /dev/null @@ -1,59 +0,0 @@ - -import matplotlib.pyplot as plt -import numpy as np -from sklearn import datasets, linear_model -from sklearn.metrics import mean_squared_error, r2_score - -# Load the diabetes dataset -diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True) - -print (diabetes_X.shape) -# Use only one feature -feature_to_use = 2 -diabetes_X = diabetes_X[:, np.newaxis, feature_to_use] -print (diabetes_X.shape) - -test_samples = 20 - -# Split the data into training/testing sets -diabetes_X_train = diabetes_X[:-test_samples] -diabetes_X_test = diabetes_X[-test_samples:] - -# Split the targets into training/testing sets -diabetes_y_train = diabetes_y[:-test_samples] -diabetes_y_test = diabetes_y[-test_samples:] - - - - -# Create linear regression object -regr = linear_model.LinearRegression() - - -# Train the model using the training sets -regr.fit(diabetes_X_train, diabetes_y_train) - -# Make prediction using the testing set -diabetes_y_pred = regr.predict(diabetes_X_test) - -print (diabetes_y_train.shape) - -print (diabetes_y_test.shape) - -# The coefficients -print("Coefficients: \n", regr.coef_) -# The mean squared error -print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred)) -# The coefficient of determination: 1 is perfect prediction -print("Coefficient of determination: %.2f" % r2_score(diabetes_y_test, diabetes_y_pred)) - -# Plot outputs -plt.scatter(diabetes_X_test, diabetes_y_test, color="black") #grond truth actual test labels -plt.scatter(diabetes_X_test, diabetes_y_pred, color="red") #predicted test labels - -plt.plot(diabetes_X_test, diabetes_y_pred, color="blue", linewidth=3)#predicted test labels - -plt.xticks(()) -plt.yticks(()) - -plt.show() diff --git a/3217-classification-lr-example5.py b/3217-classification-lr-example5.py deleted file mode 100644 index fa7506699351e8ba3e9036de4caf39cb72018eb9..0000000000000000000000000000000000000000 --- a/3217-classification-lr-example5.py +++ /dev/null @@ -1,34 +0,0 @@ -#Import scikit-learn dataset library -from sklearn import datasets -from sklearn.model_selection import train_test_split -from sklearn import svm, metrics - - - -#Load dataset -cancer = datasets.load_breast_cancer() - -# print the names of the features -print("Features: ", cancer.feature_names) - -# print the label type of cancer('malignant' 'benign') -print("Labels: ", cancer.target_names) - -# print data(feature)shape -print (cancer.data.shape) - - -# Split dataset into training set and test set -X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2) # 70% training and 30% test - - -#Create a svm Classifier -clf = svm.SVC(kernel='linear') # Linear Kernel - -#Train the model using the training sets -clf.fit(X_train, y_train) - -#Predict the response for test dataset -y_pred = clf.predict(X_test) - -print("Accuracy:",metrics.accuracy_score(y_test, y_pred))