diff --git a/3217-example1.py b/3217-example1.py new file mode 100644 index 0000000000000000000000000000000000000000..2e52436c3f817deed0347943c3859e055ddb9370 --- /dev/null +++ b/3217-example1.py @@ -0,0 +1,43 @@ +#Taken from Scikit + +import matplotlib.pyplot as plt +from sklearn.linear_model import LogisticRegression +from sklearn import datasets +from sklearn.inspection import DecisionBoundaryDisplay + +# import some data from a predefined datatset +iris = datasets.load_iris() +X = iris.data[:, :2] # we only take the first two features. +Y = iris.target +#print shape of the array for X and Y. Also get value of targets +print (X.shape) +print (Y) +print (Y.shape) + + +# Create an instance of Logistic Regression Classifier and fit the data. +logreg = LogisticRegression(C=1) +logreg.fit(X, Y) + +_, ax = plt.subplots(figsize=(4, 3)) +DecisionBoundaryDisplay.from_estimator( + logreg, + X, + cmap=plt.cm.Paired, + ax=ax, + response_method="auto", + plot_method="pcolormesh", + shading="auto", + xlabel="Sepal length", + ylabel="Sepal width", + eps=0.5, +) + +# Plot the training points +plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors="k", cmap=plt.cm.Paired) + + +plt.xticks(()) +plt.yticks(()) + +plt.show() diff --git a/3217-example2.py b/3217-example2.py new file mode 100644 index 0000000000000000000000000000000000000000..8896f7e5c4680a3dfe44e9a69d2b1f8e6dd66659 --- /dev/null +++ b/3217-example2.py @@ -0,0 +1,44 @@ +from sklearn import datasets, neighbors, linear_model +from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score +import matplotlib.pyplot as plt + +X_digits, y_digits = datasets.load_digits(return_X_y=True) +X_digits = X_digits / X_digits.max() + +n_samples = len(X_digits) + +ratio = 0.9 +print (n_samples) +#train data +X_train = X_digits[: int(ratio * n_samples)] +y_train = y_digits[: int(ratio * n_samples)] +print (X_train.shape) + + +#test data + +X_test = X_digits[int(ratio * n_samples) :] +y_test = y_digits[int(ratio * n_samples) :] +print (X_test.shape) + +logistic = linear_model.LogisticRegression(max_iter=1000) + +print( + "LogisticRegression score: %f" + % logistic.fit(X_train, y_train).score(X_test, y_test)) + +#Get results on actual test labels and predicted labels + +predictions = logistic.predict(X_test) + +#print (predictions) +#print (y_test) +#get f1 score + +print (f1_score(y_test, predictions, average='macro')) + +#get confusion matrix +cm = confusion_matrix(y_test, predictions, labels=logistic.classes_) +disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=logistic.classes_) +disp.plot() +plt.show() diff --git a/3217-example3.py b/3217-example3.py new file mode 100644 index 0000000000000000000000000000000000000000..c7b464de4a26735759d96a65b1f46f64485770ae --- /dev/null +++ b/3217-example3.py @@ -0,0 +1,75 @@ + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +from sklearn import datasets +from sklearn.decomposition import PCA +from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import Pipeline +from sklearn.model_selection import GridSearchCV +from sklearn.preprocessing import StandardScaler + +# Define a pipeline to search for the best combination of PCA truncation +# and classifier regularization. +pca = PCA() +# Define a Standard Scaler to normalize inputs +scaler = StandardScaler() + +# set the tolerance to a large value to make the example faster +logistic = LogisticRegression(max_iter=10000, tol=0.1) +pipe = Pipeline(steps=[("scaler", scaler), ("pca", pca), ("logistic", logistic)]) + +X_digits, y_digits = datasets.load_digits(return_X_y=True) + + + + + +print (X_digits.shape) +print (y_digits.shape) + +# Parameters of pipelines can be set using '__' separated parameter names: +param_grid = { + "pca__n_components": [5, 15, 30, 45, 60], + "logistic__C": np.logspace(-1, 1, 1), +} +search = GridSearchCV(pipe, param_grid, n_jobs=2,cv=5) +search.fit(X_digits, y_digits) +print("Best parameter (CV score=%0.3f):" % search.best_score_) +print(search.best_params_) + +# Plot the PCA spectrum +pca.fit(X_digits) + +fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6)) +ax0.plot( + np.arange(1, pca.n_components_ + 1), pca.explained_variance_ratio_, "+", linewidth=2 +) +ax0.set_ylabel("PCA explained variance ratio") + +ax0.axvline( + search.best_estimator_.named_steps["pca"].n_components, + linestyle=":", + label="n_components chosen", +) +ax0.legend(prop=dict(size=12)) + +# For each number of components, find the best classifier results +results = pd.DataFrame(search.cv_results_) +print (results) +components_col = "param_pca__n_components" +best_clfs = results.groupby(components_col).apply( + lambda g: g.nlargest(1, "mean_test_score") +) + +best_clfs.plot( + x=components_col, y="mean_test_score", yerr="std_test_score", legend=False, ax=ax1 +) +ax1.set_ylabel("Classification accuracy (val)") +ax1.set_xlabel("n_components") + +plt.xlim(-1, 70) + +plt.tight_layout() +plt.show() diff --git a/3217-example4.py b/3217-example4.py new file mode 100644 index 0000000000000000000000000000000000000000..0ea5ae675780c2dae877adc3dca768c029b69bb9 --- /dev/null +++ b/3217-example4.py @@ -0,0 +1,59 @@ + +import matplotlib.pyplot as plt +import numpy as np +from sklearn import datasets, linear_model +from sklearn.metrics import mean_squared_error, r2_score + +# Load the diabetes dataset +diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True) + +print (diabetes_X.shape) +# Use only one feature +feature_to_use = 2 +diabetes_X = diabetes_X[:, np.newaxis, feature_to_use] +print (diabetes_X.shape) + +test_samples = 20 + +# Split the data into training/testing sets +diabetes_X_train = diabetes_X[:-test_samples] +diabetes_X_test = diabetes_X[-test_samples:] + +# Split the targets into training/testing sets +diabetes_y_train = diabetes_y[:-test_samples] +diabetes_y_test = diabetes_y[-test_samples:] + + + + +# Create linear regression object +regr = linear_model.LinearRegression() + + +# Train the model using the training sets +regr.fit(diabetes_X_train, diabetes_y_train) + +# Make prediction using the testing set +diabetes_y_pred = regr.predict(diabetes_X_test) + +print (diabetes_y_train.shape) + +print (diabetes_y_test.shape) + +# The coefficients +print("Coefficients: \n", regr.coef_) +# The mean squared error +print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred)) +# The coefficient of determination: 1 is perfect prediction +print("Coefficient of determination: %.2f" % r2_score(diabetes_y_test, diabetes_y_pred)) + +# Plot outputs +plt.scatter(diabetes_X_test, diabetes_y_test, color="black") #grond truth actual test labels +plt.scatter(diabetes_X_test, diabetes_y_pred, color="red") #predicted test labels + +plt.plot(diabetes_X_test, diabetes_y_pred, color="blue", linewidth=3)#predicted test labels + +plt.xticks(()) +plt.yticks(()) + +plt.show() diff --git a/3217-example5.py b/3217-example5.py new file mode 100644 index 0000000000000000000000000000000000000000..fa7506699351e8ba3e9036de4caf39cb72018eb9 --- /dev/null +++ b/3217-example5.py @@ -0,0 +1,34 @@ +#Import scikit-learn dataset library +from sklearn import datasets +from sklearn.model_selection import train_test_split +from sklearn import svm, metrics + + + +#Load dataset +cancer = datasets.load_breast_cancer() + +# print the names of the features +print("Features: ", cancer.feature_names) + +# print the label type of cancer('malignant' 'benign') +print("Labels: ", cancer.target_names) + +# print data(feature)shape +print (cancer.data.shape) + + +# Split dataset into training set and test set +X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2) # 70% training and 30% test + + +#Create a svm Classifier +clf = svm.SVC(kernel='linear') # Linear Kernel + +#Train the model using the training sets +clf.fit(X_train, y_train) + +#Predict the response for test dataset +y_pred = clf.predict(X_test) + +print("Accuracy:",metrics.accuracy_score(y_test, y_pred))