Skip to content
Snippets Groups Projects
Commit 6e0645c2 authored by jags's avatar jags
Browse files

files for comp3217 lab

parent ea1dff47
No related branches found
No related tags found
No related merge requests found
#Taken from Scikit
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.inspection import DecisionBoundaryDisplay
# import some data from a predefined datatset
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features.
Y = iris.target
#print shape of the array for X and Y. Also get value of targets
print (X.shape)
print (Y)
print (Y.shape)
# Create an instance of Logistic Regression Classifier and fit the data.
logreg = LogisticRegression(C=1)
logreg.fit(X, Y)
_, ax = plt.subplots(figsize=(4, 3))
DecisionBoundaryDisplay.from_estimator(
logreg,
X,
cmap=plt.cm.Paired,
ax=ax,
response_method="auto",
plot_method="pcolormesh",
shading="auto",
xlabel="Sepal length",
ylabel="Sepal width",
eps=0.5,
)
# Plot the training points
plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors="k", cmap=plt.cm.Paired)
plt.xticks(())
plt.yticks(())
plt.show()
from sklearn import datasets, neighbors, linear_model
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score
import matplotlib.pyplot as plt
X_digits, y_digits = datasets.load_digits(return_X_y=True)
X_digits = X_digits / X_digits.max()
n_samples = len(X_digits)
ratio = 0.9
print (n_samples)
#train data
X_train = X_digits[: int(ratio * n_samples)]
y_train = y_digits[: int(ratio * n_samples)]
print (X_train.shape)
#test data
X_test = X_digits[int(ratio * n_samples) :]
y_test = y_digits[int(ratio * n_samples) :]
print (X_test.shape)
logistic = linear_model.LogisticRegression(max_iter=1000)
print(
"LogisticRegression score: %f"
% logistic.fit(X_train, y_train).score(X_test, y_test))
#Get results on actual test labels and predicted labels
predictions = logistic.predict(X_test)
#print (predictions)
#print (y_test)
#get f1 score
print (f1_score(y_test, predictions, average='macro'))
#get confusion matrix
cm = confusion_matrix(y_test, predictions, labels=logistic.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=logistic.classes_)
disp.plot()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
# Define a pipeline to search for the best combination of PCA truncation
# and classifier regularization.
pca = PCA()
# Define a Standard Scaler to normalize inputs
scaler = StandardScaler()
# set the tolerance to a large value to make the example faster
logistic = LogisticRegression(max_iter=10000, tol=0.1)
pipe = Pipeline(steps=[("scaler", scaler), ("pca", pca), ("logistic", logistic)])
X_digits, y_digits = datasets.load_digits(return_X_y=True)
print (X_digits.shape)
print (y_digits.shape)
# Parameters of pipelines can be set using '__' separated parameter names:
param_grid = {
"pca__n_components": [5, 15, 30, 45, 60],
"logistic__C": np.logspace(-1, 1, 1),
}
search = GridSearchCV(pipe, param_grid, n_jobs=2,cv=5)
search.fit(X_digits, y_digits)
print("Best parameter (CV score=%0.3f):" % search.best_score_)
print(search.best_params_)
# Plot the PCA spectrum
pca.fit(X_digits)
fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))
ax0.plot(
np.arange(1, pca.n_components_ + 1), pca.explained_variance_ratio_, "+", linewidth=2
)
ax0.set_ylabel("PCA explained variance ratio")
ax0.axvline(
search.best_estimator_.named_steps["pca"].n_components,
linestyle=":",
label="n_components chosen",
)
ax0.legend(prop=dict(size=12))
# For each number of components, find the best classifier results
results = pd.DataFrame(search.cv_results_)
print (results)
components_col = "param_pca__n_components"
best_clfs = results.groupby(components_col).apply(
lambda g: g.nlargest(1, "mean_test_score")
)
best_clfs.plot(
x=components_col, y="mean_test_score", yerr="std_test_score", legend=False, ax=ax1
)
ax1.set_ylabel("Classification accuracy (val)")
ax1.set_xlabel("n_components")
plt.xlim(-1, 70)
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
# Load the diabetes dataset
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)
print (diabetes_X.shape)
# Use only one feature
feature_to_use = 2
diabetes_X = diabetes_X[:, np.newaxis, feature_to_use]
print (diabetes_X.shape)
test_samples = 20
# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-test_samples]
diabetes_X_test = diabetes_X[-test_samples:]
# Split the targets into training/testing sets
diabetes_y_train = diabetes_y[:-test_samples]
diabetes_y_test = diabetes_y[-test_samples:]
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)
# Make prediction using the testing set
diabetes_y_pred = regr.predict(diabetes_X_test)
print (diabetes_y_train.shape)
print (diabetes_y_test.shape)
# The coefficients
print("Coefficients: \n", regr.coef_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(diabetes_y_test, diabetes_y_pred))
# Plot outputs
plt.scatter(diabetes_X_test, diabetes_y_test, color="black") #grond truth actual test labels
plt.scatter(diabetes_X_test, diabetes_y_pred, color="red") #predicted test labels
plt.plot(diabetes_X_test, diabetes_y_pred, color="blue", linewidth=3)#predicted test labels
plt.xticks(())
plt.yticks(())
plt.show()
#Import scikit-learn dataset library
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm, metrics
#Load dataset
cancer = datasets.load_breast_cancer()
# print the names of the features
print("Features: ", cancer.feature_names)
# print the label type of cancer('malignant' 'benign')
print("Labels: ", cancer.target_names)
# print data(feature)shape
print (cancer.data.shape)
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2) # 70% training and 30% test
#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel
#Train the model using the training sets
clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment