Skip to content
Snippets Groups Projects
Commit 0f0ce335 authored by christosPro123's avatar christosPro123
Browse files

Removed unnecessary files, added report draft

parent f7165931
No related branches found
No related tags found
No related merge requests found
File added
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
import sklearn
import scipy
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.model_selection import RandomizedSearchCV
```
%% Cell type:code id: tags:
``` python
#Read CSV file as Pandas Dataframe
train_df = pd.read_csv('TrainingDataBinary.csv')
```
%% Cell type:code id: tags:
``` python
print(train_df.info())
```
%% Output
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6000 entries, 0 to 5999
Columns: 129 entries, 1 to 129
dtypes: float64(112), int64(17)
memory usage: 5.9 MB
None
%% Cell type:code id: tags:
``` python
# Create a histogram to show the distribution of a column
plt.hist(train_df['129'])
```
%% Output
(array([3000., 0., 0., 0., 0., 0., 0., 0., 0.,
3000.]),
array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),
<BarContainer object of 10 artists>)
%% Cell type:code id: tags:
``` python
scaler = StandardScaler()
# Separate the features from the target variable
X = train_df.drop('129', axis=1)
y = train_df['129']
#Fix infinite value error
# X[X == np.inf] = np.finfo('float64').max
X.replace([np.inf,-np.inf],0,inplace=True)
# Create a SimpleImputer object to replace NaN values with the mean value of the corresponding column
imputer = SimpleImputer(strategy='mean')
# Impute the missing values in the features data
X_imputed = imputer.fit_transform(X)
# Fit the scaler to the features data and transform the data
X_scaled = scaler.fit_transform(X_imputed)
# # The transformed data will be a numpy array, so you can convert it back to a DataFrame
# X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)
```
%% Cell type:code id: tags:
``` python
#PCA
pca = PCA(n_components=100)
X_pca = pca.fit_transform(X_scaled)
```
%% Cell type:code id: tags:
``` python
#split data
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
#train the model
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
# 5. Evaluate the model on the testing set
y_pred = log_reg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", report)
```
%% Output
Accuracy: 0.895
Classification Report:
precision recall f1-score support
0 0.86 0.94 0.90 588
1 0.93 0.86 0.89 612
accuracy 0.90 1200
macro avg 0.90 0.90 0.89 1200
weighted avg 0.90 0.90 0.89 1200
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
%% Cell type:code id: tags:
``` python
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)
```
%% Output
Confusion Matrix:
[[550 38]
[ 88 524]]
%% Cell type:markdown id: tags:
[[True Negatives (TN), False Positives (FP)],
[False Negatives (FN), True Positives (TP)]]
%% Cell type:markdown id: tags:
Fine tuning
%% Cell type:code id: tags:
``` python
param_dist = {
'penalty': ['l1', 'l2', 'elasticnet', 'none'],
'C': np.logspace(-4, 4, 10),
'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
'max_iter': [100, 500, 1000],
}
# Create the RandomizedSearchCV object with the logistic regression model, hyperparameters, and cross-validation
log_reg = LogisticRegression()
random_search = RandomizedSearchCV(log_reg, param_dist, n_iter=100, cv=3, n_jobs=-1, verbose=1, random_state=42)
# Fit the random search to the training data
random_search.fit(X_train, y_train)
# Check the best hyperparameters found
print("Best Parameters:", random_search.best_params_)
print("Best Score:", random_search.best_score_)
# Use the best estimator for predictions and evaluation
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", report)
```
%% Output
Fitting 3 folds for each of 100 candidates, totalling 300 fits
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:378: FitFailedWarning:
120 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.
Below are more details about the failures:
--------------------------------------------------------------------------------
24 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
raise ValueError(
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got elasticnet penalty.
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 71, in _check_solver
raise ValueError("penalty='none' is not supported for the liblinear solver")
ValueError: penalty='none' is not supported for the liblinear solver
--------------------------------------------------------------------------------
18 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
raise ValueError(
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
--------------------------------------------------------------------------------
6 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1291, in fit
fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\utils\parallel.py", line 63, in __call__
return super().__call__(iterable_with_config)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\parallel.py", line 1085, in __call__
if self.dispatch_one_batch(iterator):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\parallel.py", line 901, in dispatch_one_batch
self._dispatch(tasks)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\parallel.py", line 819, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
result = ImmediateResult(func)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\_parallel_backends.py", line 597, in __init__
self.results = batch()
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\parallel.py", line 288, in __call__
return [func(*args, **kwargs)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\parallel.py", line 288, in <listcomp>
return [func(*args, **kwargs)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\utils\parallel.py", line 123, in __call__
return self.function(*args, **kwargs)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 521, in _logistic_regression_path
alpha = (1.0 / C) * (1 - l1_ratio)
TypeError: unsupported operand type(s) for -: 'int' and 'NoneType'
--------------------------------------------------------------------------------
21 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 64, in _check_solver
raise ValueError(
ValueError: Only 'saga' solver supports elasticnet penalty, got solver=liblinear.
--------------------------------------------------------------------------------
12 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
raise ValueError(
ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.
--------------------------------------------------------------------------------
9 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
raise ValueError(
ValueError: Solver sag supports only 'l2' or 'none' penalties, got elasticnet penalty.
--------------------------------------------------------------------------------
6 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
raise ValueError(
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got elasticnet penalty.
--------------------------------------------------------------------------------
9 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
raise ValueError(
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
warnings.warn(some_fits_failed_message, FitFailedWarning)
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_search.py:952: UserWarning: One or more of the test scores are non-finite: [ nan 0.87666667 0.92083333 nan nan 0.87416667
nan 0.87666667 0.864375 0.87645833 0.78208333 0.87854167
0.72333333 0.87854167 0.85645833 nan nan nan
0.85083333 0.72333333 0.5025 0.92020833 0.78208333 0.918125
0.86458333 0.87666667 nan 0.9225 0.90375 nan
0.78208333 nan 0.5025 nan nan nan
nan 0.78208333 nan 0.78208333 0.85645833 0.628125
0.918125 nan 0.49916667 0.85875 nan 0.49916667
nan nan 0.87791667 0.86520833 nan 0.9225
nan 0.918125 0.865625 0.84166667 nan 0.9225
0.90375 0.918125 0.87375 0.918125 0.864375 nan
nan 0.87666667 nan 0.90375 0.85625 0.62895833
nan nan 0.85625 nan nan 0.87854167
0.85645833 nan 0.87791667 0.90395833 0.87854167 nan
nan 0.87375 0.78208333 0.87666667 nan nan
0.78208333 0.90270833 nan nan 0.85625 nan
0.86583333 nan nan nan]
warnings.warn(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:1173: FutureWarning: `penalty='none'`has been deprecated in 1.2 and will be removed in 1.4. To keep the past behaviour, set `penalty=None`.
warnings.warn(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:1181: UserWarning: Setting penalty=None will ignore the C and l1_ratio parameters
warnings.warn(
Best Parameters: {'solver': 'newton-cg', 'penalty': 'none', 'max_iter': 100, 'C': 0.005994842503189409}
Best Score: 0.9225
Accuracy: 0.9325
Confusion Matrix:
[[557 31]
[ 50 562]]
Classification Report:
precision recall f1-score support
0 0.92 0.95 0.93 588
1 0.95 0.92 0.93 612
accuracy 0.93 1200
macro avg 0.93 0.93 0.93 1200
weighted avg 0.93 0.93 0.93 1200
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\utils\optimize.py:210: ConvergenceWarning: newton-cg failed to converge. Increase the number of iterations.
warnings.warn(
%% Cell type:markdown id: tags:
Predict
%% Cell type:code id: tags:
``` python
test_data = pd.read_csv('TestingDataBinary.csv')
```
%% Cell type:code id: tags:
``` python
# Preprocessing
X_new = test_data
X_new.replace([np.inf, -np.inf], 0, inplace=True)
# Impute the missing values in the features data
X_imputed_new = imputer.transform(X_new)
# Scale the features data
X_scaled_new = scaler.transform(X_imputed_new)
# Apply PCA transformation
X_pca_new = pca.transform(X_scaled_new)
```
%% Cell type:code id: tags:
``` python
# Use the best estimator for predictions on the new data
y_pred_new = best_model.predict(X_pca_new)
# Save the predictions to a new column in the DataFrame
test_data['predicted_marker'] = y_pred_new
# Save the updated DataFrame to a new CSV file
test_data.to_csv('TestingDataBinary_with_predictions.csv', index=False)
```
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
import sklearn
import scipy
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.model_selection import RandomizedSearchCV
```
%% Cell type:code id: tags:
``` python
#Read CSV file as Pandas Dataframe
train_df = pd.read_csv('TrainingDataBinary.csv')
```
%% Cell type:code id: tags:
``` python
print(train_df.info())
```
%% Output
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6000 entries, 0 to 5999
Columns: 129 entries, 1 to 129
dtypes: float64(112), int64(17)
memory usage: 5.9 MB
None
%% Cell type:code id: tags:
``` python
# Create a histogram to show the distribution of a column
plt.hist(train_df['marker'])
plt.hist(train_df['129'])
```
%% Output
(array([3000., 0., 0., 0., 0., 0., 0., 0., 0.,
3000.]),
array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),
<BarContainer object of 10 artists>)
%% Cell type:code id: tags:
``` python
scaler = StandardScaler()
# Separate the features from the target variable
X = train_df.drop('marker', axis=1)
y = train_df['marker']
X = train_df.drop('129', axis=1)
y = train_df['129']
#Fix infinite value error
# X[X == np.inf] = np.finfo('float64').max
X.replace([np.inf,-np.inf],0,inplace=True)
# Create a SimpleImputer object to replace NaN values with the mean value of the corresponding column
imputer = SimpleImputer(strategy='mean')
# Impute the missing values in the features data
X_imputed = imputer.fit_transform(X)
# Fit the scaler to the features data and transform the data
X_scaled = scaler.fit_transform(X_imputed)
# # The transformed data will be a numpy array, so you can convert it back to a DataFrame
# X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)
```
%% Cell type:code id: tags:
``` python
n_components = 100
pca = PCA(n_components=n_components)
principal_components = pca.fit_transform(X_scaled)
# Create a DataFrame with the loadings
loadings = pd.DataFrame(pca.components_.T, columns=[f'PC{i+1}' for i in range(n_components)], index=X.columns)
# Apply PCA to the scaled data
# pca = PCA(n_components=100)
# X_pca = pca.fit_transform(X_scaled)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(pca, y, test_size=0.2,random_state=42)
# # Train the model on the training data
# lr.fit(X_train, y_train)
# # Predict the labels for the test data
# y_pred = lr.predict(X_test)
# # Evaluate the model performance
# print("Accuracy:", accuracy_score(y_test, y_pred))
#PCA
pca = PCA(n_components=100)
X_pca = pca.fit_transform(X_scaled)
```
%% Cell type:code id: tags:
``` python
X_test_pca = pca.transform(X_test_scaled)
clf = LogisticRegression(random_state=42)
clf.fit(X_train_pca, y_train)
#split data
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
#train the model
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred = clf.predict(X_test_pca)
# Calculate and print the accuracy of the model
# 5. Evaluate the model on the testing set
y_pred = log_reg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", report)
```
%% Cell type:code id: tags:
%% Output
``` python
# Read the test dataset
test_df = pd.read_csv('TestingDataBinary.csv')
```
Accuracy: 0.895
Classification Report:
precision recall f1-score support
0 0.86 0.94 0.90 588
1 0.93 0.86 0.89 612
accuracy 0.90 1200
macro avg 0.90 0.90 0.89 1200
weighted avg 0.90 0.90 0.89 1200
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
%% Cell type:code id: tags:
``` python
# explained_variance_ratio = pca.explained_variance_ratio_
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)
```
%% Output
# sorted_indices = np.argsort(explained_variance_ratio)[::-1]
Confusion Matrix:
[[550 38]
[ 88 524]]
# # Get the top 100 components
# top_100_indices = sorted_indices[:100]
# top_100_components = principal_components[:, top_100_indices]
# top_100_explained_variance_ratio = explained_variance_ratio[top_100_indices]
%% Cell type:markdown id: tags:
[[True Negatives (TN), False Positives (FP)],
[False Negatives (FN), True Positives (TP)]]
# print("Top 100 components:\n", top_100_components)
```
%% Cell type:markdown id: tags:
%% Output
Top 100 components:
[[ 3.72196354e+00 -5.87941588e+00 -4.02934784e-01 ... 4.15787367e-03
1.89567282e-03 2.81043971e-03]
[ 1.25401316e+00 -5.82245182e+00 -7.51607953e-01 ... 5.71178351e-04
1.64284342e-04 3.97691294e-03]
[ 1.24713154e+00 -5.82164239e+00 -7.59379345e-01 ... 3.40089202e-03
2.59366304e-04 4.28360451e-03]
...
[-6.89160079e-01 -5.50909843e+00 -4.69952506e-01 ... -2.71254494e-03
-9.03351989e-05 -2.02581895e-03]
[ 7.34703326e-01 -5.58643030e+00 -5.41845944e-01 ... -3.62008786e-03
-8.72999728e-05 -2.60358277e-03]
[ 7.35621169e-01 -5.58380312e+00 -5.36559421e-01 ... -3.46823833e-03
3.30081328e-04 -2.83803266e-03]]
Fine tuning
%% Cell type:code id: tags:
``` python
# Create a Logistic Regression model
lr = LogisticRegression()
# Define the parameter grid to search over
param_grid = {'C': [0.1, 1, 10, 100], 'solver': ['liblinear', 'lbfgs']}
# Create a GridSearchCV object and fit it to the data
grid_search = GridSearchCV(lr, param_grid, cv=5)
grid_search.fit(X_scaled, y)
# Print the best hyperparameters and the corresponding accuracy score
print("Best hyperparameters: ", grid_search.best_params_)
print("Best accuracy score: ", grid_search.best_score_)
param_dist = {
'penalty': ['l1', 'l2', 'elasticnet', 'none'],
'C': np.logspace(-4, 4, 10),
'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
'max_iter': [100, 500, 1000],
}
# Create the RandomizedSearchCV object with the logistic regression model, hyperparameters, and cross-validation
log_reg = LogisticRegression()
random_search = RandomizedSearchCV(log_reg, param_dist, n_iter=100, cv=3, n_jobs=-1, verbose=1, random_state=42)
# Fit the random search to the training data
random_search.fit(X_train, y_train)
# Check the best hyperparameters found
print("Best Parameters:", random_search.best_params_)
print("Best Score:", random_search.best_score_)
# Use the best estimator for predictions and evaluation
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", report)
```
%% Output
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\svm\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\svm\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\svm\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
Fitting 3 folds for each of 100 candidates, totalling 300 fits
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:378: FitFailedWarning:
120 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.
Below are more details about the failures:
--------------------------------------------------------------------------------
24 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
raise ValueError(
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got elasticnet penalty.
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 71, in _check_solver
raise ValueError("penalty='none' is not supported for the liblinear solver")
ValueError: penalty='none' is not supported for the liblinear solver
--------------------------------------------------------------------------------
18 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
raise ValueError(
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
--------------------------------------------------------------------------------
6 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1291, in fit
fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\utils\parallel.py", line 63, in __call__
return super().__call__(iterable_with_config)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\parallel.py", line 1085, in __call__
if self.dispatch_one_batch(iterator):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\parallel.py", line 901, in dispatch_one_batch
self._dispatch(tasks)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\parallel.py", line 819, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
result = ImmediateResult(func)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\_parallel_backends.py", line 597, in __init__
self.results = batch()
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\parallel.py", line 288, in __call__
return [func(*args, **kwargs)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\joblib\parallel.py", line 288, in <listcomp>
return [func(*args, **kwargs)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\utils\parallel.py", line 123, in __call__
return self.function(*args, **kwargs)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 521, in _logistic_regression_path
alpha = (1.0 / C) * (1 - l1_ratio)
TypeError: unsupported operand type(s) for -: 'int' and 'NoneType'
--------------------------------------------------------------------------------
21 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 64, in _check_solver
raise ValueError(
ValueError: Only 'saga' solver supports elasticnet penalty, got solver=liblinear.
--------------------------------------------------------------------------------
12 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
raise ValueError(
ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.
--------------------------------------------------------------------------------
9 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
raise ValueError(
ValueError: Solver sag supports only 'l2' or 'none' penalties, got elasticnet penalty.
--------------------------------------------------------------------------------
6 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
raise ValueError(
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got elasticnet penalty.
--------------------------------------------------------------------------------
9 fits failed with the following error:
Traceback (most recent call last):
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
solver = _check_solver(self.solver, self.penalty, self.dual)
File "c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
raise ValueError(
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
warnings.warn(some_fits_failed_message, FitFailedWarning)
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_search.py:952: UserWarning: One or more of the test scores are non-finite: [ nan 0.87666667 0.92083333 nan nan 0.87416667
nan 0.87666667 0.864375 0.87645833 0.78208333 0.87854167
0.72333333 0.87854167 0.85645833 nan nan nan
0.85083333 0.72333333 0.5025 0.92020833 0.78208333 0.918125
0.86458333 0.87666667 nan 0.9225 0.90375 nan
0.78208333 nan 0.5025 nan nan nan
nan 0.78208333 nan 0.78208333 0.85645833 0.628125
0.918125 nan 0.49916667 0.85875 nan 0.49916667
nan nan 0.87791667 0.86520833 nan 0.9225
nan 0.918125 0.865625 0.84166667 nan 0.9225
0.90375 0.918125 0.87375 0.918125 0.864375 nan
nan 0.87666667 nan 0.90375 0.85625 0.62895833
nan nan 0.85625 nan nan 0.87854167
0.85645833 nan 0.87791667 0.90395833 0.87854167 nan
nan 0.87375 0.78208333 0.87666667 nan nan
0.78208333 0.90270833 nan nan 0.85625 nan
0.86583333 nan nan nan]
warnings.warn(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\svm\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:1173: FutureWarning: `penalty='none'`has been deprecated in 1.2 and will be removed in 1.4. To keep the past behaviour, set `penalty=None`.
warnings.warn(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\svm\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:1181: UserWarning: Setting penalty=None will ignore the C and l1_ratio parameters
warnings.warn(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
Best hyperparameters: {'C': 100, 'solver': 'liblinear'}
Best accuracy score: 0.8968333333333334
Best Parameters: {'solver': 'newton-cg', 'penalty': 'none', 'max_iter': 100, 'C': 0.005994842503189409}
Best Score: 0.9225
Accuracy: 0.9325
Confusion Matrix:
[[557 31]
[ 50 562]]
Classification Report:
precision recall f1-score support
0 0.92 0.95 0.93 588
1 0.95 0.92 0.93 612
accuracy 0.93 1200
macro avg 0.93 0.93 0.93 1200
weighted avg 0.93 0.93 0.93 1200
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\svm\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\utils\optimize.py:210: ConvergenceWarning: newton-cg failed to converge. Increase the number of iterations.
warnings.warn(
%% Cell type:markdown id: tags:
Predict
%% Cell type:code id: tags:
``` python
lr = LogisticRegression(C=100, solver='liblinear')
# Train the model on the training data
lr.fit(X_train, y_train)
test_data = pd.read_csv('TestingDataBinary.csv')
```
# Predict the labels for the test data
y_pred = lr.predict(X_test)
%% Cell type:code id: tags:
# Evaluate the model performance
print("Accuracy:", accuracy_score(y_test, y_pred))
```
``` python
# Preprocessing
X_new = test_data
X_new.replace([np.inf, -np.inf], 0, inplace=True)
%% Output
# Impute the missing values in the features data
X_imputed_new = imputer.transform(X_new)
Accuracy: 0.9158333333333334
# Scale the features data
X_scaled_new = scaler.transform(X_imputed_new)
c:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\svm\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn(
# Apply PCA transformation
X_pca_new = pca.transform(X_scaled_new)
```
%% Cell type:code id: tags:
``` python
# Normalize the features
test_df_scaled = scaler.transform(test_df)
# Select the top 15 features
test_df_selected = test_df_scaled[:, :top_n]
# Use the best estimator for predictions on the new data
y_pred_new = best_model.predict(X_pca_new)
# Use the chosen model to predict AQI scores for the test dataset
test_predictions = rf_reg_selected.predict(test_df_selected)
# Save the predictions to a new column in the DataFrame
test_data['predicted_marker'] = y_pred_new
# Save the predictions to the subs.csv file
submission_df = pd.DataFrame({'AQI_Bucket': test_predictions})
# submission_df.to_csv("C:\Users\andre\Downloads\subs.csv", index=False)
# Save the updated DataFrame to a new CSV file
test_data.to_csv('TestingDataBinary_with_predictions.csv', index=False)
```
......
import pandas as pd
import numpy as np
import sklearn
import scipy
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
#Read CSV file as Pandas Dataframe
train_df = pd.read_csv('TrainingDataBinary.csv')
test_df = pd.read_csv('TestingDataBinary.csv')
#Confirm reading of files
print(train_df.head)
print("----------------------------------")
print(test_df.head)
# Get the summary statistics of the data
print(train_df.describe())
# Get the information about the columns of the DataFrame
print(train_df.info())
# Create a histogram to show the distribution of a column
plt.hist(train_df['marker'])
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment