#1 Data Analytics Program in India
₹2,499₹1,499Enroll Now
5 min read min read

Logistic Regression

Learn to classify with Logistic Regression

Logistic Regression

What is Logistic Regression?

Despite the name, it's for classification (not regression)!

Predicts probability of belonging to a class (0 to 1).

Simple Example

code.py
from sklearn.linear_model import LogisticRegression
import numpy as np

# Predict if student passes (1) or fails (0) based on hours studied
hours = np.array([[1], [2], [3], [4], [5], [6], [7], [8]])
passed = np.array([0, 0, 0, 0, 1, 1, 1, 1])

model = LogisticRegression()
model.fit(hours, passed)

# Predict
new_student = np.array([[4.5]])
prediction = model.predict(new_student)
probability = model.predict_proba(new_student)

print(f"Prediction: {'Pass' if prediction[0] == 1 else 'Fail'}")
print(f"Probability: {probability[0][1]:.1%}")

How It Works

  1. Calculate linear combination: z = wx + b
  2. Apply sigmoid function: p = 1 / (1 + e^(-z))
  3. If p ≥ 0.5, predict class 1; else class 0
code.py
# The sigmoid function
import matplotlib.pyplot as plt

z = np.linspace(-10, 10, 100)
sigmoid = 1 / (1 + np.exp(-z))

plt.plot(z, sigmoid)
plt.axhline(y=0.5, color='r', linestyle='--')
plt.xlabel('z')
plt.ylabel('Probability')
plt.title('Sigmoid Function')
plt.show()

Binary Classification

code.py
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

# Load data
data = load_breast_cancer()
X = data.data
y = data.target

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Evaluate
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy:.1%}")

Multiclass Classification

code.py
from sklearn.datasets import load_iris

# Load iris (3 classes)
iris = load_iris()
X = iris.data
y = iris.target

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train (handles multiclass automatically)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predict probabilities for each class
proba = model.predict_proba(X_test[:1])
print(f"Probabilities: {proba[0]}")
print(f"Classes: {iris.target_names}")

Predictions and Probabilities

code.py
# Get predictions
predictions = model.predict(X_test)

# Get probability for each class
probabilities = model.predict_proba(X_test)

# Example for first sample
print(f"Predicted class: {predictions[0]}")
print(f"Probabilities: {probabilities[0]}")
print(f"Most likely: {iris.target_names[predictions[0]]}")

Confusion Matrix

See where model makes mistakes:

code.py
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

print("Confusion Matrix:")
print(cm)

# Visual
ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
plt.title('Confusion Matrix')
plt.show()

Classification Metrics

code.py
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

y_pred = model.predict(X_test)

# For binary classification
print(f"Accuracy: {accuracy_score(y_test, y_pred):.1%}")
print(f"Precision: {precision_score(y_test, y_pred, average='weighted'):.1%}")
print(f"Recall: {recall_score(y_test, y_pred, average='weighted'):.1%}")
print(f"F1 Score: {f1_score(y_test, y_pred, average='weighted'):.1%}")

Definitions:

  • Accuracy: Overall correct predictions
  • Precision: Of predicted positive, how many are actually positive
  • Recall: Of actual positive, how many were predicted positive
  • F1: Balance between precision and recall

Classification Report

All metrics at once:

code.py
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred, target_names=iris.target_names))

Adjusting Threshold

Default threshold is 0.5, but you can change it:

code.py
# Get probabilities
proba = model.predict_proba(X_test)[:, 1]  # Probability of class 1

# Custom threshold
threshold = 0.7
custom_pred = (proba >= threshold).astype(int)

print(f"Default predictions: {model.predict(X_test[:5])}")
print(f"Custom predictions: {custom_pred[:5]}")

Feature Importance

code.py
# Coefficients show feature importance
for feature, coef in zip(iris.feature_names, model.coef_[0]):
    print(f"{feature}: {coef:.3f}")

Complete Example

code.py
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.datasets import load_breast_cancer
import numpy as np

# Load data
data = load_breast_cancer()
X = data.data
y = data.target

print(f"Features: {len(data.feature_names)}")
print(f"Classes: {data.target_names}")

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

# Predict
y_pred = model.predict(X_test_scaled)

# Evaluate
print(f"\n=== Results ===")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.1%}")
print(f"\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=data.target_names))

# Top features
print("\nTop 5 Important Features:")
importance = np.abs(model.coef_[0])
top_indices = np.argsort(importance)[-5:][::-1]
for idx in top_indices:
    print(f"  {data.feature_names[idx]}: {model.coef_[0][idx]:.3f}")

Key Points

  • Logistic Regression is for classification
  • Outputs probabilities (0 to 1)
  • Default threshold is 0.5
  • Check confusion matrix for mistakes
  • Use precision/recall for imbalanced data
  • Scale features for better performance

What's Next?

Learn about model evaluation metrics in detail.

SkillsetMaster - AI, Web Development & Data Analytics Courses