import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
import os


class LogReg:
    
    def __init__(self, alpha=0.0001, num_iters=1000):
        self.alpha = alpha
        self.num_iters = num_iters
        self.w = None
        self.cost = np.zeros(self.num_iters)
    
    def _sigmoid(self, z):
        return 1.0 / (1.0 + np.exp(-z))
    
    def _predict(self, X):
        z = X @ self.w
        return self._sigmoid(z)
    
    def fit(self, X, y):
        n, m = X.shape
        self.w = np.zeros(m,)
        
        # Gradient descent
        for i in range(self.num_iters):
            
            # Calculate predicted values
            yhat = self._predict(X)
            # Calculate cost function
            self.cost[i] = - (1.0 / m) * ((y.T @ np.log(yhat)) + ((1 - y).T @ np.log(1 - yhat)))
            # Update weights
            dw = (yhat - y) @ X
            self.w -= self.alpha * dw
    
    def predict(self, X, probs=False):
        p = self._predict(X)
        yhat = [1 if i > 0.5 else 0 for i in p]
        if probs:
            return p
        else:
            return yhat


X, y = make_classification(n_features=2, n_redundant=0, n_informative=1,
                             n_clusters_per_class=1)

# Add intercept
ones = np.ones(X.shape[0])[:, np.newaxis]
X = np.append(ones, X, axis=1)


# Plot data
fig, ax = plt.subplots()

ax.scatter(X[:, 1], X[:, 2], c=y)

# Format
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_title('Toy Data')
xlim = ax.get_xlim()
ylim = ax.get_ylim()

plt.show()


# Test algorithm
alpha = 0.0001
num_iters = 1000

cls = LogReg(alpha=alpha, num_iters=num_iters)

cls.fit(X, y)
yhat = cls.predict(X)
print('Accuracy: {:.2f}%'.format(100 * np.sum(y == yhat) / y.shape[0]))

Accuracy: 99.00%


# Plot cost function
fig, ax = plt.subplots()
ax.plot(np.arange(num_iters), cls.cost)

# Format
ax.set_title('Cost Function')
ax.set_xlabel('Number of iterations')
ax.set_ylabel('Cost')

plt.show()


# Plot the decision boundary
fig, ax = plt.subplots()

ax.scatter(X[:, 1], X[:, 2], c=y)

# Decision boundary
c = cls.w[0]
w1, w2 = cls.w[1], cls.w[2]
m = - w1 / w2
xd = np.array(xlim)
boundary = m * xd + c
ax.plot(xd, boundary, '--', c='red', label='Decision boundary')

# Format
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_title('Toy Data')
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show()


x = np.linspace(-5, 5, 100)
y = 1.0 / (1.0 + np.exp(-x))

fig, ax = plt.subplots()
ax.plot(x, y)

# Format
ax.set_title('Sigmoid Function')
ax.spines['left'].set_position('center')
ax.spines['bottom'].set_position('zero')
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.set_yticks([0.5, 1.0])

#os.mkdir('images')
plt.savefig(os.getcwd() + r'\images\sigmoid.png')

Purpose¶

Background¶

Cost Function¶

Gradient Descent¶

Import Dependencies¶

Logistic Regression Class¶

Toy Problem¶

Appendix¶