Linear Regression Implementation

Linear Regression Implementation using Python

Jul 11, 2020

undraw_annotation_7das.png


This article will help you learn how to implement Linear Regression using Python and covers following things:

  1. Implementation of Linear Regression from scratch (Single Variable)
  2. Implementation of Linear Regression using Scikit-Learn (Single Variable)
  3. Multinomial Linear Regression from scratch
  4. Multinomial Linear Regression using Scikit - Learn
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import sklearn
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import axes3d

%matplotlib inline
data = pd.read_csv('headbrain.csv')
print('Shape of Data', data.shape)
data.head()
data1.png
X = data['Head Size(cm^3)'].values
Y = data['Brain Weight(grams)'].values

Implementing Linear Regression from scratch

mean_x = np.mean(X)
mean_y = np.mean(Y)

m = len(X)

numer = 0
denom = 0

for i in range(m):
    numer += (X[i] - mean_x) * (Y[i] - mean_y)
    denom += (X[i] - mean_x) ** 2
    
b1 = numer/denom
b0 = mean_y - (b1 * mean_x)

print('b1 = ', b1, '\n', 'b0 = ', b0)

b1 = 0.26342933948939945
b0 = 325.57342104944223

BrainWeight=325.573421049+0.263429339489∗HeadSize

max_x = np.max(X) + 100
min_x = np.min(X) - 100

# Calculating line values x and y
x = np.linspace(min_x, max_x, 1000)
y = b0 + b1 * x
# Ploting Line
plt.figure(figsize=(11,6))
plt.plot(x, y, color='#58b970', label='Regression Line')
# Ploting Scatter Points
plt.scatter(X, Y, c='#ef5423', label='Scatter Plot')

plt.xlabel('Head Size in cm3')
plt.ylabel('Brain Weight in grams')
plt.legend()
plt.show()
lr11.png
# Calculating Root Mean Squares Error
rmse = 0
for i in range(m):
    y_pred = b0 + b1 * X[i]
    rmse += (Y[i] - y_pred) ** 2
rmse = np.sqrt(rmse/m)
print('RMSE = ', rmse)

RMSE = 72.1206213783709

# Calculating R2 score
ss_t = 0
ss_r = 0
for i in range(m):
    y_pred = b0 + b1 * X[i]
    ss_t += (Y[i] - mean_y) ** 2
    ss_r += (Y[i] - y_pred) ** 2
r2 = 1 - (ss_r/ss_t)
print('R2 Score = ', r2)

R2 Score = 0.6393117199570003

Scikit-Learn Approach

# Cannot use Rank 1 matrix in scikit learn
X = X.reshape((m, 1))
# Creating Model
reg = LinearRegression()
# Fitting training data
reg = reg.fit(X, Y)
# Y Prediction
Y_pred = reg.predict(X)

# Calculating RMSE and R2 Score
mse = mean_squared_error(Y, Y_pred)
rmse = np.sqrt(mse)
r2_score = reg.score(X, Y)

print('RMSE = ', np.sqrt(mse))
print('R2 Score =', r2_score)

RMSE = 72.1206213783709
R2 Score = 0.639311719957

Multiple Linear Regression

data = pd.read_csv('student.csv')
print('Shape of Data = ', data.shape)
data.head()

Shape of Data = (1000, 3)

data2.png

Implementing from scratch

math = data['Math'].values
read = data['Reading'].values
write = data['Writing'].values

# Ploting the scores as scatter plot
fig = plt.figure(figsize=(18,10))
ax = Axes3D(fig)
ax.scatter(math, read, write, color='#ef1234')
# plt.figure(figsize=(18,10))
plt.show()
lr12.png
m = len(math)
x0 = np.ones(m)
X = np.array([x0, math, read]).T
# Initial Coefficients
B = np.array([0, 0, 0])
Y = np.array(write)
alpha = 0.0001

def cost_function(X, Y, B):
    m = len(Y)
    J = np.sum((X.dot(B) - Y) ** 2)/(2 * m)
    return J

inital_cost = cost_function(X, Y, B)
print(inital_cost)

2470.11

def gradient_descent(X, Y, B, alpha, iterations):
    cost_history = [0] * iterations
    m = len(Y)
    
    for iteration in range(iterations):
        # Hypothesis Values
        h = X.dot(B)
        # Difference b/w Hypothesis and Actual Y
        loss = h - Y
        # Gradient Calculation
        gradient = X.T.dot(loss) / m
        # Changing Values of B using Gradient
        B = B - alpha * gradient
        # New Cost Value
        cost = cost_function(X, Y, B)
        cost_history[iteration] = cost
        
    return B, cost_history


# 100000 Iterations
newB, cost_history = gradient_descent(X, Y, B, alpha, 100000)

# New Values of B
print(newB)

# Final Cost of new B
print(cost_history[-1])

[-0.47889172 0.09137252 0.90144884]
10.475123473539169

S writing =−0.47889172+0.09137252∗S math + 0.90144884∗S reading

# Model Evaluation - RMSE
def rmse(Y, Y_pred):
    rmse = np.sqrt(sum((Y - Y_pred) ** 2) / len(Y))
    return rmse

# Model Evaluation - R2 Score
def r2_score(Y, Y_pred):
    mean_y = np.mean(Y)
    ss_tot = sum((Y - mean_y) ** 2)
    ss_res = sum((Y - Y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2

Y_pred = X.dot(newB)

print(rmse(Y, Y_pred))
print(r2_score(Y, Y_pred))

4.577143972727789
0.9097223273061553

# X and Y Values
X = np.array([math, read]).T
Y = np.array(write)

# Model Intialization
reg = LinearRegression()
# Data Fitting
reg = reg.fit(X, Y)
# Y Prediction
Y_pred = reg.predict(X)

# Model Evaluation
rmse = np.sqrt(mean_squared_error(Y, Y_pred))
r2 = reg.score(X, Y)

print(rmse)
print(r2)

4.572887051836439
0.9098901726717316

plt.figure(figsize=(18,10))
plt.plot(read,  Y_pred, color='#58b970', label='Regression Line')
plt.scatter(read, write, color='#ef1234', label='Data')
plt.show()
lr13.png

Links referred for this article :

Referred Url



Interested
In Our Blogs?
Subscribe To Get Notified When We Add More Blogs On Our Platform