Linear Regression Implementation
Linear Regression Implementation using Python
Jul 11, 2020

This article will help you learn how to implement Linear Regression using Python and covers following things:
- Implementation of Linear Regression from scratch (Single Variable)
- Implementation of Linear Regression using Scikit-Learn (Single Variable)
- Multinomial Linear Regression from scratch
- Multinomial Linear Regression using Scikit - Learn
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import sklearn
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import axes3d
%matplotlib inline
data = pd.read_csv('headbrain.csv')
print('Shape of Data', data.shape)
data.head()

X = data['Head Size(cm^3)'].values
Y = data['Brain Weight(grams)'].values
Implementing Linear Regression from scratch
mean_x = np.mean(X)
mean_y = np.mean(Y)
m = len(X)
numer = 0
denom = 0
for i in range(m):
numer += (X[i] - mean_x) * (Y[i] - mean_y)
denom += (X[i] - mean_x) ** 2
b1 = numer/denom
b0 = mean_y - (b1 * mean_x)
print('b1 = ', b1, '\n', 'b0 = ', b0)
b1 = 0.26342933948939945
b0 = 325.57342104944223
BrainWeight=325.573421049+0.263429339489∗HeadSize
max_x = np.max(X) + 100
min_x = np.min(X) - 100
# Calculating line values x and y
x = np.linspace(min_x, max_x, 1000)
y = b0 + b1 * x
# Ploting Line
plt.figure(figsize=(11,6))
plt.plot(x, y, color='#58b970', label='Regression Line')
# Ploting Scatter Points
plt.scatter(X, Y, c='#ef5423', label='Scatter Plot')
plt.xlabel('Head Size in cm3')
plt.ylabel('Brain Weight in grams')
plt.legend()
plt.show()

# Calculating Root Mean Squares Error
rmse = 0
for i in range(m):
y_pred = b0 + b1 * X[i]
rmse += (Y[i] - y_pred) ** 2
rmse = np.sqrt(rmse/m)
print('RMSE = ', rmse)
RMSE = 72.1206213783709
# Calculating R2 score
ss_t = 0
ss_r = 0
for i in range(m):
y_pred = b0 + b1 * X[i]
ss_t += (Y[i] - mean_y) ** 2
ss_r += (Y[i] - y_pred) ** 2
r2 = 1 - (ss_r/ss_t)
print('R2 Score = ', r2)
R2 Score = 0.6393117199570003
Scikit-Learn Approach
# Cannot use Rank 1 matrix in scikit learn
X = X.reshape((m, 1))
# Creating Model
reg = LinearRegression()
# Fitting training data
reg = reg.fit(X, Y)
# Y Prediction
Y_pred = reg.predict(X)
# Calculating RMSE and R2 Score
mse = mean_squared_error(Y, Y_pred)
rmse = np.sqrt(mse)
r2_score = reg.score(X, Y)
print('RMSE = ', np.sqrt(mse))
print('R2 Score =', r2_score)
RMSE = 72.1206213783709
R2 Score = 0.639311719957
Multiple Linear Regression
data = pd.read_csv('student.csv')
print('Shape of Data = ', data.shape)
data.head()
Shape of Data = (1000, 3)

Implementing from scratch
math = data['Math'].values
read = data['Reading'].values
write = data['Writing'].values
# Ploting the scores as scatter plot
fig = plt.figure(figsize=(18,10))
ax = Axes3D(fig)
ax.scatter(math, read, write, color='#ef1234')
# plt.figure(figsize=(18,10))
plt.show()

m = len(math)
x0 = np.ones(m)
X = np.array([x0, math, read]).T
# Initial Coefficients
B = np.array([0, 0, 0])
Y = np.array(write)
alpha = 0.0001
def cost_function(X, Y, B):
m = len(Y)
J = np.sum((X.dot(B) - Y) ** 2)/(2 * m)
return J
inital_cost = cost_function(X, Y, B)
print(inital_cost)
2470.11
def gradient_descent(X, Y, B, alpha, iterations):
cost_history = [0] * iterations
m = len(Y)
for iteration in range(iterations):
# Hypothesis Values
h = X.dot(B)
# Difference b/w Hypothesis and Actual Y
loss = h - Y
# Gradient Calculation
gradient = X.T.dot(loss) / m
# Changing Values of B using Gradient
B = B - alpha * gradient
# New Cost Value
cost = cost_function(X, Y, B)
cost_history[iteration] = cost
return B, cost_history
# 100000 Iterations
newB, cost_history = gradient_descent(X, Y, B, alpha, 100000)
# New Values of B
print(newB)
# Final Cost of new B
print(cost_history[-1])
[-0.47889172 0.09137252 0.90144884]
10.475123473539169
S writing =−0.47889172+0.09137252∗S math + 0.90144884∗S reading
# Model Evaluation - RMSE
def rmse(Y, Y_pred):
rmse = np.sqrt(sum((Y - Y_pred) ** 2) / len(Y))
return rmse
# Model Evaluation - R2 Score
def r2_score(Y, Y_pred):
mean_y = np.mean(Y)
ss_tot = sum((Y - mean_y) ** 2)
ss_res = sum((Y - Y_pred) ** 2)
r2 = 1 - (ss_res / ss_tot)
return r2
Y_pred = X.dot(newB)
print(rmse(Y, Y_pred))
print(r2_score(Y, Y_pred))
4.577143972727789
0.9097223273061553
# X and Y Values
X = np.array([math, read]).T
Y = np.array(write)
# Model Intialization
reg = LinearRegression()
# Data Fitting
reg = reg.fit(X, Y)
# Y Prediction
Y_pred = reg.predict(X)
# Model Evaluation
rmse = np.sqrt(mean_squared_error(Y, Y_pred))
r2 = reg.score(X, Y)
print(rmse)
print(r2)
4.572887051836439
0.9098901726717316
plt.figure(figsize=(18,10))
plt.plot(read, Y_pred, color='#58b970', label='Regression Line')
plt.scatter(read, write, color='#ef1234', label='Data')
plt.show()

Links referred for this article :
Interested
In Our Blogs?
Subscribe To Get Notified When We Add More Blogs On Our Platform