gradient descent and linear regression

datasets.make_regression

1
2
3
4
5
6
7
import matplotlib.pyplot as plt
import numpy
from sklearn import datasets
from numpy import abs

# noise = 10 make it more scttered
regressionData = datasets.make_regression(100, 1, noise=10)

y = ax+b and scatterplot

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# scatterplot
plt.scatter(regressionData[0], regressionData[1], c='red', marker='*')

# define initial m & b (it can be chosen by your own)
init_m = 10
init_b = 10
learning_rate = 0.1
range1 = [-5, 5]

# x axis frome -5 ~ 5 (range1)
plt.xlim(range1)

# y=m*x+b
plt.plot(range1, init_m * numpy.array(range1) + init_b)
plt.show()

MSE ( Mean squared error )


goal : minimize

Gradient descent

use gradient descent to find the minimum

Code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
def update_weight(m, b, X, Y, learning_rate):
m_deriv = 0
b_deriv = 0
N = len(X)
for i in range(N):
m_deriv += -2 * X[i] * (Y[i] - (m * X[i] + b))
b_deriv += -2 * (Y[i] - (m * X[i] + b))
m -= (m_deriv / N) * learning_rate
b -= (b_deriv / N) * learning_rate
return m, b


def cost(m, b, X, Y):
cost = 0
for i in range(len(X)):
cost += (Y[i] - (m * X[i] + b)) ** 2
return cost / (len(X))

Note that we have constant 2 after derivative,
we can technically take 1/2 MSE as our cost function,
and it will make it more convenient to calculate

Cost function

use 1/2 MSE as cost function


goal : minimize

Gradient descent

use gradient descent to find the minimum

Code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
def update_weight(m, b, X, Y, learning_rate):
m_deriv = 0
b_deriv = 0
N = len(X)
for i in range(N):
m_deriv += -1 * X[i] * (Y[i] - (m * X[i] + b))
b_deriv += -1 * (Y[i] - (m * X[i] + b))
m -= (m_deriv / N) * learning_rate
b -= (b_deriv / N) * learning_rate
return m, b


def cost(m, b, X, Y):
cost = 0
for i in range(len(X)):
cost += (Y[i] - (m * X[i] + b)) ** 2
return cost / (2 * len(X))

Iteration

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
init_cost = cost(init_m, init_b, regressionData[0], regressionData[1])
print("init cost=", init_cost)

current_m = init_m
current_b = init_b
lr = 0.1
for _ in range(30):
new_m, new_b = update_weight(current_m, current_b, regressionData[0], regressionData[1], lr)
new_cost = cost(new_m, new_b, regressionData[0], regressionData[1])
print("cost=", new_cost)
plt.plot(range1, new_m * range1 + new_b)
plt.scatter(regressionData[0], regressionData[1], c='red', marker='*')
plt.xlim(range1)
plt.show()
current_m = new_m
current_b = new_b

Ref ML Glossary Linear Regression