【发布时间】:2020-05-03 21:06:00
【问题描述】:
我正在尝试实现一个多变量回归模型,其中均方误差作为成本函数,梯度下降来优化参数。超过 1000 次迭代,成本函数没有减少。我不确定我是否正确实现了渐变。另外,我怎样才能将偏见融入其中。我知道对于简单的线性模型,偏差是 y 截距,但我如何在这里实现它。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import datasets
class LinearRegression:
def __init__(self, learning_rate=0.0001, n_iters=1000):
self.lr = learning_rate
self.n_iters = n_iters
#since we have three independent variable, we initialize three weights with zeros
self.weights = np.array([[0.0],[0.0],[0.0]])
def update_param(self, x_featureset, y_targets, weights):
"""
x_featureset - (160,3)
y_targets - (160,1)
predictions - (160,1)
weights - (3,1)
"""
predictions = self.predict(x_featureset, weights)
#extract the features
x1 = x_featureset[:,0]
x2 = x_featureset[:,1]
x3 = x_featureset[:,2]
#calculate partial derivatives
d_w1 = -x1*(y_targets - predictions)
d_w2 = -x2*(y_targets - predictions)
d_w3 = -x2*(y_targets - predictions)
#multiply derivative by learning rate and subtract from our weights
weights[0][0] -= (self.lr*np.mean(d_w1))
weights[1][0] -= (self.lr*np.mean(d_w2))
weights[2][0] -= (self.lr*np.mean(d_w3))
return weights
def cost_function(self, x_featureset, y_targets, weights):
"""
x_featureset - (160,3)
y_targets - (160,1)
predictions - (160,1)
weights - (3,1)
"""
total_observation = len(y_targets)
predictions = self.predict(x_featureset, weights)
sq_error = (y_targets-predictions)**2
return 1.0/(2*total_observation) * sq_error.sum()
def normalize(self, x_featureset):
"""
x_featureset - (160,3)
x_featureset.T - (3,160)
"""
for features in x_featureset.T:
fmean = np.mean(features)
frange = np.amax(features) - np.amin(features)
#vector subtraction
features -= fmean
#vector division
features /= frange
return x_featureset
def train(self, x, y):
cost_history = []
#nomalize independent variables
x = self.normalize(x)
for i in range(self.n_iters):
self.weights = self.update_param(x, y, self.weights)
cost = self.cost_function(x,y, self.weights)
cost_history.append(cost)
#log process
if i % 10 == 0:
print("cost: {}".format(cost))
def predict(self, x_featureset, weights):
"""
featureset - (160,3)
weights - (3,1)
predictions - (160,1)
"""
y_predicted = np.dot(x_featureset, weights)
return y_predicted
#generating sample data using sklearn
def generate_data():
x, y = datasets.make_regression(n_samples=200, n_features=3, noise=20, random_state=4)
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=1234)
return (x_train, x_test, y_train, y_test)
#create model instance
model = LinearRegression()
x_train, x_test, y_train, y_test = generate_data()
#fit the data
model.train(x_train, y_train)
【问题讨论】:
标签: python-3.x numpy scikit-learn regression linear-regression