【发布时间】:2017-02-20 14:07:23
【问题描述】:
import pandas as pd
import matplotlib.pyplot as plt
# I'm trying to code the utter basic func of LinearRegression
# from sklearn.linear_model import LinearRegression
dataframe = pd.read_fwf('brain_body.txt') # link given below
x_values = dataframe[['Brain']]
y_values = dataframe[['Body']]
lr = LinearRegression(0.0001, 10) # sending learning_rate and iterations
lr.fit(x_values, y_values)
# commenting out because the values are insane
# plt.scatter(x_values, y_values)
# plt.plot(x_values, clf.predict(x_values))
# plt.show()
这是我写的课程
class LinearRegression:
def __init__(self, learning_rate, iterations):
self.b = 0 # b as in y=mx+b
self.m = 0 # m as in y=mx+b
self.learning_rate = learning_rate
self.iterations = iterations
def get_y(self, x):
return self.m * float(x) + self.b
def step_gradient(self, x_values, y_values):
print()
print("Values before: m =", self.m, " b =", self.b)
m_gradient = 0
b_gradient = 0
N = float(len(x_values.ix[:, 0]))
print('%11s' % "d(m)", '%11s' % "m_gradient", '%11s' % "d(b)", '%11s' % "b_gradient")
for i in range(int(N)):
x = x_values.iloc[i][0]
y = y_values.iloc[i][0]
# EDIT: I missed a * -1 here
# But that wouldn't just fix everything, adjusting learning rate does
pm = (y - self.get_y(x)) * x # partial derivative of m
pb = (y - self.get_y(x)) * -1 # partial derivative of b
m_gradient += pm * 2 / N
b_gradient += pb * 2 / N
print('%11s' % pm, '%11s' % m_gradient, '%11s' % pb, '%11s' % b_gradient)
self.m -= self.learning_rate * m_gradient # adjust current m
self.b -= self.learning_rate * b_gradient # adjust current b
print("Values after: m =", self.m, " b =", self.b)
print()
def fit(self, x_values, y_values): # equivalent to train_model
for i in range(self.iterations):
self.step_gradient(x_values, y_values)
return
def predict(self, x_values): # equivalent to get_output
predictions = []
for x in x_values.ix[:, 0]:
predictions.append(self.get_y(x))
return predictions
我看了Siraj Raval's How to do Linear Regression the right way,几乎跟他一样。 我确实了解了偏导数和梯度下降是什么,但我不知道偏导数的值是什么(或猜测它们)。在第一次迭代中,这些数字就像疯了一样:
Values before: m = 0 b = 0
d(m) m_gradient d(b) b_gradient
150.6325 4.85911290323 -44.5 -1.43548387097
7.44 5.09911290323 -15.5 -1.93548387097
10.935 5.45185483871 -8.1 -2.19677419355
196695.0 6350.45185484 -423.0 -15.8419354839
4341.435 6490.49814516 -119.5 -19.6967741935
3180.9 6593.10782258 -115.0 -23.4064516129
1456.306 6640.08543548 -98.2 -26.5741935484
5.72 6640.26995161 -5.5 -26.7516129032
243.02 6648.10930645 -58.0 -28.6225806452
2.72 6648.19704839 -6.4 -28.8290322581
0.404 6648.21008065 -4.0 -28.9580645161
5.244 6648.37924194 -5.7 -29.1419354839
6.6 6648.59214516 -6.6 -29.3548387097
0.0007 6648.59216774 -0.14 -29.3593548387
0.06 6648.59410323 -1.0 -29.3916129032
37.8 6649.81345806 -10.8 -29.74
24.6 6650.60700645 -12.3 -30.1367741935
10.71 6650.95249032 -6.3 -30.34
11723841.0 384839.371845 -4603.0 -178.823870968
0.0069 384839.372068 -0.3 -178.833548387
78394.9 387368.23981 -419.0 -192.349677419
341255.0 398376.465616 -655.0 -213.478709677
2.7475 398376.554245 -3.5 -213.591612903
1150.0 398413.651019 -115.0 -217.301290323
84.48 398416.376181 -25.6 -218.127096774
1.0 398416.408439 -5.0 -218.288387097
24.675 398417.204406 -17.5 -218.852903226
359720.0 410021.075374 -680.0 -240.788387097
84042.0 412732.107632 -406.0 -253.88516129
27625.0 413623.236665 -325.0 -264.369032258
9.225 413623.534245 -12.3 -264.765806452
81840.0 416263.534245 -1320.0 -307.346451613
38007648.0 1642316.69554 -5712.0 -491.604516129
13.65 1642317.13586 -3.9 -491.730322581
1217.2 1642356.40037 -179.0 -497.504516129
1960.0 1642419.62618 -56.0 -499.310967742
68.85 1642421.84715 -17.0 -499.859354839
0.12 1642421.85102 -1.0 -499.891612903
0.0092 1642421.85132 -0.4 -499.904516129
0.0025 1642421.8514 -0.25 -499.912580645
17.5 1642422.41591 -12.5 -500.315806452
122500.0 1646374.02882 -490.0 -516.122258065
30.25 1646375.00462 -12.1 -516.512580645
9712.5 1646688.31107 -175.0 -522.157741935
15700.0 1647194.76269 -157.0 -527.222258065
22950.4 1647935.09817 -440.0 -541.415806452
1893.725 1647996.18607 -179.5 -547.206129032
1.32 1647996.22865 -2.4 -547.283548387
4860.0 1648153.00285 -81.0 -549.896451613
75.6 1648155.44156 -21.0 -550.573870968
168.0896 1648160.8638 -39.2 -551.838387097
0.532 1648160.88096 -1.9 -551.899677419
0.09 1648160.88387 -1.2 -551.938387097
0.366 1648160.89567 -3.0 -552.03516129
0.01584 1648160.89619 -0.33 -552.045806452
34560.0 1649275.73489 -180.0 -557.852258065
75.0 1649278.15425 -25.0 -558.658709677
27040.0 1650150.41231 -169.0 -564.110322581
2.34 1650150.4878 -2.6 -564.194193548
18.468 1650151.08354 -11.4 -564.561935484
0.26 1650151.09193 -2.5 -564.642580645
213.444 1650157.97722 -50.4 -566.268387097
Values after: m = -165.015797722 b = 0.0566268387097
Values after 10 iteration: m = -1.76899770934e+22 b = 4.21166966984e+18
我如何正确地从头开始进行线性回归?
【问题讨论】:
-
这对我来说有点有趣,因为我正在 Coursera 上使用 ML class,并且正在考虑在
R和python以及推荐的octave中这样做。解释别人的代码总是有点棘手......如果你写出你正在做的事情背后的数学,你得到答案的可能性可能会更好。我有一些想法(例如pm和pb计算中的符号错误),但很难说,因为我不知道您要实现什么。 -
改变符号 bcoz,我们可以做
(y-current.y)^2或(current.y-y)^2因为两者的结果是一样的;但是在取偏导数时会发生符号变化 -
我所需要的只是
b和m的最佳值,这将给我一条线y=mx+b作为近似线 -
我知道您需要什么,但我只是说很难按原样解释您的内容(至少对我而言)。开始超级简单怎么样:放弃课程,只需完成基本的渐变步骤。使用 x = [1,2,3...n], y = [1,2,3,...n] 之类的东西。初始化为 m=-10, b = -5 之类的东西,看看每一步做了什么(应该越来越接近 m=1, b=0)?
-
感谢@Hendy 的提示!我以前应该做这么多的。我在his lib 上试过,它只适用于他的csv,我用
x=1,2,3...和y=1,2,3,...交换了那个csv,results 很糟糕,但它对reasonably well 的记录更少。 !完全混淆 37 多条记录如何使 e 的幂从 -5 变为 +256...!
标签: python-3.x linear-regression gradient-descent