【问题标题】:Multivariable regression model cost does not decrease多变量回归模型成本不降低
【发布时间】:2020-05-03 21:06:00
【问题描述】:

我正在尝试实现一个多变量回归模型,其中均方误差作为成本函数,梯度下降来优化参数。超过 1000 次迭代,成本函数没有减少。我不确定我是否正确实现了渐变。另外,我怎样才能将偏见融入其中。我知道对于简单的线性模型,偏差是 y 截距,但我如何在这里实现它。

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import datasets

class LinearRegression:
    def __init__(self, learning_rate=0.0001, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        #since we have three independent variable, we initialize three weights with zeros
        self.weights = np.array([[0.0],[0.0],[0.0]])

    def update_param(self, x_featureset, y_targets, weights):
        """
        x_featureset - (160,3)
        y_targets - (160,1)
        predictions - (160,1)
        weights - (3,1)
        """
        predictions = self.predict(x_featureset, weights)

        #extract the features
        x1 = x_featureset[:,0]
        x2 = x_featureset[:,1]
        x3 = x_featureset[:,2]

        #calculate partial derivatives
        d_w1 = -x1*(y_targets - predictions)
        d_w2 = -x2*(y_targets - predictions)
        d_w3 = -x2*(y_targets - predictions)

        #multiply derivative by learning rate and subtract from our weights
        weights[0][0] -= (self.lr*np.mean(d_w1))
        weights[1][0] -= (self.lr*np.mean(d_w2))
        weights[2][0] -= (self.lr*np.mean(d_w3))

        return weights

    def cost_function(self, x_featureset, y_targets, weights):
        """
        x_featureset - (160,3)
        y_targets - (160,1)
        predictions - (160,1)
        weights - (3,1)
        """

        total_observation = len(y_targets)
        predictions = self.predict(x_featureset, weights)
        sq_error = (y_targets-predictions)**2
        return 1.0/(2*total_observation) * sq_error.sum()

    def normalize(self, x_featureset):
        """
        x_featureset - (160,3)
        x_featureset.T - (3,160)
        """
        for features in x_featureset.T:
            fmean = np.mean(features)
            frange = np.amax(features) - np.amin(features)

            #vector subtraction
            features -= fmean
            #vector division
            features /= frange

        return x_featureset

    def train(self, x, y):
        cost_history = []
        #nomalize independent variables
        x = self.normalize(x)
        for i in range(self.n_iters):
            self.weights = self.update_param(x, y, self.weights)
            cost = self.cost_function(x,y, self.weights)
            cost_history.append(cost)
            #log process
            if i % 10 == 0:
                print("cost: {}".format(cost))

    def predict(self, x_featureset, weights):
        """
        featureset - (160,3)
        weights - (3,1)
        predictions - (160,1)
        """
        y_predicted = np.dot(x_featureset, weights)
        return y_predicted

#generating sample data using sklearn
def generate_data():
    x, y = datasets.make_regression(n_samples=200, n_features=3, noise=20, random_state=4)
    x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=1234)
    return (x_train, x_test, y_train, y_test)

#create model instance
model = LinearRegression()
x_train, x_test, y_train, y_test = generate_data()

#fit the data
model.train(x_train, y_train)

【问题讨论】:

    标签: python-3.x numpy scikit-learn regression linear-regression


    【解决方案1】:

    我建议遵循执行模型多元回归的代码

    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    from sklearn.model_selection import train_test_split
    from sklearn import datasets
    
    class LinearRegression:
        def __init__(self, learning_rate=0.0001, n_iters=1000):
            self.lr = learning_rate
            self.n_iters = n_iters
            #since we have three independent variable, we initialize 4 weights with zeros
            self.weights = np.array([[0.0],[0.0],[0.0],[0.0]])
    
        def update_param(self, x_featureset, y_targets, weights):
            """
            x_featureset - (160,3)
            y_targets - (160,1)
            predictions - (160,1)
            weights - (4,1)
            """
            predictions = self.predict(x_featureset, weights)
    
            #extract the features
            x1 = x_featureset[:,0]
            x2 = x_featureset[:,1]
            x3 = x_featureset[:,2]
    
            #calculate partial derivatives
            d_w0 = - (y_targets - predictions)
            d_w1 = -x1*(y_targets - predictions)
            d_w2 = -x2*(y_targets - predictions)
            d_w3 = -x3*(y_targets - predictions)
    
            #multiply derivative by learning rate and subtract from our weights
            weights[0][0] -= (self.lr * np.mean(d_w0))
            weights[1][0] -= (self.lr * np.mean(d_w1))
            weights[2][0] -= (self.lr *np.mean(d_w2))
            weights[3][0] -= (self.lr*np.mean(d_w3))
    
            return weights
    
        def cost_function(self, x_featureset, y_targets, weights):
            """
            x_featureset - (160,3)
            y_targets - (160,1)
            predictions - (160,1)
            weights - (4,1)
            """
    
            total_observation = len(y_targets)
            predictions = self.predict(x_featureset, weights)
            sq_error = (y_targets-predictions)**2
            return 1.0/(2*total_observation) * sq_error.sum()
    
        def normalize(self, x_featureset):
            """
            x_featureset - (160,3)
            x_featureset.T - (3,160)
            """
            for features in x_featureset.T:
                fmean = np.mean(features)
                frange = np.amax(features) - np.amin(features)
    
                #vector subtraction
                features -= fmean
                #vector division
                features /= frange
    
            return x_featureset
    
        def train(self, x, y):
            cost_history = []
            #nomalize independent variables
            x = self.normalize(x)
            for i in range(self.n_iters):
                self.weights = self.update_param(x, y, self.weights)
                cost = self.cost_function(x,y, self.weights)
                cost_history.append(cost)
                #log process
                if i % 10 == 0:
                    print("cost: {}".format(cost))
    
        def predict(self, x_featureset, weights):
            """
            featureset - (160,3)
            weights - (4,1)
            predictions - (160,1)
            """
            # Y = W0 + W1* X1 + W2 * X2 + W3 * X3 
            y_predicted = weights[0,:]+np.dot(x_featureset, weights[1:,:])
            return y_predicted
    
    #generating sample data using sklearn
    def generate_data():
        x, y = datasets.make_regression(n_samples=200, n_features=3, noise=20, random_state=4)
        x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=1234)
        return (x_train, x_test, y_train, y_test)
    
    #create model instance
    model = LinearRegression()
    x_train, x_test, y_train, y_test = generate_data()
    
    #fit the data
    model.train(x_train, y_train)
    

    输出:

    cost: 980808.7969914433
    cost: 980757.9150537294
    cost: 980707.1372473323
    cost: 980656.4633691043
    cost: 980605.8932163038
    cost: 980555.4265865949
    cost: 980505.0632780452
    cost: 980454.8030891262
    cost: 980404.6458187121
    cost: 980354.5912660785
    cost: 980304.6392309029
    cost: 980254.7895132622
    cost: 980205.0419136335
    cost: 980155.3962328921
    cost: 980105.8522723109
    cost: 980056.4098335612
    cost: 980007.0687187086
    cost: 979957.8287302161
    cost: 979908.6896709399
    cost: 979859.6513441313
    cost: 979810.7135534338
    cost: 979761.8761028836
    cost: 979713.138796909
    cost: 979664.5014403281
    cost: 979615.9638383496
    cost: 979567.5257965708
    cost: 979519.1871209786
    cost: 979470.9476179467
    cost: 979422.8070942352
    cost: 979374.7653569917
    cost: 979326.8222137484
    cost: 979278.9774724214
    cost: 979231.2309413117
    cost: 979183.5824291029
    cost: 979136.031744861
    cost: 979088.578698033
    cost: 979041.2230984472
    cost: 978993.9647563117
    cost: 978946.8034822136
    cost: 978899.7390871193
    cost: 978852.7713823715
    cost: 978805.9001796913
    cost: 978759.1252911751
    cost: 978712.4465292948
    cost: 978665.8637068978
    cost: 978619.3766372039
    cost: 978572.9851338081
    cost: 978526.689010676
    cost: 978480.4880821462
    cost: 978434.3821629275
    cost: 978388.3710680995
    cost: 978342.4546131112
    cost: 978296.6326137795
    cost: 978250.9048862904
    cost: 978205.271247197
    cost: 978159.7315134181
    cost: 978114.2855022394
    cost: 978068.9330313113
    cost: 978023.6739186477
    cost: 977978.5079826281
    cost: 977933.4350419926
    cost: 977888.4549158453
    cost: 977843.5674236503
    cost: 977798.7723852337
    cost: 977754.0696207809
    cost: 977709.4589508367
    cost: 977664.9401963042
    cost: 977620.5131784454
    cost: 977576.177718878
    cost: 977531.9336395771
    cost: 977487.7807628732
    cost: 977443.7189114518
    cost: 977399.7479083528
    cost: 977355.8675769694
    cost: 977312.0777410483
    cost: 977268.3782246873
    cost: 977224.7688523371
    cost: 977181.2494487979
    cost: 977137.8198392204
    cost: 977094.4798491052
    cost: 977051.2293043006
    cost: 977008.0680310033
    cost: 976964.9958557582
    cost: 976922.0126054548
    cost: 976879.1181073303
    cost: 976836.3121889662
    cost: 976793.5946782889
    cost: 976750.9654035685
    cost: 976708.4241934177
    cost: 976665.9708767924
    cost: 976623.6052829901
    cost: 976581.3272416494
    cost: 976539.1365827485
    cost: 976497.0331366067
    cost: 976455.0167338816
    cost: 976413.0872055686
    cost: 976371.2443830017
    cost: 976329.488097852
    cost: 976287.8181821259
    cost: 976246.2344681664
    

    更新:

    使用lr=0.001 进行测试,因为上述学习率太大并且迭代到100000。我发现该模型收敛于以下成本值。

    cost: 959301.8925571552
    cost: 959298.6367338672
    cost: 959296.3380453996
    cost: 959294.9824055596
    cost: 959294.5560072181
    cost: 959295.0453167808
    cost: 959296.4370687702
    cost: 959298.7182605114
    cost: 959301.8761469286
    

    【讨论】:

      【解决方案2】:

      首先,您的代码中存在表达式(语法)错误。

      d_w1 = -x1*(y_targets - predictions)
      d_w2 = -x2*(y_targets - predictions)
      d_w3 = -x2*(y_targets - predictions)
      

      应该是:

      d_w1 = -x1*(y_targets - predictions)
      d_w2 = -x2*(y_targets - predictions)
      d_w3 = -x3*(y_targets - predictions)
      

      现在这确实会导致成本降低一点。但我不认为这已经收敛到 Global Optimum。如果我可以进一步优化它,将审查和更新。 输出:

      cost: 980813.8909325758
      cost: 980813.8924092407
      cost: 980813.8963470139
      cost: 980813.9027458953
      cost: 980813.9116058851
      cost: 980813.9229269831
      cost: 980813.9367091894
      cost: 980813.952952504
      cost: 980813.9716569266
      cost: 980813.9928224577
      cost: 980814.0164490971
      cost: 980814.0425368445
      cost: 980814.0710857003
      cost: 980814.1020956644
      cost: 980814.1355667366
      cost: 980814.171498917
      cost: 980814.2098922059
      cost: 980814.250746603
      cost: 980814.2940621084
      cost: 980814.3398387218
      cost: 980814.3880764437
      cost: 980814.4387752739
      cost: 980814.4919352122
      cost: 980814.5475562587
      cost: 980814.6056384137
      cost: 980814.6661816769
      cost: 980814.729186048
      cost: 980814.7946515278
      cost: 980814.8625781157
      cost: 980814.9329658119
      cost: 980815.0058146162
      cost: 980815.0811245289
      cost: 980815.1588955498
      cost: 980815.239127679
      cost: 980815.3218209165
      cost: 980815.4069752623
      cost: 980815.4945907161
      cost: 980815.5846672785
      cost: 980815.6772049487
      cost: 980815.7722037275
      cost: 980815.8696636144
      cost: 980815.9695846096
      cost: 980816.0719667133
      cost: 980816.1768099251
      cost: 980816.284114245
      cost: 980816.3938796733
      cost: 980816.5061062098
      cost: 980816.6207938545
      cost: 980816.7379426076
      cost: 980816.8575524688
      cost: 980816.9796234384
      cost: 980817.1041555163
      cost: 980817.2311487021
      cost: 980817.3606029968
      cost: 980817.4925183991
      cost: 980817.6268949099
      cost: 980817.7637325292
      cost: 980817.9030312565
      cost: 980818.0447910922
      cost: 980818.1890120357
      cost: 980818.3356940881
      cost: 980818.4848372485
      cost: 980818.6364415172
      cost: 980818.7905068938
      cost: 980818.9470333789
      cost: 980819.1060209726
      cost: 980819.267469674
      cost: 980819.431379484
      cost: 980819.5977504023
      cost: 980819.7665824285
      cost: 980819.9378755633
      cost: 980820.1116298061
      cost: 980820.2878451576
      cost: 980820.4665216169
      cost: 980820.6476591846
      cost: 980820.8312578606
      cost: 980821.0173176448
      cost: 980821.2058385374
      cost: 980821.3968205382
      cost: 980821.5902636473
      cost: 980821.7861678643
      cost: 980821.9845331898
      cost: 980822.1853596235
      cost: 980822.3886471657
      cost: 980822.594395816
      cost: 980822.8026055746
      cost: 980823.0132764415
      cost: 980823.2264084164
      cost: 980823.4420014997
      cost: 980823.6600556913
      cost: 980823.880570991
      cost: 980824.1035473992
      cost: 980824.3289849155
      cost: 980824.55688354
      cost: 980824.787243273
      cost: 980825.0200641142
      cost: 980825.2553460634
      cost: 980825.4930891211
      cost: 980825.733293287
      cost: 980825.9759585612
      

      【讨论】:

      • 感谢您的意见。为什么成本会这么高?是因为标准化问题吗?
      • 可能。在执行期间更新权重时,似乎没问题。由于这是线性回归模型,并且您正在使用最小二乘法进行优化,因此高成本可能是因为最佳拟合对训练数据的拟合不足(或不能很好地泛化)。必须先研究数据集才能说什么。
      • 这是有道理的。另外,为什么我们需要方程中的权重(W0)。你能解释一下这对模型有什么帮助吗?
      • W0 是添加到模型中的截距或偏差,以便它表示线性方程。 Y=MX+C 矩阵形式,本质上就是线性回归。
      猜你喜欢
      • 2019-08-11
      • 2020-10-22
      • 2020-04-24
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2019-03-30
      • 2019-08-21
      • 2022-01-16
      相关资源
      最近更新 更多