【问题标题】:K-Nearest Neighbors Model is not working. Passing list-likes to .loc or [] with any missing labels is no longer supportedK-最近邻模型不工作。不再支持将列表喜欢传递给带有任何缺失标签的 .loc 或 []
【发布时间】:2021-04-24 04:33:18
【问题描述】:

数据集可以在这里找到:https://gofile.io/d/f8nBLL

我正在尝试运行 K-Nearest Neighbors 模型,但我面临以下问题

KeyError: '不再支持将列表喜欢传递给 .loc 或 [] 缺少任何标签,请参阅 https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike'

我使用的代码如下:

def knn_train_test(variable_columns,target_columns,df,hyp_test=False):
# Randomizing the Data
np.random.seed(1)
df=df.loc[np.random.permutation(len(df))]
df = df.reset_index(drop=True)

# Dividing Training 75% / Test 25% Dataset 
rows=round(df.shape[0]*0.75)
training=df[:rows]
test=df[rows:]
x_training=training[variable_columns]
y_training=training[target_columns]
x_test=test[variable_columns]
y_test=test[target_columns]

#Testing Multiple Hyper Parameters and Accuracy
if test==True:
    hyper_params = [x for x in range(1,21)]
    # Append the first model's MSE values to this list.
    two_mse_values = list()
    # Append the second model's MSE values to this list.
    two_hyp_mse = dict()
    for hp in hyper_params:
        knn = KNeighborsRegressor(n_neighbors=hp) #Selecting the Model
        knn.fit(x_training, y_training)
        predictions = knn.predict(x_test)
        rmse = mean_squared_error(y_test, predictions,squared=False)
        two_mse_values.append(rmse)

    two_lowest_mse = two_mse_values[0]
    two_lowest_k = 1

    for k,mse in enumerate(two_mse_values):
        if rmse < two_lowest_mse:
            two_lowest_mse = rmse
            two_lowest_k = k + 1
    two_hyp_mse[two_lowest_k] = two_lowest_mse
    return (f'k:{two_lowest_k}, RMSE:{two_lowest_mse}')
else:
    knn = KNeighborsRegressor() #Selecting the Model
    knn.fit(x_training, y_training)
    predictions = knn.predict(x_test)
    rmse = mean_squared_error(y_test, predictions)
    return rmse

variables=numeric_cars.drop('price',axis=1)
target=numeric_cars['price']

# For each column (minus `price`), train a model, return RMSE value
# and add to the dictionary `rmse_results`.
for col in variables.columns:
    rmse_val = knn_train_test(col, 'price', numeric_cars)
    rmse_results[col] = rmse_val

# Create a Series object from the dictionary so 
# we can easily view the results, sort, etc
rmse_results_series = pd.Series(rmse_results)
rmse_results_series.sort_values()

【问题讨论】:

    标签: python function machine-learning data-science nearest-neighbor


    【解决方案1】:

    此代码适用于我

    import pandas as pd
    import numpy as np
    import sklearn
    
    from sklearn.neighbors import KNeighborsRegressor
    from sklearn.metrics import mean_squared_error
    
    assert(pd.__version__ == '1.2.1')
    assert(np.__version__ == '1.18.5')
    assert(sklearn.__version__ == '0.23.1')
    
    def knn_train_test(variable_columns,target_columns,df,hyp_test=False):
    # Randomizing the Data
        np.random.seed(1)
        df=df.loc[np.random.permutation(len(df))]
        df = df.reset_index(drop=True)
    
        # Dividing Training 75% / Test 25% Dataset 
        rows=round(df.shape[0]*0.75)
        training=df[:rows]
        test=df[rows:]
        x_training=training[variable_columns]
        y_training=training[target_columns]
        x_test=test[variable_columns]
        y_test=test[target_columns]
    
        #Testing Multiple Hyper Parameters and Accuracy
        if hyp_test==True:
            hyper_params = [x for x in range(1,21)]
            # Append the first model's MSE values to this list.
            two_mse_values = list()
            # Append the second model's MSE values to this list.
            two_hyp_mse = dict()
            for hp in hyper_params:
                knn = KNeighborsRegressor(n_neighbors=hp) #Selecting the Model
                knn.fit(x_training, y_training)
                predictions = knn.predict(x_test)
                rmse = mean_squared_error(y_test, predictions,squared=False)
                two_mse_values.append(rmse)
    
            two_lowest_mse = two_mse_values[0]
            two_lowest_k = 1
    
            for k,mse in enumerate(two_mse_values):
                if rmse < two_lowest_mse:
                    two_lowest_mse = rmse
                    two_lowest_k = k + 1
            two_hyp_mse[two_lowest_k] = two_lowest_mse
            return (f'k:{two_lowest_k}, RMSE:{two_lowest_mse}')
        else:
            knn = KNeighborsRegressor() #Selecting the Model
            knn.fit(x_training, y_training)
            predictions = knn.predict(x_test)
            rmse = mean_squared_error(y_test, predictions)
            return rmse
    
        variables=numeric_cars.drop('price',axis=1)
        target=numeric_cars['price']
    
        # For each column (minus `price`), train a model, return RMSE value
        # and add to the dictionary `rmse_results`.
        for col in variables.columns:
            rmse_val = knn_train_test(col, 'price', numeric_cars)
            rmse_results[col] = rmse_val
    
        # Create a Series object from the dictionary so 
        # we can easily view the results, sort, etc
        rmse_results_series = pd.Series(rmse_results)
        rmse_results_series.sort_values()
    
    file = '~/Downloads/numeric_cars.csv'
    
    df = pd.read_csv(file)
    
    variable_columns = ['normalized-losses', 'wheel-base']
    target_columns = ['price']
    
    
    
    print(knn_train_test(variable_columns, target_columns, df))
    
    

    【讨论】:

      最近更新 更多