您可以使用 scipy 手动构造加权 kde。只要您将bw_method 指定为标量,这将完美匹配。允许默认时,拟合变得不一致
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
np.random.seed(410112)
# Real counts data to check with
df = pd.DataFrame({'observed_scores': np.random.randint(1, 100, 40000)})
# Aggregated to value_counts, which is what you have access to
df1 = df.groupby('observed_scores').size().to_frame('counts').reset_index()
代码
def weighted_kde(y, weights, bw_method=None):
sample_range = np.nanmax(y) - np.nanmin(y)
ind = np.linspace(
np.nanmin(y) - 0.5 * sample_range,
np.nanmax(y) + 0.5 * sample_range,
1000)
gkde = stats.gaussian_kde(y, bw_method=bw_method, weights=weights)
y = gkde.evaluate(ind)
return ind, y
检查输出
bw_method=0.5
fig, ax = plt.subplots()
# Underlying data, with pandas kde
df['observed_scores'].plot.density(ax=ax, bw_method=bw_method, label='pandas density', lw=2)
# From aggregated counts data
ind, y = weighted_kde(df1['observed_scores'], df1['counts'], bw_method=bw_method)
ax.plot(ind, y, label='Manual Weighted KDE', lw=2, linestyle='--')
ax.legend()
plt.show()