使用 LMFIT 将对数正态模型拟合到数据答案

【问题标题】：Fitting a log-normal model to data using LMFIT使用 LMFIT 将对数正态模型拟合到数据
【发布时间】：2019-07-10 14:40:19
【问题描述】：

我希望将对数正态曲线拟合到大致遵循对数正态分布的数据。

我拥有的数据来自激光衍射仪，该仪器测量喷雾的粒度分布。这段代码的最终目标是为我的数据重新创建this method，它使用了为XRD数据曲线拟合设计的OriginPro软件；类似的问题。我想将该方法集成到我自己的研究分析中，这是在 Python 中完成的。

我将this post 中的代码改编为（理想情况下）处理对数正态分布。我已经简化了我的代码，只处理数据中的第一个对数正态峰值，所以现在它只是试图适应一个对数正态分布。我提供的数据也被简化为只有一个峰值可以拟合。示例数据和代码在本文底部给出。

我之前有一些使用 LMFIT 进行模型拟合的经验，尽管我使用的是用户定义的状态空间模型进行时间建模和 LMFIT minimize() 函数。我什至不确定从哪里开始调试这段代码的曲线拟合组件。

谁能帮我弄清楚为什么我无法适应这些数据？请注意，我得到的结果是微不足道的（y=0 处的直线）。

在 Windows 7（笔记本电脑）和 10（台式机）上工作

在 CMD 窗口中运行 python -V 给出：

Python 3.5.3 :: Anaconda 4.1.1 (64-bit)

这是样本分布的数据：

sizes = np.array([  1.26500000e-01,   1.47000000e-01,   1.71500000e-01,
     2.00000000e-01,   2.33000000e-01,   2.72000000e-01,
     3.17000000e-01,   3.69500000e-01,   4.31000000e-01,
     5.02500000e-01,   5.86000000e-01,   6.83500000e-01,
     7.97000000e-01,   9.29000000e-01,   1.08300000e+00,
     1.26250000e+00,   1.47200000e+00,   1.71650000e+00,
     2.00100000e+00,   2.33300000e+00,   2.72050000e+00,
     3.17200000e+00,   3.69800000e+00,   4.31150000e+00,
     5.02700000e+00,   5.86100000e+00,   6.83300000e+00,
     7.96650000e+00,   9.28850000e+00,   1.08295000e+01,
     1.26265000e+01,   1.47215000e+01,   1.71640000e+01,
     2.00115000e+01,   2.33315000e+01,   2.72030000e+01,
     3.17165000e+01,   3.69785000e+01,   4.31135000e+01,
     5.02665000e+01,   5.86065000e+01,   6.83300000e+01,
     7.96670000e+01,   9.28850000e+01,   1.08296000e+02,
     1.26264000e+02,   1.47213000e+02,   1.71637500e+02,
     2.00114500e+02,   2.33316500e+02])

y_exp = np.array([ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.01,
    0.02,  0.03,  0.04,  0.06,  0.07,  0.08,  0.09,  0.1 ,  0.11,
    0.13,  0.19,  0.3 ,  0.48,  0.74,  1.1 ,  1.56,  2.11,  2.72,
    3.37,  3.99,  4.55,  4.99,  5.3 ,  5.48,  5.53,  5.48,  5.36,
    5.19,  4.97,  4.67,  4.28,  3.79,  3.18,  2.48,  1.73,  1.  ,
    0.35,  0.  ,  0.  ,  0.  ,  0.  ])

以下是函数：


def generate_model(spec):
    composite_model = None
    params = None
    x = spec['x']
    y = spec['y']
    x_min = np.min(x)
    x_max = np.max(x)
    x_range = x_max - x_min
    y_max = np.max(y)
    for i, basis_func in enumerate(spec['model']):
#        prefix = f'm{i}_'
        prefix = 'm{0}_'.format(i)
        model = getattr(models, basis_func['type'])(prefix=prefix)
        if basis_func['type'] in ['LognormalModel','GaussianModel', 'LorentzianModel', 'VoigtModel']: # for now VoigtModel has gamma constrained to sigma
            model.set_param_hint('sigma', min=1e-6, max=x_range)
            model.set_param_hint('center', min=x_min, max=x_max)
            model.set_param_hint('height', min=1e-6, max=1.1*y_max)
            model.set_param_hint('amplitude', min=1e-6)
            # default guess is horrible!! do not use guess()
            default_params = {
                prefix+'center': x_min + x_range * random.random(),
                prefix+'height': y_max * random.random(),
                prefix+'sigma': x_range * random.random()
                }
        else:
#            raise NotImplemented(f'model {basis_func["type"]} not implemented yet')
            raise NotImplemented('model {0} not implemented yet'.format(basis_func["type"])) 
        if 'help' in basis_func:  # allow override of settings in parameter
            for param, options in basis_func['help'].items():
                model.set_param_hint(param, **options)
        model_params = model.make_params(**default_params, **basis_func.get('params', {}))
        if params is None:
            params = model_params
        else:
            params.update(model_params)
        if composite_model is None:
            composite_model = model
        else:
            composite_model = composite_model + model
    return composite_model, params

def update_spec_from_peaks(spec, model_indicies, peak_widths=np.arange(1,10), **kwargs):
    x = spec['x']
    y = spec['y']
    x_range = np.max(x) - np.min(x)
    peak_indicies = signal.find_peaks_cwt(y, peak_widths)
    np.random.shuffle(peak_indicies)
#    for peak_indicie, model_indicie in zip(peak_indicies.tolist(), model_indicies):
    for peak_indicie, model_indicie in zip(peak_indicies, model_indicies):
        model = spec['model'][model_indicie]
        if model['type'] in ['LognormalModel','GaussianModel', 'LorentzianModel', 'VoigtModel']:
            params = {
                'height': y[peak_indicie],
                'sigma': x_range / len(x) * np.min(peak_widths),
                'center': x[peak_indicie]
            }
            if 'params' in model:
                model.update(params)
            else:
                model['params'] = params
        else:
#            raise NotImplemented(f'model {basis_func["type"]} not implemented yet')
            raise NotImplemented('model {0} not implemented yet'.format(model["type"])) 
    return peak_indicies

这里是主线：

spec = {
    'x': sizes,
    'y': y_exp,
    'model': [
        {
            'type': 'LognormalModel',
            'params': {'center': 20, 'height': 3, 'sigma': 1},
#            'help': {'center': {'min': 10, 'max': 30}}
        }]}

num_comp = list(range(0,len(spec['model'])))

peaks_found = update_spec_from_peaks(spec, num_comp, peak_widths=np.arange(1,10))

#For checking peak fitting
print(peaks_found)
fig, ax = plt.subplots()
ax.scatter(spec['x'], spec['y'], s=4)
for i in peaks_found:
    ax.axvline(x=spec['x'][i], c='black', linestyle='dotted')

model, params = generate_model(spec)

output = model.fit(spec['y'], params, x=spec['x'])

fig, gridspec = output.plot()

感谢您的帮助，祝您今天愉快。

艾萨克

【问题讨论】：

标签： python model-fitting lmfit

【解决方案1】：

关于 Stackoverflow 和一般问题解决的标准建议是将问题简化为显示问题的最小脚本。例如，参见https://stackoverflow.com/help/mcve。这种方法鼓励剥离问题，并且通常有助于指出问题在代码中的位置。这是解决问题的经典方法。

事实证明，您的脚本有很多额外的东西。精简到必需品会：

import numpy as np
from lmfit import models
import matplotlib.pyplot as plt

x = np.array([ 1.26500000e-01, 1.47000000e-01, 1.71500000e-01,
            2.00000000e-01, 2.33000000e-01, 2.72000000e-01,
            3.17000000e-01, 3.69500000e-01, 4.31000000e-01,
            5.02500000e-01, 5.86000000e-01, 6.83500000e-01,
            7.97000000e-01, 9.29000000e-01, 1.08300000e+00,
            1.26250000e+00, 1.47200000e+00, 1.71650000e+00,
            2.00100000e+00, 2.33300000e+00, 2.72050000e+00,
            3.17200000e+00, 3.69800000e+00, 4.31150000e+00,
            5.02700000e+00, 5.86100000e+00, 6.83300000e+00,
            7.96650000e+00, 9.28850000e+00, 1.08295000e+01,
            1.26265000e+01, 1.47215000e+01, 1.71640000e+01,
            2.00115000e+01, 2.33315000e+01, 2.72030000e+01,
            3.17165000e+01, 3.69785000e+01, 4.31135000e+01,
            5.02665000e+01, 5.86065000e+01, 6.83300000e+01,
            7.96670000e+01, 9.28850000e+01, 1.08296000e+02,
            1.26264000e+02, 1.47213000e+02, 1.71637500e+02,
            2.00114500e+02, 2.33316500e+02])

y = np.array([ 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.02,
           0.03, 0.04, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11, 0.13, 0.19,
           0.3 , 0.48, 0.74, 1.1 , 1.56, 2.11, 2.72, 3.37, 3.99, 4.55,
           4.99, 5.3 , 5.48, 5.53, 5.48, 5.36, 5.19, 4.97, 4.67, 4.28,
           3.79, 3.18, 2.48, 1.73, 1.  , 0.35, 0.  , 0.  , 0.  , 0.  ])

model = models.LognormalModel()
params = model.make_params(center=20, sigma=3, amplitude=5)

result = model.fit(y, params, x=x)
print(result.fit_report())

plt.plot(x, y, label='data')
plt.plot(x, result.best_fit, label='fit')
plt.legend()
plt.show()

这可以运行并且即使不是很完美也可以提供不错的贴合度。

一般来说，我不鼓励您根据数据范围设置“参数提示”。谨慎使用设置此类限制的能力，并且仅在模型固有的情况下使用（例如，sigma<0 没有意义）。

我不知道你的代码使用什么随机数来设置初始值，但在我看来它确实很可能设置初始值，这是非常糟糕的选择。

【讨论】：

我尝试按原样使用代码的原因（来自父帖子中链接的源代码）是为了拥有一个由多个子模型组合而成的模型的灵活性。我想包括这个来提供上下文，但也想调试我无论如何都会使用的格式。然而，使用更简单的版本已经清楚地表明我遇到的问题是在sigma 的初始猜测中，初始猜测太低会导致不收敛。感谢您的帮助！
两点（可能重复我自己）：a) 在故障排除和寻求帮助时，将问题分解为最小的可能示例。您可能会在更复杂的代码中自己发现问题。 b）链接是一个非常好的文章，但我不同意它使用参数提示。参数提示属于模型，以帮助构建参数实例。提示不应依赖于数据。相反，创建参数，然后为数据集自定义它们的初始值和边界。最后，在设置界限时避免聪明或严格。谨慎使用它们。