【发布时间】:2017-01-20 14:55:18
【问题描述】:
我是 python 的初学者,但遇到了一个问题。 我想使用 lmfit 模块将函数拟合到 .csv 文件中的 3 组 (x, y) 数据,其中包含一些共享参数 (a,b,d) 和一个固定的单个参数 (c)。 我的数据格式如下: x1 y1 x2 y2 x3 y3 .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ..
.. .. .. .. .. ..
这是一个指数函数的示例,以及我尝试拟合每个数据集并进行绘图的尝试。我在定义全局残差以及为 c 参数的每个数据集定义一个固定值时遇到问题。这是我到目前为止所得到的:
import numpy as np
import pandas as pd
from lmfit import minimize, Parameters, report_fit
import matplotlib.pyplot as plt
#Load data
df=pd.read_csv('mydata.csv')
df[['x1', 'y1', 'x2', 'y2', 'x3','y3']][:6]
# set up the data
xs = np.array(df[df.columns[0::2]])
ys = np.array(df[df.columns[1::2]])
#define function
def biexp(xs,a,b,c,d):
return a*np.exp(-xs*b*c)+(1-a)*np.exp(-xs*d*c)
#Define a function that calculates biexp for data set i
def biexp_dataset(params, i, xs):
a = params['a_%i' % (i+1)].value
b = params['b_%i' % (i+1)].value
c = params['c_%i' % (i+1)].value
d = params['d_%i' % (i+1)].value
return biexp(xs,a,b,c,d)
#Define the real function to minimize, which calculates the total residual for all fits, modeled by biexp function
"""Where I'm stuck"""
def objective(params, xs,ys):
nys, nxs= df.shape[1]/2
resid = 0.0*ys[:]
# make residual per data set
for i in range(nys):
resid = ys[:, i] - biexp_dataset(params,i,xs)
# now flatten this to a 1D array, as minimize() needs
return resid.flatten()
#Define parameters
fit_params = Parameters()
for i in range(df.shape[1]/2):
fit_params.add( 'a_%i' % (i+1), value=1, min=0, max=9,vary=True)
fit_params.add( 'b_%i' % (i+1), value=0.3, min=0.0, vary=True)
fit_params.add( 'd_%i' % (i+1), value=0.5, min=0.0, vary=True)
"""How can I define c for each data sets?"""
"""Iwant for x1,y1 => c1=2; x2,y2=>c2=0.4; x3,y3=>c3=5, for example"""
# constrain all values of a,b and d to have the same value
for i in (2, 3):
fit_params['a_%i' % i].expr='a_1'
fit_params['b_%i' % i].expr='b_1'
fit_params['d_%i' % i].expr='d_1'
# run the global fit to all the data sets
result=minimize(objective, fit_params, args=(xs, ys))
report_fit(result.params)
# plot the data sets and fits
plt.figure()
for i in range(df.shape[1]/2):
y_fit = biexp_dataset(fit_params, i, xs)
plt.plot(xs[:,i], ys[:, i], 'o', xs[:,i], y_fit, '-')
plt.show()
所以基本上我要问的是:如何为每个数据集定义一个固定的 c 参数值? c1=0.4;例如c2=1;c3=5
我在目标定义中做错了什么,因为当我运行代码时它指出:TypeError: can't multiply sequence by non-int of type 'float'
任何帮助将不胜感激......
更新所以我使用我想要拟合的真实函数编写了一些新代码(并且我已经使用 originLab 中的全局拟合选项进行了拟合)。因此,我的数据集共享相同的 D1、D2、tau1 和 tau2。然而,模型作为一个新变量固定,即实验时间(t_exps)。现在我的代码没有任何错误,但我得到的不是六个数据集的 6 条拟合曲线,而是 36 条曲线拟合。我迷路了......我的代码有什么问题:
import numpy as np
import pandas as pd
from lmfit import minimize, Parameters, report_fit
import matplotlib.pyplot as plt
#set up the data
df=pd.read_excel('BTMS_hex.xlsx')
df = df.dropna() #drop rows that have invalid values (extra line in data file)
# set up the data
xs = np.array(df[df.columns[0::2]]).astype(float)
ys = np.log(np.array(df[df.columns[1::2]]).astype(float))
t_exps = [0.04857, 0.0989, 0.14923, 0.1993, 0.49957, 0.99957]
#Create function
def karger(xs,D1,D2,tau1,tau2,texp):
term1=D1+D2+(1/(xs/texp))*((1/tau1)+(1/(tau2)))
term2=np.sqrt(((D1-D2+(1/(xs/texp))*((1/tau1)+(1/(tau2))))**2)+(4/(((xs/texp)**2)*tau1*tau2)))
DA=0.5*(term1-term2)
DB=0.5*(term1+term2)
P1=tau1/(tau1+tau2)
P2=1-P1
CB=(P1*D1+P2*D2-DA)/(DB-DA)
return np.log((1-CB)*np.exp(-xs*DA)+CB*np.exp(-xs*DB))
def karger_dataset(params, xs,texp):
D1 = params['D1'].value
D2 = params['D2'].value
tau1 = params['tau1'].value
tau2 = params['tau2'].value
return karger(xs, D1,D2,tau1,tau2,texp)
def objective(params, xs,ys):
resid = np.array([])
for i in range(xs.shape[1]):
y_pred = karger_dataset(params, xs[:,i],t_exps[i])
resid=np.concatenate((resid,(ys[:, i] - y_pred)))
return resid
#Define parameters
fit_params = Parameters()
fit_params.add( 'D1', value=2E-1,min=0, vary=False)
fit_params.add( 'D2', value=1E-2, min=0, vary=True)
fit_params.add( 'tau1', value=8.2, min=0, vary=True)
fit_params.add( 'tau2', value=0.5, min=0, vary=True)
# run the global fit to all the data sets
result=minimize(objective, fit_params, args=(xs, ys))
report_fit(result.params)
print result.residual
# plot the data sets and fits
plt.figure()
for i in range(df.shape[1]/2):
y_fit = karger_dataset(fit_params, xs,t_exps[i])
plt.plot(xs[:,i], ys[:, i], 'o', xs[:,i], y_fit, '-')
"plt.yscale('log')"
plt.axis([0,1600, -10,0.1])
plt.savefig('books_read.png')
【问题讨论】:
标签: curve-fitting curve least-squares