拟合数据¶
日期 | 2019-08-06(最后修改),2006-09-21(创建) |
---|
In [1]
import numpy as np
from numpy import pi, r_
import matplotlib.pyplot as plt
from scipy import optimize
# Generate data points with noise
num_points = 150
Tx = np.linspace(5., 8., num_points)
Ty = Tx
tX = 11.86*np.cos(2*pi/0.81*Tx-1.32) + 0.64*Tx+4*((0.5-np.random.rand(num_points))*np.exp(2*np.random.rand(num_points)**2))
tY = -32.14*np.cos(2*np.pi/0.8*Ty-1.94) + 0.15*Ty+7*((0.5-np.random.rand(num_points))*np.exp(2*np.random.rand(num_points)**2))
拟合数据¶
现在我们有两组数据:Tx 和 Ty,时间序列,以及 tX 和 tY,带有噪声的正弦数据。我们感兴趣的是找到正弦波的频率。
In [2]
# Fit the first set
fitfunc = lambda p, x: p[0]*np.cos(2*np.pi/p[1]*x+p[2]) + p[3]*x # Target function
errfunc = lambda p, x, y: fitfunc(p, x) - y # Distance to the target function
p0 = [-15., 0.8, 0., -1.] # Initial guess for the parameters
p1, success = optimize.leastsq(errfunc, p0[:], args=(Tx, tX))
time = np.linspace(Tx.min(), Tx.max(), 100)
plt.plot(Tx, tX, "ro", time, fitfunc(p1, time), "r-") # Plot of the data and the fit
# Fit the second set
p0 = [-15., 0.8, 0., -1.]
p2,success = optimize.leastsq(errfunc, p0[:], args=(Ty, tY))
time = np.linspace(Ty.min(), Ty.max(), 100)
plt.plot(Ty, tY, "b^", time, fitfunc(p2, time), "b-")
# Legend the plot
plt.title("Oscillations in the compressed trap")
plt.xlabel("time [ms]")
plt.ylabel("displacement [um]")
plt.legend(('x position', 'x fit', 'y position', 'y fit'))
ax = plt.axes()
plt.text(0.8, 0.07,
'x freq : %.3f kHz \n y freq : %.3f kHz' % (1/p1[1],1/p2[1]),
fontsize=16,
horizontalalignment='center',
verticalalignment='center',
transform=ax.transAxes)
plt.show()
成本函数的巧妙运用¶
假设你拥有相同的数据集:两个振荡现象的时间序列,但你知道这两个振荡的频率相同。成本函数的巧妙运用可以让你使用相同的频率,在一个拟合中拟合两组数据。其思想是,你返回一个“成本”数组,该数组是两组数据在参数选择上的成本的串联。因此,leastsq 例程同时优化两组数据。
In [3]
# Target function
fitfunc = lambda T, p, x: p[0]*np.cos(2*np.pi/T*x+p[1]) + p[2]*x
# Initial guess for the first set's parameters
p1 = r_[-15., 0., -1.]
# Initial guess for the second set's parameters
p2 = r_[-15., 0., -1.]
# Initial guess for the common period
T = 0.8
# Vector of the parameters to fit, it contains all the parameters of the problem, and the period of the oscillation is not there twice !
p = r_[T, p1, p2]
# Cost function of the fit, compare it to the previous example.
errfunc = lambda p, x1, y1, x2, y2: r_[
fitfunc(p[0], p[1:4], x1) - y1,
fitfunc(p[0], p[4:7], x2) - y2
]
# This time we need to pass the two sets of data, there are thus four "args".
p,success = optimize.leastsq(errfunc, p, args=(Tx, tX, Ty, tY))
time = np.linspace(Tx.min(), Tx.max(), 100) # Plot of the first data and the fit
plt.plot(Tx, tX, "ro", time, fitfunc(p[0], p[1:4], time),"r-")
# Plot of the second data and the fit
time = np.linspace(Ty.min(), Ty.max(),100)
plt.plot(Ty, tY, "b^", time, fitfunc(p[0], p[4:7], time),"b-")
# Legend the plot
plt.title("Oscillations in the compressed trap")
plt.xlabel("time [ms]")
plt.ylabel("displacement [um]")
plt.legend(('x position', 'x fit', 'y position', 'y fit'))
ax = plt.axes()
plt.text(0.8, 0.07,
'x freq : %.3f kHz' % (1/p[0]),
fontsize=16,
horizontalalignment='center',
verticalalignment='center',
transform=ax.transAxes)
Out[3]
简化语法¶
尤其是在交互式使用拟合时,optimize.leastsq 的标准语法可能会变得很长。使用以下脚本可以简化你的工作
In [4]
import numpy as np
from scipy import optimize
class Parameter:
def __init__(self, value):
self.value = value
def set(self, value):
self.value = value
def __call__(self):
return self.value
def fit(function, parameters, y, x = None):
def f(params):
i = 0
for p in parameters:
p.set(params[i])
i += 1
return y - function(x)
if x is None: x = np.arange(y.shape[0])
p = [param() for param in parameters]
return optimize.leastsq(f, p)
现在拟合变得非常容易,例如拟合高斯函数
In [5]
# giving initial parameters
mu = Parameter(7)
sigma = Parameter(3)
height = Parameter(5)
# define your function:
def f(x): return height() * np.exp(-((x-mu())/sigma())**2)
# fit! (given that data is an array with the data to fit)
data = 10*np.exp(-np.linspace(0, 10, 100)**2) + np.random.rand(100)
print fit(f, [mu, sigma, height], data)
在 [6]
gaussian = lambda x: 3*np.exp(-(30-x)**2/20.)
data = gaussian(np.arange(100))
plt.plot(data, '.')
X = np.arange(data.size)
x = np.sum(X*data)/np.sum(data)
width = np.sqrt(np.abs(np.sum((X-x)**2*data)/np.sum(data)))
max = data.max()
fit = lambda t : max*np.exp(-(t-x)**2/(2*width**2))
plt.plot(fit(X), '-')
输出[6]
拟合二维高斯¶
这是一个用于拟合二维高斯的稳健代码。它计算数据的矩来猜测优化例程的初始参数。对于更完整的高斯,一个可选的加性常数和旋转,请参见 http://code.google.com/p/agpy/source/browse/trunk/agpy/gaussfitter.py。它还允许指定已知的误差。
在 [7]
def gaussian(height, center_x, center_y, width_x, width_y):
"""Returns a gaussian function with the given parameters"""
width_x = float(width_x)
width_y = float(width_y)
return lambda x,y: height*np.exp(
-(((center_x-x)/width_x)**2+((center_y-y)/width_y)**2)/2)
def moments(data):
"""Returns (height, x, y, width_x, width_y)
the gaussian parameters of a 2D distribution by calculating its
moments """
total = data.sum()
X, Y = np.indices(data.shape)
x = (X*data).sum()/total
y = (Y*data).sum()/total
col = data[:, int(y)]
width_x = np.sqrt(np.abs((np.arange(col.size)-x)**2*col).sum()/col.sum())
row = data[int(x), :]
width_y = np.sqrt(np.abs((np.arange(row.size)-y)**2*row).sum()/row.sum())
height = data.max()
return height, x, y, width_x, width_y
def fitgaussian(data):
"""Returns (height, x, y, width_x, width_y)
the gaussian parameters of a 2D distribution found by a fit"""
params = moments(data)
errorfunction = lambda p: np.ravel(gaussian(*p)(*np.indices(data.shape)) -
data)
p, success = optimize.leastsq(errorfunction, params)
return p
这里是一个使用它的例子
在 [8]
# Create the gaussian data
Xin, Yin = np.mgrid[0:201, 0:201]
data = gaussian(3, 100, 100, 20, 40)(Xin, Yin) + np.random.random(Xin.shape)
plt.matshow(data, cmap=plt.cm.gist_earth_r)
params = fitgaussian(data)
fit = gaussian(*params)
plt.contour(fit(*np.indices(data.shape)), cmap=plt.cm.copper)
ax = plt.gca()
(height, x, y, width_x, width_y) = params
plt.text(0.95, 0.05, """
x : %.1f
y : %.1f
width_x : %.1f
width_y : %.1f""" %(x, y, width_x, width_y),
fontsize=16, horizontalalignment='right',
verticalalignment='bottom', transform=ax.transAxes)
输出[8]
在 [9]
# Define function for calculating a power law
powerlaw = lambda x, amp, index: amp * (x**index)
##########
# Generate data points with noise
##########
num_points = 20
# Note: all positive, non-zero data
xdata = np.linspace(1.1, 10.1, num_points)
ydata = powerlaw(xdata, 10.0, -2.0) # simulated perfect data
yerr = 0.2 * ydata # simulated errors (10%)
ydata += np.random.randn(num_points) * yerr # simulated noisy data
在 [10]
##########
# Fitting the data -- Least Squares Method
##########
# Power-law fitting is best done by first converting
# to a linear equation and then fitting to a straight line.
# Note that the `logyerr` term here is ignoring a constant prefactor.
#
# y = a * x^b
# log(y) = log(a) + b*log(x)
#
logx = np.log10(xdata)
logy = np.log10(ydata)
logyerr = yerr / ydata
# define our (line) fitting function
fitfunc = lambda p, x: p[0] + p[1] * x
errfunc = lambda p, x, y, err: (y - fitfunc(p, x)) / err
pinit = [1.0, -1.0]
out = optimize.leastsq(errfunc, pinit,
args=(logx, logy, logyerr), full_output=1)
pfinal = out[0]
covar = out[1]
print pfinal
print covar
index = pfinal[1]
amp = 10.0**pfinal[0]
indexErr = np.sqrt( covar[1][1] )
ampErr = np.sqrt( covar[0][0] ) * amp
##########
# Plotting data
##########
plt.clf()
plt.subplot(2, 1, 1)
plt.plot(xdata, powerlaw(xdata, amp, index)) # Fit
plt.errorbar(xdata, ydata, yerr=yerr, fmt='k.') # Data
plt.text(5, 6.5, 'Ampli = %5.2f +/- %5.2f' % (amp, ampErr))
plt.text(5, 5.5, 'Index = %5.2f +/- %5.2f' % (index, indexErr))
plt.title('Best Fit Power Law')
plt.xlabel('X')
plt.ylabel('Y')
plt.xlim(1, 11)
plt.subplot(2, 1, 2)
plt.loglog(xdata, powerlaw(xdata, amp, index))
plt.errorbar(xdata, ydata, yerr=yerr, fmt='k.') # Data
plt.xlabel('X (log scale)')
plt.ylabel('Y (log scale)')
plt.xlim(1.0, 11)
输出[10]
部分作者:GaelVaroquaux、Unknown[52]、Unknown[53]、jh、jesrl、keflavich、MartinSpacek、Unknown[54]、Brian Benjamin Maranville、Luke
附件