Commit 7e31b3cb authored by Neil Vaytet's avatar Neil Vaytet
Browse files

Updated the auto frame finder to use scipy find_peaks instead of custom peak finding function

parent e4324735
......@@ -17,6 +17,6 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# *********************************************************************
from wfm_processing import WFMProcessor, make_frame_shifts, get_frame_shifts
from data_handling import load, save_fits_stack, normalize
from get_wfm_windows import detect_peaks, check_peaks, get_wfm_windows
from .wfm_processing import WFMProcessor, make_frame_shifts, get_frame_shifts
from .data_handling import load, save_fits_stack, normalize
from .get_wfm_windows import detect_peaks, check_peaks, get_wfm_windows
from __future__ import division, print_function
import numpy as np
from scipy.ndimage import gaussian_filter1d
from scipy.signal import find_peaks
################################################################################
################################################################################
......@@ -10,17 +11,11 @@ from scipy.ndimage import gaussian_filter1d
Author: Neil Vaytet, European Spallation Source, <neil.vaytet@esss.se>
Date: 11/2018
Last modified: 05/2019 : now using scipy to find peaks
This file contains two functions (see individual function code for a list of
parameters):
- detect_peaks(): A peak/valley finding routine provided by Marcos Duarte
(https://github.com/demotu/BMC). It was slightly modified
to disable the plotting functionalities.
- get_wfm_windows(): Starting from the positions of the valleys, the edges
of WFM windows are found using various thresholds.
This file contains a method to automatically find the WFM frame edges, given
some spectrum of intensity/counts vs time of flight.
Examples:
......@@ -46,13 +41,15 @@ from scipy.ndimage import gaussian_filter1d
3. Changing the thresholds: there are two different thresholds.
- bg_threshold is used to find the global left and right edges of the
signal over the background. To determine the background, we begin from
the first (leftmost) point in the data and we iterate towards the
right. This first point whose value exceeds
`bg_threshold * (ymax - ymin)` (where `ymin` and `ymax` are the minimum
and maximum y values in the entire data set) is selected as the leading
edge. The trailing edge is found with the same procedure starting from
the right end and we iterating towards the left.
signal over the background. To determine the background, we histogram
the data in 50 intensity bins and assume that the background is the bin
that contains the intensity which is most common in the data.
The leading edge is the first point that exceeds the background value
is first point whose value exceeds:
background_value + bg_threshold * (ymax - background_value)
where ymax is the maximum intensity in the data.
The trialing edge is found in the same way, starting from the right
end of the spectrum.
The default value is bg_threshold = 0.05.
- win_threshold is used to find the left and right edges of each pulse
......@@ -77,161 +74,23 @@ from scipy.ndimage import gaussian_filter1d
################################################################################
################################################################################
# Peak detection routine by Marcos Duarte
#
# """Detect peaks in data based on their amplitude and other features."""
#
#
# __author__ = "Marcos Duarte, https://github.com/demotu/BMC"
# __version__ = "1.0.5"
# __license__ = "MIT"
def detect_peaks(x, mph=None, mpd=1, threshold=0, edge="rising",
kpsh=False, valley=False):
"""Detect peaks in data based on their amplitude and other features.
Parameters
----------
x : 1D array_like
data.
mph : {None, number}, optional (default = None)
detect peaks that are greater than minimum peak height (if parameter
`valley` is False) or peaks that are smaller than maximum peak height
(if parameter `valley` is True).
mpd : positive integer, optional (default = 1)
detect peaks that are at least separated by minimum peak distance (in
number of data).
threshold : positive number, optional (default = 0)
detect peaks (valleys) that are greater (smaller) than `threshold`
in relation to their immediate neighbors.
edge : {None, 'rising', 'falling', 'both'}, optional (default = 'rising')
for a flat peak, keep only the rising edge ('rising'), only the
falling edge ('falling'), both edges ('both'), or don't detect a
flat peak (None).
kpsh : bool, optional (default = False)
keep peaks with same height even if they are closer than `mpd`.
valley : bool, optional (default = False)
if True (1), detect valleys (local minima) instead of peaks.
Returns
-------
ind : 1D array_like
indeces of the peaks in `x`.
Notes
-----
The detection of valleys instead of peaks is performed internally by simply
negating the data: `ind_valleys = detect_peaks(-x)`
The function can handle NaN's
See this IPython Notebook [1]_.
References
----------
.. [1] http://nbviewer.ipython.org/github/demotu/BMC/blob/master/notebooks/DetectPeaks.ipynb
Examples
--------
>>> from detect_peaks import detect_peaks
>>> x = np.random.randn(100)
>>> x[60:81] = np.nan
>>> # detect all peaks and plot data
>>> ind = detect_peaks(x, show=True)
>>> print(ind)
>>> x = np.sin(2*np.pi*5*np.linspace(0, 1, 200)) + np.random.randn(200)/5
>>> # set minimum peak height = 0 and minimum peak distance = 20
>>> detect_peaks(x, mph=0, mpd=20, show=True)
>>> x = [0, 1, 0, 2, 0, 3, 0, 2, 0, 1, 0]
>>> # set minimum peak distance = 2
>>> detect_peaks(x, mpd=2, show=True)
>>> x = np.sin(2*np.pi*5*np.linspace(0, 1, 200)) + np.random.randn(200)/5
>>> # detection of valleys instead of peaks
>>> detect_peaks(x, mph=-1.2, mpd=20, valley=True, show=True)
>>> x = [0, 1, 1, 0, 1, 1, 0]
>>> # detect both edges
>>> detect_peaks(x, edge='both', show=True)
>>> x = [-2, 1, -2, 2, 1, 1, 3, 0]
>>> # set threshold = 2
>>> detect_peaks(x, threshold = 2, show=True)
Version history
---------------
'1.0.5':
The sign of `mph` is inverted if parameter `valley` is True
"""
x = np.atleast_1d(x).astype("float64")
if x.size < 3:
return np.array([], dtype=int)
if valley:
x = -x
if mph is not None:
mph = -mph
# find indices of all peaks
dx = x[1:] - x[:-1]
# handle NaN's
indnan = np.where(np.isnan(x))[0]
if indnan.size:
x[indnan] = np.inf
dx[np.where(np.isnan(dx))[0]] = np.inf
ine, ire, ife = np.array([[], [], []], dtype=int)
if not edge:
ine = np.where((np.hstack((dx, 0)) < 0) & (np.hstack((0, dx)) > 0))[0]
else:
if edge.lower() in ["rising", "both"]:
ire = np.where((np.hstack((dx, 0)) <= 0) & (np.hstack((0, dx)) > 0))[0]
if edge.lower() in ["falling", "both"]:
ife = np.where((np.hstack((dx, 0)) < 0) & (np.hstack((0, dx)) >= 0))[0]
ind = np.unique(np.hstack((ine, ire, ife)))
# handle NaN's
if ind.size and indnan.size:
# NaN's and values close to NaN's cannot be peaks
ind = ind[np.in1d(ind, np.unique(np.hstack((indnan, indnan-1, indnan+1))), invert=True)]
# first and last values of x cannot be peaks
if ind.size and ind[0] == 0:
ind = ind[1:]
if ind.size and ind[-1] == x.size-1:
ind = ind[:-1]
# remove peaks < minimum peak height
if ind.size and mph is not None:
ind = ind[x[ind] >= mph]
# remove peaks - neighbors < threshold
if ind.size and threshold > 0:
dx = np.min(np.vstack([x[ind]-x[ind-1], x[ind]-x[ind+1]]), axis=0)
ind = np.delete(ind, np.where(dx < threshold)[0])
# detect small peaks closer than minimum peak distance
if ind.size and mpd > 1:
ind = ind[np.argsort(x[ind])][::-1] # sort ind by peak height
idel = np.zeros(ind.size, dtype=bool)
for i in range(ind.size):
if not idel[i]:
# keep peaks with the same height if kpsh is True
idel = idel | (ind >= ind[i] - mpd) & (ind <= ind[i] + mpd) \
& (x[ind[i]] > x[ind] if kpsh else True)
idel[i] = 0 # Keep current peak
# remove the small peaks and sort back the indices by their occurrence
ind = np.sort(ind[~idel])
return ind
def check_peaks(peaks, nwindows):
"""
This function check if the number of peaks found is the correct one.
"""
nvalleys = len(peaks)-2
if nvalleys != (nwindows - 1):
print("Error: number of valleys should be {}! Found {}.".format(nwindows-1, nvalleys))
return True # Continue looping
if nvalleys > 2*(nwindows - 1):
print("Way too many valleys: should be {}, found {}.".format(nwindows-1, nvalleys))
return 3 # Continue looping
elif nvalleys > (nwindows - 1):
print("Too many valleys: should be {}, found {}.".format(nwindows-1, nvalleys))
return 2 # Continue looping
elif nvalleys < (nwindows - 1):
print("Too few valleys: should be {}, found {}.".format(nwindows-1, nvalleys))
return 1 # Continue looping
else:
print("OK: Found {} valleys.".format(nvalleys))
return False # Exit the loop
return 0 # Exit the loop
################################################################################
################################################################################
......@@ -330,23 +189,6 @@ def get_wfm_windows(data=None, filename=None, nwindows=6, bg_threshold=0.05,
hmin = np.amin(hist)
hmax = np.amax(hist)
# # Determine background threshold by looking for strong cumulative gradients
# i_bg = None
# y_normed = y / np.sum(y)
# sorted = np.argsort(y_normed)
# grad = np.gradient(y_normed[sorted])
# cumsum = np.cumsum(grad)
# for i in range(len(cumsum)):
# if cumsum[i] > bg_threshold:
# i_bg = i
# break
# if i_bg is not None:
# bg_value = y[sorted][i_bg]
# print("Background value is: {}".format(bg_value))
# else:
# bg_value = 0.05*(ymax - ymin)
# print("Warning: background value was not found in the conventional way")
# Find the leading and trailing edges; i.e. the leftmost and rightmost
# points that exceed the background value
i_start = 0
......@@ -359,60 +201,55 @@ def get_wfm_windows(data=None, filename=None, nwindows=6, bg_threshold=0.05,
if y[i] > bg_value:
i_end = i
break
print(i_start,i_end, lbound, rbound)
# Determine minimum peak distance (mpd):
# We know there should be 6 windows between the leading and trailing edges.
# Since the windows have approximately all the same size, we can estimate a
# minimum peak distance to be close to the distance between leading and trailing
# edges divided by 6 (but slightly less to be on the safe side).
# Note that for the `detect_peaks` function, mpd is in units of data index, not
# time-of-flight.
mpd = int(0.75 * float(i_end - i_start) / nwindows)
loop = True
# Fine the valleys using the scipy find_peaks tool.
# The prominence parameter is probably the most relevant here.
prominence = 0.05
delta_prominence = 1.5
loop = 2
max_loops = 10
nloops = 0
while loop:
while loop > 0:
nloops += 1
print("The minimum peak distance (mpd) is:",mpd)
# Find valleys using `detect_peaks` function from Marcos Duarte
peaks = detect_peaks(y, mpd=mpd, valley=True)
# Now filter out peaks that are between start and end
good_peaks = [i_start]
for p in peaks:
if (p > i_start+0.5*mpd) and (p < i_end-0.5*mpd):
good_peaks.append(p)
good_peaks.append(i_end)
loop = check_peaks(good_peaks, nwindows)
print("The peak prominence is:", prominence)
# Find valleys using `detect_peaks` function from scipy.signal
peaks, _ = find_peaks(-y[lbound:rbound+1], prominence=prominence*(ymax-ymin))
# Added start and end to peaks array
peaks = np.concatenate(([i_start], peaks+lbound, [i_end]))
# Check how many peaks were found
loop = check_peaks(peaks, nwindows)
# If loop is 3, it means way too many peaks were found, so we re-run
# the peak search having increased the prominence
if loop == 3:
print("Increasing the peak prominence")
prominence *= delta_prominence
# Try to remove peaks if there are too many
if (len(good_peaks)-2) > (nwindows - 1):
if loop == 2:
print("Too many peaks found. Attempting automatic dismissal")
# if the value at the bottom of the valley is over the lowest window
# average, then this is not a real valley
temp_peaks = good_peaks[:]
for p in range(1,len(good_peaks)-1):
rmean = np.average(y[good_peaks[p]:good_peaks[p+1]])
lmean = np.average(y[good_peaks[p-1]:good_peaks[p]])
mean = min(lmean,rmean)
if y[good_peaks[p]] > mean:
temp_peaks[p] = -100
print("Removed peak number {} at x,y = {},{}".format(p,x[good_peaks[p]],y[good_peaks[p]]))
good_peaks = []
for p in range(len(temp_peaks)):
if temp_peaks[p] != -100:
good_peaks.append(temp_peaks[p])
loop = check_peaks(good_peaks, nwindows)
# Try to reduce the minimum peak distance if there are too few peaks
if (len(good_peaks)-2) < (nwindows - 1):
print("Not enough peaks found. Reducing the minimal peak distance")
mpd = int(0.75 * mpd)
if mpd < 1:
print("The peak detection has failed with too few or too many peaks")
loop = False
removed_a_peak = True
while removed_a_peak:
removed_a_peak = False
for p in range(1, len(peaks)-1):
rmean = np.average(y[peaks[p]:peaks[p+1]])
lmean = np.average(y[peaks[p-1]:peaks[p]])
mean = min(lmean,rmean)
if y[peaks[p]] > mean:
print("Removed peak number {} at x,y = {},{}".format(p,x[peaks[p]],y[peaks[p]]))
peaks = np.delete(peaks, p)
removed_a_peak = True
break
# Check peaks again
loop = check_peaks(peaks, nwindows)
# Try to reduce the peak prominence if there are too few peaks
if loop == 1:
print("Reducing the peak prominence")
prominence /= delta_prominence
# Exit if maximum number of loops has been reached
if nloops > max_loops:
loop = False
loop = 0
# Now for each valley, iterate to one side starting from the valley center and
# find the window edge. We start from the first valley, which is the second
......@@ -429,21 +266,21 @@ def get_wfm_windows(data=None, filename=None, nwindows=6, bg_threshold=0.05,
redges = []
y_minus_background = y - bg_value
for p in range(1,len(good_peaks)-1):
for p in range(1,len(peaks)-1):
# Towards the right ===================
rmean = np.average(y_minus_background[good_peaks[p]:good_peaks[p+1]])
rmean = np.average(y_minus_background[peaks[p]:peaks[p+1]])
# Find left edge iterating towards the right
for i in range(good_peaks[p]+1,good_peaks[p+1]):
if (y_minus_background[i] - y_minus_background[good_peaks[p]]) >= (win_threshold*(rmean-y_minus_background[good_peaks[p]])):
for i in range(peaks[p]+1,peaks[p+1]):
if (y_minus_background[i] - y_minus_background[peaks[p]]) >= (win_threshold*(rmean-y_minus_background[peaks[p]])):
ledges.append(i)
break
# Towards the left =======================
lmean = np.average(y_minus_background[good_peaks[p-1]:good_peaks[p]])
lmean = np.average(y_minus_background[peaks[p-1]:peaks[p]])
# Find left edge iterating towards the right
for i in range(good_peaks[p]-1,good_peaks[p-1],-1):
if (y_minus_background[i] - y_minus_background[good_peaks[p]]) >= (win_threshold*(lmean-y_minus_background[good_peaks[p]])):
for i in range(peaks[p]-1,peaks[p-1],-1):
if (y_minus_background[i] - y_minus_background[peaks[p]]) >= (win_threshold*(lmean-y_minus_background[peaks[p]])):
redges.append(i)
break
......@@ -462,32 +299,36 @@ def get_wfm_windows(data=None, filename=None, nwindows=6, bg_threshold=0.05,
sizex = 20.0
fig.set_size_inches(sizex, ratio*sizex)
ax = fig.add_subplot(121)
ax.grid(True, color='gray', linestyle="dotted")
ax.set_axisbelow(True)
colors = ["r","g","b","magenta","cyan","orange"]
for i in range(len(ledges)):
ax.add_patch(Rectangle((x[ledges[i]], ymin), (x[redges[i]]-x[ledges[i]]), (ymax-ymin), facecolor=colors[i%6], alpha=0.5))
for p in range(len(good_peaks)-1):
rmean = np.average(y_minus_background[good_peaks[p]:good_peaks[p+1]])
ax.plot([x[good_peaks[p]],x[good_peaks[p+1]]],[rmean+bg_value,rmean+bg_value],color="lime", lw=2)
ax.plot([x[good_peaks[p]],x[good_peaks[p+1]]],[win_threshold*rmean+bg_value,win_threshold*rmean+bg_value], color="sienna", lw=2)
for p in range(len(peaks)-1):
rmean = np.average(y_minus_background[peaks[p]:peaks[p+1]])
ax.plot([x[peaks[p]],x[peaks[p+1]]],[rmean+bg_value,rmean+bg_value],color="lime", lw=2)
ax.plot([x[peaks[p]],x[peaks[p+1]]],[win_threshold*rmean+bg_value,win_threshold*rmean+bg_value], color="sienna", lw=2)
ax.plot(x, data[:,1], color="k", lw=2, label="Raw data")
ax.plot(x, y, color="lightgrey", lw=1, label="Smoothed data")
ax.plot([np.amin(x),np.amax(x)], [bg_value, bg_value], "--", color="pink", lw=1, label="Background threshold")
ax.plot([x[good_peaks[0]],x[good_peaks[1]]], [-ymax,-ymax], color="lime", label="Window mean", lw=2)
ax.plot([x[good_peaks[0]],x[good_peaks[1]]], [-ymax,-ymax], color="sienna", label="Window mean", lw=2)
for p in range(1,len(good_peaks)-1):
ax.plot(x[good_peaks[p]], y[good_peaks[p]], "o", color="r")
ax.plot(x[good_peaks[0]], y[good_peaks[0]], "o", color="deepskyblue", label="Leading edge")
ax.plot(x[good_peaks[-1]], y[good_peaks[-1]], "o", color="yellow", label="Trailing edge")
ax.plot([x[peaks[0]],x[peaks[1]]], [-ymax,-ymax], color="lime", label="Window mean", lw=2)
ax.plot([x[peaks[0]],x[peaks[1]]], [-ymax,-ymax], color="sienna", label="Window mean", lw=2)
for p in range(1,len(peaks)-1):
ax.plot(x[peaks[p]], y[peaks[p]], "o", color="r")
ax.plot(x[peaks[0]], y[peaks[0]], "o", color="deepskyblue", label="Leading edge")
ax.plot(x[peaks[-1]], y[peaks[-1]], "o", color="yellow", label="Trailing edge")
ax.set_xlabel("Arrival time at detector")
ax.set_ylabel("Amplitude")
ax.set_ylim([0.0,1.05*np.amax(data[:,1])])
ax.plot(x[good_peaks[0]], -ymax, "o", color="r",label="Valleys")
ax.plot(x[peaks[0]], -ymax, "o", color="r",label="Valleys")
ax.legend(loc=(0,1.02),ncol=4, fontsize=10)
ax2 = fig.add_subplot(122)
ax2.hist(y[lbound:rbound+1], bins=edges, orientation='horizontal', label="Amplitude frequency", facecolor='None')
ax2.set_axisbelow(True)
ax2.grid(True, color='gray', linestyle="dotted")
ax2.hist(y[lbound:rbound+1], bins=edges, orientation='horizontal', label="Amplitude frequency")
ax2.set_xlim([hmin,1.05*hmax])
ax2.set_ylim([0.0,1.05*np.amax(data[:,1])])
ax2.plot([hmin, hmax], [bg_raw, bg_raw], "--", color="b", lw=3, label="Background estimation")
ax2.plot([hmin, hmax], [bg_raw, bg_raw], "--", color="r", lw=3, label="Background estimation")
ax2.plot([hmin, hmax], [bg_value, bg_value], "--", color="pink", lw=1, label="Background threshold")
ax2.set_xlabel("Counts in background search")
ax2.set_ylabel("Amplitude")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment