Source code for madmom.evaluation.beats

# encoding: utf-8
# pylint: disable=no-member
# pylint: disable=invalid-name
# pylint: disable=too-many-arguments
"""
This module contains beat evaluation functionality.

The measures are described in [1]_, a Matlab implementation exists here:
http://code.soundsoftware.ac.uk/projects/beat-evaluation/repository

Notes
-----
Please note that this is a complete re-implementation, which took some other
design decisions. For example, the beat detections and annotations are not
quantised before being evaluated with F-measure, P-score and other metrics.
Hence these evaluation functions DO NOT report the exact same results/scores.
This approach was chosen, because it is simpler and produces more accurate
results.

References
----------
.. [1] Matthew E. P. Davies, Norberto Degara, and Mark D. Plumbley,
       "Evaluation Methods for Musical Audio Beat Tracking Algorithms",
       Technical Report C4DM-TR-09-06,
       Centre for Digital Music, Queen Mary University of London, 2009.

"""

from __future__ import absolute_import, division, print_function

import warnings
import numpy as np

from . import (find_closest_matches, calc_errors, calc_absolute_errors,
               evaluation_io, MeanEvaluation)
from .onsets import OnsetEvaluation
from ..utils import suppress_warnings


[docs]class BeatIntervalError(Exception): """ Exception to be raised whenever an interval cannot be computed. """ # pylint: disable=super-init-not-called def __init__(self, value=None): if value is None: value = "At least two beats must be present to be able to " \ "calculate an interval." self.value = value def __str__(self): return repr(self.value)
@suppress_warnings
[docs]def load_beats(values, downbeats=False): """ Load the beats from the given values or file. To make this function more universal, it also accepts lists or arrays. Parameters ---------- values : str, file handle, list or numpy array Name / values to be loaded. downbeats : bool, optional Load downbeats instead of beats. Returns ------- numpy array Beats. Notes ----- Expected format: 'beat_time' [additional information will be ignored] """ # load the beats from the given representation if values is None: # return an empty array values = np.zeros(0) elif isinstance(values, (list, np.ndarray)): # convert to numpy array if possible # Note: use array instead of asarray because of ndmin values = np.array(values, dtype=np.float, ndmin=1, copy=False) else: # try to load the data from file values = np.loadtxt(values, ndmin=1) if values.ndim > 1: if downbeats: # rows with a "1" in the 2nd column are the downbeats. return values[values[:, 1] == 1][:, 0] else: # 1st column is the beat time, the rest is ignored return values[:, 0] return values
# function for sequence variations generation
[docs]def variations(sequence, offbeat=False, double=False, half=False, triple=False, third=False): """ Create variations of the given beat sequence. Parameters ---------- sequence : numpy array Beat sequence. offbeat : bool, optional Create an offbeat sequence. double : bool, optional Create a double tempo sequence. half : bool, optional Create half tempo sequences (includes offbeat version). triple : bool, optional Create triple tempo sequence. third : bool, optional Create third tempo sequences (includes offbeat versions). Returns ------- list Beat sequence variations. """ # create different variants of the annotations sequences = [] # double/half and offbeat variation if double or offbeat: if len(sequence) == 0: # if we have an empty sequence, there's nothing to interpolate double_sequence = [] else: # create a sequence with double tempo same = np.arange(0, len(sequence)) # request one item less, otherwise we would extrapolate shifted = np.arange(0, len(sequence), 0.5)[:-1] double_sequence = np.interp(shifted, same, sequence) # same tempo, half tempo off if offbeat: sequences.append(double_sequence[1::2]) # double/half tempo variations if double: # double tempo sequences.append(double_sequence) if half: # half tempo odd beats (i.e. 1,3,1,3,..) sequences.append(sequence[0::2]) # half tempo even beats (i.e. 2,4,2,4,..) sequences.append(sequence[1::2]) # triple/third tempo variations if triple: if len(sequence) == 0: # if we have an empty sequence, there's nothing to interpolate triple_sequence = [] else: # create a annotation sequence with triple tempo same = np.arange(0, len(sequence)) # request two items less, otherwise we would extrapolate shifted = np.arange(0, len(sequence), 1. / 3)[:-2] triple_sequence = np.interp(shifted, same, sequence) # triple tempo sequences.append(triple_sequence) if third: # third tempo 1st beat (1,4,3,2,..) sequences.append(sequence[0::3]) # third tempo 2nd beat (2,1,4,3,..) sequences.append(sequence[1::3]) # third tempo 3rd beat (3,2,1,4,..) sequences.append(sequence[2::3]) # return return sequences
# helper functions for beat evaluation
[docs]def calc_intervals(events, fwd=False): """ Calculate the intervals of all events to the previous/next event. Parameters ---------- events : numpy array Beat sequence. fwd : bool, optional Calculate the intervals towards the next event (instead of previous). Returns ------- numpy array Beat intervals. Notes ----- The sequence must be ordered. The first (last) interval will be set to the same value as the second (second to last) interval (when used in `fwd` mode). """ # at least 2 events must be given to calculate an interval if len(events) < 2: raise BeatIntervalError interval = np.zeros_like(events) if fwd: interval[:-1] = np.diff(events) # set the last interval to the same value as the second last interval[-1] = interval[-2] else: interval[1:] = np.diff(events) # set the first interval to the same value as the second interval[0] = interval[1] # return return interval
[docs]def find_closest_intervals(detections, annotations, matches=None): """ Find the closest annotated interval to each beat detection. Parameters ---------- detections : list or numpy array Detected beats. annotations : list or numpy array Annotated beats. matches : list or numpy array Indices of the closest beats. Returns ------- numpy array Closest annotated beat intervals. Notes ----- The sequences must be ordered. To speed up the calculation, a list of pre-computed indices of the closest matches can be used. The function does NOT test if each detection has a surrounding interval, it always returns the closest interval. """ # if no detection are given, return an empty interval array if len(detections) == 0: return np.zeros(0, dtype=np.float) # at least annotations must be given if len(annotations) < 2: raise BeatIntervalError # make sure the annotations and detections have a float dtype detections = np.asarray(detections, dtype=np.float) annotations = np.asarray(annotations, dtype=np.float) # init array closest_interval = np.ones_like(detections) # intervals # Note: it is faster if we combine the forward and backward intervals, # but we need to take care of the sizes; intervals to the next # annotation are always the same as those at the next index intervals = np.zeros(len(annotations) + 1) # intervals to previous annotation intervals[1:-1] = np.diff(annotations) # interval of the first annotation to the left is the same as to the right intervals[0] = intervals[1] # interval of the last annotation to the right is the same as to the left intervals[-1] = intervals[-2] # determine the closest annotations if matches is None: matches = find_closest_matches(detections, annotations) # calculate the absolute errors errors = calc_errors(detections, annotations, matches) # if the errors are positive, the detection is after the annotation # thus use the interval towards the next annotation closest_interval[errors > 0] = intervals[matches[errors > 0] + 1] # if the errors are 0 or negative, the detection is before the annotation # or at the same position; thus use the interval to previous annotation closest_interval[errors <= 0] = intervals[matches[errors <= 0]] # return the closest interval return closest_interval
[docs]def find_longest_continuous_segment(sequence_indices): """ ind the longest consecutive segment in the given sequence. Parameters ---------- sequence_indices : numpy array Indices of the beats Returns ------- length : int Length of the longest consecutive segment. start : int Start position of the longest continuous segment. """ # continuous segments have consecutive indices, i.e. diffs =! 1 are # boundaries between continuous segments; add 1 to get the correct index boundaries = np.nonzero(np.diff(sequence_indices) != 1)[0] + 1 # add a start (index 0) and stop (length of correct detections) to the # segment boundary indices boundaries = np.concatenate(([0], boundaries, [len(sequence_indices)])) # lengths of the individual segments segment_lengths = np.diff(boundaries) # return the length and start position of the longest continuous segment length = int(np.max(segment_lengths)) start_pos = int(boundaries[np.argmax(segment_lengths)]) return length, start_pos
[docs]def calc_relative_errors(detections, annotations, matches=None): """ Errors of the detections relative to the closest annotated interval. Parameters ---------- detections : list or numpy array Detected beats. annotations : list or numpy array Annotated beats. matches : list or numpy array Indices of the closest beats. Returns ------- numpy array Errors relative to the closest annotated beat interval. Notes ----- The sequences must be ordered! To speed up the calculation, a list of pre-computed indices of the closest matches can be used. """ # if no detection are given, return an empty interval array if len(detections) == 0: return np.zeros(0, dtype=np.float) # at least annotations must be given if len(annotations) < 2: raise BeatIntervalError # make sure the annotations and detections have a float dtype detections = np.asarray(detections, dtype=np.float) annotations = np.asarray(annotations, dtype=np.float) # determine the closest annotations if matches is None: matches = find_closest_matches(detections, annotations) # calculate the absolute errors errors = calc_errors(detections, annotations, matches) # get the closest intervals intervals = find_closest_intervals(detections, annotations, matches) # return the relative errors return errors / intervals
# default beat evaluation parameter values FMEASURE_WINDOW = 0.07 PSCORE_TOLERANCE = 0.2 CEMGIL_SIGMA = 0.04 GOTO_THRESHOLD = 0.175 GOTO_SIGMA = 0.1 GOTO_MU = 0.1 CONTINUITY_TEMPO_TOLERANCE = 0.175 CONTINUITY_PHASE_TOLERANCE = 0.175 INFORMATION_GAIN_BINS = 40 # evaluation functions for beat detection
[docs]def pscore(detections, annotations, tolerance=PSCORE_TOLERANCE): """ Calculate the P-score accuracy for the given detections and annotations. The P-score is determined by taking the sum of the cross-correlation between two impulse trains, representing the detections and annotations allowing for a tolerance of 20% of the median annotated interval [1]_. Parameters ---------- detections : list or numpy array Detected beats. annotations : list or numpy array Annotated beats. tolerance : float, optional Evaluation tolerance (fraction of the median beat interval). Returns ------- pscore : float P-Score. Notes ----- Contrary to the original implementation which samples the two impulse trains with 100Hz, we do not quantise the annotations and detections but rather count all detections falling withing the defined tolerance window. References ---------- .. [1] M. McKinney, D. Moelants, M. Davies and A. Klapuri, "Evaluation of audio beat tracking and music tempo extraction algorithms", Journal of New Music Research, vol. 36, no. 1, 2007. """ # neither detections nor annotations are given, perfect score if len(detections) == 0 and len(annotations) == 0: return 1. # either beat detections or annotations are empty, score 0 if (len(detections) == 0) != (len(annotations) == 0): return 0. # at least 2 annotations must be given to calculate an interval if len(annotations) < 2: raise BeatIntervalError("At least 2 annotations are needed for" "P-Score.") # tolerance must be greater than 0 if float(tolerance) <= 0: raise ValueError("`tolerance` must be greater than 0.") # make sure the annotations and detections have a float dtype detections = np.asarray(detections, dtype=np.float) annotations = np.asarray(annotations, dtype=np.float) # the error window is the given fraction of the median beat interval window = tolerance * np.median(np.diff(annotations)) # errors errors = calc_absolute_errors(detections, annotations) # count the instances where the error is smaller or equal than the window p = len(detections[errors <= window]) # normalize by the max number of detections/annotations p /= float(max(len(detections), len(annotations))) # return p-score return p
[docs]def cemgil(detections, annotations, sigma=CEMGIL_SIGMA): """ Calculate the Cemgil accuracy for the given detections and annotations. Parameters ---------- detections : list or numpy array Detected beats. annotations : list or numpy array Annotated beats. sigma : float, optional Sigma for Gaussian error function. Returns ------- cemgil : float Cemgil beat tracking accuracy. References ---------- .. [1] A.T. Cemgil, B. Kappen, P. Desain, and H. Honing, "On tempo tracking: Tempogram representation and Kalman filtering", Journal Of New Music Research, vol. 28, no. 4, 2001. """ # neither detections nor annotations are given, perfect score if len(detections) == 0 and len(annotations) == 0: return 1. # either beat detections or annotations are empty, score 0 if (len(detections) == 0) != (len(annotations) == 0): return 0. # sigma must be greater than 0 if float(sigma) <= 0: raise ValueError("`sigma` must be greater than 0.") # make sure the annotations and detections have a float dtype detections = np.asarray(detections, dtype=np.float) annotations = np.asarray(annotations, dtype=np.float) # determine the abs. errors of the detections to the closest annotations # Note: the original implementation searches for the closest matches of # detections given the annotations. Since absolute errors > a usual # beat interval produce high errors (and thus in turn add negligible # values to the accuracy), it is safe to swap those two. errors = calc_absolute_errors(detections, annotations) # apply a Gaussian error function with the given std. dev. on the errors acc = np.exp(-(errors ** 2.) / (2. * (sigma ** 2.))) # and sum up the accuracy acc = np.sum(acc) # normalized by the mean of the number of detections and annotations acc /= 0.5 * (len(annotations) + len(detections)) # return accuracy return acc
[docs]def goto(detections, annotations, threshold=GOTO_THRESHOLD, sigma=GOTO_SIGMA, mu=GOTO_MU): """ Calculate the Goto and Muraoka accuracy for the given detections and annotations. Parameters ---------- detections : list or numpy array Detected beats. annotations : list or numpy array Annotated beats. threshold : float, optional Threshold. sigma : float, optional Allowed std. dev. of the errors in the longest segment. mu : float, optional Allowed mean. of the errors in the longest segment. Returns ------- goto : float Goto beat tracking accuracy. Notes ----- [1]_ requires that the first correct beat detection must occur within the first 3/4 of the excerpt. In order to be able to deal with audio with varying tempo, this was altered that the length of the longest continuously tracked segment must be at least 1/4 of the total length [2]_. References ---------- .. [1] M. Goto and Y. Muraoka, "Issues in evaluating beat tracking systems", Working Notes of the IJCAI-97 Workshop on Issues in AI and Music - Evaluation and Assessment, 1997. .. [2] Matthew E. P. Davies, Norberto Degara, and Mark D. Plumbley, "Evaluation Methods for Musical Audio Beat Tracking Algorithms", Technical Report C4DM-TR-09-06, Centre for Digital Music, Queen Mary University of London, 2009. """ # neither detections nor annotations are given, perfect score if len(detections) == 0 and len(annotations) == 0: return 1. # either beat detections or annotations are empty, score 0 if (len(detections) == 0) != (len(annotations) == 0): return 0. # at least 2 annotations must be given to calculate an interval if len(annotations) < 2: raise BeatIntervalError("At least 2 annotations are needed for Goto's " "score.") # threshold, sigma and mu must be greater than 0 if float(threshold) <= 0 or float(sigma) <= 0 or float(mu) <= 0: raise ValueError("Threshold, sigma and mu must be positive.") # make sure the annotations and detections have a float dtype detections = np.asarray(detections, dtype=np.float) annotations = np.asarray(annotations, dtype=np.float) # get the indices of the closest detections to the annotations to determine # the longest continuous segment closest = find_closest_matches(annotations, detections) # keep only those which have abs(errors) <= threshold # Note: both the original paper and the Matlab implementation normalize by # half a beat interval, thus our threshold is halved (same applies to # sigma and mu) # errors of the detections relative to the surrounding annotation interval errors = calc_relative_errors(detections, annotations) # the absolute error must be smaller than the given threshold closest = closest[np.abs(errors[closest]) <= threshold] # get the length and start position of the longest continuous segment length, start = find_longest_continuous_segment(closest) # three conditions must be met to identify the segment as correct # 1) the length of the segment must be at least 1/4 of the total length # Note: the original paper requires that the first element must occur # within the first 3/4 of the excerpt, but this was altered in the # Matlab implementation to the above condition to be able to deal # with audio with varying tempo if length < 0.25 * len(annotations): return 0. # errors of the longest segment segment_errors = errors[closest[start: start + length]] # 2) mean of the errors must not exceed mu if np.mean(np.abs(segment_errors)) > mu: return 0. # 3) std deviation of the errors must not exceed sigma # Note: contrary to the original paper and in line with the Matlab code, # we calculate the std. deviation based on the raw errors and not on # their absolute values. if np.std(segment_errors) > sigma: return 0. # otherwise return 1 return 1.
[docs]def cml(detections, annotations, phase_tolerance=CONTINUITY_PHASE_TOLERANCE, tempo_tolerance=CONTINUITY_TEMPO_TOLERANCE): """ Calculate the cmlc and cmlt scores for the given detections and annotations. Parameters ---------- detections : list or numpy array Detected beats. annotations : list or numpy array Annotated beats. phase_tolerance : float, optional Allowed phase tolerance. tempo_tolerance : float, optional Allowed tempo tolerance. Returns ------- cmlc : float Longest continuous segment of correct detections normalized by the maximum length of both sequences (detection and annotations). cmlt : float Same as cmlc, but no continuity required. References ---------- .. [1] S. Hainsworth, "Techniques for the automated analysis of musical audio", PhD. dissertation, Department of Engineering, Cambridge University, 2004. .. [2] A.P. Klapuri, A. Eronen, and J. Astola, "Analysis of the meter of acoustic musical signals", IEEE Transactions on Audio, Speech and Language Processing, vol. 14, no. 1, 2006. """ # neither detections nor annotations are given if len(detections) == 0 and len(annotations) == 0: return 1., 1. # either beat detections or annotations are empty, score 0 if (len(detections) == 0) != (len(annotations) == 0): return 0., 0. # at least 2 annotations must be given to calculate an interval if len(annotations) < 2: raise BeatIntervalError("At least 2 annotations are needed for " "continuity scores, %s given." % annotations) # TODO: remove this, see TODO below if len(detections) < 2: raise BeatIntervalError("At least 2 detections are needed for" "continuity scores, %s given." % detections) # tolerances must be greater than 0 if float(tempo_tolerance) <= 0 or float(phase_tolerance) <= 0: raise ValueError("Tempo and phase tolerances must be greater than 0") # make sure the annotations and detections have a float dtype detections = np.asarray(detections, dtype=np.float) annotations = np.asarray(annotations, dtype=np.float) # determine closest annotations to detections closest = find_closest_matches(detections, annotations) # errors of the detections wrt. to the annotations errors = calc_absolute_errors(detections, annotations, closest) # detection intervals det_interval = calc_intervals(detections) # annotation intervals (get those intervals at the correct positions) ann_interval = calc_intervals(annotations)[closest] # a detection is correct, if it fulfills 2 conditions: # 1) must match an annotation within a certain tolerance window, i.e. the # phase must be correct correct_phase = detections[errors <= ann_interval * phase_tolerance] # Note: the initially cited technical report has an additional condition # ii) on page 5 which requires the same condition to be true for the # previous detection / annotation combination. We do not enforce # this, since a) this condition is kind of pointless: why shouldn't # we count a correct beat just because its predecessor is not? and # b) the original Matlab implementation does not enforce it either # 2) the tempo, i.e. the intervals, must be within the tempo tolerance # TODO: as agreed with Matthew, this should only be enforced from the 2nd # beat onwards. correct_tempo = detections[abs(1 - (det_interval / ann_interval)) <= tempo_tolerance] # combine the conditions correct = np.intersect1d(correct_phase, correct_tempo) # convert to indices correct_idx = np.searchsorted(detections, correct) # cmlc: longest continuous segment of detections normalized by the max. # length of both sequences (detection and annotations) length = float(max(len(detections), len(annotations))) longest, _ = find_longest_continuous_segment(correct_idx) cmlc = longest / length # cmlt: same but for all detections (no need for continuity) cmlt = len(correct) / length # return a tuple return cmlc, cmlt
[docs]def continuity(detections, annotations, phase_tolerance=CONTINUITY_PHASE_TOLERANCE, tempo_tolerance=CONTINUITY_TEMPO_TOLERANCE, offbeat=True, double=True, triple=True): """ Calculate the cmlc, cmlt, amlc and amlt scores for the given detections and annotations. Parameters ---------- detections : list or numpy array Detected beats. annotations : list or numpy array Annotated beats. phase_tolerance : float, optional Allowed phase tolerance. tempo_tolerance : float, optional Allowed tempo tolerance. offbeat : bool, optional Include offbeat variation. double : bool, optional Include double and half tempo variations (and offbeat thereof). triple : bool, optional Include triple and third tempo variations (and offbeats thereof). Returns ------- cmlc : float Tracking accuracy, continuity at the correct metrical level required. cmlt : float Same as cmlc, continuity at the correct metrical level not required. amlc : float Same as cmlc, alternate metrical levels allowed. amlt : float Same as cmlt, alternate metrical levels allowed. See Also -------- :func:`cml` """ # neither detections nor annotations are given if len(detections) == 0 and len(annotations) == 0: return 1., 1., 1., 1. # either beat detections or annotations are empty, score 0 if (len(detections) == 0) != (len(annotations) == 0): return 0., 0., 0., 0. # evaluate the correct tempo cmlc, cmlt = cml(detections, annotations, tempo_tolerance, phase_tolerance) amlc = cmlc amlt = cmlt # speed up calculation by skipping other metrical levels if the score is # higher than 0.5 already. We must have tested the correct metrical level # already, otherwise the cmlc score would be lower. if cmlc > 0.5: return cmlc, cmlt, amlc, amlt # create different variants of the annotations: # Note: double also includes half as does triple third, respectively sequences = variations(annotations, offbeat=offbeat, double=double, half=double, triple=triple, third=triple) # evaluate these metrical variants for sequence in sequences: # if other metrical levels achieve higher accuracies, take these values try: # Note: catch the IntervalError here, because the beat variants # could be too short for valid interval calculation; # ok, since we already have valid values for amlc & amlt c, t = cml(detections, sequence, tempo_tolerance, phase_tolerance) except BeatIntervalError: c, t = np.nan, np.nan amlc = max(amlc, c) amlt = max(amlt, t) # return a tuple return cmlc, cmlt, amlc, amlt
def _histogram_bins(num_bins): """ Helper function to generate the histogram bins used to calculate the error histogram of the information gain. Parameters ---------- num_bins : int Number of histogram bins. Returns ------- numpy array Histogram bin edges. Notes ----- This functions returns the bin edges for a histogram with one more bin than the requested number of bins, because the fist and last bins are added together (to make the histogram circular) later on. Because of the same reason, the first and the last bin are only half as wide as the others. """ # allow only even numbers and require at least 2 bins if num_bins % 2 != 0 or num_bins < 2: # Note: because of the implementation details of the histogram, the # easiest way to make sure the an error of 0 is always mapped # to the centre bin is to enforce an even number of bins raise ValueError("Number of error histogram bins must be even and " "greater than 0") # since np.histogram accepts a sequence of bin edges we just increase the # number of bins by 1, but we need to apply offset offset = 0.5 / num_bins # because the histogram is made circular by adding the last bin to the # first one before being removed, increase the number of bins by 2 return np.linspace(-0.5 - offset, 0.5 + offset, num_bins + 2) def _error_histogram(detections, annotations, histogram_bins): """ Helper function to calculate the relative errors of the given detections and annotations and map them to an histogram with the given bins edges. Parameters ---------- detections : list or numpy array Detected beats. annotations : list or numpy array Annotated beats. histogram_bins : numpy array Beat error histogram bin edges. Returns ------- error_histogram : numpy array Beat error histogram. Notes ----- The returned error histogram is circular, i.e. it contains 1 bin less than a histogram built normally with the given histogram bin edges. The values of the last and first bin are summed and mapped to the first bin. """ # get the relative errors of the detections to the annotations errors = calc_relative_errors(detections, annotations) # map the relative beat errors to the range of -0.5..0.5 errors = np.mod(errors + 0.5, -1) + 0.5 # get bin counts for the given errors over the distribution histogram = np.histogram(errors, histogram_bins)[0].astype(np.float) # make the histogram circular by adding the last bin to the first one histogram[0] += histogram[-1] # return the histogram without the last bin return histogram[:-1] def _entropy(error_histogram): """ Helper function to calculate the entropy of the given error histogram. Parameters ---------- error_histogram : numpy array Error histogram. Returns ------- entropy : float Entropy of the error histogram. """ # copy the error_histogram, because it must not be altered histogram = np.copy(error_histogram).astype(np.float) # normalize the histogram histogram /= np.sum(histogram) # set all 0 values to 1 to make entropy calculation well-behaved histogram[histogram == 0] = 1. # calculate entropy return - np.sum(histogram * np.log2(histogram)) def _information_gain(error_histogram): """ Helper function to calculate the information gain of the given error histogram. Parameters ---------- error_histogram : numpy array Error histogram. Returns ------- information_gain : float Information gain. """ # calculate the entropy of th error histogram if np.asarray(error_histogram).any(): entropy = _entropy(error_histogram) else: # an empty error histogram has an entropy of 0 entropy = 0. # return information gain return np.log2(len(error_histogram)) - entropy
[docs]def information_gain(detections, annotations, num_bins=INFORMATION_GAIN_BINS): """ Calculate information gain for the given detections and annotations. Parameters ---------- detections : list or numpy array Detected beats. annotations : list or numpy array Annotated beats. num_bins : int, optional Number of bins for the beat error histogram. Returns ------- information_gain : float Information gain. error_histogram : numpy array Error histogram. References ---------- .. [1] M. E.P. Davies, N. Degara and M. D. Plumbley, "Measuring the performance of beat tracking algorithms algorithms using a beat error histogram", IEEE Signal Processing Letters, vol. 18, vo. 3, 2011. """ # neither detections nor annotations are given, perfect score if len(detections) == 0 and len(annotations) == 0: # return a max. information gain and an empty error histogram return np.log2(num_bins), np.zeros(num_bins) # either beat detections or annotations are empty, score 0 # Note: use "or" here since we test both the detections against the # annotations and vice versa during the evaluation process if len(detections) == 0 or len(annotations) == 0: # return an information gain of 0 and a uniform beat error histogram # Note: because swapped detections and annotations should return the # same uniform histogram, the maximum length of the detections # and annotations is chosen (instead of just the length of the # annotations as in the Matlab implementation). max_length = max(len(detections), len(annotations)) return 0., np.ones(num_bins) * max_length / float(num_bins) # at least 2 annotations must be given to calculate an interval if len(detections) < 2 or len(annotations) < 2: raise BeatIntervalError("At least 2 annotations and 2 detections are" "needed for Information gain.") # check if there are enough beat annotations for the number of bins if num_bins > len(annotations): warnings.warn("Not enough beat annotations (%d) for %d histogram bins." % (len(annotations), num_bins)) # create bins edges for the error histogram histogram_bins = _histogram_bins(num_bins) # evaluate detections against annotations fwd_histogram = _error_histogram(detections, annotations, histogram_bins) fwd_ig = _information_gain(fwd_histogram) # if only a few (but correct) beats are detected, the errors could be small # thus evaluate also the annotations against the detections, i.e. simulate # a lot of false positive detections bwd_histogram = _error_histogram(annotations, detections, histogram_bins) bwd_ig = _information_gain(bwd_histogram) # only use the lower information gain if fwd_ig < bwd_ig: return fwd_ig, fwd_histogram else: return bwd_ig, bwd_histogram
# beat evaluation class
[docs]class BeatEvaluation(OnsetEvaluation): # this class inherits from OnsetEvaluation the Precision, Recall, and # F-measure evaluation stuff but uses a different evaluation window """ Beat evaluation class. Parameters ---------- detections : str, list or numpy array Detected beats. annotations : str, list or numpy array Annotated ground truth beats. fmeasure_window : float, optional F-measure evaluation window [seconds] pscore_tolerance : float, optional P-Score tolerance [fraction of the median beat interval]. cemgil_sigma : float, optional Sigma of Gaussian window for Cemgil accuracy. goto_threshold : float, optional Threshold for Goto error. goto_sigma : float, optional Sigma for Goto error. goto_mu : float, optional Mu for Goto error. continuity_phase_tolerance : float, optional Continuity phase tolerance. continuity_tempo_tolerance : float, optional Ccontinuity tempo tolerance. information_gain_bins : int, optional Number of bins for for the information gain beat error histogram. offbeat : bool, optional Include offbeat variation. double : bool, optional Include double and half tempo variations (and offbeat thereof). triple : bool, optional Include triple and third tempo variations (and offbeats thereof). skip : float, optional Skip the first `skip` seconds for evaluation. downbeats : bool, optional Evaluate downbeats instead of beats. Notes ----- The `offbeat`, `double`, and `triple` variations of the beat sequences are used only for AMLc/AMLt. """ METRIC_NAMES = [ ('fmeasure', 'F-measure'), ('pscore', 'P-score'), ('cemgil', 'Cemgil'), ('goto', 'Goto'), ('cmlc', 'CMLc'), ('cmlt', 'CMLt'), ('amlc', 'AMLc'), ('amlt', 'AMLt'), ('information_gain', 'D'), ('global_information_gain', 'Dg') ] def __init__(self, detections, annotations, fmeasure_window=FMEASURE_WINDOW, pscore_tolerance=PSCORE_TOLERANCE, cemgil_sigma=CEMGIL_SIGMA, goto_threshold=GOTO_THRESHOLD, goto_sigma=GOTO_SIGMA, goto_mu=GOTO_MU, continuity_phase_tolerance=CONTINUITY_PHASE_TOLERANCE, continuity_tempo_tolerance=CONTINUITY_TEMPO_TOLERANCE, information_gain_bins=INFORMATION_GAIN_BINS, offbeat=True, double=True, triple=True, skip=0, downbeats=False, **kwargs): # load the beat detections and annotations detections = load_beats(detections, downbeats) annotations = load_beats(annotations, downbeats) # if these are 2D, use only the first column (i.e. the time stamp) if detections.ndim > 1: detections = detections[:, 0] if annotations.ndim > 1: annotations = annotations[:, 0] # sort them detections = np.sort(detections) annotations = np.sort(annotations) # remove detections and annotations that are within the first N seconds # Note: skipping the first few seconds alters the results! if skip > 0: start_idx = np.searchsorted(detections, skip, 'right') detections = detections[start_idx:] start_idx = np.searchsorted(annotations, skip, 'right') annotations = annotations[start_idx:] # perform onset evaluation with the appropriate fmeasure_window super(BeatEvaluation, self).__init__(detections, annotations, window=fmeasure_window, **kwargs) # other scores self.pscore = pscore(detections, annotations, pscore_tolerance) self.cemgil = cemgil(detections, annotations, cemgil_sigma) self.goto = goto(detections, annotations, goto_threshold, goto_sigma, goto_mu) # continuity scores scores = continuity(detections, annotations, continuity_tempo_tolerance, continuity_phase_tolerance, offbeat, double, triple) self.cmlc, self.cmlt, self.amlc, self.amlt = scores # information gain stuff scores = information_gain(detections, annotations, information_gain_bins) self.information_gain, self.error_histogram = scores @property def global_information_gain(self): """Global information gain.""" # Note: if only 1 file is evaluated, it is the same as information gain return self.information_gain
[docs] def tostring(self, **kwargs): """ Format the evaluation metrics as a human readable string. Returns ------- str Evaluation metrics formatted as a human readable string. """ ret = '' if self.name is not None: ret += '%s\n ' % self.name ret += 'F-measure: %.3f P-score: %.3f Cemgil: %.3f Goto: %.3f '\ 'CMLc: %.3f CMLt: %.3f AMLc: %.3f AMLt: %.3f D: %.3f '\ 'Dg: %.3f' % \ (self.fmeasure, self.pscore, self.cemgil, self.goto, self.cmlc, self.cmlt, self.amlc, self.amlt, self.information_gain, self.global_information_gain) return ret
[docs]class BeatMeanEvaluation(MeanEvaluation): """ Class for averaging beat evaluation scores. """ METRIC_NAMES = BeatEvaluation.METRIC_NAMES @property def fmeasure(self): """F-measure.""" return np.nanmean([e.fmeasure for e in self.eval_objects]) @property def pscore(self): """P-score.""" return np.nanmean([e.pscore for e in self.eval_objects]) @property def cemgil(self): """Cemgil accuracy.""" return np.nanmean([e.cemgil for e in self.eval_objects]) @property def goto(self): """Goto accuracy.""" return np.nanmean([e.goto for e in self.eval_objects]) @property def cmlc(self): """CMLc.""" return np.nanmean([e.cmlc for e in self.eval_objects]) @property def cmlt(self): """CMLt.""" return np.nanmean([e.cmlt for e in self.eval_objects]) @property def amlc(self): """AMLc.""" return np.nanmean([e.amlc for e in self.eval_objects]) @property def amlt(self): """AMLt.""" return np.nanmean([e.amlt for e in self.eval_objects]) @property def information_gain(self): """Information gain.""" return np.nanmean([e.information_gain for e in self.eval_objects]) @property def error_histogram(self): """Error histogram.""" if not self.eval_objects: # return an empty error histogram of length 0 return np.zeros(0) # sum all error histograms to gather a global one return np.sum([e.error_histogram for e in self.eval_objects], axis=0) @property def global_information_gain(self): """Global information gain.""" if len(self.error_histogram) == 0: # if the error histogram has length 0, the information gain is 0 return 0. # calculate the information gain from the (global) error histogram return _information_gain(self.error_histogram)
[docs] def tostring(self, **kwargs): """ Format the evaluation metrics as a human readable string. Returns ------- str Evaluation metrics formatted as a human readable string. """ ret = '' if self.name is not None: ret += '%s\n ' % self.name ret += 'F-measure: %.3f P-score: %.3f Cemgil: %.3f Goto: %.3f '\ 'CMLc: %.3f CMLt: %.3f AMLc: %.3f AMLt: %.3f D: %.3f '\ 'Dg: %.3f' % \ (self.fmeasure, self.pscore, self.cemgil, self.goto, self.cmlc, self.cmlt, self.amlc, self.amlt, self.information_gain, self.global_information_gain) return ret
[docs]def add_parser(parser): """ Add a beat evaluation sub-parser to an existing parser. Parameters ---------- parser : argparse parser instance Existing argparse parser object. Returns ------- sub_parser : argparse sub-parser instance Beat evaluation sub-parser. parser_group : argparse argument group Beat evaluation argument group. """ import argparse # add beat evaluation sub-parser to the existing parser p = parser.add_parser( 'beats', help='beat evaluation', formatter_class=argparse.RawDescriptionHelpFormatter, description=''' This program evaluates pairs of files containing the beat annotations and detections. Suffixes can be given to filter them from the list of files. Each line represents a beat and must have the following format with values being separated by whitespace [brackets indicate optional values]: `beat_time [beat_inside_bar]` Lines starting with # are treated as comments and are ignored. To maintain compatibility with the original Matlab implementation, use the arguments '--skip 5 --no_triple'. Please note, that the results can still differ, because of the different implementation approach. ''') # set defaults p.set_defaults(eval=BeatEvaluation, sum_eval=None, mean_eval=BeatMeanEvaluation) # file I/O evaluation_io(p, ann_suffix='.beats', det_suffix='.beats.txt') # parameters for sequence variants s = p.add_argument_group('sequence manipulation arguments') s.add_argument('--no_offbeat', dest='offbeat', action='store_false', help='do not include offbeat evaluation') s.add_argument('--no_double', dest='double', action='store_false', help='do not include double/half tempo evaluation') s.add_argument('--no_triple', dest='triple', action='store_false', help='do not include triple/third tempo evaluation') s.add_argument('--skip', action='store', type=float, default=0, help='skip first N seconds for evaluation ' '[default=%(default).3f]') s.add_argument('--downbeats', action='store_true', help='evaluate only downbeats') # evaluation parameters g = p.add_argument_group('beat evaluation arguments') g.add_argument('--window', dest='fmeasure_window', action='store', type=float, default=FMEASURE_WINDOW, help='evaluation window for F-measure ' '[seconds, default=%(default).3f]') g.add_argument('--tolerance', dest='pscore_tolerance', action='store', type=float, default=PSCORE_TOLERANCE, help='evaluation tolerance for P-score ' '[default=%(default).3f]') g.add_argument('--sigma', dest='cemgil_sigma', action='store', type=float, default=CEMGIL_SIGMA, help='sigma for Cemgil accuracy [default=%(default).3f]') g.add_argument('--goto_threshold', action='store', type=float, default=GOTO_THRESHOLD, help='threshold for Goto error [default=%(default).3f]') g.add_argument('--goto_sigma', action='store', type=float, default=GOTO_SIGMA, help='sigma for Goto error [default=%(default).3f]') g.add_argument('--goto_mu', action='store', type=float, default=GOTO_MU, help='ยต for Goto error [default=%(default).3f]') g.add_argument('--phase_tolerance', dest='continuity_phase_tolerance', action='store', type=float, default=CONTINUITY_PHASE_TOLERANCE, help='phase tolerance window for continuity accuracies ' '[default=%(default).3f]') g.add_argument('--tempo_tolerance', dest='continuity_tempo_tolerance', action='store', type=float, default=CONTINUITY_TEMPO_TOLERANCE, help='tempo tolerance window for continuity accuracies ' '[default=%(default).3f]') g.add_argument('--bins', dest='information_gain_bins', action='store', type=int, default=INFORMATION_GAIN_BINS, help='number of histogram bins for information gain ' '[default=%(default)i]') # return the sub-parser and evaluation argument group return p, g