# encoding: utf-8
# pylint: disable=no-member
# pylint: disable=invalid-name
# pylint: disable=too-many-arguments
"""
This module contains tempo related functionality.
"""
from __future__ import absolute_import, division, print_function
import numpy as np
from madmom.processors import Processor
from madmom.audio.signal import smooth as smooth_signal
NO_TEMPO = np.nan
# helper functions
[docs]def smooth_histogram(histogram, smooth):
"""
Smooth the given histogram.
Parameters
----------
histogram : tuple
Histogram (tuple of 2 numpy arrays, the first giving the strengths of
the bins and the second corresponding delay values).
smooth : int or numpy array
Smoothing kernel (size).
Returns
-------
histogram_bins : numpy array
Bins of the smoothed histogram.
histogram_delays : numpy array
Corresponding delays.
Notes
-----
If `smooth` is an integer, a Hamming window of that length will be used as
a smoothing kernel.
"""
# smooth only the histogram bins, not the corresponding delays
return smooth_signal(histogram[0], smooth), histogram[1]
# interval detection
[docs]def interval_histogram_acf(activations, min_tau=1, max_tau=None):
"""
Compute the interval histogram of the given (beat) activation function via
auto-correlation as in [1]_.
Parameters
----------
activations : numpy array
Beat activation function.
min_tau : int, optional
Minimal delay for the auto-correlation function [frames].
max_tau : int, optional
Maximal delay for the auto-correlation function [frames].
Returns
-------
histogram_bins : numpy array
Bins of the tempo histogram.
histogram_delays : numpy array
Corresponding delays [frames].
References
----------
.. [1] Sebastian Böck and Markus Schedl,
"Enhanced Beat Tracking with Context-Aware Neural Networks",
Proceedings of the 14th International Conference on Digital Audio
Effects (DAFx), 2011.
"""
if activations.ndim != 1:
raise NotImplementedError('too many dimensions for autocorrelation '
'interval histogram calculation.')
# set the maximum delay
if max_tau is None:
max_tau = len(activations) - min_tau
# test all possible delays
taus = list(range(min_tau, max_tau + 1))
bins = []
# Note: this is faster than:
# corr = np.correlate(activations, activations, mode='full')
# bins = corr[len(activations) + min_tau - 1: len(activations) + max_tau]
for tau in taus:
bins.append(np.sum(np.abs(activations[tau:] * activations[0:-tau])))
# return histogram
return np.array(bins), np.array(taus)
[docs]def interval_histogram_comb(activations, alpha, min_tau=1, max_tau=None):
"""
Compute the interval histogram of the given (beat) activation function via
a bank of resonating comb filters as in [1]_.
Parameters
----------
activations : numpy array
Beat activation function.
alpha : float or numpy array
Scaling factor for the comb filter; if only a single value is given,
the same scaling factor for all delays is assumed.
min_tau : int, optional
Minimal delay for the comb filter [frames].
max_tau : int, optional
Maximal delta for comb filter [frames].
Returns
-------
histogram_bins : numpy array
Bins of the tempo histogram.
histogram_delays : numpy array
Corresponding delays [frames].
References
----------
.. [1] Sebastian Böck, Florian Krebs and Gerhard Widmer,
"Accurate Tempo Estimation based on Recurrent Neural Networks and
Resonating Comb Filters",
Proceedings of the 16th International Society for Music Information
Retrieval Conference (ISMIR), 2015.
"""
# import comb filter
from madmom.audio.comb_filters import CombFilterbankProcessor
# set the maximum delay
if max_tau is None:
max_tau = len(activations) - min_tau
# get the range of taus
taus = np.arange(min_tau, max_tau + 1)
# create a comb filter bank instance
cfb = CombFilterbankProcessor('backward', taus, alpha)
if activations.ndim in (1, 2):
# apply a bank of comb filters
act = cfb.process(activations)
# determine the tau with the highest value for each time step
act_max = act == np.max(act, axis=-1)[..., np.newaxis]
# sum up these maxima weighted by the activation value to yield the
# histogram bin values
histogram_bins = np.sum(act * act_max, axis=0)
else:
raise NotImplementedError('too many dimensions for comb filter '
'interval histogram calculation.')
# return the histogram
return histogram_bins, taus
# helper functions
[docs]def dominant_interval(histogram, smooth=None):
"""
Extract the dominant interval of the given histogram.
Parameters
----------
histogram : tuple
Histogram (tuple of 2 numpy arrays, the first giving the strengths of
the bins and the second corresponding delay values).
smooth : int or numpy array, optional
Smooth the histogram with the given kernel (size).
Returns
-------
interval : int
Dominant interval.
Notes
-----
If `smooth` is an integer, a Hamming window of that length will be used as
a smoothing kernel.
"""
# smooth the histogram bins
if smooth:
histogram = smooth_histogram(histogram, smooth)
# return the dominant interval
return histogram[1][np.argmax(histogram[0])]
# extract the tempo from a histogram
[docs]def detect_tempo(histogram, fps):
"""
Extract the tempo from the given histogram.
Parameters
----------
histogram : tuple
Histogram (tuple of 2 numpy arrays, the first giving the strengths of
the bins and the second corresponding delay values).
fps : float
Frames per second.
Returns
-------
tempi : numpy array
Numpy array with the dominant tempi [bpm] (first column) and their
relative strengths (second column).
"""
from scipy.signal import argrelmax
# histogram of IBIs
bins = histogram[0]
# convert the histogram bin delays to tempi in beats per minute
tempi = 60.0 * fps / histogram[1]
# to get the two dominant tempi, just keep the peaks
# use 'wrap' mode to also get peaks at the borders
peaks = argrelmax(bins, mode='wrap')[0]
# we need more than 1 peak to report multiple tempi
if len(peaks) == 0:
# a flat histogram has no peaks, use the center bin
if len(bins):
ret = np.asarray([tempi[len(bins) / 2], 1.])
else:
# otherwise: no peaks, no tempo
ret = np.asarray([NO_TEMPO, 0.])
elif len(peaks) == 1:
# report only the strongest tempo
ret = np.asarray([tempi[peaks[0]], 1.])
else:
# sort the peaks in descending order of bin heights
sorted_peaks = peaks[np.argsort(bins[peaks])[::-1]]
# normalize their strengths
strengths = bins[sorted_peaks]
strengths /= np.sum(strengths)
# return the tempi and their normalized strengths
ret = np.asarray(list(zip(tempi[sorted_peaks], strengths)))
# return the tempi
return np.atleast_2d(ret)
# tempo estimation processor class
[docs]class TempoEstimationProcessor(Processor):
"""
Tempo Estimation Processor class.
Parameters
----------
method : {'comb', 'acf', 'dbn'}
Method used for tempo estimation.
min_bpm : float, optional
Minimum tempo to detect [bpm].
max_bpm : float, optional
Maximum tempo to detect [bpm].
act_smooth : float, optional (default: 0.14)
Smooth the activation function over `act_smooth` seconds.
hist_smooth : int, optional (default: 7)
Smooth the tempo histogram over `hist_smooth` bins.
alpha : float, optional
Scaling factor for the comb filter.
fps : float, optional
Frames per second.
Examples
--------
Create a TempoEstimationProcessor. The returned array represents the
estimated tempi (given in beats per minute) and their relative strength.
>>> proc = TempoEstimationProcessor(fps=100)
>>> proc # doctest: +ELLIPSIS
<madmom.features.tempo.TempoEstimationProcessor object at 0x...>
Call this TempoEstimationProcessor with the beat activation function
obtained by RNNBeatProcessor to estimate the tempi.
>>> from madmom.features.beats import RNNBeatProcessor
>>> act = RNNBeatProcessor()('tests/data/audio/sample.wav')
>>> proc(act) # doctest: +NORMALIZE_WHITESPACE
array([[ 176.47059, 0.47469],
[ 117.64706, 0.17667],
[ 240. , 0.15371],
[ 68.96552, 0.09864],
[ 82.19178, 0.09629]])
"""
# default values for tempo estimation
METHOD = 'comb'
MIN_BPM = 40.
MAX_BPM = 250.
HIST_SMOOTH = 9
ACT_SMOOTH = 0.14
ALPHA = 0.79
def __init__(self, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM,
act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH, alpha=ALPHA,
fps=None, **kwargs):
# pylint: disable=unused-argument
# save variables
self.method = method
self.min_bpm = min_bpm
self.max_bpm = max_bpm
self.act_smooth = act_smooth
self.hist_smooth = hist_smooth
self.alpha = alpha
self.fps = fps
@property
def min_interval(self):
"""Minimum beat interval [frames]."""
return int(np.floor(60. * self.fps / self.max_bpm))
@property
def max_interval(self):
"""Maximum beat interval [frames]."""
return int(np.ceil(60. * self.fps / self.min_bpm))
[docs] def process(self, activations, **kwargs):
"""
Detect the tempi from the (beat) activations.
Parameters
----------
activations : numpy array
Beat activation function.
Returns
-------
tempi : numpy array
Array with the dominant tempi [bpm] (first column) and their
relative strengths (second column).
"""
# smooth the activations
act_smooth = int(round(self.fps * self.act_smooth))
activations = smooth_signal(activations, act_smooth)
# generate a histogram of beat intervals
histogram = self.interval_histogram(activations.astype(np.float))
# smooth the histogram
histogram = smooth_histogram(histogram, self.hist_smooth)
# detect the tempi and return them
return detect_tempo(histogram, self.fps)
[docs] def interval_histogram(self, activations):
"""
Compute the histogram of the beat intervals with the selected method.
Parameters
----------
activations : numpy array
Beat activation function.
Returns
-------
histogram_bins : numpy array
Bins of the beat interval histogram.
histogram_delays : numpy array
Corresponding delays [frames].
"""
# build the tempo (i.e. inter beat interval) histogram and return it
if self.method == 'acf':
return interval_histogram_acf(activations, self.min_interval,
self.max_interval)
elif self.method == 'comb':
return interval_histogram_comb(activations, self.alpha,
self.min_interval,
self.max_interval)
elif self.method == 'dbn':
from .beats import DBNBeatTrackingProcessor
# instantiate a DBN for beat tracking
dbn = DBNBeatTrackingProcessor(min_bpm=self.min_bpm,
max_bpm=self.max_bpm,
num_tempi=None, fps=self.fps)
# get the best state path by calling the viterbi algorithm
path, _ = dbn.hmm.viterbi(activations.astype(np.float32))
intervals = dbn.st.state_intervals[path]
# get the counts of the bins
bins = np.bincount(intervals, minlength=dbn.st.intervals.max() + 1)
# truncate everything below the minimum interval of the state space
bins = bins[dbn.st.intervals.min():]
# build a histogram together with the intervals and return it
return bins, dbn.st.intervals
else:
raise ValueError('tempo estimation method unknown')
[docs] def dominant_interval(self, histogram):
"""
Extract the dominant interval of the given histogram.
Parameters
----------
histogram : tuple
Histogram (tuple of 2 numpy arrays, the first giving the strengths
of the bins and the second corresponding delay values).
Returns
-------
interval : int
Dominant interval.
"""
# return the dominant interval
return dominant_interval(histogram, self.hist_smooth)
@staticmethod
[docs] def add_arguments(parser, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM,
act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH,
alpha=ALPHA):
"""
Add tempo estimation related arguments to an existing parser.
Parameters
----------
parser : argparse parser instance
Existing argparse parser.
method : {'comb', 'acf', 'dbn'}
Method used for tempo estimation.
min_bpm : float, optional
Minimum tempo to detect [bpm].
max_bpm : float, optional
Maximum tempo to detect [bpm].
act_smooth : float, optional
Smooth the activation function over `act_smooth` seconds.
hist_smooth : int, optional
Smooth the tempo histogram over `hist_smooth` bins.
alpha : float, optional
Scaling factor for the comb filter.
Returns
-------
parser_group : argparse argument group
Tempo argument parser group.
Notes
-----
Parameters are included in the group only if they are not 'None'.
"""
# add tempo estimation related options to the existing parser
g = parser.add_argument_group('tempo estimation arguments')
if method is not None:
g.add_argument('--method', action='store', type=str,
default=method, choices=['acf', 'comb', 'dbn'],
help="which method to use [default=%(default)s]")
if min_bpm is not None:
g.add_argument('--min_bpm', action='store', type=float,
default=min_bpm,
help='minimum tempo [bpm, default=%(default).2f]')
if max_bpm is not None:
g.add_argument('--max_bpm', action='store', type=float,
default=max_bpm,
help='maximum tempo [bpm, default=%(default).2f]')
if act_smooth is not None:
g.add_argument('--act_smooth', action='store', type=float,
default=act_smooth,
help='smooth the activations over N seconds '
'[default=%(default).2f]')
if hist_smooth is not None:
g.add_argument('--hist_smooth', action='store', type=int,
default=hist_smooth,
help='smooth the tempo histogram over N bins '
'[default=%(default)d]')
if alpha is not None:
g.add_argument('--alpha', action='store', type=float,
default=alpha,
help='alpha for comb filter tempo estimation '
'[default=%(default).2f]')
# return the argument group so it can be modified if needed
return g
# helper function for writing the detected tempi to file
[docs]def write_tempo(tempi, filename, mirex=False):
"""
Write the most dominant tempi and the relative strength to a file.
Parameters
----------
tempi : numpy array
Array with the detected tempi (first column) and their strengths
(second column).
filename : str or file handle
Output file.
mirex : bool, optional
Report the lower tempo first (as required by MIREX).
Returns
-------
tempo_1 : float
The most dominant tempo.
tempo_2 : float
The second most dominant tempo.
strength : float
Their relative strength.
"""
# make the given tempi a 2d array
tempi = np.array(tempi, ndmin=2)
# default values
t1, t2, strength = 0., 0., 1.
# only one tempo was detected
if len(tempi) == 1:
t1 = tempi[0][0]
# generate a fake second tempo
# the boundary of 68 bpm is taken from Tzanetakis 2013 ICASSP paper
if t1 < 68:
t2 = t1 * 2.
else:
t2 = t1 / 2.
# consider only the two strongest tempi and strengths
elif len(tempi) > 1:
t1, t2 = tempi[:2, 0]
strength = tempi[0, 1] / sum(tempi[:2, 1])
# for MIREX, the lower tempo must be given first
if mirex and t1 > t2:
t1, t2, strength = t2, t1, 1. - strength
# format as a numpy array
out = np.array([t1, t2, strength], ndmin=2)
# write to output
np.savetxt(filename, out, fmt='%.2f\t%.2f\t%.2f')
# also return the tempi & strength
return t1, t2, strength