Source code for madmom.features

# encoding: utf-8
# pylint: disable=no-member
# pylint: disable=invalid-name
# pylint: disable=too-many-arguments
# pylint: disable=wrong-import-position
"""
This package includes high-level features. Your definition of "high" may
vary, but we define high-level features as the ones you want to evaluate (e.g.
onsets, beats, etc.). All lower-level features can be found the `madmom.audio`
package.

Notes
-----
All features should be implemented as classes which inherit from Processor
(or provide a XYZProcessor(Processor) variant). This way, multiple Processor
objects can be chained/combined to achieve the wanted functionality.


"""

from __future__ import absolute_import, division, print_function

import numpy as np

from madmom.processors import Processor


[docs]class Activations(np.ndarray): """ The Activations class extends a numpy ndarray with a frame rate (fps) attribute. Parameters ---------- data : str, file handle or numpy array Either file name/handle to read the data from or array. fps : float, optional Frames per second (must be set if `data` is given as an array). sep : str, optional Separator between activation values (if read from file). dtype : numpy dtype Data-type the activations are stored/saved/kept. Attributes ---------- fps : float Frames per second. Notes ----- If a filename or file handle is given, an undefined or empty separator means that the file should be treated as a numpy binary file. Only binary files can store the frame rate of the activations. Text files should not be used for anything else but manual inspection or I/O with other programs. """ # pylint: disable=super-on-old-class # pylint: disable=super-init-not-called # pylint: disable=attribute-defined-outside-init def __init__(self, data, fps=None, sep=None, dtype=np.float32): # this method is for documentation purposes only pass def __new__(cls, data, fps=None, sep=None, dtype=np.float32): import io # check the type of the given data if isinstance(data, np.ndarray): # cast to Activations obj = np.asarray(data, dtype=dtype).view(cls) obj.fps = fps elif isinstance(data, (str, io.IOBase)): # read from file or file handle obj = cls.load(data, fps, sep) else: raise TypeError("wrong input data for Activations") # frame rate must be set if obj.fps is None: raise TypeError("frame rate for Activations must be set") # return the object return obj def __array_finalize__(self, obj): if obj is None: return # set default values here self.fps = getattr(obj, 'fps', None) @classmethod
[docs] def load(cls, infile, fps=None, sep=None): """ Load the activations from a file. Parameters ---------- infile : str or file handle Input file name or file handle. fps : float, optional Frames per second; if set, it overwrites the saved frame rate. sep : str, optional Separator between activation values. Returns ------- :class:`Activations` instance :class:`Activations` instance. Notes ----- An undefined or empty separator means that the file should be treated as a numpy binary file. Only binary files can store the frame rate of the activations. Text files should not be used for anything else but manual inspection or I/O with other programs. """ # load the activations if sep in [None, '']: # numpy binary format data = np.load(infile) if isinstance(data, np.lib.npyio.NpzFile): # .npz file, set the frame rate if none is given if fps is None: fps = float(data['fps']) # and overwrite the data data = data['activations'] else: # simple text format data = np.loadtxt(infile, delimiter=sep) if data.ndim > 1 and data.shape[1] == 1: # flatten the array if it has only 1 real dimension data = data.flatten() # instantiate a new object return cls(data, fps)
[docs] def save(self, outfile, sep=None, fmt='%.5f'): """ Save the activations to a file. Parameters ---------- outfile : str or file handle Output file name or file handle. sep : str, optional Separator between activation values if saved as text file. fmt : str, optional Format of the values if saved as text file. Notes ----- An undefined or empty separator means that the file should be treated as a numpy binary file. Only binary files can store the frame rate of the activations. Text files should not be used for anything else but manual inspection or I/O with other programs. If the activations are a 1D array, its values are interpreted as features of a single time step, i.e. all values are printed in a single line. If you want each value to appear in an individual line, use '\\n' as a separator. If the activations are a 2D array, the first axis corresponds to the time dimension, i.e. the features are separated by `sep` and the time steps are printed in separate lines. If you like to swap the dimensions, please use the `T` attribute. """ # save the activations if sep in [None, '']: # numpy binary format npz = {'activations': self, 'fps': self.fps} np.savez(outfile, **npz) else: if self.ndim > 2: raise ValueError('Only 1D and 2D activations can be saved in ' 'human readable text format.') # simple text format header = "FPS:%f" % self.fps np.savetxt(outfile, np.atleast_2d(self), fmt=fmt, delimiter=sep, header=header)
[docs]class ActivationsProcessor(Processor): """ ActivationsProcessor processes a file and returns an Activations instance. Parameters ---------- mode : {'r', 'w', 'in', 'out', 'load', 'save'} Mode of the Processor: read/write. fps : float, optional Frame rate of the activations (if set, it overwrites the saved frame rate). sep : str, optional Separator between activation values if saved as text file. Notes ----- An undefined or empty (“”) separator means that the file should be treated as a numpy binary file. Only binary files can store the frame rate of the activations. """ def __init__(self, mode, fps=None, sep=None, **kwargs): # pylint: disable=unused-argument self.mode = mode self.fps = fps self.sep = sep
[docs] def process(self, data, output=None): """ Depending on the mode, either loads the data stored in the given file and returns it as an Activations instance or save the data to the given output. Parameters ---------- data : str, file handle or numpy array Data or file to be loaded (if `mode` is 'r') or data to be saved to file (if `mode` is 'w'). output : str or file handle, optional output file (only in write-mode) Returns ------- :class:`Activations` instance :class:`Activations` instance (only in read-mode) """ # pylint: disable=arguments-differ if self.mode in ('r', 'in', 'load'): return Activations.load(data, fps=self.fps, sep=self.sep) if self.mode in ('w', 'out', 'save'): # TODO: should we return the data or the Activations instance? Activations(data, fps=self.fps).save(output, sep=self.sep) else: raise ValueError("wrong mode %s; choose {'r', 'w', 'in', 'out', " "'load', 'save'}") return data
@staticmethod
[docs] def add_arguments(parser): """ Add options to save/load activations to an existing parser. Parameters ---------- parser : argparse parser instance Existing argparse parser. Returns ------- parser_group : argparse argument group Input/output argument parser group. """ # add onset detection related options to the existing parser g = parser.add_argument_group('save/load the activations') # add options for saving and loading the activations g.add_argument('--save', action='store_true', default=False, help='save the activations to file') g.add_argument('--load', action='store_true', default=False, help='load the activations from file') g.add_argument('--sep', action='store', default=None, help='separator for saving/loading the activations ' '[default: None, i.e. numpy binary format]') # return the argument group so it can be modified if needed return g
# finally import the submodules from . import onsets, beats, notes, tempo