Source code for madmom.features.chords

# encoding: utf-8
"""
This module contains chord recognition related functionality.

"""
from __future__ import absolute_import, division, print_function

import numpy as np

from functools import partial
from madmom.processors import SequentialProcessor


# dtype for numpy structured arrays that contain chord segments
CHORD_DTYPE = [('start', np.float), ('end', np.float), ('label', 'U32')]


[docs]def load_chords(filename): """ Load labelled chord segments from a file. Chord segments must follow the following format, one chord label per line: <start_time> <end_time> <chord_label> All times should be given in seconds. Parameters ---------- filename : str or file handle File containing the segments Returns ------- numpy structured array Structured array with columns 'start', 'end', and 'label', containing the start time, end time, and segment label respectively Notes ----- Segment files cannot contain comments, because e.g. chord annotations can contain the '#' character! The maximum label length is 32 characters. """ return np.loadtxt(filename, comments=None, ndmin=1, dtype=CHORD_DTYPE, converters={2: lambda x: x.decode()})
[docs]def write_chords(chords, filename): """ Write chord segments to a file. Parameters ---------- chords : numpy structured array Chord segments, one per row (column definition see notes). filename : str or file handle Output filename or handle Returns ------- numpy structured array Chord segments. Notes ----- Chords are represented as numpy structured array with three named columns: 'start' contains the start time in seconds, 'end' the end time in seconds, and 'label' the chord label. """ np.savetxt(filename, chords, fmt=['%.3f', '%.3f', '%s'], delimiter='\t') return chords
[docs]def majmin_targets_to_chord_labels(targets, fps): """ Converts a series of major/minor chord targets to human readable chord labels. Targets are assumed to be spaced equidistant in time as defined by the `fps` parameter (each target represents one 'frame'). Ids 0-11 encode major chords starting with root 'A', 12-23 minor chords. Id 24 represents 'N', the no-chord class. Parameters ---------- targets : iterable Iterable containing chord class ids. fps : float Frames per second. Consecutive class Returns ------- chord labels : list List of tuples of the form (start time, end time, chord label) """ # create a map of semitone index to semitone name (e.g. 0 -> A, 1 -> A#) pitch_class_to_label = ['A', 'A#', 'B', 'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#'] def pred_to_cl(pred): """ Map a class id to a chord label. 0..11 major chords, 12..23 minor chords, 24 no chord """ if pred == 24: return 'N' return '{}:{}'.format(pitch_class_to_label[pred % 12], 'maj' if pred < 12 else 'min') # get labels per frame spf = 1. / fps labels = [(i * spf, pred_to_cl(p)) for i, p in enumerate(targets)] # join same consecutive predictions prev_label = (None, None) uniq_labels = [] for label in labels: if label[1] != prev_label[1]: uniq_labels.append(label) prev_label = label # end time of last label is one frame duration after # the last prediction time start_times, chord_labels = zip(*uniq_labels) end_times = start_times[1:] + (labels[-1][0] + spf,) return np.array(list(zip(start_times, end_times, chord_labels)), dtype=CHORD_DTYPE)
[docs]class DeepChromaChordRecognitionProcessor(SequentialProcessor): """ Recognise major and minor chords from deep chroma vectors [1]_ using a Conditional Random Field. Parameters ---------- model : str File containing the CRF model. If None, use the model supplied with madmom. fps : float Frames per second. Must correspond to the fps of the incoming activations and the model. References ---------- .. [1] Filip Korzeniowski and Gerhard Widmer, "Feature Learning for Chord Recognition: The Deep Chroma Extractor", Proceedings of the 17th International Society for Music Information Retrieval Conference (ISMIR), 2016. Examples -------- To recognise chords in an audio file using the DeepChromaChordRecognitionProcessor you first need to create a madmom.audio.chroma.DeepChromaProcessor to extract the appropriate chroma vectors. >>> from madmom.audio.chroma import DeepChromaProcessor >>> dcp = DeepChromaProcessor() >>> dcp # doctest: +ELLIPSIS <madmom.audio.chroma.DeepChromaProcessor object at ...> Then, create the DeepChromaChordRecognitionProcessor to decode a chord sequence from the extracted chromas: >>> decode = DeepChromaChordRecognitionProcessor() >>> decode # doctest: +ELLIPSIS <madmom.features.chords.DeepChromaChordRecognitionProcessor object at ...> To transcribe the chords, you can either manually call the processors one after another, >>> chroma = dcp('tests/data/audio/sample2.wav') >>> decode(chroma) ... # doctest: +NORMALIZE_WHITESPACE +NORMALIZE_ARRAYS +IGNORE_UNICODE array([(0. , 1.6, u'F:maj'), (1.6, 2.5, u'A:maj'), (2.5, 4.1, u'D:maj')], dtype=[('start', '<f8'), ('end', '<f8'), ('label', '<U32')]) or create a `SequentialProcessor` that connects them: >>> from madmom.processors import SequentialProcessor >>> chordrec = SequentialProcessor([dcp, decode]) >>> chordrec('tests/data/audio/sample2.wav') ... # doctest: +NORMALIZE_WHITESPACE +NORMALIZE_ARRAYS +IGNORE_UNICODE array([(0. , 1.6, u'F:maj'), (1.6, 2.5, u'A:maj'), (2.5, 4.1, u'D:maj')], dtype=[('start', '<f8'), ('end', '<f8'), ('label', '<U32')]) """ def __init__(self, model=None, fps=10, **kwargs): from ..ml.crf import ConditionalRandomField from ..models import CHORDS_DCCRF crf = ConditionalRandomField.load(model or CHORDS_DCCRF[0]) lbl = partial(majmin_targets_to_chord_labels, fps=fps) super(DeepChromaChordRecognitionProcessor, self).__init__((crf, lbl))
# functions necessary for CNNChordFeatureProcessor - they need to # be outside of the class so the processor stays picklable def _cnncfp_pad(data): """Pad the input""" pad_data = np.zeros((11, 113)) return np.vstack([pad_data, data, pad_data]) def _cnncfp_superframes(data): """Segment input into superframes""" from ..utils import segment_axis return segment_axis(data, 3, 1, axis=0) def _cnncfp_avg(data): """Global average pool""" return data.mean((1, 2))
[docs]class CNNChordFeatureProcessor(SequentialProcessor): """ Extract learned features for chord recognition, as described in [1]_. References ---------- .. [1] Filip Korzeniowski and Gerhard Widmer, "A Fully Convolutional Deep Auditory Model for Musical Chord Recognition", Proceedings of IEEE International Workshop on Machine Learning for Signal Processing (MLSP), 2016. Examples -------- >>> proc = CNNChordFeatureProcessor() >>> proc # doctest: +ELLIPSIS <madmom.features.chords.CNNChordFeatureProcessor object at 0x...> >>> features = proc('tests/data/audio/sample2.wav') >>> features.shape (41, 128) >>> features # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS array([[ 0.05798, 0. , ..., 0.02757, 0.014 ], [ 0.06604, 0. , ..., 0.02898, 0.00886], ..., [ 0.00655, 0.1166 , ..., 0.00651, 0. ], [ 0.01476, 0.11185, ..., 0.00287, 0. ]]) """ def __init__(self, **kwargs): from ..audio.signal import SignalProcessor, FramedSignalProcessor from ..audio.stft import ShortTimeFourierTransformProcessor from ..audio.spectrogram import LogarithmicFilteredSpectrogramProcessor from ..ml.nn import NeuralNetwork from ..models import CHORDS_CNN_FEAT # spectrogram computation sig = SignalProcessor(num_channels=1, sample_rate=44100) frames = FramedSignalProcessor(frame_size=8192, fps=10) stft = ShortTimeFourierTransformProcessor() # caching FFT window spec = LogarithmicFilteredSpectrogramProcessor( num_bands=24, fmin=60, fmax=2600, unique_filters=True ) # padding, neural network and global average pooling pad = _cnncfp_pad nn = NeuralNetwork.load(CHORDS_CNN_FEAT[0]) superframes = _cnncfp_superframes avg = _cnncfp_avg # create processing pipeline super(CNNChordFeatureProcessor, self).__init__([ sig, frames, stft, spec, pad, nn, superframes, avg ])
[docs]class CRFChordRecognitionProcessor(SequentialProcessor): """ Recognise major and minor chords from learned features extracted by a convolutional neural network, as described in [1]_. Parameters ---------- model : str File containing the CRF model. If None, use the model supplied with madmom. fps : float Frames per second. Must correspond to the fps of the incoming activations and the model. References ---------- .. [1] Filip Korzeniowski and Gerhard Widmer, "A Fully Convolutional Deep Auditory Model for Musical Chord Recognition", Proceedings of IEEE International Workshop on Machine Learning for Signal Processing (MLSP), 2016. Examples -------- To recognise chords using the CRFChordRecognitionProcessor, you first need to extract features using the CNNChordFeatureProcessor. >>> featproc = CNNChordFeatureProcessor() >>> featproc # doctest: +ELLIPSIS <madmom.features.chords.CNNChordFeatureProcessor object at 0x...> Then, create the CRFChordRecognitionProcessor to decode a chord sequence from the extracted features: >>> decode = CRFChordRecognitionProcessor() >>> decode # doctest: +ELLIPSIS <madmom.features.chords.CRFChordRecognitionProcessor object at 0x...> To transcribe the chords, you can either manually call the processors one after another, >>> feats = featproc('tests/data/audio/sample2.wav') >>> decode(feats) ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS +IGNORE_UNICODE ... # doctest: +NORMALIZE_ARRAYS array([(0. , 0.2, u'N'), (0.2, 1.6, u'F:maj'), (1.6, 2.4..., u'A:maj'), (2.4..., 4.1, u'D:min')], dtype=[('start', '<f8'), ('end', '<f8'), ('label', '<U32')]) or create a `madmom.processors.SequentialProcessor` that connects them: >>> from madmom.processors import SequentialProcessor >>> chordrec = SequentialProcessor([featproc, decode]) >>> chordrec('tests/data/audio/sample2.wav') ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS +IGNORE_UNICODE ... # doctest: +NORMALIZE_ARRAYS array([(0. , 0.2, u'N'), (0.2, 1.6, u'F:maj'), (1.6, 2.4..., u'A:maj'), (2.4..., 4.1, u'D:min')], dtype=[('start', '<f8'), ('end', '<f8'), ('label', '<U32')]) """ def __init__(self, model=None, fps=10, **kwargs): from ..ml.crf import ConditionalRandomField from ..models import CHORDS_CFCRF crf = ConditionalRandomField.load(model or CHORDS_CFCRF[0]) lbl = partial(majmin_targets_to_chord_labels, fps=fps) super(CRFChordRecognitionProcessor, self).__init__((crf, lbl))