Source code for madmom.features.key

# encoding: utf-8
# pylint: disable=no-member
# pylint: disable=invalid-name
# pylint: disable=too-many-arguments
"""
This module contains key recognition related functionality.

"""

import numpy as np

from ..processors import SequentialProcessor


KEY_LABELS = ['A major', 'Bb major', 'B major', 'C major', 'Db major',
              'D major', 'Eb major', 'E major', 'F major', 'F# major',
              'G major', 'Ab major', 'A minor', 'Bb minor', 'B minor',
              'C minor', 'C# minor', 'D minor', 'D# minor', 'E minor',
              'F minor', 'F# minor', 'G minor', 'G# minor']


[docs]def key_prediction_to_label(prediction): """ Convert key class id to a human-readable key name. Parameters ---------- prediction : numpy array Array containing the probabilities of each key class. Returns ------- str Human-readable key name. """ prediction = np.atleast_2d(prediction) return KEY_LABELS[prediction[0].argmax()]
def add_axis(x): return x[np.newaxis, ...]
[docs]class CNNKeyRecognitionProcessor(SequentialProcessor): """ Recognise the global key of a musical piece using a Convolutional Neural Network as described in [1]_. Parameters ---------- nn_files : list, optional List with trained CNN model files. Per default ('None'), an ensemble of networks will be used. References ---------- .. [1] Filip Korzeniowski and Gerhard Widmer, "Genre-Agnostic Key Classification with Convolutional Neural Networks", In Proceedings of the 19th International Society for Music Information Retrieval Conference (ISMIR), Paris, France, 2018. Examples -------- Create a CNNKeyRecognitionProcessor and pass a file through it. The returned array represents the probability of each key class. >>> proc = CNNKeyRecognitionProcessor() >>> proc # doctest: +ELLIPSIS <madmom.features.key.CNNKeyRecognitionProcessor object at 0x...> >>> proc('tests/data/audio/sample.wav') # doctest: +NORMALIZE_WHITESPACE array([[0.03426, 0.0331 , 0.02979, 0.04423, 0.04215, 0.0311 , 0.05225, 0.04263, 0.04141, 0.02907, 0.03755, 0.09546, 0.0431 , 0.02792, 0.02138, 0.05589, 0.03276, 0.02786, 0.02415, 0.04608, 0.05329, 0.02804, 0.03868, 0.08786]]) """ def __init__(self, nn_files=None, **kwargs): from ..audio.signal import SignalProcessor, FramedSignalProcessor from ..audio.stft import ShortTimeFourierTransformProcessor from ..audio.spectrogram import LogarithmicFilteredSpectrogramProcessor from ..ml.nn import NeuralNetworkEnsemble from ..ml.nn.activations import softmax from ..models import KEY_CNN # spectrogram computation sig = SignalProcessor(num_channels=1, sample_rate=44100) frames = FramedSignalProcessor(frame_size=8192, fps=5) stft = ShortTimeFourierTransformProcessor() # caching FFT window spec = LogarithmicFilteredSpectrogramProcessor( num_bands=24, fmin=65, fmax=2100, unique_filters=True ) # neural network nn_files = nn_files or KEY_CNN nn = NeuralNetworkEnsemble.load(nn_files) # create processing pipeline super(CNNKeyRecognitionProcessor, self).__init__([ sig, frames, stft, spec, nn, add_axis, softmax ])