Source code for madmom.features.notes

# encoding: utf-8
# pylint: disable=no-member
# pylint: disable=invalid-name
# pylint: disable=too-many-arguments
"""
This module contains note transcription related functionality.

Notes are stored as numpy arrays with the following column definition:

'note_time' 'MIDI_note' ['duration' ['MIDI_velocity']]

"""

from __future__ import absolute_import, division, print_function

import numpy as np

from madmom.processors import SequentialProcessor, ParallelProcessor
from madmom.utils import suppress_warnings


@suppress_warnings
[docs]def load_notes(filename): """ Load the notes from a file. Parameters ---------- filename : str or file handle Input file to load the notes from. Returns ------- numpy array Notes. Notes ----- The file format must be (duration and velocity being optional): 'note_time' 'MIDI_note' ['duration' ['MIDI_velocity']] with one note per line and individual fields separated by whitespace. """ return np.loadtxt(filename)
[docs]def expand_notes(notes, duration=0.6, velocity=100): """ Expand the notes to include all columns. Parameters ---------- notes : numpy array, shape (num_notes, 2) Notes, one per row (column definition see notes). duration : float, optional Note duration if not defined by `notes`. velocity : int, optional Note velocity if not defined by `notes`. Returns ------- numpy array Notes (including note duration and velocity). Notes ----- The note columns format must be (duration and velocity being optional): 'note_time' 'MIDI_note' ['duration' ['MIDI_velocity']] """ if not notes.ndim == 2: raise ValueError('unknown format for `notes`') rows, columns = notes.shape if columns == 4: return notes elif columns == 3: new_columns = np.ones((rows, 1)) * velocity elif columns == 2: new_columns = np.ones((rows, 2)) * velocity new_columns[:, 0] = duration else: raise ValueError('unable to handle `notes` with %d columns' % columns) # return the notes notes = np.hstack((notes, new_columns)) return notes
[docs]def write_notes(notes, filename, fmt=None, delimiter='\t', header=''): """ Write the notes to a file (as many columns as given). Parameters ---------- notes : numpy array, shape (num_notes, 2) Notes, one per row (column definition see notes). filename : str or file handle Output filename or handle. fmt : list, optional Format of the fields (i.e. columns, see notes) delimiter : str, optional String or character separating the columns. header : str, optional Header to be written (as a comment). Returns ------- numpy array Notes. Notes ----- The note columns format must be (duration and velocity being optional): 'note_time' 'MIDI_note' ['duration' ['MIDI_velocity']] """ from ..utils import write_events # set default format if fmt is None: fmt = list(('%.3f', '%d', '%.3f', '%d')) if not notes.ndim == 2: raise ValueError('unknown format for `notes`') # truncate to the number of colums given fmt = delimiter.join(fmt[:notes.shape[1]]) # write the notes write_events(notes, filename, fmt=fmt, header=header) # also return them return notes
[docs]def write_midi(notes, filename, duration=0.6, velocity=100): """ Write the notes to a MIDI file. Parameters ---------- notes : numpy array, shape (num_notes, 2) Notes, one per row (column definition see notes). filename : str Output MIDI file. duration : float, optional Note duration if not defined by `notes`. velocity : int, optional Note velocity if not defined by `notes`. Returns ------- numpy array Notes (including note length and velocity). Notes ----- The note columns format must be (duration and velocity being optional): 'note_time' 'MIDI_note' ['duration' ['MIDI_velocity']] """ from ..utils.midi import process_notes # expand the array to have a default duration and velocity notes = expand_notes(notes, duration, velocity) # write the notes to the file and return them return process_notes(notes, filename)
[docs]def write_mirex_format(notes, filename, duration=0.6): """ Write the frequencies of the notes to file (in MIREX format). Parameters ---------- notes : numpy array, shape (num_notes, 2) Notes, one per row (column definition see notes). filename : str or file handle Output filename or handle. duration : float, optional Note duration if not defined by `notes`. Returns ------- numpy array Notes in MIREX format. Notes ----- The note columns format must be (duration and velocity being optional): 'note_time' 'MIDI_note' ['duration' ['MIDI_velocity']] The output format required by MIREX is: 'onset_time' 'offset_time' 'note_frequency' """ from ..audio.filters import midi2hz # expand the notes if needed notes = expand_notes(notes, duration) # report offset time instead of duration notes = np.vstack((notes[:, 0], notes[:, 0] + notes[:, 2], midi2hz(notes[:, 1]))).T # MIREX format: onset \t offset \t frequency write_notes(notes, filename, fmt=list(('%.3f', '%.3f', '%.1f', ))) return notes
# class for detecting notes with a RNN
[docs]class RNNPianoNoteProcessor(SequentialProcessor): """ Processor to get a (piano) note activation function from a RNN. Examples -------- Create a RNNPianoNoteProcessor and pass a file through the processor to obtain a note onset activation function (sampled with 100 frames per second). >>> proc = RNNPianoNoteProcessor() >>> proc # doctest: +ELLIPSIS <madmom.features.notes.RNNPianoNoteProcessor object at 0x...> >>> act = proc('tests/data/audio/sample.wav') >>> act.shape (281, 88) >>> act # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS array([[-0.00014, 0.0002 , ..., -0. , 0. ], [ 0.00008, 0.0001 , ..., 0.00006, -0.00001], ..., [-0.00005, -0.00011, ..., 0.00005, -0.00001], [-0.00017, 0.00002, ..., 0.00009, -0.00009]], dtype=float32) """ def __init__(self, **kwargs): # pylint: disable=unused-argument from ..audio.signal import SignalProcessor, FramedSignalProcessor from ..audio.spectrogram import ( FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor, SpectrogramDifferenceProcessor) from ..models import NOTES_BRNN from ..ml.nn import NeuralNetwork # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=44100) # process the multi-resolution spec & diff in parallel multi = ParallelProcessor([]) for frame_size in [1024, 2048, 4096]: frames = FramedSignalProcessor(frame_size=frame_size, fps=100) filt = FilteredSpectrogramProcessor( num_bands=12, fmin=30, fmax=17000, norm_filters=True) spec = LogarithmicSpectrogramProcessor(mul=5, add=1) diff = SpectrogramDifferenceProcessor( diff_ratio=0.5, positive_diffs=True, stack_diffs=np.hstack) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor((frames, filt, spec, diff))) # stack the features and processes everything sequentially pre_processor = SequentialProcessor((sig, multi, np.hstack)) # process the pre-processed signal with a NN nn = NeuralNetwork.load(NOTES_BRNN[0]) # instantiate a SequentialProcessor super(RNNPianoNoteProcessor, self).__init__((pre_processor, nn))