Source code for madmom.evaluation.notes

# encoding: utf-8
# pylint: disable=no-member
# pylint: disable=invalid-name
# pylint: disable=too-many-arguments
"""
This module contains note evaluation functionality.

"""

from __future__ import absolute_import, division, print_function

import warnings
import numpy as np

from . import (evaluation_io, MultiClassEvaluation, SumEvaluation,
               MeanEvaluation)
from .onsets import onset_evaluation, OnsetEvaluation
from ..utils import suppress_warnings


@suppress_warnings
[docs]def load_notes(values):
    """
    Load the notes from the given values or file.

    Parameters
    ----------
    values: str, file handle, list of tuples or numpy array
        Notes values.

    Returns
    -------
    numpy array
        Notes.

    Notes
    -----
    Expected file/tuple/row format:

    'note_time' 'MIDI_note' ['duration' ['MIDI_velocity']]

    """
    # load the notes from the given representation
    if isinstance(values, (list, np.ndarray)):
        # convert to numpy array if possible
        # Note: use array instead of asarray because of ndmin
        return np.array(values, dtype=np.float, ndmin=2, copy=False)
    else:
        # try to load the data from file
        return np.loadtxt(values, ndmin=2)


[docs]def remove_duplicate_notes(data):
    """
    Remove duplicate rows from the array.

    Parameters
    ----------
    data : numpy array
        Data.

    Returns
    -------
    numpy array
        Data array with duplicate rows removed.

    Notes
    -----
    This function removes only exact duplicates.

    """
    if data.size == 0:
        return data
    # found here: http://stackoverflow.com/questions/2828059/
    # find the unique rows
    order = np.ascontiguousarray(data).view(
        np.dtype((np.void, data.dtype.itemsize * data.shape[1])))
    unique = np.unique(order, return_index=True)[1]
    # only use the unique rows
    data = data[unique]
    # sort them by the first column and return them
    return data[data[:, 0].argsort()]

# default note evaluation values
WINDOW = 0.025


# note onset evaluation function
[docs]def note_onset_evaluation(detections, annotations, window=WINDOW):
    """
    Determine the true/false positive/negative note onset detections.

    Parameters
    ----------
    detections : numpy array
        Detected notes.
    annotations : numpy array
        Annotated ground truth notes.
    window : float, optional
        Evaluation window [seconds].

    Returns
    -------
    tp : numpy array, shape (num_tp, 2)
        True positive detections.
    fp : numpy array, shape (num_fp, 2)
        False positive detections.
    tn : numpy array, shape (0, 2)
        True negative detections (empty, see notes).
    fn : numpy array, shape (num_fn, 2)
        False negative detections.
    errors : numpy array, shape (num_tp, 2)
        Errors of the true positive detections wrt. the annotations.

    Notes
    -----
    The expected note row format is:

    'note_time' 'MIDI_note' ['duration' ['MIDI_velocity']]

    The returned true negative array is empty, because we are not interested
    in this class, since it is magnitudes bigger than true positives array.

    """
    # make sure the arrays have the correct types and dimensions
    detections = np.asarray(detections, dtype=np.float)
    annotations = np.asarray(annotations, dtype=np.float)
    # check dimensions
    if detections.ndim != 2 or annotations.ndim != 2:
        raise ValueError('detections and annotations must be 2D arrays')

    # init TP, FP, TN and FN lists
    tp = np.zeros((0, 2))
    fp = np.zeros((0, 2))
    tn = np.zeros((0, 2))  # this will not be altered
    fn = np.zeros((0, 2))
    errors = np.zeros((0, 2))
    # if neither detections nor annotations are given
    if detections.size == 0 and annotations.size == 0:
        # return the arrays as is
        return tp, fp, tn, fn, errors
    # if only detections are given
    elif annotations.size == 0:
        # all detections are FP
        return tp, detections, tn, fn, errors
    # if only annotations are given
    elif detections.size == 0:
        # all annotations are FN
        return tp, tp, tn, annotations, errors

    # TODO: extend to also evaluate the duration and velocity of notes
    # for onset evaluation use only the onset time and midi note number
    detections = detections[:, :2]
    annotations = annotations[:, :2]

    # get a list of all notes detected / annotated
    notes = np.unique(np.concatenate((detections[:, 1],
                                      annotations[:, 1]))).tolist()
    # iterate over all notes
    for note in notes:
        # perform normal onset detection on each note
        det = detections[detections[:, 1] == note]
        ann = annotations[annotations[:, 1] == note]
        tp_, fp_, _, fn_, err_ = onset_evaluation(det[:, 0], ann[:, 0], window)
        # convert returned arrays to lists and append the detections and
        # annotations to the correct lists
        tp = np.vstack((tp, det[np.in1d(det[:, 0], tp_)]))
        fp = np.vstack((fp, det[np.in1d(det[:, 0], fp_)]))
        fn = np.vstack((fn, ann[np.in1d(ann[:, 0], fn_)]))
        # append the note number to the errors
        err_ = np.vstack((np.array(err_),
                          np.repeat(np.asarray([note]), len(err_)))).T
        errors = np.vstack((errors, err_))
    # check calculations
    if len(tp) + len(fp) != len(detections):
        raise AssertionError('bad TP / FP calculation')
    if len(tp) + len(fn) != len(annotations):
        raise AssertionError('bad FN calculation')
    if len(tp) != len(errors):
        raise AssertionError('bad errors calculation')
    # sort the arrays
    # Note: The errors must have the same sorting order as the TPs, so they
    #       must be done first (before the TPs get sorted)
    errors = errors[tp[:, 0].argsort()]
    tp = tp[tp[:, 0].argsort()]
    fp = fp[fp[:, 0].argsort()]
    fn = fn[fn[:, 0].argsort()]
    # return the arrays
    return tp, fp, tn, fn, errors


# for note evaluation with Precision, Recall, F-measure use the Evaluation
# class and just define the evaluation function
# TODO: extend to also report the measures without octave errors
[docs]class NoteEvaluation(MultiClassEvaluation):
    """
    Evaluation class for measuring Precision, Recall and F-measure of notes.

    Parameters
    ----------
    detections : str, list or numpy array
        Detected notes.
    annotations : str, list or numpy array
        Annotated ground truth notes.
    window : float, optional
        F-measure evaluation window [seconds]
    delay : float, optional
        Delay the detections `delay` seconds for evaluation.

    """

    def __init__(self, detections, annotations, window=WINDOW, delay=0,
                 **kwargs):
        # load the note detections and annotations
        detections = load_notes(detections)
        annotations = load_notes(annotations)
        # shift the detections if needed
        if delay != 0:
            detections[:, 0] += delay
        # evaluate
        numbers = note_onset_evaluation(detections, annotations, window)
        tp, fp, tn, fn, errors = numbers
        super(NoteEvaluation, self).__init__(tp, fp, tn, fn, **kwargs)
        self.errors = errors
        # save them for the individual note evaluation
        self.detections = detections
        self.annotations = annotations
        self.window = window

    @property
    def mean_error(self):
        """Mean of the errors."""
        warnings.warn('mean_error is given for all notes, this will change!')
        if len(self.errors) == 0:
            return np.nan
        return np.mean(self.errors[:, 0])

    @property
    def std_error(self):
        """Standard deviation of the errors."""
        warnings.warn('std_error is given for all notes, this will change!')
        if len(self.errors) == 0:
            return np.nan
        return np.std(self.errors[:, 0])

[docs]    def tostring(self, notes=False, **kwargs):
        """

        Parameters
        ----------
        notes : bool, optional
            Display detailed output for all individual notes.

        Returns
        -------
        str
            Evaluation metrics formatted as a human readable string.

        """
        ret = ''
        if self.name is not None:
            ret += '%s\n  ' % self.name
        # add statistics for the individual note
        if notes:
            # determine which notes are present
            notes = []
            if self.tp.any():
                notes = np.append(notes, np.unique(self.tp[:, 1]))
            if self.fp.any():
                notes = np.append(notes, np.unique(self.fp[:, 1]))
            if self.tn.any():
                notes = np.append(notes, np.unique(self.tn[:, 1]))
            if self.fn.any():
                notes = np.append(notes, np.unique(self.fn[:, 1]))
            # evaluate them individually
            for note in sorted(np.unique(notes)):
                # detections and annotations for this note (only onset times)
                det = self.detections[self.detections[:, 1] == note][:, 0]
                ann = self.annotations[self.annotations[:, 1] == note][:, 0]
                name = 'MIDI note %s' % note
                e = OnsetEvaluation(det, ann, self.window, name=name)
                # append to the output string
                ret += '  %s\n' % e.tostring(notes=False)
        # normal formatting
        ret += 'Notes: %5d TP: %5d FP: %4d FN: %4d ' \
               'Precision: %.3f Recall: %.3f F-measure: %.3f ' \
               'Acc: %.3f mean: %5.1f ms std: %5.1f ms' % \
               (self.num_annotations, self.num_tp, self.num_fp, self.num_fn,
                self.precision, self.recall, self.fmeasure, self.accuracy,
                self.mean_error * 1000., self.std_error * 1000.)
        # return
        return ret


[docs]class NoteSumEvaluation(SumEvaluation, NoteEvaluation):
    """
    Class for summing note evaluations.

    """

    @property
    def errors(self):
        """Errors of the true positive detections wrt. the ground truth."""
        if not self.eval_objects:
            # return empty array
            return np.zeros((0, 2))
        return np.concatenate([e.errors for e in self.eval_objects])


[docs]class NoteMeanEvaluation(MeanEvaluation, NoteSumEvaluation):
    """
    Class for averaging note evaluations.

    """

    @property
    def mean_error(self):
        """Mean of the errors."""
        warnings.warn('mean_error is given for all notes, this will change!')
        return np.nanmean([e.mean_error for e in self.eval_objects])

    @property
    def std_error(self):
        """Standard deviation of the errors."""
        warnings.warn('std_error is given for all notes, this will change!')
        return np.nanmean([e.std_error for e in self.eval_objects])

[docs]    def tostring(self, **kwargs):
        """
        Format the evaluation metrics as a human readable string.

        Returns
        -------
        str
            Evaluation metrics formatted as a human readable string.

        """
        # format with floats instead of integers
        ret = ''
        if self.name is not None:
            ret += '%s\n  ' % self.name
        ret += 'Notes: %5.2f TP: %5.2f FP: %5.2f FN: %5.2f ' \
               'Precision: %.3f Recall: %.3f F-measure: %.3f ' \
               'Acc: %.3f mean: %5.1f ms std: %5.1f ms' % \
               (self.num_annotations, self.num_tp, self.num_fp, self.num_fn,
                self.precision, self.recall, self.fmeasure, self.accuracy,
                self.mean_error * 1000., self.std_error * 1000.)
        return ret


[docs]def add_parser(parser):
    """
    Add a note evaluation sub-parser to an existing parser.

    Parameters
    ----------
    parser : argparse parser instance
        Existing argparse parser object.

    Returns
    -------
    sub_parser : argparse sub-parser instance
        Note evaluation sub-parser.
    parser_group : argparse argument group
        Note evaluation argument group.

    """
    import argparse
    # add tempo evaluation sub-parser to the existing parser
    p = parser.add_parser(
        'notes', help='note evaluation',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='''
    This program evaluates pairs of files containing the note annotations and
    detections. Suffixes can be given to filter them from the list of files.

    Each line represents a note and must have the following format with values
    being separated by whitespace [brackets indicate optional values]:
    `onset_time MIDI_note [duration [velocity]]`

    Lines starting with # are treated as comments and are ignored.

    ''')
    # set defaults
    p.set_defaults(eval=NoteEvaluation,
                   sum_eval=NoteSumEvaluation,
                   mean_eval=NoteMeanEvaluation)
    # file I/O
    evaluation_io(p, ann_suffix='.notes', det_suffix='.notes.txt')
    # evaluation parameters
    g = p.add_argument_group('note evaluation arguments')
    g.add_argument('-w', dest='window', action='store', type=float,
                   default=0.025,
                   help='evaluation window (+/- the given size) '
                        '[seconds, default=%(default)s]')
    g.add_argument('--delay', action='store', type=float, default=0.,
                   help='add given delay to all detections [seconds]')
    # return the sub-parser and evaluation argument group
    return p, g