Source code for madmom.evaluation.alignment

# encoding: utf-8
# pylint: disable=no-member
# pylint: disable=invalid-name
# pylint: disable=too-many-arguments
"""
This module contains global alignment evaluation functionality.

"""

from __future__ import absolute_import, division, print_function

import numpy as np

from . import EvaluationMixin


# constants for the data format
_TIME = 0
_SCORE_POS = 1

# constants for missed events/notes
_MISSED_NOTE_VAL = np.NaN

# default settings
WINDOW = 0.25
HISTOGRAM_BINS = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 1.]


[docs]class AlignmentFormatError(Exception):
    """
    Exception to be raised whenever an incorrect alignment format is given.

    """
    # pylint: disable=super-init-not-called

    def __init__(self, value=None):
        if value is None:
            value = 'Alignment has to be at least one row of two columns ' \
                    'representing time and score position.'
        self.value = value

    def __str__(self):
        return repr(self.value)


[docs]def load_alignment(values):
    """
    Load the alignment from given values or file.

    Parameters
    ----------
    values : str, file handle, list or numpy array
        Alignment values.

    Returns
    -------
    numpy array
        Time and score position columns.

    """
    if values is None:
        # return 'empty' alignment
        return np.array([[0, -1]])
    elif isinstance(values, (list, np.ndarray)):
        values = np.atleast_2d(values)
    else:
        values = np.loadtxt(values, ndmin=2)

    if values.shape[0] < 1 or values.shape[1] < 2 or len(values.shape) > 2:
        raise AlignmentFormatError()

    return values[:, :2]


[docs]def compute_event_alignment(alignment, ground_truth):
    """
    This function finds the alignment outputs corresponding to each ground
    truth alignment. In general, the alignment algorithm will output more
    alignment positions than events in the score, e.g. if it is designed to
    output the current alignment at constant intervals.

    Parameters
    ----------
    alignment : 2D numpy array
        The score follower's resulting alignment. 2D array, first value is the
        time in seconds, second value is the beat position.
    ground_truth : 2D numpy array
        Ground truth of the aligned performance. 2D array, first value is the
        time in seconds, second value is the beat position. It can contain the
        alignment positions for each individual note. In this case, the
        deviation for each note is taken into account.

    Returns
    -------
    numpy array
        Array of the same size as `ground_truth`, with each row representing
        the alignment of the corresponding ground truth element..

    """
    # find the spots where the alignment passes the score
    gt_pos = ground_truth[:, _SCORE_POS]
    al_pos = alignment[:, _SCORE_POS]

    # do not allow to move backwards
    for i in range(1, al_pos.shape[0]):
        al_pos[i] = max(al_pos[i - 1], al_pos[i])

    # find corresponding indices
    al_idxs = np.searchsorted(al_pos, gt_pos)

    # now, the number of indexes in the alignment should correspond
    # to the number of aligned positions in the ground truth
    assert len(al_idxs) == len(ground_truth)

    # first a dummy event at the very end of the alignment is added to be
    # able to process score events with were not reached by the tracker
    dummy = [[_MISSED_NOTE_VAL] * alignment.shape[1]]
    alignment = np.concatenate((alignment, dummy))

    return alignment[al_idxs]


def _attr_name(histogram_bin):
    """
    Returns the attribute name for the histogram bin.

    Parameters
    ----------
    histogram_bin : int
        Histogram bin.

    Returns
    -------
    str
        Attribute name for the `histogram_bin`.

    """
    return 'below_{:.2f}'.format(histogram_bin).replace('.', '_')


def _label(histogram_bin):
    """
    Returns the label for the histogram bin.

    Parameters
    ----------
    histogram_bin : int
        Histogram bin.

    Returns
    -------
    str
        Label for the `histogram_bin`.

    """
    return '<{:.2f}'.format(histogram_bin)


[docs]def compute_metrics(event_alignment, ground_truth, window, err_hist_bins):
    """
    This function computes the evaluation metrics based on the paper [1]_ plus
    an cumulative histogram of absolute errors.

    Parameters
    ----------
    event_alignment : 2D numpy array
        Sequence alignment as computed by the score follower. 2D array, where
        the first column is the alignment time in seconds and the second column
        the position in beats. Needs to be the same length as `ground_truth`,
        hence for each element in the ground truth the corresponding alignment
        has to be available. Use the `compute_event_alignment()` function to
        compute this.
    ground_truth : 2D numpy array
        Ground truth of the aligned performance. 2D array, first value is the
        time in seconds, second value is the beat position. It can contain the
        alignment positions for each individual note. In this case, the
        deviation for each note is taken into account.
    window : float
        Tolerance window in seconds. Alignments off less than this amount from
        the ground truth will be considered correct.
    err_hist_bins : list
        List of error bounds for which the cumulative histogram of absolute
        error will be computed (e.g. [0.1, 0.3] will give the percentage of
        events aligned with an error smaller than 0.1 and 0.3).

    Returns
    -------
    metrics : dict
        (Some) of the metrics described in [1]_ and the error histogram.

    References
    ----------
    .. [1]  Arshia Cont, Diemo Schwarz, Norbert Schnell and
            Christopher Raphael,
            "Evaluation of Real-Time Audio-to-Score Alignment",
            Proceedings of the 8th International Conference on Music
            Information Retrieval (ISMIR), 2007.

    """
    abs_error = np.abs(event_alignment[:, _TIME] - ground_truth[:, _TIME])
    missed = np.isnan(abs_error)
    aligned_error = np.ma.array(abs_error, mask=missed)

    with np.errstate(invalid='ignore'):
        # for some numpy versions the following prints a invalid value warning
        # although NaNs are masked - code still works.
        misaligned = aligned_error > window

    correctly_aligned_error = np.ma.array(aligned_error, mask=misaligned)
    pc_idx = float(correctly_aligned_error.mask[::-1].argmin())

    # we have to typecast everything to float, if we don't the variables will
    # be of type np.maskedarray
    results = {'miss_rate': float(missed.mean()),
               'misalign_rate': float(misaligned.mean()),
               'avg_imprecision': float(correctly_aligned_error.mean()),
               'stddev_imprecision': float(correctly_aligned_error.std()),
               'avg_error': float(aligned_error.mean()),
               'stddev_error': float(aligned_error.std()),
               'piece_completion': float(
                   1.0 - pc_idx / correctly_aligned_error.mask.shape[0])}

    # convert possibly masked values to NaN. A masked value can occur when
    # computing the mean or stddev of values that are all masked
    for k, v in results.items():
        if v is np.ma.masked_singleton:
            results[k] = np.NaN

    # consider the case where EVERYTHING was missed or misaligned. the standard
    # computation fails then.
    if correctly_aligned_error.mask.all():
        results['piece_completion'] = 0.0

    err_hist, _ = np.histogram(aligned_error.compressed(),
                               bins=[-np.inf] + err_hist_bins + [np.inf])
    cum_hist = np.cumsum(err_hist.astype(float) / aligned_error.shape[0])

    # add the cumulative histogram value per value to the results
    for hb, p in zip(err_hist_bins, cum_hist):
        results[_attr_name(hb)] = p

    return results


[docs]class AlignmentEvaluation(EvaluationMixin):
    """
    Alignment evaluation class for beat-level alignments. Beat-level aligners
    output beat positions for points in time, rather than computing a time step
    for each individual event in the score. The following metrics are
    available:

    Parameters
    ----------
    alignment : 2D numpy array or list of tuples
        Computed alignment; first value is the time in seconds, second value is
        the beat position.
    ground_truth : 2D numpy array or list of tuples
        Ground truth of the aligned file; first value is the time in seconds,
        second value is the beat position. It can contain the alignment
        positions for each individual event. In this case, the deviation for
        each event is taken into account.
    window : float
        Tolerance window in seconds. Alignments off less than this amount from
        the ground truth will be considered correct.
    name : str
        Name to be displayed.

    Attributes
    ----------
    miss_rate : float
        Percentage of missed events (events that exist in the reference score,
        but are not reported).
    misalign_rate : float
        Percentage of misaligned events (events with an alignment that is off
        by more than a defined `window`).
    avg_imprecision : float
        Average alignment error of non-misaligned events.
    stddev_imprecision : float
        Standard deviation of alignment error of non-misaligned events.
    avg_error : float
        Average alignment error.
    stddev_error : float
        Standard deviation of alignment error.
    piece_completion : float
        Percentage of events that was followed until the aligner hangs, i.e
        from where on there are only misaligned or missed events.
    below_{x}_{yy} : float
        Percentage of events that are aligned with an error smaller than x.yy
        seconds.

    """

    HISTOGRAM_METRICS = [(_attr_name(hb), _label(hb)) for hb in HISTOGRAM_BINS]

    METRIC_NAMES = [
        ('misalign_rate', 'Misalign Rate'),
        ('miss_rate', 'Miss Rate'),
        ('piece_completion', 'Piece Completion'),
        ('avg_imprecision', 'Avg. Imprecision'),
        ('stddev_imprecision', 'Std. Dev. of Imprecision'),
        ('avg_error', 'Avg. Error'),
        ('stddev_error', 'Std. Dev. of Error'),
    ] + HISTOGRAM_METRICS

    def __init__(self, alignment, ground_truth, window=WINDOW, name=None,
                 **kwargs):
        # pylint: disable=unused-argument

        alignment = load_alignment(alignment)
        ground_truth = load_alignment(ground_truth)

        self.name = name
        self.window = window

        self._length = len(ground_truth)

        # compute all the evaluation metrics
        metrics = compute_metrics(
            compute_event_alignment(alignment, ground_truth),
            ground_truth,
            self.window,
            HISTOGRAM_BINS
        )

        # MAGIC! This basically corresponds to doing
        # self.misalign_rate = metrics['misalign_rate']
        # for each metric
        for attr_name, _ in self.METRIC_NAMES:
            setattr(self, attr_name, metrics[attr_name])

    def __len__(self):
        """Number of ground truth events."""
        return self._length

[docs]    def tostring(self, histogram=False, **kwargs):
        """
        Format the evaluation metrics as a human readable string.

        Parameters
        ----------
        histogram : bool
            Also output the error histogram.

        Returns
        -------
        str
            Evaluation metrics formatted as a human readable string.

        """
        ret = ''
        if self.name is not None:
            ret += '%s\n  ' % self.name

        ret += 'misalign-rate: %.3f miss-rate: %.3f piece-compl.: %.3f '\
               'avg-imprecision: %.3f stddev-imprecision %.3f '\
               'avg-error: %.3f stddev-error: %.3f' %\
               (self.misalign_rate, self.miss_rate, self.piece_completion,
                self.avg_imprecision, self.stddev_imprecision, self.avg_error,
                self.stddev_error)
        # also output the histogram
        if histogram:
            ret += '\n  '
            for attr_name, lbl in self.HISTOGRAM_METRICS:
                ret += '{}: {:.2f}  '.format(lbl, getattr(self, attr_name))
        # return everything
        return ret


def _combine_metrics(eval_objects, piecewise):
    """
    Combine the metrics of the given evaluation objects.

    Parameters
    ----------
    eval_objects : list
        Evaluation objects.
    piecewise : bool
        If 'True' all evaluation objects are weighted the same; if 'False' the
        evaluation objects are weighted by the number of their events.

    Returns
    -------
    dict
        Combined metrics.

    """
    if not eval_objects:
        raise AssertionError('cannot handle empty eval_objects list yet')
    metrics = {}
    if piecewise:
        total_weight = len(eval_objects)
    else:
        total_weight = sum(len(e) for e in eval_objects)
    for e in eval_objects:
        for name, val in e.metrics.items():
            if isinstance(val, np.ndarray) or not np.isnan(val):
                weight = 1.0 if piecewise else float(len(e))
                metrics[name] = \
                    metrics.get(name, 0.) + (weight / total_weight) * val
    # return combined metrics
    return metrics


[docs]class AlignmentSumEvaluation(AlignmentEvaluation):
    """
    Class for averaging alignment evaluation scores, considering the lengths
    of the aligned pieces. For a detailed description of the available metrics,
    refer to AlignmentEvaluation.

    Parameters
    ----------
    eval_objects : list
        Evaluation objects.
    name : str
        Name to be displayed.

    """
    # pylint: disable=super-init-not-called

    def __init__(self, eval_objects, name=None):
        self.name = name or 'piecewise mean for %d files' % len(eval_objects)
        self.window = eval_objects[0].window
        self._length = sum(len(e) for e in eval_objects)

        metrics = _combine_metrics(eval_objects, piecewise=False)
        for attr_name, _ in self.METRIC_NAMES:
            setattr(self, attr_name, metrics[attr_name])


[docs]class AlignmentMeanEvaluation(AlignmentEvaluation):
    """
    Class for averaging alignment evaluation scores, averaging piecewise (i.e.
    ignoring the lengths of the pieces). For a detailed description of the
    available metrics, refer to AlignmentEvaluation.

    Parameters
    ----------
    eval_objects : list
        Evaluation objects.
    name : str
        Name to be displayed.

    """
    # pylint: disable=super-init-not-called

    def __init__(self, eval_objects, name=None):
        self.name = name or 'mean for %d files' % len(eval_objects)
        self.window = eval_objects[0].window
        self._length = len(eval_objects)

        metrics = _combine_metrics(eval_objects, piecewise=True)
        for attr_name, _ in self.METRIC_NAMES:
            setattr(self, attr_name, metrics[attr_name])


[docs]def add_parser(parser):
    """
    Add an alignment evaluation sub-parser to an existing parser.

    Parameters
    ----------
    parser : argparse parser instance
        Existing argparse parser object.

    Returns
    -------
    sub_parser : argparse sub-parser instance
        Alignment evaluation sub-parser.
    parser_group : argparse argument group
        Alignment evaluation argument group.

    """
    import argparse
    from . import evaluation_io

    p = parser.add_parser(
        'alignment', help='alignment evaluation',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='''
    This script evaluates pairs of files containing the true and computed
    alignments of audio files. Suffixes can be given to filter them from the
    list of files.

    Each line represents an alignment point and must have the following format
    with values being separated by whitespace:
    `audio_time score_position`

    Note that this script enforces the alignment to go monotonically forward,
    meaning that if a event 'e' is aligned at time 't_e', the following events
    'ef' will be aligned at max(t_e, t_ef).

    Lines starting with # are treated as comments and are ignored.

    ''')
    p.set_defaults(eval=AlignmentEvaluation, sum_eval=AlignmentSumEvaluation,
                   mean_eval=AlignmentMeanEvaluation)

    # files used for evaluation
    _, f = evaluation_io(p, ann_suffix='.alignment',
                         det_suffix='.alignment.txt')

    # evaluation parameters
    g = p.add_argument_group('alignment evaluation arguments')
    g.add_argument('--window', type=float, default=WINDOW,
                   help='tolerance window for misaligned notes '
                        '[seconds, default: %(default)s]')

    # add histogram option to formatting group
    f.add_argument('--histogram', action='store_true',
                   help='also output the error histogram')
    # return the sub-parser and evaluation argument group
    return p, g