# encoding: utf-8
# pylint: disable=no-member
# pylint: disable=invalid-name
# pylint: disable=too-many-arguments
# pylint: disable=wrong-import-position
"""
Evaluation package.
"""
from __future__ import absolute_import, division, print_function
import numpy as np
# evaluation helper functions
[docs]def find_closest_matches(detections, annotations):
"""
Find the closest annotation for each detection.
Parameters
----------
detections : list or numpy array
Detected events.
annotations : list or numpy array
Annotated events.
Returns
-------
indices : numpy array
Indices of the closest matches.
Notes
-----
The sequences must be ordered.
"""
# make sure the arrays have the correct types
detections = np.asarray(detections, dtype=np.float)
annotations = np.asarray(annotations, dtype=np.float)
# TODO: right now, it only works with 1D arrays
if detections.ndim > 1 or annotations.ndim > 1:
raise NotImplementedError('please implement multi-dim support')
# if no detections or annotations are given
if len(detections) == 0 or len(annotations) == 0:
# return a empty array
return np.zeros(0, dtype=np.int)
# if only a single annotation is given
if len(annotations) == 1:
# return an array as long as the detections with indices 0
return np.zeros(len(detections), dtype=np.int)
# solution found at: http://stackoverflow.com/questions/8914491/
indices = annotations.searchsorted(detections)
indices = np.clip(indices, 1, len(annotations) - 1)
left = annotations[indices - 1]
right = annotations[indices]
indices -= detections - left < right - detections
# return the indices of the closest matches
return indices
[docs]def calc_errors(detections, annotations, matches=None):
"""
Errors of the detections to the closest annotations.
Parameters
----------
detections : list or numpy array
Detected events.
annotations : list or numpy array
Annotated events.
matches : list or numpy array
Indices of the closest events.
Returns
-------
errors : numpy array
Errors.
Notes
-----
The sequences must be ordered. To speed up the calculation, a list of
pre-computed indices of the closest matches can be used.
"""
# make sure the arrays have the correct types
detections = np.asarray(detections, dtype=np.float)
annotations = np.asarray(annotations, dtype=np.float)
if matches is not None:
matches = np.asarray(matches, dtype=np.int)
# TODO: right now, it only works with 1D arrays
if detections.ndim > 1 or annotations.ndim > 1:
raise NotImplementedError('please implement multi-dim support')
# if no detections or annotations are given
if len(detections) == 0 or len(annotations) == 0:
# return a empty array
return np.zeros(0, dtype=np.float)
# determine the closest annotations
if matches is None:
matches = find_closest_matches(detections, annotations)
# calc error relative to those annotations
errors = detections - annotations[matches]
# return the errors
return errors
[docs]def calc_absolute_errors(detections, annotations, matches=None):
"""
Absolute errors of the detections to the closest annotations.
Parameters
----------
detections : list or numpy array
Detected events.
annotations : list or numpy array
Annotated events.
matches : list or numpy array
Indices of the closest events.
Returns
-------
errors : numpy array
Absolute errors.
Notes
-----
The sequences must be ordered. To speed up the calculation, a list of
pre-computed indices of the closest matches can be used.
"""
# make sure the arrays have the correct types
detections = np.asarray(detections, dtype=np.float)
annotations = np.asarray(annotations, dtype=np.float)
if matches is not None:
matches = np.asarray(matches, dtype=np.int)
# TODO: right now, it only works with 1D arrays
if detections.ndim > 1 or annotations.ndim > 1:
raise NotImplementedError('please implement multi-dim support')
# return the errors
return np.abs(calc_errors(detections, annotations, matches))
[docs]def calc_relative_errors(detections, annotations, matches=None):
"""
Relative errors of the detections to the closest annotations.
Parameters
----------
detections : list or numpy array
Detected events.
annotations : list or numpy array
Annotated events.
matches : list or numpy array
Indices of the closest events.
Returns
-------
errors : numpy array
Relative errors.
Notes
-----
The sequences must be ordered. To speed up the calculation, a list of
pre-computed indices of the closest matches can be used.
"""
# make sure the arrays have the correct types
detections = np.asarray(detections, dtype=np.float)
annotations = np.asarray(annotations, dtype=np.float)
if matches is not None:
matches = np.asarray(matches, dtype=np.int)
# TODO: right now, it only works with 1D arrays
if detections.ndim > 1 or annotations.ndim > 1:
raise NotImplementedError('please implement multi-dim support')
# if no detections or annotations are given
if len(detections) == 0 or len(annotations) == 0:
# return a empty array
return np.zeros(0, dtype=np.float)
# determine the closest annotations
if matches is None:
matches = find_closest_matches(detections, annotations)
# calculate the absolute errors
errors = calc_errors(detections, annotations, matches)
# return the relative errors
return np.abs(1 - (errors / annotations[matches]))
# abstract evaluation base class
[docs]class EvaluationMixin(object):
"""
Evaluation mixin class.
This class has a `name` attribute which is used for display purposes and
defaults to 'None'.
`METRIC_NAMES` is a list of tuples, containing the attribute's name and the
corresponding label, e.g.:
The attributes defined in `METRIC_NAMES` will be provided as an ordered
dictionary as the `metrics` property unless the subclass overwrites the
property.
`FLOAT_FORMAT` is used to format floats.
"""
# Example:
# METRIC_NAMES = [
# ('precision', 'Precision'),
# ('recall', 'Recall'),
# ('fmeasure', 'F-measure'),
# ]
name = None
METRIC_NAMES = []
FLOAT_FORMAT = '{:.3f}'
@property
def metrics(self):
"""Metrics as a dictionary."""
from collections import OrderedDict
metrics = OrderedDict()
# metrics = {}
for metric in [m[0] for m in self.METRIC_NAMES]:
metrics[metric] = getattr(self, metric)
return metrics
def __len__(self):
"""Length of the evaluation object."""
raise NotImplementedError('must be implemented by subclass.')
[docs] def tostring(self, **kwargs):
"""
Format the evaluation metrics as a human readable string.
Returns
-------
str
Evaluation metrics formatted as a human readable string.
Notes
-----
This is a fallback method formatting the `metrics` dictionary in a
human readable way. Classes inheriting from this mixin class should
provide a method better suitable.
"""
# pylint: disable=unused-argument
import pprint
return pprint.pformat(dict(self.metrics), indent=4)
# evaluation classes
[docs]class SimpleEvaluation(EvaluationMixin):
"""
Simple Precision, Recall, F-measure and Accuracy evaluation based on the
numbers of true/false positive/negative detections.
Parameters
----------
num_tp : int
Number of true positive detections.
num_fp : int
Number of false positive detections.
num_tn : int
Number of true negative detections.
num_fn : int
Number of false negative detections.
name : str
Name to be displayed.
Notes
-----
This class is only suitable for a 1-class evaluation problem.
"""
METRIC_NAMES = [
('num_tp', 'No. of true positives'),
('num_fp', 'No. of false positives'),
('num_tn', 'No. of true negatives'),
('num_fn', 'No. of false negatives'),
('num_annotations', 'No. Annotations'),
('precision', 'Precision'),
('recall', 'Recall'),
('fmeasure', 'F-measure'),
('accuracy', 'Accuracy'),
]
def __init__(self, num_tp=0, num_fp=0, num_tn=0, num_fn=0, name=None,
**kwargs):
# pylint: disable=unused-argument
# hidden variables, to be able to overwrite them in subclasses
self._num_tp = int(num_tp)
self._num_fp = int(num_fp)
self._num_tn = int(num_tn)
self._num_fn = int(num_fn)
# name of the evaluation
self.name = name
@property
def num_tp(self):
"""Number of true positive detections."""
return self._num_tp
@property
def num_fp(self):
"""Number of false positive detections."""
return self._num_fp
@property
def num_tn(self):
"""Number of true negative detections."""
return self._num_tn
@property
def num_fn(self):
"""Number of false negative detections."""
return self._num_fn
@property
def num_annotations(self):
"""Number of annotations."""
return self.num_tp + self.num_fn
def __len__(self):
# the length equals the number of annotations
return self.num_annotations
@property
def precision(self):
"""Precision."""
# correct / retrieved
retrieved = float(self.num_tp + self.num_fp)
# if there are no positive predictions, none of them are wrong
if retrieved == 0:
return 1.
return self.num_tp / retrieved
@property
def recall(self):
"""Recall."""
# correct / relevant
relevant = float(self.num_tp + self.num_fn)
# if there are no positive annotations, we recalled all of them
if relevant == 0:
return 1.
return self.num_tp / relevant
@property
def fmeasure(self):
"""F-measure."""
# 2pr / (p+r)
numerator = 2. * self.precision * self.recall
if numerator == 0:
return 0.
return numerator / (self.precision + self.recall)
@property
def accuracy(self):
"""Accuracy."""
# acc: (TP + TN) / (TP + FP + TN + FN)
denominator = self.num_fp + self.num_fn + self.num_tp + self.num_tn
if denominator == 0:
return 1.
numerator = float(self.num_tp + self.num_tn)
if numerator == 0:
return 0.
return numerator / denominator
[docs] def tostring(self, **kwargs):
"""
Format the evaluation metrics as a human readable string.
Returns
-------
str
Evaluation metrics formatted as a human readable string.
"""
ret = ''
if self.name is not None:
ret += '%s\n ' % self.name
ret += 'Annotations: %5d TP: %5d FP: %5d FN: %5d ' \
'Precision: %.3f Recall: %.3f F-measure: %.3f Acc: %.3f' % \
(self.num_annotations, self.num_tp, self.num_fp, self.num_fn,
self.precision, self.recall, self.fmeasure, self.accuracy)
return ret
def __str__(self):
return self.tostring()
# evaluate Precision, Recall, F-measure and Accuracy with lists or numpy arrays
[docs]class Evaluation(SimpleEvaluation):
"""
Evaluation class for measuring Precision, Recall and F-measure based on
numpy arrays or lists with true/false positive/negative detections.
Parameters
----------
tp : list or numpy array
True positive detections.
fp : list or numpy array
False positive detections.
tn : list or numpy array
True negative detections.
fn : list or numpy array
False negative detections.
name : str
Name to be displayed.
"""
METRIC_NAMES = [
('tp', 'True positives'),
('fp', 'False positives'),
('tn', 'True negatives'),
('fn', 'False negatives'),
('num_tp', 'No. of true positives'),
('num_fp', 'No. of false positives'),
('num_tn', 'No. of true negatives'),
('num_fn', 'No. of false negatives'),
('num_annotations', 'No. Annotations'),
('precision', 'Precision'),
('recall', 'Recall'),
('fmeasure', 'F-measure'),
('accuracy', 'Accuracy'),
]
def __init__(self, tp=None, fp=None, tn=None, fn=None, **kwargs):
# set default values
if tp is None:
tp = []
if fp is None:
fp = []
if tn is None:
tn = []
if fn is None:
fn = []
# instantiate a SimpleEvaluation object
super(Evaluation, self).__init__(**kwargs)
# convert everything to numpy arrays and save them
self.tp = np.asarray(list(tp), dtype=np.float)
self.fp = np.asarray(list(fp), dtype=np.float)
self.tn = np.asarray(list(tn), dtype=np.float)
self.fn = np.asarray(list(fn), dtype=np.float)
@property
def num_tp(self):
"""Number of true positive detections."""
return len(self.tp)
@property
def num_fp(self):
"""Number of false positive detections."""
return len(self.fp)
@property
def num_tn(self):
"""Number of true negative detections."""
return len(self.tn)
@property
def num_fn(self):
"""Number of false negative detections."""
return len(self.fn)
# class for evaluation of Precision, Recall, F-measure with 2D arrays
[docs]class MultiClassEvaluation(Evaluation):
"""
Evaluation class for measuring Precision, Recall and F-measure based on
2D numpy arrays with true/false positive/negative detections.
Parameters
----------
tp : list of tuples or numpy array, shape (num_tp, 2)
True positive detections.
fp : list of tuples or numpy array, shape (num_fp, 2)
False positive detections.
tn : list of tuples or numpy array, shape (num_tn, 2)
True negative detections.
fn : list of tuples or numpy array, shape (num_fn, 2)
False negative detections.
name : str
Name to be displayed.
Notes
-----
The second item of the tuples or the second column of the arrays denote
the class the detection belongs to.
"""
def __init__(self, tp=None, fp=None, tn=None, fn=None, **kwargs):
# set default values
if tp is None:
tp = np.zeros((0, 2))
if fp is None:
fp = np.zeros((0, 2))
if tn is None:
tn = np.zeros((0, 2))
if fn is None:
fn = np.zeros((0, 2))
super(MultiClassEvaluation, self).__init__(**kwargs)
self.tp = np.asarray(tp, dtype=np.float)
self.fp = np.asarray(fp, dtype=np.float)
self.tn = np.asarray(tn, dtype=np.float)
self.fn = np.asarray(fn, dtype=np.float)
[docs] def tostring(self, verbose=False, **kwargs):
"""
Format the evaluation metrics as a human readable string.
Parameters
----------
verbose : bool
Add evaluation for individual classes.
Returns
-------
str
Evaluation metrics formatted as a human readable string.
"""
ret = ''
if verbose:
# extract all classes
classes = []
if self.tp.any():
classes = np.append(classes, np.unique(self.tp[:, 1]))
if self.fp.any():
classes = np.append(classes, np.unique(self.fp[:, 1]))
if self.tn.any():
classes = np.append(classes, np.unique(self.tn[:, 1]))
if self.fn.any():
classes = np.append(classes, np.unique(self.fn[:, 1]))
for cls in sorted(np.unique(classes)):
# extract the TP, FP, TN and FN of this class
tp = self.tp[self.tp[:, 1] == cls]
fp = self.fp[self.fp[:, 1] == cls]
tn = self.tn[self.tn[:, 1] == cls]
fn = self.fn[self.fn[:, 1] == cls]
# evaluate them
e = Evaluation(tp, fp, tn, fn, name='Class %s' % cls)
# append to the output string
ret += ' %s\n' % e.tostring(verbose=False)
# normal formatting
ret += 'Annotations: %5d TP: %5d FP: %4d FN: %4d ' \
'Precision: %.3f Recall: %.3f F-measure: %.3f Acc: %.3f' % \
(self.num_annotations, self.num_tp, self.num_fp, self.num_fn,
self.precision, self.recall, self.fmeasure, self.accuracy)
# return
return ret
# class for summing Evaluations
[docs]class SumEvaluation(SimpleEvaluation):
"""
Simple class for summing evaluations.
Parameters
----------
eval_objects : list
Evaluation objects.
name : str
Name to be displayed.
"""
def __init__(self, eval_objects, name=None):
# pylint: disable=super-init-not-called
# Note: we want to inherit the evaluation functions/properties, no need
# to call __super__, but we need to take care of 'name'
if not isinstance(eval_objects, list):
# wrap the given eval_object in a list
eval_objects = [eval_objects]
self.eval_objects = eval_objects
self.name = name or 'sum for %d files' % len(self)
def __len__(self):
# just use the length of the evaluation objects
return len(self.eval_objects)
# redefine the counters (number of TP, FP, TN, FN & number of annotations)
@property
def num_tp(self):
"""Number of true positive detections."""
return sum(e.num_tp for e in self.eval_objects)
@property
def num_fp(self):
"""Number of false positive detections."""
return sum(e.num_fp for e in self.eval_objects)
@property
def num_tn(self):
"""Number of true negative detections."""
return sum(e.num_tn for e in self.eval_objects)
@property
def num_fn(self):
"""Number of false negative detections."""
return sum(e.num_fn for e in self.eval_objects)
@property
def num_annotations(self):
"""Number of annotations."""
return sum(e.num_annotations for e in self.eval_objects)
# class for averaging Evaluations
[docs]class MeanEvaluation(SumEvaluation):
"""
Simple class for averaging evaluation.
Parameters
----------
eval_objects : list
Evaluation objects.
name : str
Name to be displayed.
"""
def __init__(self, eval_objects, name=None, **kwargs):
super(MeanEvaluation, self).__init__(eval_objects, **kwargs)
# handle the 'name' here to be able to set a different default value
self.name = name or 'mean for %d files' % len(self)
# overwrite the properties to calculate the mean instead of the sum
@property
def num_tp(self):
"""Number of true positive detections."""
if not self.eval_objects:
return 0.
return np.nanmean([e.num_tp for e in self.eval_objects])
@property
def num_fp(self):
"""Number of false positive detections."""
if not self.eval_objects:
return 0.
return np.nanmean([e.num_fp for e in self.eval_objects])
@property
def num_tn(self):
"""Number of true negative detections."""
if not self.eval_objects:
return 0.
return np.nanmean([e.num_tn for e in self.eval_objects])
@property
def num_fn(self):
"""Number of false negative detections."""
if not self.eval_objects:
return 0.
return np.nanmean([e.num_fn for e in self.eval_objects])
@property
def num_annotations(self):
"""Number of annotations."""
if not self.eval_objects:
return 0.
return np.nanmean([e.num_annotations for e in self.eval_objects])
@property
def precision(self):
"""Precision."""
return np.nanmean([e.precision for e in self.eval_objects])
@property
def recall(self):
"""Recall."""
return np.nanmean([e.recall for e in self.eval_objects])
@property
def fmeasure(self):
"""F-measure."""
return np.nanmean([e.fmeasure for e in self.eval_objects])
@property
def accuracy(self):
"""Accuracy."""
return np.nanmean([e.accuracy for e in self.eval_objects])
[docs] def tostring(self, **kwargs):
"""
Format the evaluation metrics as a human readable string.
Returns
-------
str
Evaluation metrics formatted as a human readable string.
"""
ret = ''
if self.name is not None:
ret += '%s\n ' % self.name
# TODO: unify this with SimpleEvaluation but
# add option to provide field formatters (e.g. 3d or 5.2f)
# format with floats instead of integers
ret = 'Annotations: %5.2f TP: %5.2f FP: %5.2f FN: %5.2f' \
'Precision: %.3f Recall: %.3f F-measure: %.3f Acc: %.3f' % \
(self.num_annotations, self.num_tp, self.num_fp, self.num_fn,
self.precision, self.recall, self.fmeasure, self.accuracy)
return ret
[docs]def tostring(eval_objects, **kwargs):
"""
Format the given evaluation objects as human readable strings.
Parameters
----------
eval_objects : list
Evaluation objects.
Returns
-------
str
Evaluation metrics formatted as a human readable string.
"""
# pylint: disable=unused-argument
return '\n'.join([e.tostring() for e in eval_objects])
[docs]def tocsv(eval_objects, metric_names=None, float_format='{:.3f}', **kwargs):
"""
Format the given evaluation objects as a CSV table.
Parameters
----------
eval_objects : list
Evaluation objects.
metric_names : list of tuples, optional
List of tuples defining the name of the property corresponding to the
metric, and the metric label e.g. ('fp', 'False Positives').
float_format : str, optional
How to format the metrics.
Returns
-------
str
CSV table representation of the evaluation objects.
Notes
-----
If no `metric_names` are given, they will be extracted from the first
evaluation object.
"""
# pylint: disable=unused-argument
if metric_names is None:
# get the evaluation metrics from the first evaluation object
metric_names = eval_objects[0].METRIC_NAMES
metric_names, metric_labels = list(zip(*metric_names))
# add header
lines = ['Name,' + ','.join(metric_labels)]
# TODO: use e.metrics dict?
# add the evaluation objects
for e in eval_objects:
values = [float_format.format(getattr(e, mn)) for mn in metric_names]
lines.append(e.name + ',' + ','.join(values))
# return everything
return '\n'.join(lines)
[docs]def totex(eval_objects, metric_names=None, float_format='{:.3f}', **kwargs):
"""
Format the given evaluation objects as a LaTeX table.
Parameters
----------
eval_objects : list
Evaluation objects.
metric_names : list of tuples, optional
List of tuples defining the name of the property corresponding to the
metric, and the metric label e.g. ('fp', 'False Positives').
float_format : str, optional
How to format the metrics.
Returns
-------
str
LaTeX table representation of the evaluation objects.
Notes
-----
If no `metric_names` are given, they will be extracted from the first
evaluation object.
"""
# pylint: disable=unused-argument
if metric_names is None:
# get the evaluation metrics from the first evaluation object
metric_names = eval_objects[0].METRIC_NAMES
metric_names, metric_labels = list(zip(*metric_names))
# add header
lines = ['Name & ' + ' & '.join(metric_labels) + '\\\\']
# TODO: use e.metrics dict
# TODO: add a generic totable() function which accepts columns separator,
# newline stuff (e.g. tex \\\\) and others
# add the evaluation objects
for e in eval_objects:
values = [float_format.format(getattr(e, mn)) for mn in metric_names]
lines.append(e.name + ' & ' + ' & '.join(values) + '\\\\')
# return everything
return '\n'.join(lines)
[docs]def evaluation_io(parser, ann_suffix, det_suffix, ann_dir=None, det_dir=None):
"""
Add evaluation input/output and formatting related arguments to an existing
parser object.
Parameters
----------
parser : argparse parser instance
Existing argparse parser object.
ann_suffix : str
Suffix of the annotation files.
det_suffix : str
Suffix of the detection files.
ann_dir : str, optional
Use only annotations from this folder (and sub-folders).
det_dir : str, optional
Use only detections from this folder (and sub-folders).
Returns
-------
io_group : argparse argument group
Evaluation input / output argument group.
formatter_group : argparse argument group
Evaluation formatter argument group.
"""
import sys
import argparse
# general input output file handling
parser.add_argument('files', nargs='*',
help='files (or folders) to be evaluated')
parser.add_argument('-o', dest='outfile', type=argparse.FileType('w'),
default=sys.stdout,
help='output file [default: STDOUT]')
# file suffixes used for evaluation
g = parser.add_argument_group('file/folder/suffix arguments')
g.add_argument('-a', dest='ann_suffix', action='store', default=ann_suffix,
help='suffix of the annotation files '
'[default: %(default)s]')
g.add_argument('--ann_dir', action='store', default=ann_dir,
help='search only this directory (recursively) for '
'annotation files [default: %(default)s]')
g.add_argument('-d', dest='det_suffix', action='store', default=det_suffix,
help='suffix of the detection files [default: %(default)s]')
g.add_argument('--det_dir', action='store', default=det_dir,
help='search only this directory (recursively) for '
'detection files [default: %(default)s]')
# option to ignore non-existing detections
g.add_argument('-i', '--ignore_non_existing', action='store_true',
help='ignore non-existing detections [default: raise a '
'warning and assume empty detections]')
# verbose
parser.add_argument('-v', '--verbose', action='count', default=0,
help='increase verbosity level')
# option to suppress warnings
parser.add_argument('-q', '--quiet', action='store_true',
help='suppress any warnings')
# output format options
parser.set_defaults(output_formatter=tostring)
f = parser.add_argument_group('formatting arguments')
formats = f.add_mutually_exclusive_group()
formats.add_argument('--tex', dest='output_formatter',
action='store_const', const=totex,
help='format output to be used in .tex files')
formats.add_argument('--csv', dest='output_formatter',
action='store_const', const=tocsv,
help='format output to be used in .csv files')
# return the output formatting group so the caller can add more options
return g, f
# finally import the submodules
from . import onsets, beats, notes, tempo, alignment