Source code for madmom.ml.crf

# encoding: utf-8
"""
This module contains an implementation of Conditional Random Fields (CRFs)
"""
# pylint: disable=no-member
# pylint: disable=invalid-name
import numpy as np

from ..processors import Processor


[docs]class ConditionalRandomField(Processor):
    """
    Implements a linear-chain Conditional Random Field using a
    matrix-based definition:

    .. math::
        P(Y|X) = exp[E(Y,X)] / Σ_{Y'}[E(Y', X)]

        E(Y,X) = Σ_{i=1}^{N} [y_{n-1}^T  A  y_n + y_n^T c + x_n^T W y_n ] +
                y_0^T π + y_N^T τ,

    where Y is a sequence of labels in one-hot encoding and X are the observed
    features.

    Parameters
    ----------
    initial : numpy array
        Initial potential (π) of the CRF. Also defines the number of states.
    final : numpy array
        Potential (τ) of the last variable of the CRF.
    bias : numpy array
        Label bias potential (c).
    transition : numpy array
        Matrix defining the transition potentials (A), where the rows are the
        'from' dimension, and columns the 'to' dimension.
    observation : numpy array
        Matrix defining the observation potentials (W), where the rows are the
        'observation' dimension, and columns the 'state' dimension.

    Examples
    --------
    Create a CRF that emulates a simple hidden markov model. This means that
    the bias and final potential will be constant and thus have no effect
    on the predictions.

    >>> eta = np.spacing(1)  # for numerical stability
    >>> initial = np.log(np.array([0.7, 0.2, 0.1]) + eta)
    >>> final = np.ones(3)
    >>> bias = np.ones(3)
    >>> transition = np.log(np.array([[0.6, 0.2, 0.2],
    ...                               [0.1, 0.7, 0.2],
    ...                               [0.1, 0.1, 0.8]]) + eta)
    >>> observation = np.log(np.array([[0.9, 0.5, 0.1],
    ...                                [0.1, 0.5, 0.1]]) + eta)
    >>> crf = ConditionalRandomField(initial, final, bias,
    ...                              transition, observation)
    >>> crf  # doctest: +ELLIPSIS
    <madmom.ml.crf.ConditionalRandomField object at 0x...>

    We can now decode the most probable state sequence given an observation
    sequence. Since we are emulating a discrete HMM, the observation sequence
    needs to be observation ids in one-hot encoding.

    The following observation sequence corresponds to "0, 0, 1, 0, 1, 1":

    >>> obs = np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [0, 1]])

    Now we can find the most likely state sequence:

    >>> crf.process(obs)
    array([0, 0, 1, 1, 1, 1], dtype=uint32)

    """

    def __init__(self, initial, final, bias, transition, observation):
        self.pi = initial
        self.tau = final
        self.c = bias
        self.A = transition
        self.W = observation

[docs]    def process(self, observations, **kwargs):
        """
        Determine the most probable configuration of Y given the state
        sequence x:

        .. math::
            y^* = argmax_y P(Y=y|X=x)

        Parameters
        ----------
        observations : numpy array
            Observations (x) to decode the most probable state sequence for.

        Returns
        -------
        y_star : numpy array
            Most probable state sequence.
        """
        num_observations = len(observations)
        num_states = len(self.pi)
        bt_pointers = np.empty((num_observations, num_states), dtype=np.uint32)
        viterbi = self.pi.copy()
        y_star = np.empty(num_observations, dtype=np.uint32)

        for i in range(num_observations):
            all_trans = self.A + viterbi[:, np.newaxis]
            best_trans = np.max(all_trans, axis=0)
            bt_pointers[i] = np.argmax(all_trans, axis=0)
            viterbi = self.c + np.dot(observations[i], self.W) + best_trans

        viterbi += self.tau

        y_star[-1] = np.argmax(viterbi)
        for i in range(len(y_star) - 1)[::-1]:
            y_star[i] = bt_pointers[i + 1, y_star[i + 1]]

        return y_star