#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Onset detection
===============
.. autosummary::
    :toctree: generated/
    onset_detect
    onset_backtrack
    onset_strength
    onset_strength_multi
"""
import numpy as np
import scipy
from . import cache
from . import core
from . import util
from .util.exceptions import ParameterError
from .feature.spectral import melspectrogram
__all__ = ['onset_detect',
           'onset_strength',
           'onset_strength_multi',
           'onset_backtrack']
[docs]def onset_detect(y=None, sr=22050, onset_envelope=None, hop_length=512,
                 backtrack=False, energy=None,
                 units='frames', **kwargs):
    """Basic onset detector.  Locate note onset events by picking peaks in an
    onset strength envelope.
    The `peak_pick` parameters were chosen by large-scale hyper-parameter
    optimization over the dataset provided by [1]_.
    .. [1] https://github.com/CPJKU/onset_db
    Parameters
    ----------
    y          : np.ndarray [shape=(n,)]
        audio time series
    sr         : number > 0 [scalar]
        sampling rate of `y`
    onset_envelope     : np.ndarray [shape=(m,)]
        (optional) pre-computed onset strength envelope
    hop_length : int > 0 [scalar]
        hop length (in samples)
    units : {'frames', 'samples', 'time'}
        The units to encode detected onset events in.
        By default, 'frames' are used.
    backtrack : bool
        If `True`, detected onset events are backtracked to the nearest
        preceding minimum of `energy`.
        This is primarily useful when using onsets as slice points for segmentation.
    energy : np.ndarray [shape=(m,)] (optional)
        An energy function to use for backtracking detected onset events.
        If none is provided, then `onset_envelope` is used.
    kwargs : additional keyword arguments
        Additional parameters for peak picking.
        See `librosa.util.peak_pick` for details.
    Returns
    -------
    onsets : np.ndarray [shape=(n_onsets,)]
        estimated positions of detected onsets, in whichever units
        are specified.  By default, frame indices.
        .. note::
            If no onset strength could be detected, onset_detect returns
            an empty list.
    Raises
    ------
    ParameterError
        if neither `y` nor `onsets` are provided
        or if `units` is not one of 'frames', 'samples', or 'time'
    See Also
    --------
    onset_strength : compute onset strength per-frame
    onset_backtrack : backtracking onset events
    librosa.util.peak_pick : pick peaks from a time series
    Examples
    --------
    Get onset times from a signal
    >>> y, sr = librosa.load(librosa.util.example_audio_file(),
    ...                      offset=30, duration=2.0)
    >>> onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
    >>> librosa.frames_to_time(onset_frames, sr=sr)
    array([ 0.07 ,  0.395,  0.511,  0.627,  0.766,  0.975,
            1.207,  1.324,  1.44 ,  1.788,  1.881])
    Or use a pre-computed onset envelope
    >>> o_env = librosa.onset.onset_strength(y, sr=sr)
    >>> times = librosa.frames_to_time(np.arange(len(o_env)), sr=sr)
    >>> onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)
    >>> import matplotlib.pyplot as plt
    >>> D = librosa.stft(y)
    >>> plt.figure()
    >>> ax1 = plt.subplot(2, 1, 1)
    >>> librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
    ...                          x_axis='time', y_axis='log')
    >>> plt.title('Power spectrogram')
    >>> plt.subplot(2, 1, 2, sharex=ax1)
    >>> plt.plot(times, o_env, label='Onset strength')
    >>> plt.vlines(times[onset_frames], 0, o_env.max(), color='r', alpha=0.9,
    ...            linestyle='--', label='Onsets')
    >>> plt.axis('tight')
    >>> plt.legend(frameon=True, framealpha=0.75)
    """
    # First, get the frame->beat strength profile if we don't already have one
    if onset_envelope is None:
        if y is None:
            raise ParameterError('y or onset_envelope must be provided')
        onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length)
    # Shift onset envelope up to be non-negative
    # (a common normalization step to make the threshold more consistent)
    onset_envelope -= onset_envelope.min()
    # Do we have any onsets to grab?
    if not onset_envelope.any():
        return np.array([], dtype=np.int)
    # Normalize onset strength function to [0, 1] range
    onset_envelope /= onset_envelope.max()
    # These parameter settings found by large-scale search
    kwargs.setdefault('pre_max', 0.03*sr//hop_length)       # 30ms
    kwargs.setdefault('post_max', 0.00*sr//hop_length + 1)  # 0ms
    kwargs.setdefault('pre_avg', 0.10*sr//hop_length)       # 100ms
    kwargs.setdefault('post_avg', 0.10*sr//hop_length + 1)  # 100ms
    kwargs.setdefault('wait', 0.03*sr//hop_length)          # 30ms
    kwargs.setdefault('delta', 0.07)
    # Peak pick the onset envelope
    onsets = util.peak_pick(onset_envelope, **kwargs)
    # Optionally backtrack the events
    if backtrack:
        if energy is None:
            energy = onset_envelope
        onsets = onset_backtrack(onsets, energy)
    if units == 'frames':
        pass
    elif units == 'samples':
        onsets = core.frames_to_samples(onsets, hop_length=hop_length)
    elif units == 'time':
        onsets = core.frames_to_time(onsets, hop_length=hop_length, sr=sr)
    else:
        raise ParameterError('Invalid unit type: {}'.format(units))
    return onsets 
[docs]def onset_strength(y=None, sr=22050, S=None, lag=1, max_size=1,
                   detrend=False, center=True,
                   feature=None, aggregate=None,
                   centering=None,
                   **kwargs):
    """Compute a spectral flux onset strength envelope.
    Onset strength at time `t` is determined by:
    `mean_f max(0, S[f, t] - ref_S[f, t - lag])`
    where `ref_S` is `S` after local max filtering along the frequency
    axis [1]_.
    By default, if a time series `y` is provided, S will be the
    log-power Mel spectrogram.
    .. [1] Böck, Sebastian, and Gerhard Widmer.
           "Maximum filter vibrato suppression for onset detection."
           16th International Conference on Digital Audio Effects,
           Maynooth, Ireland. 2013.
    Parameters
    ----------
    y        : np.ndarray [shape=(n,)]
        audio time-series
    sr       : number > 0 [scalar]
        sampling rate of `y`
    S        : np.ndarray [shape=(d, m)]
        pre-computed (log-power) spectrogram
    lag      : int > 0
        time lag for computing differences
    max_size : int > 0
        size (in frequency bins) of the local max filter.
        set to `1` to disable filtering.
    detrend : bool [scalar]
        Filter the onset strength to remove the DC component
    center : bool [scalar]
        Shift the onset function by `n_fft / (2 * hop_length)` frames
    feature : function
        Function for computing time-series features, eg, scaled spectrograms.
        By default, uses `librosa.feature.melspectrogram` with `fmax=11025.0`
    aggregate : function
        Aggregation function to use when combining onsets
        at different frequency bins.
        Default: `np.mean`
    kwargs : additional keyword arguments
        Additional parameters to `feature()`, if `S` is not provided.
    Returns
    -------
    onset_envelope   : np.ndarray [shape=(m,)]
        vector containing the onset strength envelope
    Raises
    ------
    ParameterError
        if neither `(y, sr)` nor `S` are provided
        or if `lag` or `max_size` are not positive integers
    See Also
    --------
    onset_detect
    onset_strength_multi
    Examples
    --------
    First, load some audio and plot the spectrogram
    >>> import matplotlib.pyplot as plt
    >>> y, sr = librosa.load(librosa.util.example_audio_file(),
    ...                      duration=10.0)
    >>> D = librosa.stft(y)
    >>> times = librosa.frames_to_time(np.arange(D.shape[1]))
    >>> plt.figure()
    >>> ax1 = plt.subplot(2, 1, 1)
    >>> librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
    ...                          y_axis='log', x_axis='time')
    >>> plt.title('Power spectrogram')
    Construct a standard onset function
    >>> onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    >>> plt.subplot(2, 1, 2, sharex=ax1)
    >>> plt.plot(times, 2 + onset_env / onset_env.max(), alpha=0.8,
    ...          label='Mean (mel)')
    Median aggregation, and custom mel options
    >>> onset_env = librosa.onset.onset_strength(y=y, sr=sr,
    ...                                          aggregate=np.median,
    ...                                          fmax=8000, n_mels=256)
    >>> plt.plot(times, 1 + onset_env / onset_env.max(), alpha=0.8,
    ...          label='Median (custom mel)')
    Constant-Q spectrogram instead of Mel
    >>> onset_env = librosa.onset.onset_strength(y=y, sr=sr,
    ...                                          feature=librosa.cqt)
    >>> plt.plot(times, onset_env / onset_env.max(), alpha=0.8,
    ...          label='Mean (CQT)')
    >>> plt.legend(frameon=True, framealpha=0.75)
    >>> plt.ylabel('Normalized strength')
    >>> plt.yticks([])
    >>> plt.axis('tight')
    >>> plt.tight_layout()
    """
    odf_all = onset_strength_multi(y=y,
                                   sr=sr,
                                   S=S,
                                   lag=lag,
                                   max_size=max_size,
                                   detrend=detrend,
                                   center=center,
                                   feature=feature,
                                   aggregate=aggregate,
                                   channels=None,
                                   **kwargs)
    return odf_all[0] 
[docs]def onset_backtrack(events, energy):
    '''Backtrack detected onset events to the nearest preceding local
    minimum of an energy function.
    This function can be used to roll back the timing of detected onsets
    from a detected peak amplitude to the preceding minimum.
    This is most useful when using onsets to determine slice points for
    segmentation.
    .. [1] Jehan, Tristan.
           "Creating music by listening"
           Doctoral dissertation
           Massachusetts Institute of Technology, 2005.
    Parameters
    ----------
    events : np.ndarray, dtype=int
        List of onset event frame indices, as computed by `onset_detect`
    energy : np.ndarray, shape=(m,)
        An energy function
    Returns
    -------
    events_backtracked : np.ndarray, shape=events.shape
        The input events matched to nearest preceding minima of `energy`.
    Examples
    --------
    >>> y, sr = librosa.load(librosa.util.example_audio_file(),
    ...                      offset=30, duration=2.0)
    >>> oenv = librosa.onset.onset_strength(y=y, sr=sr)
    >>> # Detect events without backtracking
    >>> onset_raw = librosa.onset.onset_detect(onset_envelope=oenv,
    ...                                        backtrack=False)
    >>> # Backtrack the events using the onset envelope
    >>> onset_bt = librosa.onset.onset_backtrack(onset_raw, oenv)
    >>> # Backtrack the events using the RMS energy
    >>> rmse = librosa.feature.rmse(S=np.abs(librosa.stft(y=y)))
    >>> onset_bt_rmse = librosa.onset.onset_backtrack(onset_raw, rmse[0])
    >>> # Plot the results
    >>> import matplotlib.pyplot as plt
    >>> plt.figure()
    >>> plt.subplot(2,1,1)
    >>> plt.plot(oenv, label='Onset strength')
    >>> plt.vlines(onset_raw, 0, oenv.max(), label='Raw onsets')
    >>> plt.vlines(onset_bt, 0, oenv.max(), label='Backtracked', color='r')
    >>> plt.legend(frameon=True, framealpha=0.75)
    >>> plt.subplot(2,1,2)
    >>> plt.plot(rmse[0], label='RMSE')
    >>> plt.vlines(onset_bt_rmse, 0, rmse.max(), label='Backtracked (RMSE)', color='r')
    >>> plt.legend(frameon=True, framealpha=0.75)
    '''
    # Find points where energy is non-increasing
    # all points:  energy[i] <= energy[i-1]
    # tail points: energy[i] < energy[i+1]
    minima = np.flatnonzero((energy[1:-1] <= energy[:-2]) &
                            (energy[1:-1] < energy[2:]))
    # Pad on a 0, just in case we have onsets with no preceding minimum
    # Shift by one to account for slicing in minima detection
    minima = util.fix_frames(1 + minima, x_min=0)
    # Only match going left from the detected events
    return minima[util.match_events(events, minima, right=False)] 
[docs]@cache(level=30)
def onset_strength_multi(y=None, sr=22050, S=None, lag=1, max_size=1,
                         detrend=False, center=True, feature=None,
                         aggregate=None, channels=None, **kwargs):
    """Compute a spectral flux onset strength envelope across multiple channels.
    Onset strength for channel `i` at time `t` is determined by:
    `mean_{f in channels[i]} max(0, S[f, t+1] - S[f, t])`
    Parameters
    ----------
    y        : np.ndarray [shape=(n,)]
        audio time-series
    sr       : number > 0 [scalar]
        sampling rate of `y`
    S        : np.ndarray [shape=(d, m)]
        pre-computed (log-power) spectrogram
    lag      : int > 0
        time lag for computing differences
    max_size : int > 0
        size (in frequency bins) of the local max filter.
        set to `1` to disable filtering.
    detrend : bool [scalar]
        Filter the onset strength to remove the DC component
    center : bool [scalar]
        Shift the onset function by `n_fft / (2 * hop_length)` frames
    feature : function
        Function for computing time-series features, eg, scaled spectrograms.
        By default, uses `librosa.feature.melspectrogram` with `fmax=11025.0`
    aggregate : function
        Aggregation function to use when combining onsets
        at different frequency bins.
        Default: `np.mean`
    channels : list or None
        Array of channel boundaries or slice objects.
        If `None`, then a single channel is generated to span all bands.
    kwargs : additional keyword arguments
        Additional parameters to `feature()`, if `S` is not provided.
    Returns
    -------
    onset_envelope   : np.ndarray [shape=(n_channels, m)]
        array containing the onset strength envelope for each specified channel
    Raises
    ------
    ParameterError
        if neither `(y, sr)` nor `S` are provided
    See Also
    --------
    onset_strength
    Notes
    -----
    This function caches at level 30.
    Examples
    --------
    First, load some audio and plot the spectrogram
    >>> import matplotlib.pyplot as plt
    >>> y, sr = librosa.load(librosa.util.example_audio_file(),
    ...                      duration=10.0)
    >>> D = librosa.stft(y)
    >>> plt.figure()
    >>> plt.subplot(2, 1, 1)
    >>> librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
    ...                          y_axis='log')
    >>> plt.title('Power spectrogram')
    Construct a standard onset function over four sub-bands
    >>> onset_subbands = librosa.onset.onset_strength_multi(y=y, sr=sr,
    ...                                                     channels=[0, 32, 64, 96, 128])
    >>> plt.subplot(2, 1, 2)
    >>> librosa.display.specshow(onset_subbands, x_axis='time')
    >>> plt.ylabel('Sub-bands')
    >>> plt.title('Sub-band onset strength')
    """
    if feature is None:
        feature = melspectrogram
        kwargs.setdefault('fmax', 11025.0)
    if aggregate is None:
        aggregate = np.mean
    if lag < 1 or not isinstance(lag, int):
        raise ParameterError('lag must be a positive integer')
    if max_size < 1 or not isinstance(max_size, int):
        raise ParameterError('max_size must be a positive integer')
    # First, compute mel spectrogram
    if S is None:
        S = np.abs(feature(y=y, sr=sr, **kwargs))
        # Convert to dBs
        S = core.power_to_db(S)
    # Retrieve the n_fft and hop_length,
    # or default values for onsets if not provided
    n_fft = kwargs.get('n_fft', 2048)
    hop_length = kwargs.get('hop_length', 512)
    # Ensure that S is at least 2-d
    S = np.atleast_2d(S)
    # Compute the reference spectrogram.
    # Efficiency hack: skip filtering step and pass by reference
    # if max_size will produce a no-op.
    if max_size == 1:
        ref_spec = S
    else:
        ref_spec = scipy.ndimage.maximum_filter1d(S, max_size, axis=0)
    # Compute difference to the reference, spaced by lag
    onset_env = S[:, lag:] - ref_spec[:, :-lag]
    # Discard negatives (decreasing amplitude)
    onset_env = np.maximum(0.0, onset_env)
    # Aggregate within channels
    pad = True
    if channels is None:
        channels = [slice(None)]
    else:
        pad = False
    onset_env = util.sync(onset_env, channels,
                          aggregate=aggregate,
                          pad=pad,
                          axis=0)
    # compensate for lag
    pad_width = lag
    if center:
        # Counter-act framing effects. Shift the onsets by n_fft / hop_length
        pad_width += n_fft // (2 * hop_length)
    onset_env = np.pad(onset_env, ([0, 0], [int(pad_width), 0]),
                       mode='constant')
    # remove the DC component
    if detrend:
        onset_env = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99],
                                         onset_env, axis=-1)
    # Trim to match the input duration
    if center:
        onset_env = onset_env[:, :S.shape[1]]
    return onset_env