Source code for minispec.filters

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Filters
=======

Filter bank construction
------------------------
.. autosummary::
    :toctree: generated/

    mel
    chroma
    constant_q
    semitone_filterbank

Window functions
----------------
.. autosummary::
    :toctree: generated/

    window_bandwidth
    get_window


Miscellaneous
-------------
.. autosummary::
    :toctree: generated/

    constant_q_lengths
    cq_to_chroma
    mr_frequencies
    window_sumsquare

Deprecated
----------
.. autosummary::
    :toctree: generated/

    dct
"""
import warnings

import numpy as np
import scipy
import scipy.signal
import six

from . import util
from .util.exceptions import ParameterError

from .core.time_frequency import fft_frequencies, mel_frequencies

__all__ = ['mel',
           'get_window',
           'window_sumsquare']


[docs]def mel(sr, n_fft, n_mels=128, fmin=0.0, fmax=None, htk=False, norm=1): """Create a Filterbank matrix to combine FFT bins into Mel-frequency bins Parameters ---------- sr : number > 0 [scalar] sampling rate of the incoming signal n_fft : int > 0 [scalar] number of FFT components n_mels : int > 0 [scalar] number of Mel bands to generate fmin : float >= 0 [scalar] lowest frequency (in Hz) fmax : float >= 0 [scalar] highest frequency (in Hz). If `None`, use `fmax = sr / 2.0` htk : bool [scalar] use HTK formula instead of Slaney norm : {None, 1, np.inf} [scalar] if 1, divide the triangular mel weights by the width of the mel band (area normalization). Otherwise, leave all the triangles aiming for a peak value of 1.0 Returns ------- M : np.ndarray [shape=(n_mels, 1 + n_fft/2)] Mel transform matrix Examples -------- >>> melfb = minispec.filters.mel(22050, 2048) >>> melfb array([[ 0. , 0.016, ..., 0. , 0. ], [ 0. , 0. , ..., 0. , 0. ], ..., [ 0. , 0. , ..., 0. , 0. ], [ 0. , 0. , ..., 0. , 0. ]]) Clip the maximum frequency to 8KHz >>> minispec.filters.mel(22050, 2048, fmax=8000) array([[ 0. , 0.02, ..., 0. , 0. ], [ 0. , 0. , ..., 0. , 0. ], ..., [ 0. , 0. , ..., 0. , 0. ], [ 0. , 0. , ..., 0. , 0. ]]) """ if fmax is None: fmax = float(sr) / 2 if norm is not None and norm != 1 and norm != np.inf: raise ParameterError('Unsupported norm: {}'.format(repr(norm))) # Initialize the weights n_mels = int(n_mels) weights = np.zeros((n_mels, int(1 + n_fft // 2))) # Center freqs of each FFT bin fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft) # 'Center freqs' of mel bands - uniformly spaced between limits mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk) fdiff = np.diff(mel_f) ramps = np.subtract.outer(mel_f, fftfreqs) for i in range(n_mels): # lower and upper slopes for all bins lower = -ramps[i] / fdiff[i] upper = ramps[i+2] / fdiff[i+1] # .. then intersect them with each other and zero weights[i] = np.maximum(0, np.minimum(lower, upper)) if norm == 1: # Slaney-style mel is scaled to be approx constant energy per channel enorm = 2.0 / (mel_f[2:n_mels+2] - mel_f[:n_mels]) weights *= enorm[:, np.newaxis] # Only check weights if f_mel[0] is positive if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)): # This means we have an empty channel somewhere warnings.warn('Empty filters detected in mel frequency basis. ' 'Some channels will produce empty responses. ' 'Try increasing your sampling rate (and fmax) or ' 'reducing n_mels.') return weights
def __float_window(window_spec): '''Decorator function for windows with fractional input. This function guarantees that for fractional `x`, the following hold: 1. `__float_window(window_function)(x)` has length `np.ceil(x)` 2. all values from `np.floor(x)` are set to 0. For integer-valued `x`, there should be no change in behavior. ''' def _wrap(n, *args, **kwargs): '''The wrapped window''' n_min, n_max = int(np.floor(n)), int(np.ceil(n)) window = get_window(window_spec, n_min) if len(window) < n_max: window = np.pad(window, [(0, n_max - len(window))], mode='constant') window[n_min:] = 0.0 return window return _wrap
[docs]def get_window(window, Nx, fftbins=True): '''Compute a window function. This is a wrapper for `scipy.signal.get_window` that additionally supports callable or pre-computed windows. Parameters ---------- window : string, tuple, number, callable, or list-like The window specification: - If string, it's the name of the window function (e.g., `'hann'`) - If tuple, it's the name of the window function and any parameters (e.g., `('kaiser', 4.0)`) - If numeric, it is treated as the beta parameter of the `'kaiser'` window, as in `scipy.signal.get_window`. - If callable, it's a function that accepts one integer argument (the window length) - If list-like, it's a pre-computed window of the correct length `Nx` Nx : int > 0 The length of the window fftbins : bool, optional If True (default), create a periodic window for use with FFT If False, create a symmetric window for filter design applications. Returns ------- get_window : np.ndarray A window of length `Nx` and type `window` See Also -------- scipy.signal.get_window Raises ------ ParameterError If `window` is supplied as a vector of length != `n_fft`, or is otherwise mis-specified. ''' if six.callable(window): return window(Nx) elif (isinstance(window, (six.string_types, tuple)) or np.isscalar(window)): # TODO: if we add custom window functions in minispec, call them here return scipy.signal.get_window(window, Nx, fftbins=fftbins) elif isinstance(window, (np.ndarray, list)): if len(window) == Nx: return np.asarray(window) raise ParameterError('Window size mismatch: ' '{:d} != {:d}'.format(len(window), Nx)) else: raise ParameterError('Invalid window specification: {}'.format(window))
def __window_ss_fill(x, win_sq, n_frames, hop_length): # pragma: no cover '''Helper function for window sum-square calculation.''' n = len(x) n_fft = len(win_sq) for i in range(n_frames): sample = i * hop_length x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))]
[docs]def window_sumsquare(window, n_frames, hop_length=512, win_length=None, n_fft=2048, dtype=np.float32, norm=None): ''' Compute the sum-square envelope of a window function at a given hop length. This is used to estimate modulation effects induced by windowing observations in short-time fourier transforms. Parameters ---------- window : string, tuple, number, callable, or list-like Window specification, as in `get_window` n_frames : int > 0 The number of analysis frames hop_length : int > 0 The number of samples to advance between frames win_length : [optional] The length of the window function. By default, this matches `n_fft`. n_fft : int > 0 The length of each analysis frame. dtype : np.dtype The data type of the output Returns ------- wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))` The sum-squared envelope of the window function Examples -------- For a fixed frame length (2048), compare modulation effects for a Hann window at different hop lengths: >>> n_frames = 50 >>> wss_256 = minispec.filters.window_sumsquare('hann', n_frames, hop_length=256) >>> wss_512 = minispec.filters.window_sumsquare('hann', n_frames, hop_length=512) >>> wss_1024 = minispec.filters.window_sumsquare('hann', n_frames, hop_length=1024) >>> import matplotlib.pyplot as plt >>> plt.figure() >>> plt.subplot(3,1,1) >>> plt.plot(wss_256) >>> plt.title('hop_length=256') >>> plt.subplot(3,1,2) >>> plt.plot(wss_512) >>> plt.title('hop_length=512') >>> plt.subplot(3,1,3) >>> plt.plot(wss_1024) >>> plt.title('hop_length=1024') >>> plt.tight_layout() ''' if win_length is None: win_length = n_fft n = n_fft + hop_length * (n_frames - 1) x = np.zeros(n, dtype=dtype) # Compute the squared window at the desired length win_sq = get_window(window, win_length) win_sq = util.normalize(win_sq, norm=norm)**2 win_sq = util.pad_center(win_sq, n_fft) # Fill the envelope __window_ss_fill(x, win_sq, n_frames, hop_length) return x