Source code for thebeat.stats

# Copyright (C) 2022-2025  Jelle van der Werff
#
# This file is part of thebeat.
#
# thebeat is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# thebeat is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with thebeat.  If not, see <https://www.gnu.org/licenses/>.

from __future__ import annotations

import numbers
from fractions import Fraction

import Levenshtein
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.fft
import scipy.signal
import scipy.stats
from scipy.fft import rfft, rfftfreq

import thebeat.core
import thebeat.helpers


[docs] def acf_df( sequence: thebeat.core.Sequence, resolution, smoothing_window: float | None = None, smoothing_sd: float | None = None, ) -> pd.DataFrame: """ Perform autocorrelation analysis on a :py:class:`Sequence` object, and return a :class:`Pandas.DataFrame` object containing the results. Parameters ---------- sequence A :py:class:`~thebeat.core.Sequence` object. resolution The temporal resolution. If the used Sequence is in seconds, you might want to use 0.001. If the Sequence is in milliseconds, try using 1. Incidentally, the number of lags for the autocorrelation function is calculated as ``n_lags = sequence_duration / resolution``. smoothing_window The window within which a normal probability density function is used for smoothing out the analysis. smoothing_sd The standard deviation of the normal probability density function used for smoothing out the analysis. Returns ------- :class:`pandas.DataFrame` DataFrame containing two columns: the timestamps, and the autocorrelation factor. Notes ----- This function is based on the procedure described in :cite:t:`ravignaniMeasuringRhythmicComplexity2017`. There, one can also find a more detailed description of the smoothing procedure. Examples -------- >>> rng = np.random.default_rng(seed=123) # for reproducability >>> seq = thebeat.core.Sequence.generate_random_uniform(n_events=10,a=400,b=600,rng=rng) >>> df = acf_df(seq, smoothing_window=50, smoothing_sd=20, resolution=10) >>> print(df.head(3)) timestamp correlation 0 0 1.000000 1 10 0.824706 2 20 0.547863 """ correlations = acf_values( sequence=sequence, resolution=resolution, smoothing_window=smoothing_window, smoothing_sd=smoothing_sd, ) correlations = correlations / np.max(correlations) timestamps = np.arange(start=0, stop=len(correlations) * resolution, step=resolution) df = pd.DataFrame({"timestamp": timestamps, "correlation": correlations}) return df
[docs] def acf_plot( sequence: thebeat.core.Sequence, resolution, min_lag: float | None = None, max_lag: float | None = None, smoothing_window: float | None = None, smoothing_sd: float | None = None, title: str = "Autocorrelation", x_axis_label: str = "Lag", y_axis_label: str = "Correlation", figsize: tuple | None = None, dpi: int = 100, ax: plt.Axes | None = None, ) -> tuple[plt.Figure, plt.Axes]: """ This function can be used for plotting an autocorrelation plot from a :class:`~thebeat.core.Sequence`. Parameters ---------- sequence A :py:class:`~thebeat.core.Sequence` object. resolution The temporal resolution. If the used Sequence is in seconds, you might want to use 0.001. If the Sequence is in milliseconds, try using 1. Incidentally, the number of lags for the autocorrelation function is calculated as ``n_lags = sequence_duration_in_ms / resolution``. min_lag The minimum lag to be plotted. Defaults to 0. max_lag The maximum lag to be plotted. Defaults to the sequence duration. smoothing_window The window (in milliseconds) within which a normal probability density function is used for smoothing out the analysis. smoothing_sd The standard deviation of the normal probability density function used for smoothing out the analysis. title If desired, one can provide a title for the plot. This takes precedence over using the :class:`~thebeat.core.Sequence` or :class:`~thebeat.core.SoundSequence` ``name`` attribute as the title of the plot (if the object has one). x_axis_label A label for the x axis. y_axis_label A label for the y axis. figsize A tuple containing the desired output size of the plot in inches, e.g. ``(4, 1)``. This refers to the ``figsize`` parameter in :func:`matplotlib.pyplot.figure`. dpi The desired output resolution of the plot in dots per inch (DPI). This refers to the ``dpi`` parameter in :func:`matplotlib.pyplot.figure`. ax If desired, one can provide an existing :class:`matplotlib.axes.Axes` object to plot the autocorrelation plot on. This is for instance useful if you want to plot multiple autocorrelation plots on the same figure. Notes ----- This function is based on the procedure described in :cite:t:`ravignaniMeasuringRhythmicComplexity2017`. There, one can also find a more detailed description of the smoothing procedure. """ onsets = sequence.onsets correlation = acf_values( sequence=sequence, resolution=resolution, smoothing_window=smoothing_window, smoothing_sd=smoothing_sd, ) x_step = resolution min_lag = int(min_lag // resolution) if min_lag else 0 max_lag = ( int(max_lag // resolution) if max_lag else np.floor(np.max(onsets) / resolution).astype(int) ) # plot try: y = correlation[min_lag:max_lag] y = y / np.max(y) # normalize except ValueError: raise ValueError("We end up with an empty y axis. Try changing the resolution.") # Make x axis x = np.arange(start=min_lag, stop=max_lag * x_step, step=x_step) # Plot if ax is None: fig, ax = plt.subplots(figsize=figsize, tight_layout=True, dpi=dpi) else: fig = ax.get_figure() ax.set_xlabel(x_axis_label) ax.set_ylabel(y_axis_label) ax.set_title(title) ax.plot(x, y) return fig, ax
[docs] def acf_values( sequence: thebeat.core.Sequence, resolution, smoothing_window: float | None = None, smoothing_sd: float | None = None, ) -> np.ndarray: """ Perform autocorrelation. This function takes a :class:`~thebeat.core.Sequence` object, and returns an array with steps of ``resolution`` of unstandardized correlation factors. Parameters ---------- sequence A :class:`~thebeat.core.Sequence` object. resolution The temporal resolution. If the used Sequence is in seconds, you might want to use 0.001. If the Sequence is in milliseconds, try using 1. Incidentally, the number of lags for the autocorrelation function is calculated as ``n_lags = sequence_duration / resolution``. smoothing_window The window within which a normal probability density function is used for smoothing out the analysis. smoothing_sd The standard deviation of the normal probability density function used for smoothing out the analysis. Notes ----- This function is based on the procedure described in :cite:t:`ravignaniMeasuringRhythmicComplexity2017`. There, one can also find a more detailed description of the smoothing procedure. This function uses the :func:`numpy.correlate` to calculate the correlations. """ signal = thebeat.utils.sequence_to_binary(sequence, resolution) # npdf if smoothing_window and smoothing_sd: # Here we make a tiny normal distribution, which has the width of the smoothing window x = np.arange(start=-smoothing_window / 2, stop=smoothing_window / 2, step=resolution) npdf = scipy.stats.norm.pdf(x, 0, smoothing_sd) npdf = npdf / np.max(npdf) # Convolve tiny normal distributions with the original signal signal = np.convolve(signal, npdf, "same") try: correlation = np.correlate(signal, signal, "full") except ValueError as e: raise ValueError( "Error! Hint: Most likely your resolution is too large for the chosen smoothing_window" "and smoothing_sd. Try choosing a smaller resolution." ) from e # Now, we remove the negative lags (which have the length of signal) correlation = correlation[len(signal) - 1:] return correlation
[docs] def ccf_df( test_sequence: thebeat.core.Sequence, reference_sequence: thebeat.core.Sequence, resolution, smoothing_window: float | None = None, smoothing_sd: float | None = None, ) -> pd.DataFrame: """ Perform autocorrelation analysis on a :py:class:`Sequence` object, and return a :class:`Pandas.DataFrame` object containing the results. Parameters ---------- test_sequence The test sequence. reference_sequence The reference sequence. resolution The temporal resolution. If the used Sequence is in seconds, you might want to use 0.001. If the Sequence is in milliseconds, try using 1. Incidentally, the number of lags for the autocorrelation function is calculated as ``n_lags = sequence_duration / resolution``. smoothing_window The window within which a normal probability density function is used for smoothing out the analysis. smoothing_sd The standard deviation of the normal probability density function used for smoothing out the analysis. Returns ------- :class:`pandas.DataFrame` DataFrame containing two columns: the timestamps, and the cross-correlation factor. Notes ----- This function is based on the procedure described in :cite:t:`ravignaniMeasuringRhythmicComplexity2017`. There, one can also find a more detailed description of the smoothing procedure. """ correlations = ccf_values( test_sequence=test_sequence, reference_sequence=reference_sequence, resolution=resolution, smoothing_window=smoothing_window, smoothing_sd=smoothing_sd, ) # normalize correlations = correlations / np.max(correlations) timestamps = np.arange(start=0, stop=len(correlations) * resolution, step=resolution) df = pd.DataFrame({"timestamp": timestamps, "correlation": correlations}) return df
[docs] def ccf_plot( test_sequence: thebeat.core.Sequence, reference_sequence: thebeat.core.Sequence, resolution, smoothing_window: float | None = None, smoothing_sd: float | None = None, title: str = "Cross-correlation", x_axis_label: str = "Lag", y_axis_label: str = "Correlation", figsize: tuple | None = None, dpi: int = 100, ax: plt.Axes | None = None, ) -> tuple[plt.Figure, plt.Axes]: """ Calculate and plot the cross-correlation function (CCF) between two :class:`~thebeat.core.Sequence` objects. The test sequence is compared to the reference sequence. Parameters ---------- test_sequence The test sequence. reference_sequence The reference sequence. resolution The temporal resolution. If the used Sequence is in milliseconds, you probably want 1. If the Sequence is in seconds, try using 0.001. smoothing_window The window within which a normal probability density function is used for smoothing out the analysis. smoothing_sd The standard deviation of the normal probability density function used for smoothing out the analysis. title The title of the plot. x_axis_label The label of the x axis. y_axis_label The label of the y axis. figsize A tuple containing the desired output size of the plot in inches, e.g. ``(4, 1)``. This refers to the ``figsize`` parameter in :func:`matplotlib.pyplot.figure`. dpi The resolution of the plot in dots per inch. This refers to the ``dpi`` parameter in :func:`matplotlib.pyplot.figure`. ax A :class:`matplotlib.axes.Axes` object. If ``None``, a new Figure and Axes is created. Returns ------- fig The :class:`matplotlib.figure.Figure` object. ax The :class:`matplotlib.axes.Axes` object. Notes ----- This function is based on the procedure described in :cite:t:`ravignaniMeasuringRhythmicComplexity2017`. There, one can also find a more detailed description of the smoothing procedure. """ # Get correlation factors correlation = ccf_values( test_sequence=test_sequence, reference_sequence=reference_sequence, resolution=resolution, smoothing_window=smoothing_window, smoothing_sd=smoothing_sd, ) # Make y axis x_step = resolution max_lag = np.floor( np.max(np.concatenate([test_sequence.onsets, reference_sequence.onsets])) / resolution ).astype(int) try: y = correlation[:max_lag] y = y / np.max(y) # normalize except ValueError: raise ValueError("We end up with an empty y axis. Try changing the resolution.") # Make x axis x = np.arange(start=0, stop=len(y) * x_step, step=x_step) # Plot if ax is None: fig, ax = plt.subplots(figsize=figsize, tight_layout=True, dpi=dpi) else: fig = ax.get_figure() ax.set_xlabel(x_axis_label) ax.set_ylabel(y_axis_label) ax.set_title(title) ax.plot(x, y) return fig, ax
[docs] def ccf_values( test_sequence: thebeat.core.Sequence, reference_sequence: thebeat.core.Sequence, resolution: float, smoothing_window: float | None = None, smoothing_sd: float | None = None, ) -> np.ndarray: """ Returns the unstandardized cross-correlation function (CCF) for two :class:`~thebeat.core.Sequence` objects. The test sequence is compared to the reference sequence. Parameters ---------- test_sequence The test sequence. reference_sequence The reference sequence. resolution The temporal resolution. If the used Sequence is in milliseconds, you probably want 1. If the Sequence is in seconds, try using 0.001. smoothing_window The window within which a normal probability density function is used for smoothing out the analysis. smoothing_sd The standard deviation of the normal probability density function used for smoothing out the analysis. Returns ------- correlation The unstandardized cross-correlation function. """ # Make into 0's and 1's test_signal = thebeat.utils.sequence_to_binary(test_sequence, resolution) ref_signal = thebeat.utils.sequence_to_binary(reference_sequence, resolution) # npdf if smoothing_window and smoothing_sd: # Here we make a tiny normal distribution, which has the width of the smoothing window x = np.arange(start=-smoothing_window / 2, stop=smoothing_window / 2, step=resolution) npdf = scipy.stats.norm.pdf(x, 0, smoothing_sd) npdf = npdf / np.max(npdf) # Convolve tiny normal distributions with the original signal test_signal = np.convolve(test_signal, npdf, "same") ref_signal = np.convolve(ref_signal, npdf, "same") # Calculate cross-correlation try: correlation = np.correlate(test_signal, ref_signal, "full") except ValueError as e: raise ValueError( "Error! Hint: Most likely your resolution is too large for the chosen smoothing_window" "and smoothing_sd. Try choosing a smaller resolution." ) from e # Now, remove negative lags correlation = correlation[len(test_signal) - 1:] return correlation
[docs] def edit_distance_rhythm( test_rhythm: thebeat.music.Rhythm, reference_rhythm: thebeat.music.Rhythm, smallest_note_value: Fraction | float = Fraction(1, 16), ) -> float: """ Caculates edit/Levenshtein distance between two rhythms. The ``smallest_note_value`` determines the underlying grid that is used. If e.g. 16, the underlying grid is composed of 1/16th notes. Note ---- Based on the procedure described in :cite:t:`postEditDistanceMeasure2011`. Parameters ---------- test_rhythm The rhythm to be tested. reference_rhythm The rhythm to which ``test_rhythm`` will be compared. smallest_note_value The smallest note value that is used in the underlying grid. 16 means 1/16th notes, 4 means 1/4th notes, etc. Examples -------- >>> from thebeat.music import Rhythm >>> test_rhythm = Rhythm.from_note_values([1/4, 1/4, 1/4, 1/4]) >>> reference_rhythm = Rhythm.from_note_values([1/4, 1/8, 1/8, 1/4, 1/4]) >>> print(edit_distance_rhythm(test_rhythm, reference_rhythm)) 1 """ smallest_note_value = Fraction(smallest_note_value).limit_denominator() if not isinstance(test_rhythm, thebeat.music.Rhythm) or not isinstance( reference_rhythm, thebeat.music.Rhythm ): raise TypeError("test_rhythm and reference_rhythm must be of type Rhythm") test_string = thebeat.utils.rhythm_to_binary( rhythm=test_rhythm, smallest_note_value=smallest_note_value ) reference_string = thebeat.utils.rhythm_to_binary( rhythm=reference_rhythm, smallest_note_value=smallest_note_value ) # calculate edit distance edit_distance = Levenshtein.distance(test_string, reference_string) return edit_distance
[docs] def edit_distance_sequence( test_sequence: thebeat.core.Sequence, reference_sequence: thebeat.core.Sequence, resolution: int ) -> float: """ Calculates the edit/Levenshtein distance between two sequences. Requires for all the IOIs in a Sequence to be multiples of 'resolution'. If needed, quantize the Sequence first, e.g. using Sequence.quantize_iois(). Note ---- The resolution also represents the underlying grid. If, for example, the resolution is 50, that means that a grid will be created with steps of 50. The onsets of the sequence are then placed on the grid for both sequences. The resulting sequences consist of ones and zeros, where ones represent the event onsets. This string for ``test_sequence`` is compared to the string of the ``reference_sequence``. Note that ``test_sequence`` and ``reference_sequence`` can be interchanged without an effect on the results. Parameters ---------- test_sequence The sequence to be tested. reference_sequence The sequence to which ``test_sequence`` will be compared. resolution The used resolution (i.e. bin size). """ if not isinstance(test_sequence, thebeat.core.Sequence) or not isinstance( reference_sequence, thebeat.core.Sequence ): raise TypeError("test_sequence and reference_sequence must be of type Sequence") # Check whether we need to quantize the sequences if np.any(test_sequence.onsets % resolution != 0) or np.any( reference_sequence.onsets % resolution != 0 ): raise ValueError( "test_sequence and reference_sequence must be quantized to multiples of {resolution} first," "for instance using Sequence.quantize_iois()" ) test_string = thebeat.utils.sequence_to_binary(sequence=test_sequence, resolution=resolution) reference_string = thebeat.utils.sequence_to_binary( sequence=reference_sequence, resolution=resolution ) # calculate edit distance edit_distance = Levenshtein.distance(test_string, reference_string) return edit_distance
[docs] def fft_values( sequence: thebeat.core.Sequence, unit_size: float, x_min: float | None = None, x_max: float | None = None, remove_dc: bool = True, ) -> tuple[np.ndarray, np.ndarray]: """ Gets the x and y values for a Fourier transform of a :class:`~thebeat.core.Sequence` object. The ``unit_size`` parameter is required, because Sequence objects are agnostic about the used time unit. You can use 1000 if the Sequence is in milliseconds, and 1 if the Sequence is in seconds. The x values indicate the number of cycles per unit, and y values indicate the absolute power (amplitude squared). The number of cycles per unit can be interpreted as the beat frequency. For instance, 2 cycles for a unit size of 1000 ms means a beat frequency of 2 Hz. Note ---- If a sequence ends with an event, the last event is not included in the calculation of the Fourier transform. This is because the Fourier transform assumes that the sequence can be repeated indefinitely. In other words, we estimate the Fourier transform on the basis of the IOIs, not on the basis of the onsets. Parameters ---------- sequence The sequence. unit_size The size of the unit in which the sequence is measured. If the sequence is in milliseconds, you probably want 1000. If the sequence is in seconds, you probably want 1. x_min The minimum number of cycles per unit to be plotted. x_max The maximum number of cycles per unit to be plotted. remove_dc If True, the DC component is removed from the Fourier transform by subtracting the mean from the signal. The DC component is the first value of the Fourier transform and represents the average of the sequence. Returns ------- xf The x values. yf The y values. Note that absolute values are returned. Examples -------- >>> from thebeat import Sequence >>> from thebeat.stats import fft_values >>> seq = Sequence.generate_random_normal(n_events=100, mu=500, sigma=25) # milliseconds >>> xf, yf = fft_values(seq, unit_size=1000, x_max=10) """ # Calculate step size step_size = unit_size / 1000 # Make a sequence of ones and zeroes (we use IOIs here to account for sequences that end with onsets, or with first onset different than 0) timeseries = thebeat.utils.sequence_to_binary(thebeat.Sequence(sequence.iois, end_with_interval=True), resolution=step_size) # Remove DC component if necessary if remove_dc: timeseries = timeseries - np.mean(timeseries) duration = len(timeseries) * step_size x_length = np.ceil(duration / step_size).astype(int) # Do the fft yf = rfft(timeseries) xf = rfftfreq(x_length, d=step_size) * (step_size / 0.001) # Slice the data and take absolute values min_freq_index = np.min(np.where(xf > x_min)).astype(int) if x_min else None max_freq_index = np.min(np.where(xf > x_max)).astype(int) if x_max else None yf = np.abs(yf[min_freq_index:max_freq_index])**2 xf = xf[min_freq_index:max_freq_index] return xf, yf
[docs] def fft_plot( sequence: thebeat.core.Sequence, unit_size: float, x_min: float | None = None, x_max: float | None = None, remove_dc: bool = True, title: str = "Fourier transform", x_axis_label: str = "Cycles per unit", y_axis_label: str = "Absolute power", figsize: tuple | None = None, dpi: int = 100, ax: plt.Axes | None = None, ) -> tuple[plt.Figure, plt.Axes]: """ Plots the Fourier transform of a :class:`~thebeat.core.Sequence` object. The ``unit_size`` parameter is required, because Sequence objects are agnostic about the used time unit. You can use 1000 if the Sequence is in milliseconds, and 1 if the Sequence is in seconds. On the x axis is plotted the number of cycles per unit, and on the y axis the absolute power. The number of cycles per unit can be interpreted as the beat frequency. For instance, 2 cycles for a unit size of 1000 ms means a beat frequency of 2 Hz. Note ---- In most beat-finding applications you will want to set the ``x_max`` argument to something reasonable. The Fourier transform is plotted for all possible frequencies until the Nyquist frequency (half the value of ``unit_size``). However, in most cases you will not be interested in frequencies that are higher than the beat frequency. For instance, if you have a sequence with a beat frequency of 2 Hz, you will not be interested in frequencies higher than, say, 20 Hz. In that case, you can set ``x_max`` to 20. Parameters ---------- sequence The sequence. unit_size The size of the unit in which the sequence is measured. If the sequence is in milliseconds, you probably want 1000. If the sequence is in seconds, you probably want 1. x_min The minimum number of cycles per unit to be plotted. x_max The maximum number of cycles per unit to be plotted. remove_dc If True, the DC component is removed from the Fourier transform by subtracting the mean from the signal. The DC component is the first value of the Fourier transform and represents the average of the sequence. title The title of the plot. x_axis_label The label of the x axis. y_axis_label The label of the y axis. figsize A tuple containing the desired output size of the plot in inches, e.g. ``(4, 1)``. This refers to the ``figsize`` parameter in :func:`matplotlib.pyplot.figure`. dpi The resolution of the plot in dots per inch. ax A matplotlib Axes object to plot on. If not provided, a new figure and axes will be created. Returns ------- fig The matplotlib Figure object. ax The matplotlib Axes object. Examples -------- >>> from thebeat import Sequence >>> from thebeat.stats import fft_plot >>> seq = Sequence.generate_random_normal(n_events=100, mu=500, sigma=25) # milliseconds >>> fft_plot(seq, unit_size=1000) # doctest: +SKIP (<Figure size 800x550 with 1 Axes>, <Axes: title={'center': 'Fourier transform'}, \ xlabel='Cycles per unit', ylabel='Absolute power'>) >>> seq = Sequence.generate_random_normal(n_events=100, mu=0.5, sigma=0.025) # seconds >>> fft_plot(seq, unit_size=1, x_max=5) # doctest: +SKIP (<Figure size 800x550 with 1 Axes>, <Axes: title={'center': 'Fourier transform'}, \ xlabel='Cycles per unit', ylabel='Absolute power'>) """ # Get values xf, yf = fft_values(sequence=sequence, unit_size=unit_size, x_min=x_min, x_max=x_max, remove_dc=remove_dc) # Plot if ax is None: fig, ax = plt.subplots(figsize=figsize, tight_layout=True, dpi=dpi) else: fig = ax.get_figure() ax.plot(xf, yf) ax.set_xlabel(x_axis_label) ax.set_ylabel(y_axis_label) ax.set_xlim(x_min, None) ax.set_title(title) return fig, ax
[docs] def ks_test( sequence: thebeat.core.Sequence, reference_distribution: str = "normal", alternative: str = "two-sided", ): """ This function returns the `D` statistic and `p` value of a one-sample Kolmogorov-Smirnov test. It calculates how different the distribution of inter-onset intervals (IOIs) is compared to the provided reference distribution. If `p` is significant it means that the IOIs are not distributed according to the provided reference distribution. Parameters ---------- sequence A :class:`~thebeat.core.Sequence` object. reference_distribution Either 'normal' or 'uniform'. The distribution against which the distribution of inter-onset intervals (IOIs) is compared. alternative Either 'two-sided', 'less', or 'greater'. See :func:`scipy.stats.kstest` for more information. Returns ------- :class:`scipy.stats._stats_py.KstestResult` A SciPy named tuple containing the `D` statistic and the `p` value. Notes ----- This function uses :func:`scipy.stats.kstest`. For more information about the use of the Kolmogorov-Smirnov test in rhythm research, see :cite:t:`jadoulSeekingTemporalPredictability2016` and :cite:t:`ravignaniMeasuringRhythmicComplexity2017`. Examples -------- >>> rng = np.random.default_rng(seed=123) >>> seq = thebeat.core.Sequence.generate_random_normal(n_events=100,mu=500,sigma=25,rng=rng) >>> ks_result = ks_test(seq) >>> print(round(ks_result.pvalue, 5)) 0.6608 """ sequence = sequence.iois if reference_distribution == "normal": mean = np.mean(sequence) sd = np.std(sequence) dist = scipy.stats.norm(loc=mean, scale=sd).cdf return scipy.stats.kstest(sequence, dist, alternative=alternative) elif reference_distribution == "uniform": a = min(sequence) b = max(sequence) scale = b - a dist = scipy.stats.uniform(loc=a, scale=scale).cdf return scipy.stats.kstest(sequence, dist, alternative=alternative) else: raise ValueError("Unknown distribution. Choose 'normal' or 'uniform'.")
[docs] def get_interval_ratios_from_dyads(sequence: np.array | thebeat.core.Sequence | list): r""" Return sequential interval ratios, calculated as: :math:`\textrm{ratio}_k = \frac{\textrm{IOI}_k}{\textrm{IOI}_k + \textrm{IOI}_{k+1}}`. Note that for *n* IOIs this property returns *n*-1 ratios. Parameters ---------- sequence The sequence from which to calculate the interval ratios. Can be a Sequence object, or a list or array of IOIs. Notes ----- The used method is based on the methodology from :cite:t:`roeskeCategoricalRhythmsAre2020`. """ if isinstance(sequence, thebeat.core.Sequence): sequence = sequence.iois return sequence[:-1] / (sequence[1:] + sequence[:-1])
[docs] def get_phase_differences( test_events: thebeat.core.Sequence | list[numbers.Real] | np.ndarray | numbers.Real, reference_sequence: thebeat.core.Sequence, reference_ioi: str = "preceding", window_size: int | None = None, unit: str = "degrees", modulo: bool = True ) -> np.ndarray | float: r"""Get the phase differences for ``test_events`` compared to ``reference_sequence``. Phase differences are a (circular) measure of temporal alignment. They are calculated as the difference between the onset of a test event and the onset of the corresponding reference event, divided by the IOI of the reference IOI: :math:`\textrm{phase difference} = \frac{\textrm{test onset} - \textrm{reference onset}}{\textrm{reference IOI}}` The resulting phase differences are expressed in the unit specified by ``unit``, where the default is in degrees. The reference IOI can either be the IOI in the reference sequence that 'contains' an onset in the test sequence (``reference_ioi='containing'``), or the IOI in the reference sequences that precedes this onset (``reference_ioi='preceding'``). The default is for the reference IOI to be the one that precedes the test onset. Let us consider a reference sequence with onsets: :math:`t = 0`, :math:`t = 250`, :math:`t = 1000` If an onset in the test sequence is at :math:`t = 750`, and ``reference_ioi='containing'``, the reference IOI that will be used is the one from :math:`t = 250` to :math:`t = 1000`. If ``reference_ioi='preceding'``, the reference IOI that will be used is the one from :math:`t = 0` to :math:`t = 250`. In addition, one can specify a window size to get a (moving) average of the preceding IOIs. This is only used if ``reference_ioi='preceding'``. The window size determines the number of reference IOIs (up to and including the immediately 'preceding' one) that are used to calculate a mean reference IOI. This can be useful to smoothe out the reference IOI when using irregular (reference) sequences. Finally, one can specify whether modular arithmetic should be used. For degrees, if ``modulo=True``, the phase differences will be expressed in the range :math:`[0, 360)` degrees, :math:`[0, 2\pi)` radians, or :math:`[0, 1)` (as a plain 'fraction'). If ``modulo=False``, the phase differences will be expressed in the range :math:`[0, \infty)`, with the event at the start of the containing interval corresponding to 0 for both the 'containing' and 'preceding' values. Note ---- In cases where it is not possible to calculate a phase difference, ``np.nan`` is returned. This can happen in the following cases: - The test event is before the first onset in the reference sequence. - The test event is after the last onset in the reference sequence, and ``reference_ioi='containing'``. - The test event is in the nth interval of the reference sequence, and ``reference_ioi='preceding'`` and a ``window_size`` is larger than n. Examples -------- >>> from thebeat.core import Sequence >>> reference = Sequence.from_onsets([0, 250, 1000]) >>> test = Sequence.from_onsets([250, 1250]) >>> get_phase_differences(test, reference, reference_ioi='preceding', unit='fraction') array([0. , 0.33333333]) Parameters ---------- test_events A sequence or a single time point to be compared with the reference sequence. Can either be a single event time, or a Sequence, list, or NumPy array containing multiple events. reference_sequence The reference sequence. Can be a Sequence object, a list or array of Sequence objects, or a number. In the latter case, the reference sequence will be an isochronous sequence with a constant IOI of that number and the same length as ``sequence_1``. reference_ioi The IOI in the reference sequence that is used as the reference IOI. Can be either "containing" or "preceding". window_size The window size used for calculating the mean reference IOI. Only used if ``reference_ioi='preceding'``. circular_unit The unit of the circular unit. Can be "degrees" (the default), "radians", or "fraction". modulo Return the phase differences modulo 360 degrees or not. Only has an effect if ``reference_ioi='preceding'``. Returns ------- :class:`numpy.ndarray` An array containing the phase differences if a Sequence, list, or NumPy array was passed. :class:`float` The phase difference if a single event time was passed. """ # Input validation and processing if isinstance(test_events, thebeat.Sequence): test_onsets = test_events.onsets elif isinstance(test_events, (list, np.ndarray)): test_onsets = np.array(test_events) elif isinstance(test_events, numbers.Real): test_onsets = np.array([test_events]) else: raise TypeError("test_events must be a Sequence, list, NumPy array, or float.") if not isinstance(reference_sequence, thebeat.Sequence): raise TypeError("reference_sequence must be a Sequence object.") if reference_ioi not in ("containing", "preceding"): raise ValueError("reference_ioi must be either 'containing' or 'preceding'.") if reference_ioi == "containing" and window_size is not None: raise ValueError("window_size cannot be used with reference_ioi='containing'.") elif reference_ioi == "preceding" and window_size is None: window_size = 1 elif reference_ioi == "preceding" and window_size <= 0: raise ValueError("window_size must be a positive integer.") if unit not in ("degrees", "radians", "fraction"): raise ValueError("unit must be either 'degrees', 'radians' or 'fraction'") reference_onsets = reference_sequence.onsets reference_end = reference_sequence.onsets[0] + reference_sequence.duration reference_iois = reference_sequence.iois phase_diffs = [] for test_onset in test_onsets: if test_onset >= reference_end: containing_ioi_index = len(reference_iois) elif test_onset < reference_onsets[0]: containing_ioi_index = -1 else: containing_ioi_index = np.flatnonzero(test_onset >= reference_onsets)[-1] if reference_ioi == "containing": if not 0 <= containing_ioi_index < len(reference_iois): phase_diff = np.nan else: reference_ioi_ = reference_iois[containing_ioi_index] phase_diff = (test_onset - reference_onsets[containing_ioi_index]) / reference_ioi_ else: assert reference_ioi == "preceding" if containing_ioi_index < window_size: phase_diff = np.nan else: mean_reference_ioi = np.mean(reference_iois[containing_ioi_index - window_size:containing_ioi_index]) reference_onset = reference_onsets[containing_ioi_index] if containing_ioi_index < len(reference_onsets) else reference_end phase_diff = (test_onset - reference_onset) / mean_reference_ioi phase_diffs.append(phase_diff) phase_diffs = np.array(phase_diffs, dtype=np.float64) if modulo: phase_diffs = np.fmod(phase_diffs, 1) if unit == "degrees": return phase_diffs * 360 elif unit == "radians": return phase_diffs * 2 * np.pi if isinstance(test_events, numbers.Real): return float(phase_diffs[0]) else: return phase_diffs
[docs] def get_rhythmic_entropy( sequence: thebeat.core.Sequence | thebeat.music.Rhythm, smallest_unit: float ): """ Calculate Shannon entropy from bins. This is a measure of rhythmic complexity. If many different 'note durations' are present, entropy is high. If only a few are present, entropy is low. A sequence that is completely isochronous has a Shannon entropy of 0. The smallest_unit determines the size of the bins/the underlying grid. In musical terms, this for instance represents a 1/16th note. Bins will then be made such that each bin has a width of one 1/16th note. A 1/4th note will then be contained in one of those bins. Sequence needs to be quantized to multiples of 'smallest_unit'. If needed, quantize the Sequence first, e.g. using 'Sequence.quantize_iois'. Parameters ---------- sequence The :py:class:`thebeat.core.Sequence` object for which Shannon entropy is calculated. smallest_unit The size of the bins/the underlying grid. Example ------- >>> seq = thebeat.Sequence.generate_isochronous(n_events=10, ioi=500) >>> print(get_rhythmic_entropy(seq, smallest_unit=250)) 0.0 """ if not isinstance(sequence, (thebeat.core.Sequence, thebeat.music.Rhythm)): raise TypeError("sequence must be a Sequence or Rhythm object") if np.any(sequence.iois % smallest_unit != 0): raise ValueError( f"Sequence needs to be quantized to multiples of {smallest_unit}." "If needed, quantize the Sequence first e.g. using 'Sequence.quantize_iois'." ) bins = ( np.arange(0, np.max(sequence.iois) + 2 * smallest_unit, smallest_unit) - smallest_unit / 2 ) # shift bins to center bin_counts = np.histogram(sequence.iois, bins=bins)[0] return scipy.stats.entropy(bin_counts)
[docs] def get_cov(sequence: thebeat.core.Sequence) -> np.float64: """ Calculate the coefficient of variantion of the inter-onset intervals (IOIS) in a :py:class:`thebeat.core.Sequence` object. Parameters ---------- sequence A :py:class:`thebeat.core.Sequence` object. Returns ------- float The covariance of the sequence. """ return np.float64(np.std(sequence.iois) / np.mean(sequence.iois))
[docs] def get_npvi(sequence: thebeat.core.Sequence) -> np.float64: """ This function calculates the normalized pairwise variability index (nPVI) for a provided :py:class:`Sequence` or :py:class:`SoundSequence` object, or for an interable of inter-onset intervals (IOIs). Parameters ---------- sequence Either a :py:class:`Sequence` or :py:class:`SoundSequence` object, or an iterable containing inter-onset intervals (IOIs). Returns ------- :class:`numpy.float64` The nPVI for the provided sequence. Notes ----- The normalied pairwise variability index (nPVI) is a measure of the variability of adjacent temporal intervals. The nPVI is zero for sequences that are perfectly isochronous. See :cite:t:`jadoulSeekingTemporalPredictability2016` and :cite:t:`ravignaniMeasuringRhythmicComplexity2017` for more information on its use in rhythm research. Examples -------- >>> seq = thebeat.core.Sequence.generate_isochronous(n_events=10, ioi=500) >>> print(get_npvi(seq)) 0.0 >>> rng = np.random.default_rng(seed=123) >>> seq = thebeat.core.Sequence.generate_random_normal(n_events=10,mu=500,sigma=50,rng=rng) >>> print(get_npvi(seq)) 9.40657936323865 """ if isinstance(sequence, (thebeat.core.Sequence, thebeat.core.SoundSequence)): iois = sequence.iois else: iois = np.array(sequence) npvi_values = [] for i in range(1, len(iois)): diff = iois[i] - iois[i - 1] mean = np.mean([iois[i], iois[i - 1]]) npvi_values.append(np.abs(diff / mean)) npvi = (100 / (len(iois) - 1)) * np.sum(npvi_values) return np.float64(npvi)
[docs] def get_ugof_isochronous( test_sequence: thebeat.core.Sequence, reference_ioi: float, output_statistic: str = "mean" ) -> np.float64: r""" This function calculates the universal goodness of fit (``ugof``) measure. The ``ugof`` statistic quantifies how well a theoretical sequence describes a sequence at hand (the ``test_sequence``). This function can only calculate ``ugof`` using a theoretical sequence that is isochronous. The ``reference_ioi`` is the IOI of an isochronous theoretical sequence. Parameters ---------- test_sequence A :py:class:`~thebeat.core.Sequence` or :py:class:`~thebeat.core.SoundSequence` object that will be compared to ``reference_sequence``. reference_ioi A number (float or int) representing the IOI of an isochronous sequence. output_statistic Either 'mean' (the default) or 'median'. This determines whether for the individual ugof values we take the mean or the median as the output statistic. Returns ------- :class:`numpy.float64` The ugof statistic. Notes ----- This measure is described in :cite:t:`burchardtNovelIdeasFurther2021`. Please also refer to `this Github page <https://github.com/LSBurchardt/R_app_rhythm>`_ for an R implementation of the *ugof* measure. Examples -------- >>> seq = thebeat.core.Sequence.generate_isochronous(n_events=10, ioi=1000) >>> ugof = get_ugof_isochronous(seq, reference_ioi=68.21) >>> print(ugof) 0.4646435 """ # Input validation if not isinstance(test_sequence, thebeat.core.sequence.Sequence): raise TypeError("test_sequence must be a Sequence object") if not isinstance(reference_ioi, numbers.Real): raise TypeError("reference_ioi must be a number (int or float)") # Re-calculate onsets based on the sequence's IOIs, this to make sure both the test # and reference sequences start with an onset at 0 (of which the ugof will be discarded) test_onsets = np.concatenate([[0], np.cumsum(test_sequence.iois)]) reference_onsets = np.arange( start=0, stop=np.max(test_onsets) + reference_ioi + 1, step=reference_ioi ) # For each onset, get the closest theoretical beat and get the absolute difference minimal_deviations = np.min(np.abs(test_onsets[:, None] - reference_onsets), axis=1) maximal_deviation = reference_ioi / 2 # calculate ugofs ugof_values = minimal_deviations / maximal_deviation if output_statistic == "mean": return np.float32( np.mean(ugof_values[1:]) ) # discard the first value because that will always be 0 elif output_statistic == "median": return np.float32( np.median(ugof_values[1:]) ) # discard the first value because that will always be 0 else: raise ValueError("The output statistic can only be 'median' or 'mean'.")