Source code for glhmm.auxiliary

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Auxiliary functions - Gaussian Linear Hidden Markov Model
@author: Diego Vidaurre 2022
"""

import numpy as np
import sys
import scipy.special
import scipy.io
import math

from numba import njit

try:
    import cupy as cp
except:
    pass



[docs]
def slice_matrix(M,indices):
    """Slices rows of input matrix M based on indices array along axis 0.

    Parameters:
    -----------
    M : array-like of shape (n_samples, n_parcels)
        The input matrix.
    indices : array-like of shape (n_sessions, 2)
        The indices that define the sections (i.e., trials/sessions) of the data to be processed.

    Returns:
    --------
    M_sliced : array-like of shape (n_total_samples, n_parcels)
        The sliced matrix.

    """
    N = indices.shape[0]
    T = indices[:,1] - indices[:,0]
    M_sliced = np.empty((np.sum(T),M.shape[1]))
    acc = 0
    for j in range(N):
        ind_1 = np.arange(indices[j,0],indices[j,1])
        ind_2 = np.arange(0,T[j]) + acc
        acc += T[j]
        M_sliced[ind_2,:] = M[ind_1,:]
    return M_sliced




[docs]
def make_indices_from_T(T):
    """Creates indices array from trials/sessions lengths.
    
    Parameters:
    -----------
    T : array-like of shape (n_sessions,)
        Contains the lengths of each trial/session.

    Returns:
    --------
    indices : array-like of shape (n_sessions, 2)
        The start and end indices of each trial/session in the input data.

    """

    if len(T)>1: Ts = np.squeeze(T)
    else: Ts = T
    N = Ts.shape[0]
    indices = np.zeros((N,2),dtype=int)
    acc = 0
    for j in range(N):
        indices[j,0] = acc
        indices[j,1] = acc + Ts[j]
        acc += Ts[j]
    return indices




[docs]
def Gamma_indices_to_Xi_indices(indices):
    """Converts indices from Gamma array to Xi array format.

    Note Xi has 1 sample less than Gamma per trial/session (i.e., n_samples - n_sessions).

    Parameters:
    -----------
    indices : array-like of shape (n_sessions, 2)
        The start and end indices of each trial/session in the input data.

    Returns:
    --------
    indices_Xi : array-like of shape (n_sessions, 2)
        The converted indices in Xi array format.

    """

    indices_Xi = np.copy(indices)
    for j in range(indices.shape[0]):
        indices_Xi[j,0] -= j
        indices_Xi[j,1] -= (j+1)
    return indices_Xi




[docs]
def jls_extract_def():
    return 




[docs]
def approximate_Xi(Gamma,indices):
    """Approximates Xi array based on Gamma and indices.

    Parameters:
    -----------
    Gamma : array-like of shape (n_samples, n_states)
            The state probability time series.
    indices : array-like of shape (n_sessions, 2)
        The start and end indices of each trial/session in the input data.

    Returns:
    --------
    Xi : array-like of shape (n_samples - n_sessions, n_states, n_states)
        The joint probabilities of past and future states conditioned on data.

    """
    jls_extract_def()

    K = Gamma.shape[1]
    N = indices.shape[0]
    T = indices[:,1] - indices[:,0]
    Xi = np.empty((np.sum(T)-N,K,K))
    acc = 0
    for j in range(N):
        ind_1_1 = np.arange(indices[j,0],indices[j,1]-1)
        ind_1_2 = np.arange(indices[j,0]+1,indices[j,1])
        ind_2 = np.arange(0,T[j]-1) + acc
        acc += (T[j]-1)
        xi = Gamma[ind_1_1,:].T @ Gamma[ind_1_2,:]
        xi = xi / np.sum(xi)
        Xi[ind_2,:,:] = xi
    return Xi    




[docs]
@njit
def compute_alpha_beta_serial(L,Pi,P):
    """Computes alpha and beta values and scaling factors.

    Parameters:
    -----------
    L : array-like of shape (n_samples, n_states)
        The L matrix.
    Pi : array-like with shape (n_states,)
        The initial state probabilities.
    P : array-like of shape (n_states, n_states)
        The transition probabilities across states.

    Returns:
    --------
    a : array-like of shape (n_samples, n_states)
        The alpha values.
    b : array-like of shape (n_samples, n_states)
        The beta values.
    sc : array-like of shape (n_samples,)
        The scaling factors.

    """
    T,K = L.shape

    #minreal = sys.float_info.min
    #maxreal = sys.float_info.max

    a = np.zeros((T,K))
    b = np.zeros((T,K))
    sc = np.zeros(T)

    a[0,:] = Pi * L[0,:]
    sc[0] = np.sum(a[0,:])
    a[0,:] = a[0,:] / sc[0]

    for t in range(1,T):
        a[t,:] = (a[t-1,:] @ P) * L[t,:]
        sc[t] = np.sum(a[t,:])
        #if sc[t]<minreal: sc[t] = minreal
        a[t,:] = a[t,:] / sc[t]

    b[T-1,:] = np.ones((1,K)) / sc[T-1]
    for t in range(T-2,-1,-1):
        b[t,:] = ( (b[t+1,:] * L[t+1,:]) @ P.T ) / sc[t]
        #bad = b[t,:]>maxreal
        #if bad.sum()>0: b[t,bad] = maxreal

    return a,b,sc




[docs]
def compute_alpha_beta_parallel(L,Pi,P,indices_individual,gpu_acceleration):
    """Computes alpha and beta values and scaling factors.

    Parameters:
    -----------
    L : array-like of shape (n_samples, n_timeseries, n_states)
        The L matrix.
    Pi : array-like with shape (n_states,)
        The initial state probabilities.
    P : array-like of shape (n_states, n_states)
        The transition probabilities across states.
    indices_individual : array-like of shape (n_timeseries,2)
        The first and last index of the subject-specific time series
    gpu_acceleration : int
        GPU acceleration setting.

    Returns:
    --------
    a : GPU array-like of shape (n_samples, n_states)
        The alpha values.
    b : GPU array-like of shape (n_samples, n_states)
        The beta values.
    sc : GPU array-like of shape (n_samples,)
        The scaling factors.

    """
    gpu = gpu_acceleration > 0
    xp = cp if gpu else np

    T,N,K = L.shape
    #minreal = sys.float_info.min
    #maxreal = sys.float_info.max
    Pi = xp.asarray(Pi)
    if gpu_acceleration == 1:
        L = cp.asarray(L)
        P = cp.asarray(P)

    a = xp.zeros((T,N,K))
    sc = xp.zeros((T,N))

    ## Compute alpha per time point for all subjects simultaneously.
    a[0,:,:] = Pi * L[0,:,:]
    del Pi

    sc[0,:] = xp.sum(a[0,:,:],axis=1)
    a[0,:,:] = a[0,:,:] / xp.expand_dims(sc[0,:],axis=1)

    for t in range(1,T):
        a[t,:,:] = xp.einsum('ij,jk,ik->ik',a[t-1,:,:],P,L[t,:,:])
        sc[t,:] = xp.einsum('ij->i',a[t,:,:])
        #if sc[t]<minreal: sc[t] = minreal
        a[t,:,:] = a[t,:,:] / xp.expand_dims(sc[t,:],axis=1)

    ## bottom-align L and the scaling matrix to estiamte beta.
    L_ = roll_by_vector(L,T-indices_individual[:,1],axis=0,gpu_enabled=gpu)
    del L

    sc_ = roll_by_vector(sc,T-indices_individual[:,1],axis=0,gpu_enabled=gpu)
    b_ = xp.zeros((T,N,K))

    b_[T-1,:,:] = xp.ones((1,N,K)) / xp.expand_dims(sc_[T-1,:],axis=1)
    for t in range(T-2,-1,-1):
        b_[t,:,:] = xp.einsum('ij,ij,kj->ik',b_[t+1,:,:],L_[t+1,:,:],P) / xp.expand_dims(sc_[t,:],axis=1)
        #bad = b[t,:]>maxreal
        #if bad.sum()>0: b[t,bad] = maxreal
    del P, sc_, L_

    ## top-align beta for output.
    b = roll_by_vector(b_,indices_individual[:,1],axis=0,gpu_enabled=gpu)
    del b_

    if gpu:
        sc = cp.asnumpy(sc)
        if gpu_acceleration == 1:
            a = cp.asnumpy(a)
            b = cp.asnumpy(b)

    return a,b,sc




[docs]
def compute_qstar_serial(L,Pi,P):
    """Compute the most probable state sequence.

    Parameters:
    -----------
    L : array-like of shape (n_samples, n_states)
        The L matrix.
    Pi : array-like with shape (n_states,)
        The initial state probabilities.
    P : array-like of shape (n_states, n_states)
        The transition probabilities across states.

    Returns:
    --------
    qstar : array-like of shape (n_samples, n_states)
        The most probable state sequence.

    """
    T,K = L.shape

    delta = np.zeros((T,K))
    psi = np.zeros((T,K)).astype(np.int64)
    qstar = np.zeros((T,K))

    delta[0,:] = Pi * L[0,:]
    delta[0,:] = delta[0,:] / np.sum(delta[0,:])

    for t in range(1,T):
        v = delta[t-1,:] * P.T
        psi[t,:] = np.argmax(v,axis=1)
        delta[t,:] = v[range(K),psi[t,:]] * L[t,:]
        delta[t,:] = delta[t,:] / np.sum(delta[t,:])

    """for t in range(1,T):
        for k in range(K):
            v = delta[t-1,:] * P[:,k]
            mv = np.amax(v)
            delta[t,k] = mv * L[t,k]
            psi[t,k] = np.where(mv==v)[0][0]
        delta[t,:] = delta[t,:] / np.sum(delta[t,:]) """

    id = np.where(delta[-1,:]==np.amax(delta[-1,:]))[0][0]
    qstar[T-1,id] = 1

    for t in range(T-2,-1,-1):
        id0 = np.where(qstar[t+1,:]==1)[0][0]
        id = psi[t+1,id0]
        qstar[t,id] = 1

    return qstar



[docs]
def compute_qstar_parallel(L,Pi,P,indices_individual):
    """Compute the most probable state sequence.

    Parameters:
    -----------
    L : array-like of shape (n_samples, n_states)
        The L matrix.
    Pi : array-like with shape (n_states,)
        The initial state probabilities.
    P : array-like of shape (n_states, n_states)
        The transition probabilities across states.

    Returns:
    --------
    qstar : array-like of shape (n_samples, n_states)
        The most probable state sequence.

    """
    T,N,K = L.shape

    delta = np.zeros((T,N,K))
    psi = np.zeros((T,N,K)).astype(np.int64)
    qstar = np.zeros((T,N,K))

    delta[0,:,:] = Pi * L[0,:,:]
    delta[0,:,:] = delta[0,:,:] / np.expand_dims(np.sum(delta[0,:,:],axis=1),axis=1)

    for t in range(1,T):
        v = np.expand_dims(delta[t-1,:,:],axis=1) * P.T
        psi[t,:,:] = np.argmax(v,axis=2)
        delta[t,:,:] = v[np.expand_dims(range(N),axis=1),np.expand_dims(range(K),axis=0),psi[t,:,:]] * L[t,:,:]
        delta[t,:,:] = delta[t,:,:] / np.expand_dims(np.sum(delta[t,:,:],axis=1),axis=1)

    delta_ = roll_by_vector(delta,T-indices_individual[:,1],axis=0)

    id = np.argmax(delta_[-1,:,:],axis=1)

    qstar[T-1,range(N),id] = 1

    for t in range(T-2,-1,-1):
        id0 = np.argmax(qstar[t+1,:,:]==1,axis=1)
        id = psi[t+1,range(N),id0]
        qstar[t,range(N),id] = 1

    return qstar


## Mathematical functions related to the free energy


[docs]
def gauss1d_kl(mu_q, sigma_q, mu_p, sigma_p):

    """Computes the KL divergence between two univariate Gaussian distributions.

    Parameters:
    -----------
    mu_q : float of shape (n_parcels,)
        The mean of the first Gaussian distribution.
    sigma_q : float of shape (n_parcels, n_parcels)
        The variance of the first Gaussian distribution.
    mu_p : float of shape (n_parcels,)
        The mean of the second Gaussian distribution.
    sigma_p : float of shape (n_parcels, n_parcels)
        The variance of the second Gaussian distribution.

    Returns:
    --------
    D : float
        The KL divergence between the two Gaussian distributions.

    """
    D = 0.5 * math.log(sigma_p/sigma_q) + \
        0.5 * ((mu_q - mu_p)**2) / sigma_p + \
        0.5 * sigma_q / sigma_p
    return D




[docs]
def gauss_kl(mu_q, sigma_q, mu_p, sigma_p):
    """Computes the KL divergence between two multivariate Gaussian distributions.

    Parameters:
    -----------
    mu_q : float of shape (n_parcels,)
        The mean of the first Gaussian distribution.
    sigma_q : float of shape (n_parcels, n_parcels)
        The variance of the first Gaussian distribution.
    mu_p : float of shape (n_parcels,)
        The mean of the second Gaussian distribution.
    sigma_p : float of shape (n_parcels, n_parcels)
        The variance of the second Gaussian distribution.

    Returns:
    --------
    D : float
        The KL divergence between the two Gaussian distributions.

    """

    if len(mu_q) == 1:
        D = gauss1d_kl(mu_q[0], sigma_q[0,0], mu_p[0], sigma_p[0,0])
    else:
        N = mu_q.shape[0]
        isigma_p = np.linalg.inv(sigma_p)
        (sign, logdet) = np.linalg.slogdet(sigma_p)
        t1 = sign * logdet
        (sign, logdet) = np.linalg.slogdet(sigma_q)
        t2 = sign * logdet        
        d = mu_q - mu_p
        D = 0.5 * (t1 - t2 - N + np.trace(isigma_p @ sigma_q) + ((d.T @ isigma_p) @ d))
    return D




[docs]
def gamma_kl(shape_q,rate_q,shape_p,rate_p):
    """Computes the Kullback-Leibler divergence between two Gamma distributions with shape and rate parameters.

    The Kullback-Leibler divergence is a measure of how different two probability distributions are.

    This implementation follows the formula presented here (https://statproofbook.github.io/P/gam-kl) from the book "KL-Divergences of Normal, Gamma, Dirichlet and Wishart densities" by Penny, William D. in 2001.

    Parameters:
    -----------
    shape_q : float or numpy.ndarray
        The shape parameter of the first Gamma distribution.
    rate_q : float or numpy.ndarray
        The rate parameter of the first Gamma distribution.
    shape_p : float or numpy.ndarray
        The shape parameter of the second Gamma distribution.
    rate_p : float or numpy.ndarray
        The rate parameter of the second Gamma distribution.

    Returns:
    --------
    D : float or numpy.ndarray
        The Kullback-Leibler divergence between the two Gamma distributions.

    """

    D = shape_p * np.log(rate_q / rate_p) \
        + scipy.special.gammaln(shape_p) - scipy.special.gammaln(shape_q) \
        + (shape_q - shape_p) * scipy.special.psi(shape_q) \
        - (rate_q - rate_p) * shape_q / rate_q

    return D




[docs]
def wishart_kl(shape_q,C_q,shape_p,C_p):
    """Computes the Kullback-Leibler (KL) divergence between two Wishart distributions.

    Parameters:
    -----------
    shape_q : float
        Shape parameter of the first Wishart distribution.
    C_q : ndarray of shape (n_parcels, n_parcels)
        Scale parameter of the first Wishart distribution.
    shape_p : float
        Shape parameter of the second Wishart distribution.
    C_p : ndarray of shape (n_parcels, n_parcels)
        Scale parameter of the second Wishart distribution.

    Returns:
    --------
    D : float
        KL divergence from the first to the second Wishart distribution.

    """ 

    def L(shape,C):
        N = C.shape[0]
        PsiWish_alphasum = 0
        for j in range(1,N+1): 
            PsiWish_alphasum += scipy.special.psi(0.5 * (shape + 1 - j))
        (s, logdet) = np.linalg.slogdet(C)
        ldetWishB = s * logdet
        return (PsiWish_alphasum - ldetWishB + N * math.log(2))

    def logZ(shape,C):
        N = C.shape[0]
        t1 = shape * N / 2 * math.log(2)
        (s, logdet) = np.linalg.slogdet(C)
        t2 = - (shape/2) * s * logdet
        t3 = (N*(N-1)/4) * math.log(math.pi)
        for j in range(1,N+1):  
            t3 += scipy.special.gammaln(0.5 * (shape + 1 - j))
        return (t1 + t2 + t3)
        
    N = C_q.shape[0]
    iC_q = np.linalg.inv(C_q)

    t1 = ((shape_q - N - 1)/2) * L(shape_q,C_q) - \
            ((shape_p - N - 1)/2) * L(shape_p,C_p)
    t2 = - N * shape_q / 2
    t3 = shape_q * np.trace(C_p @ iC_q) / 2
    t4 = logZ(shape_p,C_p) - logZ(shape_q,C_q)

    return (t1 + t2 + t3 + t4)




[docs]
def dirichlet_kl(alpha_q,alpha_p):
    """Computes the Kullback-Leibler divergence between two Dirichlet distributions with parameters alpha_q and alpha_p.

    Parameters:
    -----------
        alpha_q : Array of shape (n_states,)
            The concentration parameters of the first Dirichlet distribution.
        alpha_p : Array of shape (n_states,)
            The concentration parameters of the second Dirichlet distribution.

    Returns:
    --------
        float: The Kullback-Leibler divergence between the two Dirichlet distributions.

    """

    ind = (alpha_q>0) & (alpha_p>0)
    alpha_q = np.copy(alpha_q[ind])
    alpha_p = np.copy(alpha_p[ind])

    sum_alpha_q = np.sum(alpha_q)
    sum_alpha_p = np.sum(alpha_p)
    t1 = + scipy.special.gammaln(sum_alpha_q) - scipy.special.gammaln(sum_alpha_p) \
        + np.sum(scipy.special.gammaln(alpha_p)) - np.sum(scipy.special.gammaln(alpha_q))
    t2 = np.sum( (alpha_q - alpha_p) * (scipy.special.psi(alpha_q) - scipy.special.psi(sum_alpha_q)) )
    return ( t1 + t2 )




[docs]
def Gamma_entropy(Gamma,Xi,indices):
    """Computes the entropy of a Gamma distribution and a sequence of transition probabilities Xi.

    Parameters:
    -----------
        Gamma : Array-like of shape (n_samples, n_states)
            The posterior probabilities of a hidden variable.
        Xi : Array-like of shape (n_samples - n_sessions, n_states, n_states)
            The joint probability of past and future states conditioned on data.
        indices : Array-like of shape (n_sessions, 2)
            The start and end indices of each trial/session in the input data.

    Returns:
    --------
        float: The entropy of the Gamma distribution and the sequence of transition probabilities.

    """

    minreal = sys.float_info.min
    # initial point
    Gamma_0 = Gamma[indices[:,0]]
    Gamma_0[Gamma_0 < minreal] = minreal
    Entr = -np.sum(Gamma_0 * np.log(Gamma_0))
    # transitions
    Xi[Xi < minreal] = minreal
    Xi_norm = np.zeros(Xi.shape)
    for k in range(Gamma.shape[1]):
        Xi_norm[:,k,:] = Xi[:,k,:] / np.expand_dims(np.sum(Xi[:,k,:],axis=1),axis=1)
    Entr -= np.sum(Xi * np.log(Xi_norm))
    return Entr



[docs]
def get_T(idx_data):
    """
    Returns the timepoints spent for each trial/session based on the given indices.
    We want to get the variable "T" when we are using the function padGamma
                       
    Parameters:
    --------------
        idx_data (numpy.ndarray): The indices that mark the timepoints for when each trial/session starts and ends.
        It should be a 2D array where each row represents the start and end index for a trial.
        Example: idx_data = np.array([[0, 150], [150, 300], [300, 500]])

    Returns:
    --------------
        T (numpy.ndarray): An array containing the timepoints spent for each trial/session.
        For example, given idx_data = np.array([[0, 150], [150, 300], [300, 500]]),
        the function would return T = np.array([150, 150, 200]).
    """
    T = np.diff(idx_data)  # Calculate the difference between consecutive indices to get timepoints spent
    return T



[docs]
def padGamma(Gamma, T, options):
    """
    Adjusts the state time courses to have the same size as the data time series.

    Parameters:
    --------------
        Gamma (numpy.ndarray): The state time courses.
        T (numpy.ndarray): Timepoints spent for each trial/session.
        options (dict): Dictionary containing various options.
        - 'embeddedlags' (list): Array of lagging times if 'embeddedlags' is specified.
        - 'order' (int): Integer value if 'order' is specified.

    Returns:
    --------------
        Gamma (numpy.ndarray): Adjusted state time courses.
    """
    do_chop = 0

    # Check if 'embeddedlags' is in options and has more than one value
    if 'embeddedlags' in options and isinstance(options['embeddedlags'], list) and len(options['embeddedlags']) > 1:
        d = [-min(options['embeddedlags']), max(options['embeddedlags'])]  # Define d based on 'embeddedlags'
        do_chop = 1
    # Check if 'order' is in options and its value is greater than 1
    elif 'order' in options and isinstance(options['order'], int) and options['order'] > 1:
        d = [options['order'], 0]  # Define d based on 'order'
        do_chop = 1

    if not do_chop:
        return Gamma  # If no chopping is needed, return the original Gamma

    if isinstance(T, list):
        if len(T) == 1:
            T = np.transpose(T)  # Transpose T if it is a single-row list
        T = np.array(T)  # Convert T to a numpy array if it is a list

    K = Gamma.shape[1]  # Number of columns in Gamma
    #offset = sum(d)  # Calculate the offset based on d
    offset = len(options['embeddedlags'])-1 # Calculate the offset
    N = len(T)  # Number of trials/sessions
    Tshifted = T - offset  # Shift timepoints based on offset

    if do_chop:
        Gamma_orig = Gamma.copy()  # Create a copy of the original Gamma
        Gamma = np.zeros((int(sum(T)), K))  # Initialize adjusted Gamma with zeros

        # Iterate over trials/sessions
        for j in range(N):
            t1 = np.arange(1, Tshifted[j] + 1) + sum(Tshifted[:j])  # Create indices for the shifted timepoints
            t2 = np.arange(1, T[j] + 1) + sum(T[:j])  # Create indices for the original timepoints
            mg = np.mean(Gamma_orig[t1 - 1, :], axis=0)  # Calculate the mean of Gamma within the shifted indices
            # Concatenate the chopped Gamma for the current trial/session
            Gamma[t2 - 1, :] = np.vstack([np.tile(mg, (d[0], 1)), Gamma_orig[t1 - 1, :], np.tile(mg, (d[1], 1))])

    return Gamma  # Return the adjusted Gamma



[docs]
def roll_by_vector(arr, shifts, axis=1,gpu_enabled=False):
    """Apply an independent roll for each dimensions of a single axis.

    Parameters
    ----------
    arr : np.ndarray
        Array of any shape.

    shifts : np.ndarray
        How many shifting to use for each dimension. Shape: `(arr.shape[axis],)`.

    axis : int
        Axis along which elements are shifted. 

    gpu_enabled : bool
        Whether input arrays are loaded in the GPU or not.
    """
    xp = cp if gpu_enabled else np

    shifts = xp.asarray(shifts)

    arr = xp.swapaxes(arr,axis,-1)
    all_idcs = xp.ogrid[[slice(0,n) for n in arr.shape]]

    # Convert to a positive shift
    all_idcs[-1] = all_idcs[-1] - xp.expand_dims(shifts,axis=1)

    result = arr[tuple(all_idcs)]
    arr = xp.swapaxes(result,-1,axis)
    return arr