`pyleida.data_utils`

The module 'pyleida.data_utils' provides generic functions to manipulate/handle data, and load the neccesary files to run the 'Leida' class

Expand source code

"""The module 'pyleida.data_utils' provides generic
functions to manipulate/handle data, and load the 
neccesary files to run the 'Leida' class"""

from ._data_utils import (
    load_tseries,
    load_classes,
    load_rois_coordinates,
    load_rois_labels,
    load_dictionary,
    save_dictionary,
    load_model,
    array2dict,
    list2txt,
    txt2list,
)

__all__ = [
    "load_tseries",
    "load_classes",
    "load_rois_coordinates",
    "load_rois_labels",
    "load_dictionary",
    "save_dictionary",
    "load_model",
    "array2dict",
    "list2txt",
    "txt2list"
]

Functions

def array2dict(array, subject_ids)

Convert a 3D array with shape (N_ROIs,N_volumes,N_subjects) to a dictionary representation.

Params:

array : ndarray with shape (N_ROIs,N_volumes,N_subjects). Contains the BOLD time series of a group of subjects.

subject_ids : list or array. Contain the subjects' ids (in the same order that they appear in the array with the signals).

Returns:

dict_data : dict. Contains the subjects ids as keys, and the BOLD time series as values.

Expand source code

def array2dict(array,subject_ids):
    """
    Convert a 3D array with shape 
    (N_ROIs,N_volumes,N_subjects)
    to a dictionary representation.

    Params:
    -------
    array : ndarray with shape (N_ROIs,N_volumes,N_subjects). 
        Contains the BOLD time series of a group of
        subjects.

    subject_ids : list or array. 
        Contain the subjects' ids (in the same order
        that they appear in the array with the signals).
    
    Returns:
    --------
    dict_data : dict. 
        Contains the subjects ids as keys, and the
        BOLD time series as values.
    """
    dict_data = {}
    for subject_idx,subject in enumerate(subject_ids):
        dict_data[subject] = array[:,:,subject_idx]
    return dict_data

def list2txt(list_, filename=None)

Save a list as a .txt file. Elements are located line by line.

Params:

list_ : list. The list to be saved on local folder.

filename : str. Specify the name of the file to be saved.

Expand source code

def list2txt(list_,filename=None):
    """
    Save a list as a .txt file.
    Elements are located line by line.

    Params:
    -------
    list_ : list.
        The list to be saved on local folder.

    filename : str.
        Specify the name of the file to be saved.
    """
    filename = 'txt_from_list.txt' if filename is None else f'{filename}.txt'
    with open(f'{filename}', 'w') as f:
        for line in list_:
            f.write(line)
            f.write('\n')

def load_classes(path)

Load the .csv or .pkl 'metadata' file that contains the label/s specifying the group/condition to which each subject or volume belongs to and return it as a dictionary.

Params:

path : str. Path to local folder where the 'metadata' file is located.

Returns:

classes : dict Dictionary with 'subject_ids' as keys and the labels as values.

Expand source code

def load_classes(path):
    """
    Load the .csv or .pkl 'metadata' file that contains
    the label/s specifying the group/condition to which
    each subject or volume belongs to and return it as a
    dictionary.

    Params:
    -------
    path : str.
        Path to local folder where the
        'metadata' file is located.
    
    Returns:
    --------
    classes : dict
        Dictionary with 'subject_ids' as keys
        and the labels as values.
    """ 
    try:
        try:
            classes = load_dictionary(f'{path}/metadata.pkl')
        except:
            metadata = pd.read_csv(f'{path}/metadata.csv',sep=',')
            cols = ['subject_id','condition']
            if not all(item in cols for item in list(metadata.columns)):
                raise Exception("f'{cols} columns must be present in 'metadata.csv'!")
            classes = {}
            for sub in np.unique(metadata.subject_id):
                classes[sub] = [label for label in metadata[metadata.subject_id==sub].condition]
        return classes
    except:
        raise Exception("The groups/conditions labels coudn't be loaded.")

def load_dictionary(filepath)

Load dictionary from pickle (.pkl) file in local folder.

Params:

filepath : str. Specify the path to the pickle file to be loaded.

Returns:

dict_ : dict. Loaded dictionary.

Expand source code

def load_dictionary(filepath):
    """
    Load dictionary from pickle (.pkl)
    file in local folder.
    
    Params:
    --------
    filepath : str.
        Specify the path to the pickle
        file to be loaded.

    Returns:
    --------
    dict_ : dict.
        Loaded dictionary.
    """
    with open(filepath, 'rb') as file:
        dict_ = pickle.load(file)
        return dict_

def load_model(k=2, models_path=None)

Load model from .pkl file.

Params:

k : int. Select the model to load.

models_path : str. Path the folder that contains the saved models for each k partition.

Returns:

model : KMeansLeida object. The fitted model that was used to predict the cluster labels of each observation

Expand source code

def load_model(k=2,models_path=None):
    """
    Load model from .pkl file.
    
    Params:
    -------
    k : int.
        Select the model to load.

    models_path : str.
        Path the folder that contains the
        saved models for each k partition.
    
    Returns:
    --------
    model : KMeansLeida object.
        The fitted model that was used to
        predict the cluster labels of each
        observation
    """
    if models_path is None:
        print("You must provide a path to the models folder.")
    else:
        try:
            model = pd.read_pickle(f'{models_path}/model_k_{k}.pkl')
            return model
        except:
            raise Warning("The model couldn't be loaded. Check that the '.pkl' "
                        "file is located in the provided 'path'.")

def load_rois_coordinates(path)

Load the .csv file that contains the ROIs coordinates in MNI space and return it as a numpy array (N_ROIs,3)

Params:

path : str Path to the local folder in which the file is located.

Returns:

coords : ndarray with shape (n_rois,3) | None Returns a 2D array with the coordinates of each ROI/parcel if the file was succesfully loaded. Otherwise returns None.

Expand source code

def load_rois_coordinates(path):
    """
    Load the .csv file that contains
    the ROIs coordinates in MNI space
    and return it as a numpy array (N_ROIs,3)

    Params:
    -------
    path : str
        Path to the local folder
        in which the file is located.

    Returns:
    --------
    coords : ndarray with shape (n_rois,3) | None
        Returns a 2D array with the coordinates of each
        ROI/parcel if the file was succesfully loaded. 
        Otherwise returns None.
    """ 
    try:
        coords = pd.read_csv(f'{path}/rois_coordinates.csv',sep=',').values
        if coords.shape[1] != 3:
            print(f"Warning: the provided file with coordinates contain {coords.shape[1]} columns. "
                "Remember that this file must have a shape of (N_rois,3).")
            return None
        else:
            return coords
    except:
        return None

def load_rois_labels(path)

Load the .txt file that contains the labels of each ROI and return it as a list.

Params:

path : str Path to the local folder in which the file is located.

Returns:

labels : list Returns a list with the ROIs labels.

Expand source code

def load_rois_labels(path):
    """
    Load the .txt file that contains
    the labels of each ROI and return
    it as a list.

    Params:
    -------
    path : str
        Path to the local folder
        in which the file is located.

    Returns:
    --------
    labels : list
        Returns a list with the ROIs labels.
    """ 
    try:
        return txt2list(f'{path}/rois_labels.txt')
    except: 
        raise Exception("The ROIs' labels couldn't be loaded "
                        "from the provided path.")

def load_tseries(path)

Load the time series of each subject that are contained either in a .pkl file in 'path', or in individual .csv files in '{path}/time_series/{subject_id}/'.

Params:

path : str Path to the local folder in which the file/s is/are located.

Returns:

tseries : dict Returns a dictionary with 'subjects_ids' as keys and BOLD time series as values.

Expand source code

def load_tseries(path):
    """
    Load the time series of each subject that are contained
    either in a .pkl file in 'path', or in individual
    .csv files in '{path}/time_series/{subject_id}/'.

    Params:
    -------
    path : str
        Path to the local folder in which
        the file/s is/are located.

    Returns:
    --------
    tseries : dict
        Returns a dictionary with 'subjects_ids' as keys
        and BOLD time series as values.
    """ 
    try:
        try:
            tseries = load_dictionary(f'{path}/time_series.pkl')
        except:
            signals_path = f'{path}/time_series'
            sub_folders = [f for f in os.listdir(signals_path) if os.path.isdir(f'{signals_path}/{f}')]
            sub_folders.sort()

            tseries = {}

            for sub in sub_folders:
                for file in os.listdir(f'{signals_path}/{sub}'):
                    if file.endswith('.csv'):
                        tseries[sub] = np.array(
                                            pd.read_csv(
                                                f'{signals_path}/{sub}/{file}',
                                                sep=',',
                                                header=None
                                                )
                                            )
            
        return tseries
    except:
        raise Exception("The time series couldn't be loaded.")

def save_dictionary(filename, dictionary)

Save dictionary in local folder as a pickle (.pkl) file.

Params:

filename : str. Specify the name (and optionally the path) of the pickle file to be saved.

Expand source code

def save_dictionary(filename,dictionary):
    """
    Save dictionary in local folder
    as a pickle (.pkl) file.
    
    Params:
    --------
    filename : str.
        Specify the name (and optionally the
        path) of the pickle file to be saved.
    """
    with open(f'{filename}.pkl', 'wb') as file:
        pickle.dump(dictionary, file)

def txt2list(path)

Load a .txt file as a list. Note: the .txt file must contain an entry/value per line/row.

Params:

path : str. Full path to the .txt file of interest. E.g.: 'data/rois_labels.txt'

Returns:

list_ : list.

Expand source code

def txt2list(path):
    """
    Load a .txt file as a list.
    Note: the .txt file must contain
    an entry/value per line/row.

    Params:
    -------
    path : str.
        Full path to the .txt file of
        interest. E.g.: 'data/rois_labels.txt'

    Returns:
    --------
    list_ : list.
    """
    with open(path,'r') as file:
        list_ = [line.strip() for line in file]
    return list_