Source code for leaspy.io.outputs.result

import copy
import json
import os
import warnings
from collections.abc import Iterable
from typing import Union

import pandas as pd
import torch

from leaspy.exceptions import (
    LeaspyIndividualParamsInputError,
    LeaspyInputError,
    LeaspyTypeError,
)
from leaspy.utils.typing import DictParamsTorch, IDType, ParamType

from ..data import Data, Dataset

__all__ = ["Result"]


[docs] class Result: """ Result object class. Used as logs by personalize algorithms & simulation algorithm. Parameters ---------- data : :class:`.Data` Object containing the information of the individuals, in particular the time-points :math:`(t_{i,j})` and the observations :math:`(y_{i,j})`. individual_parameters : :obj:`dict` [:obj:`str`, :class:`torch.Tensor`] Contains log-acceleration 'xi', time-shifts 'tau' & 'sources' noise_std : :obj:`float` or :class:`torch.FloatTensor`, optional (default None) Desired noise standard deviation level Attributes ---------- data : :class:`.Data` Object containing the information of the individuals, in particular the time-points :math:`(t_{i,j})` and the observations :math:`(y_{i,j})`. individual_parameters : :obj:`dict` [:obj:`str`, :class:`torch.Tensor`] Contains log-acceleration 'xi', time-shifts 'tau' & 'sources' (dictionary of `torch.Tensor`). ID_to_idx : :obj:`dict` The keys are the individual ID & the items are their respective ordered position in the data file given by the user. This order remains the same during the computation. Example - in Result.individual_parameters['xi'], the first element corresponds to the first patient in ID_to_idx. noise_std : :obj:`float` or :class:`torch.FloatTensor` Desired noise standard deviation level. """ # TODO : Check consistency and ordering of subjects ID between Data and individual parameters io. def __init__( self, data: Data, individual_parameters: DictParamsTorch, noise_std=None ): self.data = data self.individual_parameters = individual_parameters self.ID_to_idx: dict[IDType, int] = { key: i for i, key in enumerate(data.individuals) } self.noise_std = noise_std # TODO : this method is used only once in plotting => delete it ?
[docs] def get_torch_individual_parameters( self, ID: Union[IDType, list[IDType]] = None ) -> DictParamsTorch: """ Getter function for the individual parameters. Parameters ---------- ID : :obj:`str` or :obj:`list`[:obj:`str`], optional (default None) Contains the identifiers of the wanted subject. Returns ------- :obj:`dict` [:obj:`str`, :class:`torch.Tensor`] Contains the individual parameters. """ if ID is not None: if not isinstance(ID, list): if isinstance(ID, str) or not isinstance(ID, Iterable): # If ID is not a Iterable (case where ID is a int) => convert into list # If ID is a str => convert into list ID = [ID] else: raise LeaspyIndividualParamsInputError( "Input argument 'ID' must be a single identifier or a list or identifiers!" ) list_idt = [self.ID_to_idx[id_patient] for id_patient in ID] ind_parameters = { key: value[list_idt] for key, value in self.individual_parameters.items() } else: ind_parameters = self.individual_parameters.copy() return ind_parameters
# TODO: unit test & functional test
[docs] def get_dataframe_individual_parameters( self, cofactors: Union[str, list[str]] = None ) -> pd.DataFrame: """ Return the dataframe of the individual parameters. Each row corresponds to a subject. The columns correspond (in this order) to the subjects' ID, the individual parameters (one column per individual parameter) & the cofactors (one column per cofactor). Parameters ---------- cofactors : :obj:`str` or :obj:`list`[:obj:`str`], optional (default None) Contains the cofactor(s) to join to the logs dataframe. Returns ------- :class:`pandas.DataFrame` Contains for each patient his ID & his individual parameters (optional and his cofactors states) Notes ----- The cofactors must be present in the leaspy data object stored into the .data attribute of the result instance. See the example. Examples -------- Load a longitudinal multivariate dataset & the subjects' cofactors. Compute the individual parameters for this dataset & get the corresponding dataframe with the genetic APOE cofactor >>> import pandas as pd >>> from leaspy.api import Leaspy >>> from leaspy.algo import AlgorithmSettings >>> from leaspy.io.data import Data >>> from leaspy.io.logs.visualization import Plotter >>> leaspy_logistic = Leaspy('logistic') >>> data = Data.from_csv_file('data/my_leaspy_data.csv') # replace with your own path! >>> genes_cofactors = pd.read_csv('data/genes_cofactors.csv') # replace with your own path! >>> print(genes_cofactors.head()) ID APOE4 0 sub-HS0102 1 1 sub-HS0112 0 2 sub-HS0113 0 3 sub-HS0114 1 4 sub-HS0115 0 >>> data.load_cofactors(genes_cofactors, ['GENES']) >>> model_settings = AlgorithmSettings('mcmc_saem', seed=0) >>> personalize_settings = AlgorithmSettings('mode_real', seed=0) >>> leaspy_logistic.fit(data, model_settings) >>> individual_results = leaspy_logistic.personalize(data, model_settings) >>> individual_results_df = individual_results.get_dataframe_individual_parameters('GENES') >>> print(individual_results_df.head()) tau xi sources_0 sources_1 APOE4 ID sub-HS0102 70.329201 0.120465 5.969921 -0.245034 1 sub-HS0112 95.156624 -0.692099 1.520273 3.477707 0 sub-HS0113 74.900673 -1.769864 -1.222979 1.665889 0 sub-HS0114 81.792763 -1.003620 1.021321 2.371716 1 sub-HS0115 89.724648 -0.820971 -0.480975 0.741601 0 """ # Initialize patient dict with ID patient_dict = {"ID": list(self.ID_to_idx.keys())} # For each individual variable for variable_ind in list(self.individual_parameters.keys()): # Case tau / xi --> unidimensional if self.individual_parameters[variable_ind].shape[1] == 1: patient_dict[variable_ind] = ( self.individual_parameters[variable_ind].numpy().reshape(-1) ) # Case sources --> multidimensional elif self.individual_parameters[variable_ind].shape[1] > 1: for dim in range(self.individual_parameters[variable_ind].shape[1]): patient_dict[f"{variable_ind}_{dim}"] = ( self.individual_parameters[variable_ind][:, dim] .numpy() .reshape(-1) ) df_individual_parameters = pd.DataFrame(patient_dict).set_index("ID") # If you want to load cofactors too if cofactors is not None: if isinstance(cofactors, str): cofactors = [cofactors] cofactor_dict = {"ID": list(self.data.individuals.keys())} for cofactor in cofactors: cofactor_dict[cofactor] = [ self.data.individuals[idx].cofactors[cofactor] for idx in cofactor_dict["ID"] ] df_cofactors = pd.DataFrame(cofactor_dict).set_index("ID") df_individual_parameters = df_individual_parameters.join(df_cofactors) return df_individual_parameters
[docs] def save_individual_parameters_csv( self, path: str, idx: list[IDType] = None, cofactors=None, **args ): """ Save the individual parameters in a csv format. Parameters ---------- path : :obj:`str` The logs' path. idx : :obj:`list` [:obj:`str`], optional (default None) Contain the IDs of the selected subjects. If ``None``, all the subjects are selected. cofactors : :obj:`str` or :obj:`list` [:obj:`str`], optional (default None) Contains the cofactor(s) to join to the logs dataframe. **args Parameters to pass to :meth:`pandas.DataFrame.to_csv`. Notes ----- The cofactors must be present in the leaspy data object stored into the :attr:`.data` attribute of the result instance. See the example. Examples -------- Save the individual parameters of the twenty first subjects. >>> from leaspy.algo import AlgorithmSettings >>> from leaspy.api import Leaspy >>> from leaspy.io.data import Data >>> leaspy_logistic = Leaspy('logistic') >>> data = Data.from_csv_file('data/my_leaspy_data.csv') # replace with your own path! >>> genes_cofactors = pd.read_csv('data/genes_cofactors.csv') # replace with your own path! >>> data.load_cofactors(genes_cofactors, ['GENES']) >>> model_settings = AlgorithmSettings('mcmc_saem', seed=0) >>> personalize_settings = AlgorithmSettings('mode_real', seed=0) >>> leaspy_logistic.fit(data, model_settings) >>> individual_results = leaspy_logistic.personalize(data, model_settings) >>> output_path = 'outputs/logistic_seed0-mode_real_seed0-individual_parameter.csv' >>> idx = list(individual_results.individual_parameters.keys())[:20] >>> individual_results.save_individual_parameters_csv(output_path, idx, cofactors='GENES') """ self._check_folder_existence(path) df_individual_parameters = self.get_dataframe_individual_parameters( cofactors=cofactors ) if idx: if not isinstance(idx, list): raise LeaspyIndividualParamsInputError( "Input 'idx' must be a list, even if it contains only one element! " f"You gave idx={idx} which is of type {type(idx)}." ) df_individual_parameters = df_individual_parameters.loc[idx] df_individual_parameters.to_csv(path, index=True, **args)
[docs] def save_individual_parameters_json( self, path: str, idx: list[IDType] = None, human_readable=None, **args ): """ Save the individual parameters in a json format. Parameters ---------- path : :obj:`str` The logs' path. idx : :obj:`list` [:obj:`str`], optional (default None) Contain the IDs of the selected subjects. If ``None``, all the subjects are selected. human_readable : Any, optional (default None) --> TODO change to bool .. deprecated:: 1.0 * If None (default): save as json file * If not None: call :meth:`.save_individual_parameters_torch`. **args Arguments to pass to json.dump. Default to: dict(indent=2) Raises ------ :class:`NotADirectoryError` if parent directory of path does not exist. Examples -------- Save the individual parameters of the twenty first subjects. >>> from leaspy.algo import AlgorithmSettings >>> from leaspy.api import Leaspy >>> from leaspy.io.data import Data >>> leaspy_logistic = Leaspy('logistic') >>> data = Data.from_csv_file('data/my_leaspy_data.csv') >>> model_settings = AlgorithmSettings('mcmc_saem', seed=0) >>> personalize_settings = AlgorithmSettings('mode_real', seed=0) >>> leaspy_logistic.fit(data, model_settings) >>> individual_results = leaspy_logistic.personalize(data, model_settings) >>> output_path = 'outputs/logistic_seed0-mode_real_seed0-individual_parameter.json' >>> idx = list(individual_results.individual_parameters.keys())[:20] >>> individual_results.save_individual_parameters_json(output_path, idx) """ self._check_folder_existence(path) dump = self._get_dump(idx) if human_readable is not None: warnings.warn( "This parameter is deprecated! To save as a torch file, use the method " "'save_individual_parameters_torch'.", DeprecationWarning, stacklevel=2, ) self.save_individual_parameters_torch(path, idx) else: # Default json.dump kwargs: args = {"indent": 2, **args} with open(path, "w") as fp: json.dump(dump, fp, **args)
[docs] def save_individual_parameters_torch( self, path: str, idx: list[IDType] = None, **args ): """ Save the individual parameters in a torch format. Parameters ---------- path : :obj:`str` The logs' path. idx : :obj:`list` [:obj:`str`], optional (default None) Contain the IDs of the selected subjects. If ``None``, all the subjects are selected. **args Arguments to pass to torch.save. Raises ------ :exc:`NotADirectoryError` if parent directory of path does not exist. Examples -------- Save the individual parameters of the twenty first subjects. >>> from leaspy.algo import AlgorithmSettings >>> from leaspy.api import Leaspy >>> from leaspy.io.data import Data >>> leaspy_logistic = Leaspy('logistic') >>> data = Data.from_csv_file('data/my_leaspy_data.csv') >>> model_settings = AlgorithmSettings('mcmc_saem', seed=0) >>> personalize_settings = AlgorithmSettings('mode_real', seed=0) >>> leaspy_logistic.fit(data, model_settings) >>> individual_results = leaspy_logistic.personalize(data, model_settings) >>> output_path = 'outputs/logistic_seed0-mode_real_seed0-individual_parameter.pt' >>> idx = list(individual_results.individual_parameters.keys())[:20] >>> individual_results.save_individual_parameters_torch(output_path, idx) """ self._check_folder_existence(path) dump = self._get_dump(idx) torch.save(dump, path, **args)
@staticmethod def _check_folder_existence(path: str): """ Checks whether the folder in the given file path exists. Parameters ---------- path : :obj:`str` The file path to check. May include a directory component. Raises ------ :exc:`NotADirectoryError` If the directory part of the path is non-empty and does not exist. """ # Test path's folder existence (if path contain a folder) dir_path = os.path.dirname(path) if not (dir_path == "" or os.path.isdir(dir_path)): raise NotADirectoryError( f"Cannot save individual parameter at path {path}. The folder does not exist!" ) def _get_dump(self, idx: list[IDType] = None): """ Convert the individual_parameters attribute into a dictionary of list. The univariate parameters values like xi and tau are squeeze from shape (n_subjects, 1) to (n_subjects,). One can select only the wanted subject by specifying their ID with 'idx' parameter. Parameters ---------- idx : :obj:`list`, optional (default None) Contains the ID of the selected subjects. Returns ------- :obj:`dict` A dictionary where keys are parameter names and values are lists of parameter values, either as flat lists (for univariate parameters) or lists of lists (for multivariate ones). """ dump: dict = copy.deepcopy(self.individual_parameters) # Ex: individual_parameters = {'param1': torch.tensor([[1], [2], [3]]), ...} # Select only the wanted subjects if idx is not None: if not isinstance(idx, list): raise LeaspyIndividualParamsInputError( "Input 'idx' must be a list, even if it contains only one element! " f"You gave idx={idx} which is of type {type(idx)}." ) selected_id = [self.ID_to_idx[val] for val in idx] dump = {key: val[selected_id] for key, val in dump.items()} for key in dump.keys(): if not isinstance(dump[key], list): # For multivariate parameter - like sources # convert tensor([[1, 2], [2, 3]]) into [[1, 2], [2, 3]] if dump[key].shape[1] == 2: dump[key] = dump[key].tolist() # for univariate parameters - like xi & tau # convert tensor([[1], [2], [3]]) into [1, 2, 3] => squeeze it elif dump[key].shape[1] == 1: dump[key] = dump[key].squeeze().tolist() return dump
[docs] @classmethod def load_individual_parameters_from_csv(cls, path: str, *, verbose=True, **kwargs): """ Load individual parameters from a csv. Parameters ---------- path : :obj:`str` The file's path. The csv file musts contain two columns named 'tau' and 'xi'. If the individual parameters come from a multivariate model, it must also contain the columns 'sources_i' for i in [0, ..., n_sources]. verbose : :obj:`bool` (default True) Whether to have verbose output or not **kwargs Parameters to pass to :func:`pandas.read_csv`. Returns ------- :obj:`dict` [:obj:`str`, :class:`torch.Tensor`] A dictionary of torch.tensor which contains the individual parameters. Examples -------- Load an individual parameters dictionary from a saved file. >>> from leaspy.io.outputs import Result >>> path = 'outputs/logistic_seed0-mode_real_seed0-individual_parameter.csv' >>> individual_parameters = Result.load_individual_parameters_from_csv(path) """ df = pd.read_csv(path, **kwargs) if verbose: print("Load from csv file ... conversion to torch") return cls.load_individual_parameters_from_dataframe(df)
[docs] @staticmethod def load_individual_parameters_from_dataframe(df: pd.DataFrame): """ Load individual parameters from a :class:`pandas.DataFrame`. Parameters ---------- df : :class:`pandas.DataFrame` Must contain two columns named 'tau' and 'xi'. If the individual parameters come from a multivariate model, it must also contain the columns 'sources_i' for i in [0, ..., n_sources]. Returns ------- :obj:`dict`[:obj:`str`, :class:`torch.Tensor`] A dictionary of torch.tensor which contains the individual parameters. """ df.columns = [header.lower() for header in df.columns] sources_index = ["sources" in header for header in df.columns] ind_param = { "tau": torch.tensor(df["tau"].values, dtype=torch.float32).view(-1, 1), "xi": torch.tensor(df["xi"].values, dtype=torch.float32).view(-1, 1), } if any(sources_index): ind_param["sources"] = torch.tensor( df.iloc[:, sources_index].values, dtype=torch.float32 ) return ind_param
[docs] @staticmethod def load_individual_parameters_from_json(path: str, *, verbose=True, **kwargs): """ Load individual parameters from a json file. Deprecated : also load torch files. Parameters ---------- path : :obj:`str` The file's path. verbose : :obj:`bool` (default True) Whether to have verbose output or not **kwargs Parameters to pass to :func:`json.load`. Returns ------- :obj:`dict` [:obj:`str`, :class:`torch.Tensor`] A dictionary of `torch.Tensor` which contains the individual parameters. Examples -------- Load an individual parameters dictionary from a saved file. >>> from leaspy.io.outputs import Result >>> path = 'outputs/logistic_seed0-mode_real_seed0-individual_parameter.json' >>> individual_parameters = Result.load_individual_parameters_from_json(path) """ # Test if file is a json file try: with open(path, "r") as f: individual_parameters = json.load(f, **kwargs) if verbose: print("Load from json file ... conversion to torch") for key in individual_parameters.keys(): # Convert every list in torch.tensor individual_parameters[key] = torch.tensor( individual_parameters[key], dtype=torch.float32 ) # If tensor is 1-dimensional tensor([1, 2, 3]) => reshape it in tensor([[1], [2], [3]]) if individual_parameters[key].dim() == 1: individual_parameters[key] = individual_parameters[key].view(-1, 1) # Else if it is a torch file except UnicodeDecodeError: warnings.warn( "To load a torch file, use the static method result `load_individual_parameters_from_torch`", DeprecationWarning, stacklevel=2, ) individual_parameters = torch.load(path) # load function from torch if verbose: print("Load from torch file") return individual_parameters
[docs] @staticmethod def load_individual_parameters_from_torch(path: str, *, verbose=True, **kwargs): """ Load individual parameters from a torch file. Parameters ---------- path : :obj:`str` The file's path. verbose : :obj:`bool` (default True) Whether to have verbose output or not **kwargs Parameters to pass to :func:`torch.load`. Returns ------- :obj:`dict` [:obj:`str`, :class:`torch.Tensor`] A dictionary of `torch.Tensor` which contains the individual parameters. Examples -------- Load an individual parameters dictionary from a saved file. >>> from leaspy.io.outputs import Result >>> path = 'outputs/logistic_seed0-mode_real_seed0-individual_parameter.pt' >>> individual_parameters = Result.load_individual_parameters_from_torch(path) """ if verbose: print("Load from torch file") individual_parameters = torch.load(path, **kwargs) for key, val in individual_parameters.items(): if not isinstance(val, torch.Tensor): individual_parameters[key] = torch.tensor(val, dtype=torch.float32) if individual_parameters[key].ndim != 2: individual_parameters[key] = individual_parameters[key].unsqueeze(-1) return individual_parameters
[docs] @classmethod def load_individual_parameters(cls, path_or_df, **kwargs): """ Load individual parameters from a :class:`pandas.DataFrame`, a csv, a json file or a torch file. Parameters ---------- path_or_df : str or :class:`pandas.DataFrame` The file's path or a DataFrame containing the individual parameters. **kwargs Keyword-arguments to be passed to the corresponding load function. Returns ------- :obj:`dict` [:obj:`str`, :class:`torch.Tensor`] A dictionary of torch.tensor which contains the individual parameters. Raises ------ :exc:`FileNotFoundError` if path is invalid """ if isinstance(path_or_df, pd.DataFrame): return cls.load_individual_parameters_from_dataframe(path_or_df) elif isinstance(path_or_df, str): file_extension = os.path.splitext(path_or_df)[-1] if file_extension == ".csv": return cls.load_individual_parameters_from_csv(path_or_df, **kwargs) elif file_extension == ".json": return cls.load_individual_parameters_from_json(path_or_df, **kwargs) else: if file_extension not in (".pt", ".p"): warnings.warn( f"File extension not recognized (got '{file_extension}')." "Trying to load with torch by default.", RuntimeWarning, stacklevel=2, ) return cls.load_individual_parameters_from_torch(path_or_df, **kwargs) else: raise LeaspyIndividualParamsInputError( "The given input must be a pandas.DataFrame or a string " "giving the path of the file containing the individual parameters!" )
[docs] @classmethod def load_result(cls, data, individual_parameters, *, cofactors=None, **kwargs): """ Load a `Result` class object from two file - one for the individual data & one for the individual parameters. Parameters ---------- data : :obj:`str` or :class:`pandas.DataFrame` or :class:`.Data` The file's path or a DataFrame containing the features' scores. individual_parameters : :obj:`str` or :class:`pandas.DataFrame` The file's path or a DataFrame containing the individual parameters. cofactors : :obj:`str` or :class:`pandas.DataFrame`, optional (default None) The file's path or a DataFrame containing the individual cofactors. The ID must be in index! Thus, the shape is (n_subjects, n_cofactors). **kwargs Parameters to pass to `Result.load_individual_parameters` method. Returns ------- :class:`Result` A Result class object which contains the individual parameters and the individual data. Examples -------- Launch an individual parameters estimation, save it and reload it. >>> from leaspy.algo import AlgorithmSettings >>> from leaspy.io.outputs import Result >>> from leaspy.api import Leaspy >>> from leaspy.io.data import Data >>> leaspy_logistic = Leaspy('logistic') >>> data = Data.from_csv_file('data/my_leaspy_data.csv') >>> model_settings = AlgorithmSettings('mcmc_saem', seed=0) >>> personalize_settings = AlgorithmSettings('mode_real', seed=0) >>> leaspy_logistic.fit(data, model_settings) >>> individual_results = leaspy_logistic.personalize(data, model_settings) >>> path_data = 'data/my_leaspy_data.csv' >>> path_individual_parameters = 'outputs/logistic_seed0-mode_real_seed0-individual_parameter.json' >>> individual_results.data.to_dataframe().to_csv(path_data) >>> individual_results.save_individual_parameters_json(path_individual_parameters) >>> individual_parameters = Result.load_result(path_data, path_individual_parameters) """ if isinstance(data, Data): pass elif isinstance(data, str): data = Data.from_csv_file(data) elif isinstance(data, pd.DataFrame): data = Data.from_dataframe(data) else: raise LeaspyTypeError( "The given `data` input must be a Data instance, a pandas.DataFrame " "or a string giving the path of the file containing the features' scores! " f"You gave an object of type {type(data)}" ) if cofactors is not None: if isinstance(cofactors, str): cofactors_df = pd.read_csv(cofactors, dtype={"ID": str}).set_index("ID") elif isinstance(cofactors, pd.DataFrame): cofactors_df = cofactors.copy() else: raise LeaspyTypeError( "The given `cofactors` input must be a pandas.DataFrame " "or a string giving the path of the file containing the cofactors! " f"You gave an object of type {type(cofactors)}" ) data.load_cofactors(cofactors_df) individual_parameters = cls.load_individual_parameters( individual_parameters, **kwargs ) return cls(data, individual_parameters)
[docs] def get_error_distribution_dataframe(self, model, cofactors=None): """ Get signed residual distribution per patient, per sub-score & per visit. Each residual is equal to the modeled data minus the observed data. Parameters ---------- model : :class:`~.models.abstract_model.AbstractModel` cofactors : str, list [str], optional (default None) Contains the cofactors' names to be included in the DataFrame. By default, no cofactors are returned. If cofactors == "all", all the available cofactors are returned. Returns ------- residuals_dataframe : :class:`pandas.DataFrame` with index ['ID', 'TIME'] Examples -------- Get mean absolute error per feature: >>> from leaspy.algo import AlgorithmSettings >>> from leaspy.api import Leaspy >>> from leaspy.io.data import Data >>> data = Data.from_csv_file("/my/data/path") >>> leaspy_logistic = Leaspy('logistic') >>> settings = AlgorithmSettings("mcmc_saem", seed=0) >>> leaspy_logistic.calibrate(data, settings) >>> settings = AlgorithmSettings("mode_real", seed=0) >>> results = leaspy_logistic.personalize(data, settings) >>> residuals_dataframe = results.get_error_distribution_dataframe(model) >>> residuals_dataframe.abs().mean() """ residuals_dataset = Dataset(self.data) residuals_dataset.values = ( model.compute_individual_tensorized( residuals_dataset.timepoints, self.individual_parameters ) - residuals_dataset.values ) residuals_dataframe = residuals_dataset.to_pandas() if cofactors is not None: if isinstance(cofactors, str): if cofactors == "all": cofactors_list = self.data.cofactors else: cofactors_list = [cofactors] elif isinstance(cofactors, list): cofactors_list = cofactors else: raise LeaspyTypeError( "The given `cofactors` input must be a string or a list of strings! " f"You gave an object of type {type(cofactors)}" ) cofactors_df = ( self.data.to_dataframe(cofactors=cofactors) .groupby("ID") .first()[cofactors_list] ) residuals_dataframe = residuals_dataframe.join(cofactors_df) return residuals_dataframe
############################################################### # DEPRECATION WARNINGS # These following methods will be removed in a future release ###############################################################
[docs] @staticmethod def get_cofactor_states(cofactors: list) -> list: """ .. deprecated:: 1.0 Given a list of string return the list of unique elements. Parameters ---------- cofactors : list[str] Distribution list of the cofactors. Returns ------- list Unique occurrences of the input vector. """ warnings.warn("This method will soon be removed!", DeprecationWarning) result = set(cofactors) return sorted(result)
@staticmethod def _get_parameter_name_and_dim(param: str): """ Splits a parameter string into its base name and optional dimension. Parameters ---------- param : str The parameter name, possibly including a numeric suffix indicating a dimension. Returns ------- tuple A tuple `(name, dim)`, where `name` is the base parameter name as a string, and `dim` is the parsed integer dimension, or `None` if no valid dimension is found. Examples -------- >>> _get_parameter_name_and_dim(`abc_def_34`) ('abc_def', 34) """ param_short, *param_dim = param.rsplit("_", maxsplit=1) # from right if param_dim: # we found a last "***_NNN", return this split if and only if NNN can be interpreted as an integer try: return param_short, int(param_dim[0]) except Exception: pass return param, None
[docs] def get_parameter_distribution(self, parameter: ParamType, cofactor=None): """ .. deprecated:: 1.0 Return the wanted parameter distribution (one distribution per covariate state). Parameters ---------- parameter : str The wanted parameter's name (ex: 'xi', 'tau', ...). It can also be `sources_i` to only get the i-th dimension of multivariate `sources` parameter. cofactor : str, optional (default None) The wanted cofactor's name. Returns ------- list[float] or dict[str, Any] Raises ------ :exc:`.LeaspyIndividualParamsInputError` if unsupported individual parameters :exc:`.LeaspyInputError` if unknown cofactor Notes ----- If ``cofactor is None``: * If the parameter is univariate => return a list the parameter's distribution: list[float] * If the parameter is multivariate => return a dictionary: {'parameter1': distribution of parameter variable 1, 'parameter2': ...} If ``cofactor is not None``: * If the parameter is univariate => return a dictionary: {'cofactor1': parameter distribution such that patient.covariate = covariate1, 'cofactor2': ...} * If the parameter is multivariate => return a dictionary: {'cofactor1': {'parameter1': ..., 'parameter2': ...}, 'cofactor2': {...}, ...} """ warnings.warn("This method will soon be removed!", DeprecationWarning) param_short, param_dim = self._get_parameter_name_and_dim(parameter) parameter_distribution = self.individual_parameters[ param_short ] # torch.tensor class object # parameter_distribution is of size (N_subjects, N_dimension_of_parameter) if param_dim is not None: parameter_distribution = parameter_distribution[:, [param_dim]] # Check the tensor's dimension is <= 2 p_ndim = parameter_distribution.ndimension() if p_ndim > 2: raise LeaspyIndividualParamsInputError( f"The chosen parameter {parameter} is a tensor " f"of dimension {p_ndim}: it should be <= 2!" ) ############################################## # If there is no cofactor to take into account ############################################## if cofactor is None: # If parameter is 1-dimensional if parameter_distribution.shape[1] == 1: # return a list of length = N_subjects parameter_distribution = parameter_distribution.view(-1).tolist() # Else transpose it and split it in a dictionary else: # return {'parameter1': distribution of parameter variable 1, 'parameter2': ... } parameter_distribution = { parameter + str(i): val for i, val in enumerate( parameter_distribution.transpose(0, 1).tolist() ) } return parameter_distribution ############################################################ # If the distribution as asked for different cofactor values ############################################################ # Check if the cofactor exist all_cofactors = self.data[0].cofactors.keys() if cofactor not in all_cofactors: raise LeaspyInputError( f"The cofactor '{cofactor}' do not exist. " f"Here are the available cofactors: {list(all_cofactors)}" ) # Get possible covariate stats # cofactors = [_.cofactors[cofactor] for _ in self.data if _.cofactors[cofactor] is not None] cofactors = self.get_cofactor_distribution(cofactor) cofactor_states = self.get_cofactor_states(cofactors) # Initialize the result distributions = {} # If parameter 1-dimensional if parameter_distribution.shape[1] == 1: parameter_distribution = parameter_distribution.view( -1 ).tolist() # ex: [1, 2, 3] # Create one entry per cofactor state for p in cofactor_states: if p not in distributions.keys(): distributions[p] = [] # For each covariate state, get parameter distribution for i, v in enumerate(parameter_distribution): if self.data[i].cofactors[cofactor] == p: distributions[p].append(v) # return {'cofactor1': ..., 'cofactor2': ...} else: # Create one dictionary per cofactor state for p in cofactor_states: if p not in distributions.keys(): # Create one dictionary per parameter dimension distributions[p] = { parameter + str(i): [] for i in range(parameter_distribution.shape[1]) } # Fill these entries by the corresponding values of the corresponding subject for i, v in enumerate(parameter_distribution.tolist()): if self.data[i].cofactors[cofactor] == p: for j, key in enumerate(distributions[p].keys()): distributions[p][key].append(v[j]) # return {'cofactor1': {'parameter1': .., 'parameter2': ..}, 'cofactor2': { .. }, .. } return distributions
[docs] def get_cofactor_distribution(self, cofactor: str): """ .. deprecated:: 1.0 Get the list of the cofactor's distribution. Parameters ---------- cofactor : str Cofactor's name Returns ------- list Cofactor's distribution. """ warnings.warn("This method will soon be removed!", DeprecationWarning) return [d.cofactors[cofactor] for d in self.data]
[docs] def get_patient_individual_parameters(self, idx: IDType): """ .. deprecated:: 1.0 Get the dictionary of the wanted patient's individual parameters Parameters ---------- idx : str ID of the wanted patient Returns ------- dict[param_name:str, `torch.Tensor`] Patient's individual parameters """ warnings.warn("This method will soon be removed!", DeprecationWarning) # indices = list(self.data.individuals.keys()) # idx_number = int( # np.where(np.array(indices) == idx)[0]) idx_number = [ idx_nbr for idx_nbr, idxx in self.data.iter_to_idx.items() if idxx == idx ][0] patient_dict = dict.fromkeys(self.individual_parameters.keys()) for variable_ind in list(self.individual_parameters.keys()): patient_dict[variable_ind] = self.individual_parameters[variable_ind][ idx_number ] return patient_dict