Source code for leaspy.io.logs.visualization.plotting

import os
import warnings

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch

from leaspy.exceptions import (
    LeaspyIndividualParamsInputError,
    LeaspyInputError,
    LeaspyTypeError,
)

from ...outputs import IndividualParameters

__all__ = ["Plotting"]


# TODO: outdated -
[docs] class Plotting: """ .. deprecated:: 1.2 Class defining some plotting tools. Parameters ---------- model : :class:`~leaspy.models.BaseModel` The used model. output_path : :obj:`str`, (optional) Folder where plots will be saved. If None, default to current working directory. palette : :obj:`str` (palette name) or :class:`matplotlib.colors.Colormap` (`ListedColormap` or `LinearSegmentedColormap`) The palette to use. max_colors : :obj:`int` > 0, optional (default, corresponding to model nb of features) Only used if palette is a string """ def __init__(self, model, output_path=".", palette="tab10", max_colors=10): warnings.warn( "Plotting will soon be removed from Leaspy, please use Plotter instead.", FutureWarning, ) self.model = model # ---- Graphical options self.color_palette = None self.standard_size = (8, 4) self.linestyle = { "average_model": "-", "individual_model": "-", "individual_data": "-", } self.linewidth = { "average_model": 5, "individual_model": 2, "individual_data": 2, } self.alpha = {"average_model": 0.5, "individual_model": 1, "individual_data": 1} self.output_path = output_path self.set_palette(palette, max_colors)
[docs] def set_palette(self, palette, max_colors=None): """ Set palette of plots Parameters ---------- palette : :obj:`str` (palette name) or :class:`matplotlib.colors.Colormap` (`ListedColormap` or `LinearSegmentedColormap`) The palette to use. max_colors : :obj:`int` > 0, optional (default, corresponding to model nb of features) Only used if palette is a string """ if isinstance(palette, mpl.colors.Colormap): self.color_palette = palette else: if max_colors is None: if self.model.dimension is not None: raise LeaspyInputError( "Initialize model first please, with a not None dimension" ) max_colors = self.model.dimension self.color_palette = mpl.colormaps[palette].resampled(max_colors)
[docs] def colors(self, at=None): """ Wrapper over color_palette iterator to get colors Parameters ---------- at : any legit color_palette arg (int, float or iterable of any of these) or None (default) if None returns all colors of palette upto model dimension Returns ------- colors : single color tuple (RGBA) or np.array of RGBA colors (number of colors x 4) """ if at is None: at = [i % self.color_palette.N for i in range(self.model.dimension)] return self.color_palette(at)
def _raise_if_model_not_init(self): # /!\ Break if model is not initialized if not self.model.is_initialized: raise LeaspyInputError("Please initialize the model before plotting") def _handle_kwargs_begin(self, kwargs, all_features_list=None): """Extract kwargs corresponding to plot information and remove associated keys (in-place).""" # get features from initialized model if not set if all_features_list is None: self._raise_if_model_not_init() all_features_list = self.model.features # ---- Get requested features (may be a subset) features = kwargs.pop("features", all_features_list) features_ix = list(map(all_features_list.index, features)) # ---- Colors colors = kwargs.pop("color", self.colors(features_ix)) if len(colors) < len(features): raise LeaspyInputError( f"Please choose a palette with at least {len(features)} colors." ) # TODO: reindex default colors if subset of features? # ---- Labels labels = kwargs.pop("labels", features) if len(labels) != len(features): raise LeaspyInputError( f"Dimensions mismatch between features ({len(features)}) and labels ({len(labels)}." ) # ---- Ax ax = kwargs.pop("ax", None) if ax is None: fig, ax = plt.subplots( 1, 1, figsize=kwargs.pop("figsize", self.standard_size) ) # ---- Handle ylim if "logistic" in self.model.name: ax.set_ylim(0, 1) return ax, features, features_ix, labels, colors def _handle_kwargs_end(self, ax, kwargs, colors, labels): # ---- Legend dimension = len(labels) # if dimension is None: # dimension = self.model.dimension custom_lines = [ mpl.lines.Line2D([0], [0], color=colors[i], lw=4) for i in range(dimension) ] ax.legend(custom_lines, labels, title="Features") # ax.legend(title='Features') ax.set_ylabel("Normalized score") # ---- Save if "save_as" in kwargs.keys(): plt.savefig(os.path.join(self.output_path, kwargs["save_as"]))
[docs] def average_trajectory(self, **kwargs): """ Plot the population average trajectories. They are parametrized by the population parameters derived during the calibration. Parameters ---------- **kwargs * alpha: :obj:`float`, default 0.6 Matplotlib's transparency option. Must be in [0, 1]. * linestyle: {'-', '--', '-.', ':', '', (offset, on-off-seq), ...} Matplotlib's linestyle option. * linewidth: :obj:`float` Matplotlib's linewidth option. * features: list[:obj:`str`] Name of features (if set it must be a subset of model features) Default: all model features. * colors: list[:obj:`str`] Contains matplotlib compatible colors. At least as many as number of features. * labels: list[:obj:`str`] Used to rename features in the plot. Exactly as many as number of features. Default: raw variable name of each feature * ax: matplotlib.axes.Axes Axes object to modify, instead of creating a new one. * figsize: tuple of int The figure's size. * save_as: :obj:`str`, default None Path to save the figure. * title: :obj:`str` * n_tpts: :obj:`int` Number of timepoints in plot (default: 100) * n_std_left, n_std_right: :obj:`float` (default: 3 and 6 resp.) Time window around `tau_mean`, expressed as times of max(`tau_std`, 4) Returns ------- :class:`matplotlib.axes.Axes` """ # ---- Input manager plot_kws = self._plot_kwargs("average", kwargs) ax, _, features_ix, labels, colors = self._handle_kwargs_begin(kwargs) # ---- Get timepoints mean_time = self.model.parameters["tau_mean"].item() std_time = max(self.model.parameters["tau_std"].item(), 4) timepoints = mean_time + std_time * np.linspace( -kwargs.get("n_std_left", 3), kwargs.get("n_std_right", 6), kwargs.get("n_tpts", 100), ) timepoints = torch.tensor(timepoints, dtype=torch.float32).unsqueeze(0) # ---- Compute average trajectory mean_trajectory = ( self.model.compute_mean_traj(timepoints).cpu().detach().numpy() ) # ---- plot it for each dimension for ft_ix, ft_lbl, ft_color in zip(features_ix, labels, colors): ax.plot( timepoints[0, :].cpu().detach().numpy(), mean_trajectory[0, :, ft_ix], c=ft_color, # label=ft_lbl, # not needed **plot_kws["model"], ) # ---- Title & labels ax.set_title("Average trajectories") ax.set_xlabel("Age") self._handle_kwargs_end(ax, kwargs, colors, labels) return ax
def _plot_kwargs(self, case, kwargs): if case == "average": return { "model": dict( alpha=kwargs.get("alpha", self.alpha["average_model"]), linestyle=kwargs.get("linestyle", self.linestyle["average_model"]), linewidth=kwargs.get("linewidth", self.linewidth["average_model"]), ) } elif case == "obs": return { "obs": dict( alpha=kwargs.get("alpha", self.alpha["individual_data"]), linestyle=kwargs.get( "linestyle", self.linestyle["individual_data"] ), linewidth=kwargs.get( "linewidth", self.linewidth["individual_data"] ), marker=kwargs.get("marker", "o"), markersize=kwargs.get("markersize", "3"), ) } elif case == "recons": # both observations & model will be displayed p_obs = dict( marker=kwargs.get("marker", "o"), # None not to display obs markersize=kwargs.get("markersize", "4"), alpha=kwargs.get("obs_alpha", self.alpha["individual_data"]), linestyle=kwargs.get("obs_ls", ""), linewidth=kwargs.get("obs_lw", self.linewidth["individual_data"]), ) p_model = dict( alpha=kwargs.get("alpha", self.alpha["individual_model"]), linestyle=kwargs.get("linestyle", self.linestyle["individual_model"]), linewidth=kwargs.get("linewidth", self.linewidth["individual_model"]), ) return {"obs": p_obs, "model": p_model} else: raise LeaspyInputError("case must be in {'average', 'obs', 'recons'}") @staticmethod def _get_ip_df_torch(individual_parameters): # convert individual parameters in different cases if isinstance(individual_parameters, IndividualParameters): ip_df = individual_parameters.to_dataframe() ip_torch = individual_parameters.to_pytorch() elif isinstance(individual_parameters, pd.DataFrame): ip_df = individual_parameters ip_torch = IndividualParameters.from_dataframe( individual_parameters ).to_pytorch() elif isinstance(individual_parameters, tuple): ip_df = IndividualParameters.from_pytorch( *individual_parameters ).to_dataframe() ip_torch = individual_parameters else: raise LeaspyTypeError( "`individual_parameters` should be an IndividualParameters object, a pandas.DataFrame or a dict." ) if ip_df.index.names != ["ID"]: raise LeaspyIndividualParamsInputError( "Individual parameters index is not ['ID'] " f"as expected but {list(ip_df.index.names)}" ) return ip_df, ip_torch def _plot_patients_generic( self, case, data, patients_idx="all", individual_parameters=None, reparametrized_ages=False, **kwargs, ): # plot with reparametrized ages ip_df, ip_torch = None, None if individual_parameters is not None: self._raise_if_model_not_init() ip_df, ip_torch = self._get_ip_df_torch(individual_parameters) # ---- Input manager plot_kws = self._plot_kwargs(case, kwargs) with_model = "model" in plot_kws # plot reconstruction of model as well with_obs = "obs" in plot_kws and plot_kws["obs"].get("marker") is not None if not (with_model or with_obs): # (or both !) raise LeaspyInputError( "Nothing to plot... nor model values nor observations." ) # ---- Patients sublist if "patient_IDs" in kwargs.keys(): warnings.warn( "Keyword argument <patient_IDs> is deprecated! " "Use <patients_idx> instead.", DeprecationWarning, ) patients_idx = kwargs.get("patient_IDs") if isinstance(patients_idx, str): if patients_idx == "all": patients_idx = list(data.iter_to_idx.values()) else: patients_idx = [patients_idx] # features check if self.model.is_initialized: if data.headers != self.model.features: raise LeaspyInputError( "Features provided mismatch between data and model: " f"{data.headers} != {self.model.features}" ) ax, features, features_ix, labels, colors = self._handle_kwargs_begin( kwargs, data.headers ) # Data to dataframe (only selected patients) df = data.to_dataframe() df["ID"] = df["ID"].astype( str ) # needed because of IndividualParameters converting ID int -> str df = df.set_index("ID").loc[patients_idx] if reparametrized_ages: if ip_df is None: raise LeaspyInputError( "You want to plot reparametrized ages (`reparametrized_ages=True`) but you did not provide any individual parameters " "to do so (please use `individual_parameters` argument)." ) t0 = self.model.parameters["tau_mean"].item() df = df.join(ip_df) # reparametrized ages df["TIME_reparam"] = np.exp(df["xi"]) * (df["TIME"] - df["tau"]) + t0 # ---- Plot # plot observations (with reparametrized times or not) if with_obs: self._plot_observations( ax, df, features, colors, reparametrized_ages, plot_kws["obs"] ) # plot reconstruction as well (model values) if with_model: if ip_torch is None: raise LeaspyInputError( "Individual reconstruction need valid individual parameters." ) self._plot_model_trajectories( ax, df, self.model, ip_torch, features_ix, colors, reparametrized_ages, plot_kws["model"], **kwargs, ) # ---- Title & labels if with_obs: title = "Observations" if with_model: title += " and individual trajectories" else: # only with_model title = "Individual trajectories" ax.set_title(title) if reparametrized_ages: ax.set_xlabel("Reparametrized age") else: ax.set_xlabel("Age") self._handle_kwargs_end(ax, kwargs, colors, labels) return ax @staticmethod def _plot_observations(ax, df, features, colors, reparametrized_ages, plot_kws): """ Internal routine: plot individual observations Parameters ---------- ax : :class:`matplotlib.axes.Axes` df : :class:`pandas.DataFrame` Data to plot features : list[:obj:`str`] Which features to plot (subset of model features / data features) colors : list List of colors (associated to features selected), in order reparametrized_ages : bool Should we plot trajectories in reparam age or not? plot_kws : dict Plot kwargs """ if reparametrized_ages: time_col = "TIME_reparam" else: time_col = "TIME" df_with_time = df.set_index(df[time_col].rename("T"), append=True).sort_index() df_with_time = df_with_time[features].dropna( how="all" ) # selected features only for ind_id, ind_df in df_with_time.groupby("ID"): for (ft_name, s_ind_ft), ft_color in zip(ind_df.items(), colors): s_ind_ft = s_ind_ft.dropna() # TODO? use a cycle of markers to better distinguish individuals? ax.plot( s_ind_ft.reset_index("T")["T"], s_ind_ft, c=ft_color, # label=ft_lbl, # legend is done afterwards **plot_kws, ) @staticmethod def _plot_model_trajectories( ax, df, model, individual_parameters, features_ix, colors, reparametrized_ages, plot_kws, **kwargs, ): """ Internal routine: plot individual trajectories estimated by model Parameters ---------- ax : :class:`matplotlib.axes.Axes` df : :class:`pandas.DataFrame` Data (TODO: could be the MultiIndex [ID,TIME] instead...) individual_parameters : tuple[list, dict] <!> in pytorch dict format: tuple(indices:list, dict{ip_name: vals}) features_ix : list[int] Which features to plot (order of features from model) colors : list List of colors (associated to features selected), in order reparametrized_ages : bool Should we plot trajectories in reparam age or not? plot_kws : dict Plot kwargs **kwargs * "factor_past", "factor_future": float (default 0.5) past/future padding to plot (as fraction of total follow-up duration of subjects) * "n_tpts": int (default 100) nb of tpts in trajectory """ ip_indices, ip_torch = individual_parameters for ind_id, ind_df in df.groupby("ID"): ind_ix = ip_indices.index(ind_id) ind_ip = {pn: pv[ind_ix] for pn, pv in ip_torch.items()} # torch compatible timepoints = ind_df[ "TIME" ] # <!> always real patient ages here (to compute) min_t, max_t = min(timepoints), max(timepoints) total_t = max_t - min_t timepoints = np.linspace( min_t - kwargs.get("factor_past", 0.5) * total_t, max_t + kwargs.get("factor_future", 0.5) * total_t, kwargs.get("n_tpts", 100), ) t = torch.tensor(timepoints, dtype=torch.float32).unsqueeze(0) trajectory = model.compute_individual_trajectory(t, ind_ip).squeeze(0) # times to plot if reparametrized ages are wanted if reparametrized_ages: timepoints = ( ( model.time_reparametrization( t=t, alpha=ind_ip["xi"].exp(), tau=ind_ip["tau"] ) + model.parameters["tau_mean"].item() ) .squeeze(0) .cpu() .numpy() ) for ft_ix, ft_color in zip(features_ix, colors): ax.plot( timepoints, trajectory[:, ft_ix], c=ft_color, # label=ft_lbl, **plot_kws, )
[docs] def patient_observations( self, data, patients_idx="all", individual_parameters=None, **kwargs ): """ Plot patient observations Parameters ---------- data : :class:`.Data` patients_idx : 'all' (default), :obj:`str` or list[:obj:`str`] Patients to display (by their ID). individual_parameters : :class:`.IndividualParameters` or :class:`pandas.DataFrame` (as may be output by ip.to_dataframe()) or dict (Pytorch ip format), optional If not None, observations are plotted with respect to reparametrized ages. """ return self._plot_patients_generic( "obs", data, patients_idx=patients_idx, individual_parameters=individual_parameters, reparametrized_ages=individual_parameters is not None, **kwargs, )
[docs] def patient_observations_reparametrized( self, data, individual_parameters, patients_idx="all", **kwargs ): """ Plot patient observations (reparametrized ages) """ return self._plot_patients_generic( "obs", data, patients_idx=patients_idx, individual_parameters=individual_parameters, reparametrized_ages=True, **kwargs, )
[docs] def patient_trajectories( self, data, individual_parameters, patients_idx="all", reparametrized_ages=False, **kwargs, ): """ Plot patient observations together with model individual reconstruction Parameters ---------- data : :class:`.Data` individual_parameters : :class:`.IndividualParameters` or :class:`pandas.DataFrame` (as may be output by ip.to_dataframe()) or dict (Pytorch ip format) patients_idx : 'all' (default), :obj:`str` or list[:obj:`str`] Patients to display (by their ID). reparametrized_ages : :obj:`bool` (default False) Should we plot trajectories in reparam age or not? to study source impact essentially **kwargs cf. :meth:`._plot_model_trajectories` In particular, pass marker=None if you don't want observations besides model """ return self._plot_patients_generic( "recons", data, patients_idx=patients_idx, individual_parameters=individual_parameters, reparametrized_ages=reparametrized_ages, **kwargs, )