Source code for leaspy.algo.personalize.constant_prediction_algo

from enum import Enum

import numpy as np

from leaspy.io.data import Dataset
from leaspy.io.outputs.individual_parameters import IndividualParameters
from leaspy.models import ConstantModel
from leaspy.utils.typing import FeatureType

from ..base import AlgorithmName
from ..settings import AlgorithmSettings
from .base import PersonalizeAlgorithm

__all__ = ["ConstantPredictionAlgorithm"]


class PredictionType(str, Enum):
    LAST = "last"
    LAST_KNOWN = "last-known"
    MAX = "max"
    MEAN = "mean"



[docs]
class ConstantPredictionAlgorithm(
    PersonalizeAlgorithm[ConstantModel, IndividualParameters]
):
    r"""ConstantPredictionAlgorithm is an algorithm that provides constant predictions.

    It is used with the :class:`~leaspy.models.ConstantModel`.

    Parameters
    ----------
    settings : :class:`.AlgorithmSettings`
        The settings of constant prediction algorithm. It supports the following  `prediction_type` values (str)::
            * ``'last'``: last value even if NaN,
            * ``'last_known'``: last non NaN value,
            * ``'max'``: maximum (=worst) value ,
            * ``'mean'``: average of values

        depending on features, the `last_known` / `max` value may correspond to different visits.
        For a given feature, value will be NaN if and only if all values for this feature are NaN.

    Raises
    ------
    :exc:`.LeaspyAlgoInputError`
        If any invalid setting for the algorithm
    """

    name: AlgorithmName = AlgorithmName.PERSONALIZE_CONSTANT
    deterministic: bool = True

    def __init__(self, settings: AlgorithmSettings):
        super().__init__(settings)
        self.prediction_type: PredictionType = PredictionType(
            settings.parameters["prediction_type"]
        )

    def _compute_individual_parameters(
        self, model: ConstantModel, dataset: Dataset, **kwargs
    ) -> IndividualParameters:
        # always overwrite model features (no fit process)
        # TODO? we could fit the model before, only to recover model features,
        #  and then check at personalize that is the same (as in others personalize algos...)
        # Always overwrite model features (no fit for constant model...)
        model.initialize(dataset)
        individual_parameters = IndividualParameters()
        for individual in range(dataset.n_individuals):
            idx = dataset.indices[individual]
            times = dataset.get_times_patient(individual)
            values = dataset.get_values_patient(individual).numpy()
            ind_ip = self._get_individual_last_values(
                times, values, features=model.features
            )
            individual_parameters.add_individual_parameters(str(idx), ind_ip)
        return individual_parameters

    def _get_individual_last_values(
        self, times: np.ndarray, values: np.ndarray, *, features: list[FeatureType]
    ):
        """Get individual last values.

        Parameters
        ----------
        times : :class:`numpy.ndarray` [float]
            shape (n_visits,)

        values : :class:`numpy.ndarray` [float]
            shape (n_visits, n_features)

        features : list[FeatureType]
            Feature names

        Returns
        -------
        dict[ft_name: str, constant_value_to_be_padded]
        """
        # return a dict with parameters names being features names
        return dict(zip(features, self._get_feature_values(times, values)))

    def _get_feature_values(self, times: np.ndarray, values: np.ndarray):
        if self.prediction_type == PredictionType.MAX:
            return np.nanmax(values, axis=0)
        if self.prediction_type == PredictionType.MEAN:
            return np.nanmean(values, axis=0)
        sorted_indices = sorted(range(len(times)), key=times.__getitem__, reverse=True)
        # Sometimes, last value can be a NaN.
        # If this behavior is intended, then return it anyway
        if self.prediction_type == PredictionType.LAST:
            return values[sorted_indices[0]]
        values_sorted_desc = values[sorted_indices]
        # get first index of values being non nan, with visits ordered by more recent
        last_non_nan_ix_per_ft = (~np.isnan(values_sorted_desc)).argmax(axis=0)
        # 1 feature value will be nan iff feature was nan at all visits
        return values_sorted_desc[last_non_nan_ix_per_ft, range(values.shape[1])]