Source code for leaspy.algo.personalize.constant_prediction_algo
from enum import Enum
import numpy as np
from leaspy.io.data import Dataset
from leaspy.io.outputs.individual_parameters import IndividualParameters
from leaspy.models import ConstantModel
from leaspy.utils.typing import FeatureType
from ..base import AlgorithmName
from ..settings import AlgorithmSettings
from .base import PersonalizeAlgorithm
__all__ = ["ConstantPredictionAlgorithm"]
class PredictionType(str, Enum):
LAST = "last"
LAST_KNOWN = "last-known"
MAX = "max"
MEAN = "mean"
[docs]
class ConstantPredictionAlgorithm(
PersonalizeAlgorithm[ConstantModel, IndividualParameters]
):
r"""ConstantPredictionAlgorithm is an algorithm that provides constant predictions.
It is used with the :class:`~leaspy.models.ConstantModel`.
Parameters
----------
settings : :class:`.AlgorithmSettings`
The settings of constant prediction algorithm. It supports the following `prediction_type` values (str)::
* ``'last'``: last value even if NaN,
* ``'last_known'``: last non NaN value,
* ``'max'``: maximum (=worst) value ,
* ``'mean'``: average of values
depending on features, the `last_known` / `max` value may correspond to different visits.
For a given feature, value will be NaN if and only if all values for this feature are NaN.
Raises
------
:exc:`.LeaspyAlgoInputError`
If any invalid setting for the algorithm
"""
name: AlgorithmName = AlgorithmName.PERSONALIZE_CONSTANT
deterministic: bool = True
def __init__(self, settings: AlgorithmSettings):
super().__init__(settings)
self.prediction_type: PredictionType = PredictionType(
settings.parameters["prediction_type"]
)
def _compute_individual_parameters(
self, model: ConstantModel, dataset: Dataset, **kwargs
) -> IndividualParameters:
# always overwrite model features (no fit process)
# TODO? we could fit the model before, only to recover model features,
# and then check at personalize that is the same (as in others personalize algos...)
# Always overwrite model features (no fit for constant model...)
model.initialize(dataset)
individual_parameters = IndividualParameters()
for individual in range(dataset.n_individuals):
idx = dataset.indices[individual]
times = dataset.get_times_patient(individual)
values = dataset.get_values_patient(individual).numpy()
ind_ip = self._get_individual_last_values(
times, values, features=model.features
)
individual_parameters.add_individual_parameters(str(idx), ind_ip)
return individual_parameters
def _get_individual_last_values(
self, times: np.ndarray, values: np.ndarray, *, features: list[FeatureType]
):
"""Get individual last values.
Parameters
----------
times : :class:`numpy.ndarray` [float]
shape (n_visits,)
values : :class:`numpy.ndarray` [float]
shape (n_visits, n_features)
features : list[FeatureType]
Feature names
Returns
-------
dict[ft_name: str, constant_value_to_be_padded]
"""
# return a dict with parameters names being features names
return dict(zip(features, self._get_feature_values(times, values)))
def _get_feature_values(self, times: np.ndarray, values: np.ndarray):
if self.prediction_type == PredictionType.MAX:
return np.nanmax(values, axis=0)
if self.prediction_type == PredictionType.MEAN:
return np.nanmean(values, axis=0)
sorted_indices = sorted(range(len(times)), key=times.__getitem__, reverse=True)
# Sometimes, last value can be a NaN.
# If this behavior is intended, then return it anyway
if self.prediction_type == PredictionType.LAST:
return values[sorted_indices[0]]
values_sorted_desc = values[sorted_indices]
# get first index of values being non nan, with visits ordered by more recent
last_non_nan_ix_per_ft = (~np.isnan(values_sorted_desc)).argmax(axis=0)
# 1 feature value will be nan iff feature was nan at all visits
return values_sorted_desc[last_non_nan_ix_per_ft, range(values.shape[1])]