Source code for leaspy.variables.specs

from __future__ import annotations

from abc import abstractmethod
from collections import UserDict
from dataclasses import dataclass, field
from enum import Enum
from typing import (
    Callable,
    ClassVar,
    Optional,
    Union,
)
from typing import (
    Mapping as TMapping,
)
from typing import (
    MutableMapping as TMutableMapping,
)

import torch

from leaspy.exceptions import LeaspyModelInputError
from leaspy.models.utilities import (
    compute_ind_param_std_from_suff_stats,
    compute_ind_param_mean_from_suff_stats_mixture,
    compute_ind_param_std_from_suff_stats_mixture,
    compute_ind_param_std_from_suff_stats_mixture_burn_in,
    compute_probs_from_state,
)
from leaspy.utils.functional import (
    Identity,
    Mean,
    Prod,
    NamedInputFunction,
    Sqr,
    Std,
    Sum,
    SumDim,
    get_named_parameters,
)
from leaspy.utils.typing import KwargsType
from leaspy.utils.weighted_tensor import (
    TensorOrWeightedTensor,
    WeightedTensor,
    expand_left,
    sum_dim,
)

from .distributions import SymbolicDistribution
from .utilities import compute_individual_parameter_std_from_sufficient_statistics

__all__ = [
    "VariableName",
    "VariableValue",
    "VariableNameToValueMapping",
    "VariablesToFrozenSet",
    "VariablesLazyValuesRW",
    "VariablesLazyValuesRO",
    "SuffStatsRO",
    "SuffStatsRW",
    "VariableInterface",
    "IndepVariable",
    "Hyperparameter",
    "Collect",
    "ModelParameter",
    "DataVariable",
    "LatentVariableInitType",
    "LatentVariable",
    "PopulationLatentVariable",
    "IndividualLatentVariable",
    "LinkedVariable",
    "NamedVariables",
]


VariableName = str
VariableValue = TensorOrWeightedTensor[float]
VariableNameToValueMapping = TMapping[VariableName, VariableValue]
VariablesToFrozenSet = TMapping[VariableName, frozenset[VariableValue]]
VariablesLazyValuesRO = TMapping[VariableName, Optional[VariableValue]]
VariablesLazyValuesRW = TMutableMapping[VariableName, Optional[VariableValue]]
SuffStatsRO = TMapping[VariableName, torch.Tensor]  # VarValue
SuffStatsRW = TMutableMapping[VariableName, torch.Tensor]  # VarValue

LVL_IND = 0
LVL_FT = -1


[docs] class VariableInterface: """Interface for variable specifications.""" is_settable: ClassVar[bool] """Is True if and only if state of variables is intended to be manually modified by user.""" fixed_shape: ClassVar[bool] """Is True as soon as we guarantee that shape of variable is only dependent on model hyperparameters, not data."""
[docs] @abstractmethod def compute(self, state: VariableNameToValueMapping) -> Optional[VariableValue]: """ Compute variable value from a `state` exposing a dict-like interface: var_name -> values. If not relevant for variable type return None. Parameters ---------- state : :class:`~leaspy.variables.specs.VariableNameToValueMapping` The state to use in order to perform computations. Returns ------- :class:`~leaspy.variables.specs.VariableValue` : The variable value computed from the state. """
[docs] @abstractmethod def get_ancestors_names(self) -> frozenset[VariableName]: """ Get the names of the variables that the current variable directly depends on. Returns ------- :obj:`frozenset` [ :class:`~leaspy.variables.specs.VariableName`] : The set of ancestors variable names. """
# TODO? add a check or validate(value) method? (to be optionally called by State) # <!> should some extra context be passed to this method # (e.g. `n_individuals` or `n_timepoints` dimensions are not known during variable definition # but their consistency could/should be checked?)
[docs] class IndepVariable(VariableInterface): """Base class for variable that is not dependent on any other variable."""
[docs] def get_ancestors_names(self) -> frozenset[VariableName]: """ Get the names of the variables that the current variable directly depends on. Returns ------- :obj:`frozenset` [ :class:`~leaspy.variables.specs.VariableName`] : The set of ancestors variable names. """ return frozenset()
[docs] def compute(self, state: VariableNameToValueMapping) -> Optional[VariableValue]: """ Compute variable value from a `state` exposing a dict-like interface: var_name -> values. If not relevant for variable type return None. Parameters ---------- state : :class:`~leaspy.variables.specs.VariableNameToValueMapping` The state to use in order to perform computations. Returns ------- :class:`~leaspy.variables.specs.VariableValue` or None: The variable value computed from the state. """ return None
[docs] @dataclass(frozen=True) class Hyperparameter(IndepVariable): """Hyperparameters that can not be reset.""" value: VariableValue """The hyperparameter value.""" fixed_shape: ClassVar = True """Whether the variable has a fixed shape or not. For hyperparameters this is True.""" is_settable: ClassVar = False """Whether the variable is mutable or not. For hyperparameters this is False.""" def __post_init__(self): if not isinstance(self.value, (torch.Tensor, WeightedTensor)): object.__setattr__(self, "value", torch.tensor(self.value))
[docs] def to_device(self, device: torch.device) -> None: """ Move the value to specified device (other variables never hold values so need for this method). Parameters ---------- device : :class:`torch.device` The device on which to move the variable value. """ return object.__setattr__(self, "value", self.value.to(device=device))
@property def shape(self) -> tuple[int, ...]: return self.value.shape
[docs] @dataclass(frozen=True, init=False) class Collect: """ A convenient class to produce a function to collect sufficient stats that are existing or dedicated variables (to be automatically created). Parameters ---------- existing_variables : :obj:`tuple` of :class:`~leaspy.variables.specs.VariableName`, optional Names of existing variables that should be included when collecting statistics. dedicated_variables : :obj:`dict` [:class:`~leaspy.variables.specs.VariableName`, :class:`~leaspy.variables.specs.LinkedVariable`], optional Custom or derived variables that will be included in the collection process. """ existing_variables: tuple[VariableName, ...] = () dedicated_variables: Optional[TMapping[VariableName, LinkedVariable]] = None def __init__( self, *existing_variables: VariableName, **dedicated_variables: LinkedVariable ): # custom init to allow more convenient variadic form object.__setattr__(self, "existing_variables", existing_variables) object.__setattr__(self, "dedicated_variables", dedicated_variables or None) @property def variables(self) -> tuple[VariableName, ...]: """ Get the combined list of all variable names to be collected. Returns ------- :obj:`tuple` of :class:`~leaspy.variables.specs.VariableName` Tuple containing both existing and dedicated variable names. """ return self.existing_variables + tuple(self.dedicated_variables or ()) def __call__(self, state: VariableNameToValueMapping) -> SuffStatsRW: """ Collect sufficient statistics from a given state. Parameters ---------- state : :class:`~leaspy.variables.specs.VariableNameToValueMapping` A mapping from variable names to their current values. Returns ------- stats : :class:`~leaspy.variables.specs.SuffStatsRW` A dictionary of variable names and their corresponding values, for all variables defined in this collector. """ return {k: state[k] for k in self.variables}
[docs] @dataclass(frozen=True) class ModelParameter(IndepVariable): """ Variable for model parameters with a maximization rule. This variable shouldn't be sampled and it shouldn't be data, a hyperparameter or a linked variable. Parameters ---------- shape : :obj:`tuple` of :obj:`int` Shape of the parameter tensor. It must be fixed and known in advance. suff_stats : :class:`~leaspy.variables.specs.Collect` A callable object that collects sufficient statistics required to compute the update. update_rule : :obj:`.typing.Callable` [..., :class:`~leaspy.variables.specs.VariableValue`] The symbolic update rule for this parameter, used during both burn-in and standard learning phase unless overridden by `update_rule_burn_in`. update_rule_burn_in : :obj:`.typing.Callable` [..., :class:`~leaspy.variables.specs.VariableValue`] or None, optional An optional alternative update rule specifically used during the burn-in phase. If provided, it overrides `update_rule` during that phase. Attributes ---------- _update_rule_parameters : :obj:`frozenset` of :class:`~leaspy.variables.specs.VariableName` Internal cache of variable names required by `update_rule`. _update_rule_burn_in_parameters : :obj:`frozenset` of :class:`~leaspy.variables.specs.VariableName` or None Internal cache of variable names required by `update_rule_burn_in`, if defined. fixed_shape : :obj:`bool` (class attribute) Indicates that this variable has a fixed shape (True by design). is_settable : :obj:`bool` (class attribute) Flags this variable as being settable externally (True by design). """ shape: tuple[int, ...] suff_stats: Collect # Callable[[VariablesValuesRO], SuffStatsRW] """ The symbolic update functions will take variadic `suff_stats` values, in order to re-use NamedInputFunction logic: e.g. update_rule=Std('xi') <!> ISSUE: for `tau_std` and `xi_std` we also need `state` values in addition to `suff_stats` values (only after burn-in) since we can NOT use the variadic form readily for both `state` and `suff_stats` (names would be conflicting!), we sent `state` as a special kw variable (a bit lazy but valid) (and we prevent using this name for a variable as a safety) """ update_rule: Callable[..., VariableValue] """Update rule for normal phase, and memory-less (burn-in) phase unless `update_rule_burn_in` is not None.""" update_rule_burn_in: Optional[Callable[..., VariableValue]] = None """Specific rule for burn-in (currently implemented for some variables -> e.g. `xi_std`)""" # private attributes (computed in __post_init__) _update_rule_parameters: frozenset[VariableName] = field(init=False, repr=False) _update_rule_burn_in_parameters: Optional[frozenset[VariableName]] = field( default=None, init=False, repr=False ) fixed_shape: ClassVar = True is_settable: ClassVar = True def __post_init__(self): self._check_and_store_update_rule_parameters("update_rule") self._check_and_store_update_rule_parameters("update_rule_burn_in") def _check_and_store_update_rule_parameters(self, update_method: str) -> None: """ Validates and stores the keyword parameters required by the specified update rule. Parameters ---------- update_method : :obj:`str` The name of the update method attribute to validate (either `"update_rule"` or `"update_rule_burn_in"`). Raises ------ :exc:`LeaspyModelInputError` If the function associated with the `update_method` has: - Positional arguments - Unexpected keyword arguments not matching `suff_stats` variables or `'state'` - Any signature that cannot be parsed or is otherwise invalid """ method = getattr(self, update_method) if method is None: return allowed_kws = set(self.suff_stats.variables).union({"state"}) err_msg = ( f"Function provided in `ModelParameter.{update_method}` should be a function with keyword-only parameters " "(using names of this variable sufficient statistics, or the special 'state' keyword): not {}" ) try: inferred_params = get_named_parameters(method) except ValueError as e: raise LeaspyModelInputError(err_msg.format(str(e))) from e forbidden_kws = set(inferred_params).difference(allowed_kws) if len(forbidden_kws): raise LeaspyModelInputError(err_msg.format(forbidden_kws)) object.__setattr__( self, f"_{update_method}_parameters", frozenset(inferred_params) )
[docs] def compute_update( self, *, state: VariableNameToValueMapping, suff_stats: SuffStatsRO, burn_in: bool, ) -> VariableValue: """ Compute the updated value for the model parameter using a maximization step. Parameters ---------- state : :class:`~leaspy.variables.specs.VariableNameToValueMapping` The state to use for computations. suff_stats : :class:`~leaspy.variables.specs.SuffStatsRO` The sufficient statistics to use. burn_in : :obj:`bool` If True, use the update rule in burning phase. Returns ------- :class:`~leaspy.variables.specs.VariableValue` : The computed variable value. """ update_rule, update_rule_params = self.update_rule, self._update_rule_parameters if burn_in and self.update_rule_burn_in is not None: update_rule, update_rule_params = ( self.update_rule_burn_in, self._update_rule_burn_in_parameters, ) state_kw = dict(state=state) if "state" in update_rule_params else {} # <!> it would not be clean to send all suff_stats (unfiltered) for standard kw-only functions... return update_rule( **state_kw, **{k: suff_stats[k] for k in self._update_rule_parameters if k != "state"}, )
[docs] @classmethod def for_pop_mean( cls, population_variable_name: VariableName, shape: tuple[int, ...] ): """ Smart automatic definition of `ModelParameter` when it is the mean of Gaussian prior of a population latent variable. Parameters ---------- population_variable_name : :class:`~leaspy.variables.specs.VariableName` Name of the population latent variable for which this is the prior mean. shape : :obj:`tuple` of :obj:`int` The shape of the model parameter (typically matching the variable's dimensionality). Returns ------- :class:`~leaspy.variables.specs.ModelParameter` A new instance of `ModelParameter` configured as a prior mean. """ return cls( shape, suff_stats=Collect(population_variable_name), update_rule=Identity(population_variable_name), )
[docs] @classmethod def for_ind_mean( cls, individual_variable_name: VariableName, shape: tuple[int, ...] ): """ Smart automatic definition of `ModelParameter` when it is the mean of Gaussian prior of an individual latent variable. Parameters ---------- individual_variable_name : :class:`~leaspy.variables.specs.VariableName` Name of the individual latent variable for which this is the prior mean. shape : :obj:`tuple` of :obj:`int` The shape of the model parameter (typically matching the variable's dimensionality). Returns ------- :class:`~leaspy.variables.specs.ModelParameter` A new instance of `ModelParameter` configured as a prior mean. """ return cls( shape, suff_stats=Collect(individual_variable_name), update_rule=Mean(individual_variable_name, dim=LVL_IND), )
[docs] @classmethod def for_ind_mean_mixture(cls, ind_var_name: VariableName ,shape: Tuple[int, ...],): """ Smart automatic definition of `ModelParameter` when it is the mean of a mixture of Gaussians prior of an individual latent variable. Extra handling to keep one mean per cluster Parameters ---------- individual_variable_name : :class:`~leaspy.variables.specs.VariableName` Name of the individual latent variable for which this is the prior mean. shape : :obj:`tuple` of :obj:`int` The shape of the model parameter (typically matching the variable's dimensionality). Returns ------- :class:`~leaspy.variables.specs.ModelParameter` A new instance of `ModelParameter` configured as a prior mean. """ update_rule_mixture = NamedInputFunction( compute_ind_param_mean_from_suff_stats_mixture, parameters = ("state",), kws=dict(ip_name = ind_var_name) ) return cls( shape, suff_stats=Collect(ind_var_name), update_rule=update_rule_mixture, )
[docs] @classmethod def for_ind_std(cls, ind_var_name: VariableName, shape: Tuple[int, ...], **tol_kw): """ Smart automatic definition of `ModelParameter` when it is the std-dev of Gaussian prior of an individual latent variable. Parameters ---------- ind_var_name : :class:`~leaspy.variables.specs.VariableName` Name of the individual latent variable for which this is the prior std-dev. shape : :obj:`tuple` of :obj:`int` The shape of the model parameter (typically matching the variable's dimensionality). Returns ------- :class:`~leaspy.variables.specs.ModelParameter` A new instance of `ModelParameter` configured as a prior std-dev. """ ind_var_sqr_name = f"{ind_var_name}_sqr" update_rule_normal = NamedInputFunction( compute_individual_parameter_std_from_sufficient_statistics, parameters=( "state", ind_var_name, ind_var_sqr_name, ), kws=dict( individual_parameter_name=ind_var_name, dim=LVL_IND, **tol_kw, ), ) return cls( shape, suff_stats=Collect( ind_var_name, **{ ind_var_sqr_name: LinkedVariable( Sqr(ind_var_name) ) }, ), update_rule_burn_in=Std(ind_var_name, dim=LVL_IND), update_rule=update_rule_normal, )
[docs] @classmethod def for_ind_std_mixture(cls, ind_var_name: VariableName, shape: Tuple[int, ...], **tol_kw): """ Smart automatic definition of `ModelParameter` when it is the std-dev of Gaussian prior of an individual latent variable. Parameters ---------- ind_var_name : :class:`~leaspy.variables.specs.VariableName` Name of the individual latent variable for which this is the prior std-dev. shape : :obj:`tuple` of :obj:`int` The shape of the model parameter (typically matching the variable's dimensionality). Returns ------- :class:`~leaspy.variables.specs.ModelParameter` A new instance of `ModelParameter` configured as a prior std. """ ind_var_sqr_name = f"{ind_var_name}_sqr" update_rule_mixture = NamedInputFunction( compute_ind_param_std_from_suff_stats_mixture, parameters=("state", ind_var_name, ind_var_sqr_name), kws=dict(ip_name=ind_var_name, dim=LVL_IND, **tol_kw), ) update_rule_mixture_burn_in = NamedInputFunction( compute_ind_param_std_from_suff_stats_mixture_burn_in, parameters = ("state",), kws=dict(ip_name = ind_var_name) ) return cls( shape, suff_stats=Collect( ind_var_name, **{ind_var_sqr_name: LinkedVariable(Sqr(ind_var_name))} ), update_rule_burn_in=update_rule_mixture_burn_in, update_rule=update_rule_mixture, )
[docs] @classmethod def for_probs(cls, shape: Tuple[int, ...],): """ Smart automatic definition of `ModelParameter` when it is the probabilities of a Gaussian mixture. Parameters ---------- shape : :obj:`tuple` of :obj:`int` The shape of the model parameter (typically matching the variable's dimensionality). Returns ------- :class:`~leaspy.variables.specs.ModelParameter` A new instance of `ModelParameter` configured as a probability vector. """ update_rule_probs = NamedInputFunction( compute_probs_from_state, parameters = ("state",), ) return cls( shape, suff_stats= Collect(), update_rule= update_rule_probs, )
[docs] @dataclass(frozen=True) class DataVariable(IndepVariable): """ Variables for input data, that may be reset. Attributes ---------- fixed_shape : :obj:`bool` Indicates whether the shape of the variable is fixed. For `DataVariable`. `False` by design, allowing for more flexible data injection. is_settable : :obj:`bool` Flag indicating whether this variable can be set/reset directly in the state. `True` by desdign, meaning it can be modified externally. """ fixed_shape: ClassVar = False is_settable: ClassVar = True
[docs] class LatentVariableInitType(str, Enum): """ Type of initialization for latent variables. Members ------- PRIOR_MODE : :obj:`str` Initialize latent variables using the mode of their prior distribution. PRIOR_MEAN : :obj:`str` Initialize latent variables using the mean of their prior distribution. PRIOR_SAMPLES : :obj:`str` Initialize latent variables by sampling from their prior distribution. """ PRIOR_MODE = "mode" PRIOR_MEAN = "mean" PRIOR_SAMPLES = "samples"
[docs] @dataclass(frozen=True) class LatentVariable(IndepVariable): """ Unobserved variable that will be sampled, with symbolic prior distribution. Attributes ---------- prior : :class:`~leaspy.variables.distributions.SymbolicDistribution` The symbolic prior distribution for the latent variable (e.g. `Normal('xi_mean', 'xi_std')`). sampling_kws : :obj:`dict`, optional Optional keyword arguments to customize the sampling process (e.g. number of samples, random seed). Class Attributes ---------------- is_settable : :obj:`bool` Indicates that this variable can be explicitly set in the model (default: True). """ # TODO/WIP? optional mask derive from optional masks of prior distribution parameters? # or should be fixed & explicit here? prior: SymbolicDistribution sampling_kws: Optional[KwargsType] = None is_settable: ClassVar = True
[docs] def get_prior_shape( self, named_vars: TMapping[VariableName, VariableInterface] ) -> tuple[int, ...]: """ Get shape of prior distribution (i.e. without any expansion for `IndividualLatentVariable`). Parameters ---------- named_vars : :obj:`Mapping` [:class:`~leaspy.variables.specs.VariableName`, :class:`~leaspy.variables.specs.VariableInterface`] A mapping from variable names to their corresponding variable interfaces. These should include the parameters of the prior distribution. Returns ------- :obj:`tuple` of :obj:`int` The shape of the prior distribution (without any replication for individual variables). Raises ------ :exc:`LeaspyModelInputError` If any of the prior distribution’s parameter variables do not have a fixed shape. """ bad_params = { n for n in self.prior.parameters_names if not named_vars[n].fixed_shape } if len(bad_params): raise LeaspyModelInputError( f"Shapes of some prior distribution parameters are not fixed: {bad_params}" ) params_shapes = {n: named_vars[n].shape for n in self.prior.parameters_names} res = self.prior.shape(**params_shapes) # before it returned only this # some changes needed to handle the parameters in mixture, # it sampled with shape n_clusters for the individual latent parameters if we leave it as before # the correct sample.size is like in the classic model, # the latent individual variables do not have an extra dimension if 'Mixture' in str(self.prior): name = str([self.prior.parameters_names[0]]) if 'sources' in name: shape_to_modif = self.prior.shape(**params_shapes) res = torch.Size(shape_to_modif [:1]) if 'tau' in name: shape_to_modif = torch.Size([1]) res = shape_to_modif if 'xi' in name: shape_to_modif = torch.Size([1]) res = shape_to_modif return res
def _get_init_func_generic( self, method: Union[str, LatentVariableInitType], *, sample_shape: tuple[int, ...], ) -> NamedInputFunction[torch.Tensor]: """ Return a function that may be used for initialization. Parameters ---------- method : :obj:`str` or :class:`~leaspy.variables.specs.LatentVariableInitType` Initialization method. Must be one of `'samples'`, `'mode'`, or `'mean'`. sample_shape : :obj:`tuple` of :obj:`int` The shape to prepend to the initialized tensor (i.e., left expansion). Returns ------- :class:`~leaspy.utils.functional._named_input_function.NamedInputFunction`[:class:`torch.Tensor`] A symbolic function to compute the initial value tensor. Raises ------ :exc:`ValueError` If `method` is not one of the allowed values. """ method = LatentVariableInitType(method) if method is LatentVariableInitType.PRIOR_SAMPLES: return self.prior.get_func_sample(sample_shape) if method is LatentVariableInitType.PRIOR_MODE: return self.prior.mode.then(expand_left, shape=sample_shape) if method is LatentVariableInitType.PRIOR_MEAN: return self.prior.mean.then(expand_left, shape=sample_shape)
[docs] @abstractmethod def get_regularity_variables( self, value_name: VariableName ) -> dict[VariableName, LinkedVariable]: """Get extra linked variables to compute regularity term for this latent variable.""" # return { # # Not really useful... directly sum it to be memory efficient... # f"nll_regul_{value_name}_full": LinkedVariable( # self.prior.get_func_regularization(value_name) # ), # # TODO: jacobian as well... # } pass
[docs] class PopulationLatentVariable(LatentVariable): """ Population latent variable. Attributes ---------- fixed_shape : `ClassVar`[:obj:`bool`] Indicates that the shape is fixed (True). """ # not so easy to guarantee the fixed shape property in fact... # (it requires that parameters of prior distribution all have fixed shapes) fixed_shape: ClassVar = True
[docs] def get_init_func( self, method: Union[str, LatentVariableInitType], ) -> NamedInputFunction[torch.Tensor]: """ Return a function that may be used for initialization. Parameters ---------- method : :class:`~leaspy.variables.specs.LatentVariableInitType` or :obj:`str` The method to be used. Returns ------- :class:`~leaspy.utils.functional._named_imput_function.NamedInputFunction`[:class:`torch.Tensor`] : The initialization function. """ return self._get_init_func_generic(method=method, sample_shape=())
[docs] def get_regularity_variables( self, variable_name: VariableName, ) -> dict[VariableName, LinkedVariable]: """ Return the negative log likelihood regularity for the provided variable name. Parameters ---------- variable_name : :class:`~leaspy.variables.specs.VariableName` The name of the variable for which to retrieve regularity. Returns ------- :obj:`dict` [ :class:`~leaspy.variables.specs.VariableName`, :class:`~leaspy.variables.specs.LinkedVariable`] : The dictionary holding the :class:`~leaspy.variables.specs.LinkedVariable` for the regularity. """ # d = super().get_regularity_variables(value_name) d = {} d.update( { f"nll_regul_{variable_name}": LinkedVariable( # SumDim(f"nll_regul_{value_name}_full") self.prior.get_func_regularization(variable_name).then(sum_dim) ), # TODO: jacobian as well... } ) return d
[docs] class IndividualLatentVariable(LatentVariable): """ Individual latent variable. Attributes ---------- fixed_shape : `ClassVar`[:obj:`bool`] Indicates that the shape is fixed (True). """ fixed_shape: ClassVar = False
[docs] def get_init_func( self, method: Union[str, LatentVariableInitType], *, n_individuals: int, ) -> NamedInputFunction[torch.Tensor]: """ Return a function that may be used for initialization. Parameters ---------- method : :class:`~leaspy.variables.specs.LatentVariableInitType` or :obj:`str` The method to be used. n_individuals : :obj:`int` The number of individuals, used to define the shape. Returns ------- :class:`~leaspy.utils.functional._named_imput_function.NamedInputFunction`[:class:`torch.Tensor`] : The initialization function. """ return self._get_init_func_generic(method=method, sample_shape=(n_individuals,))
[docs] def get_regularity_variables( self, variable_name: VariableName, ) -> dict[VariableName, LinkedVariable]: """ Return the negative log likelihood regularity for the provided variable name. Parameters ---------- variable_name : :class:`~leaspy.variables.specs.VariableName` The name of the variable for which to retrieve regularity. Returns ------- :obj:`dict` [ :class:`~leaspy.variables.specs.VariableName`, :class:`~leaspy.variables.specs.LinkedVariable`] : The dictionary holding the :class:`~leaspy.variables.specs.LinkedVariable` for the regularity. """ # d = super().get_regularity_variables(value_name) d = {} if 'Mixture' in str(self.prior): #specification for the mixture model : we don't want to sum all dimensions, we need one regularity per cluster if variable_name == 'sources' : d.update( { f"nll_regul_{variable_name}_ind": LinkedVariable( self.prior.get_func_regularization(variable_name).then( sum_dim, but_dim=(LVL_IND, 2) # sum per source but omit the cluster dimension as well ) ), f"nll_regul_{variable_name}": LinkedVariable( SumDim(f"nll_regul_{variable_name}_ind") ), } ) else : d.update( { f"nll_regul_{variable_name}_ind": LinkedVariable( self.prior.get_func_regularization(variable_name) ), # keep it per cluster dont sum all dimensions f"nll_regul_{variable_name}": LinkedVariable( SumDim(f"nll_regul_{variable_name}_ind") ), } ) else: d.update( { f"nll_regul_{variable_name}_ind": LinkedVariable( # SumDim(f"nll_regul_{value_name}_full", but_dim=LVL_IND) self.prior.get_func_regularization(variable_name).then( sum_dim, but_dim=LVL_IND ) ), f"nll_regul_{variable_name}": LinkedVariable( SumDim(f"nll_regul_{variable_name}_ind") ), # TODO: jacobian as well... } ) return d
[docs] @dataclass(frozen=True) class LinkedVariable(VariableInterface): """ Variable which is a deterministic expression of other variables (we directly use variables names instead of mappings: kws <-> vars). Parameters ---------- f : :obj:`Callable`[..., :class:`~leaspy.variables.specs.VariableValue`] A deterministic function that computes this variable's value from its input variables. The function should accept keyword arguments matching the variable names in `parameters`. Attributes ---------- parameters : :obj:`frozenset`[:class:`~leaspy.variables.specs.VariableName`] The set of variable names on which this linked variable depends. This is inferred internally from the function `f`. is_settable : `ClassVar`[:obj:`bool`] Indicates that this variable is not settable directly (`False`). fixed_shape : `ClassVar`[obj:`bool`] Indicates whether the shape of the linked variable is fixed. By design it is `False`. """ f: Callable[..., VariableValue] parameters: frozenset[VariableName] = field(init=False) # expected_shape? (<!> some of the shape dimensions might not be known like `n_individuals` or `n_timepoints`...) # admissible_value? (<!> same issue than before, cf. remark on `IndividualLatentVariable`) is_settable: ClassVar = False # shape of linked variable may be fixed in some cases, but complex/boring/useless logic to guarantee it fixed_shape: ClassVar = False def __post_init__(self): try: inferred_params = get_named_parameters(self.f) except ValueError: raise LeaspyModelInputError( "Function provided in `LinkedVariable` should be a function with " "keyword-only parameters (using variables names)." ) object.__setattr__(self, "parameters", frozenset(inferred_params))
[docs] def get_ancestors_names(self) -> frozenset[VariableName]: """ Return the set of variable names that this linked variable depends on. Returns ------- :obj:`frozenset`[:class:`~leaspy.variables.specs.VariableName`] The names of ancestor variables used as inputs by this linked variable. """ return self.parameters
[docs] def compute(self, state: VariableNameToValueMapping) -> VariableValue: """ Compute the variable value from a given State. Parameters ---------- state : :class:`~leaspy.variables.specs.VariableNameToValueMapping` The state to use for computations. Returns ------- :class:`~leaspy.variables.specs.VariableValue` : The value of the variable. """ return self.f(**{k: state[k] for k in self.parameters})
[docs] class NamedVariables(UserDict): """Convenient dictionary for named variables specifications. In particular, it: 1. forbids the collisions in variable names when assigning/updating the collection 2. forbids the usage of some reserved names like 'state' or 'suff_stats' 3. automatically adds implicit variables when variables of certain kind are added (e.g. dedicated vars for sufficient stats of ModelParameter) 4. automatically adds summary variables depending on all contained variables (e.g. `nll_regul_ind_sum` that depends on all individual latent variables contained) <!> For now, you should NOT update a `NamedVariables` with another one, only update with a regular mapping. """ FORBIDDEN_NAMES: ClassVar = frozenset( { "all", "pop", "ind", "sum", "tot", "full", "nll", "attach", "regul", "state", "suff_stats", } ) AUTOMATIC_VARS: ClassVar = ( # TODO? jacobians as well "nll_regul_ind_sum_ind", "nll_regul_ind_sum", # "nll_regul_pop_sum" & "nll_regul_all_sum" are not really relevant so far # (because priors for our population variables are NOT true bayesian priors) # "nll_regul_pop_sum", # "nll_regul_all_sum", ) def __init__(self, *args, **kws): self._latent_pop_vars = set() self._latent_ind_vars = set() super().__init__(*args, **kws) def __len__(self): return super().__len__() + len(self.AUTOMATIC_VARS) def __iter__(self): return iter(tuple(self.data) + self.AUTOMATIC_VARS) def __setitem__(self, name: VariableName, var: VariableInterface) -> None: if name in self.FORBIDDEN_NAMES or name in self.AUTOMATIC_VARS: raise ValueError(f"Can not use the reserved name '{name}'") if name in self.data: raise ValueError(f"Can not reset the variable '{name}'") super().__setitem__(name, var) if isinstance(var, ModelParameter): self.update(var.suff_stats.dedicated_variables or {}) if isinstance(var, LatentVariable): self.update(var.get_regularity_variables(name)) if isinstance(var, PopulationLatentVariable): self._latent_pop_vars.add(name) else: self._latent_ind_vars.add(name) def __getitem__(self, name: VariableName) -> VariableInterface: if name in self.AUTOMATIC_VARS: return self._auto_vars[name] return super().__getitem__(name) @property def _auto_vars(self) -> dict[VariableName, LinkedVariable]: # TODO? add jacobian as well? d = dict( # nll_regul_pop_sum=LinkedVariable( # Sum( # *( # f"nll_regul_{pop_var_name}" # for pop_var_name in self._latent_pop_vars # ) # ) # ), nll_regul_ind_sum_ind=LinkedVariable( Sum( *( f"nll_regul_{ind_var_name}_ind" for ind_var_name in self._latent_ind_vars ) ) ), nll_regul_ind_sum=LinkedVariable(SumDim("nll_regul_ind_sum_ind")), # nll_regul_all_sum=LinkedVariable( # Sum("nll_regul_pop_sum", "nll_regul_ind_sum") # ), ) assert d.keys() == set(self.AUTOMATIC_VARS) return d