Source code for xsdba.measures

"""
Measures Submodule
==================
Measures compare adjusted simulations to a reference

SDBA diagnostic tests are made up of properties and measures. Measures compare adjusted simulations to a reference,
through statistical properties or directly. This framework for the diagnostic tests was inspired by the
`VALUE <http://www.value-cost.eu/>`_ project.

This module depends on `xclim`. Run `pip install xsdba['extras']` to install it.
"""

from __future__ import annotations
from collections.abc import Sequence

import numpy as np
import xarray as xr
from xclim.core.indicator import Indicator, base_registry

from xsdba.base import Grouper
from xsdba.typing import InputKind
from xsdba.units import convert_units_to, pint2cfattrs, units2pint
from xsdba.utils import _pairwise_spearman


[docs] class StatisticalMeasure(Indicator): """ Base indicator class for statistical measures used when validating bias-adjusted outputs. Statistical measures use input data where the time dimension was reduced, usually by the computation of a :py:class:`xsdba.properties.StatisticalProperty` instance. They usually take two arrays as input: "sim" and "ref", "sim" being measured against "ref". The two arrays must have identical coordinates on their common dimensions. Statistical measures are generally unit-generic. If the inputs have different units, "sim" is converted to match "ref". """ realm = "generic" @classmethod def _ensure_correct_parameters(cls, parameters): inputs = {k for k, p in parameters.items() if p.kind == InputKind.VARIABLE} if not inputs.issuperset({"sim", "ref"}): raise ValueError(f"{cls.__name__} requires 'sim' and 'ref' as inputs. Got {inputs}.") return super()._ensure_correct_parameters(parameters) def _preprocess_and_checks(self, das, params): """Perform parent's checks and also check convert units so that sim matches ref.""" das, params = super()._preprocess_and_checks(das, params) # Convert grouping and check if allowed: das["sim"] = convert_units_to(das["sim"], das["ref"]) ref = das["ref"] sim = das["sim"] # Check if common coordinates are identical. newsim, newref = xr.broadcast(sim, ref) for dim in set(sim.dims).union(ref.dims): if [sim[dim].size, ref[dim].size] != [newsim[dim].size, newref[dim].size]: raise ValueError(f"Common dimension {dim} has different coordinates between ref and sim.") return das, params
[docs] class StatisticalPropertyMeasure(Indicator): """ Base indicator class for statistical properties that include the comparison measure, used when validating bias-adjusted outputs. StatisticalPropertyMeasure objects combine the functionalities of :py:class:`xsdba.properties.StatisticalProperty` and :py:class:`xsdba.properties.StatisticalMeasure`. Statistical properties usually reduce the time dimension and sometimes more dimensions (for example in spatial properties), sometimes adding a grouping dimension according to the passed value of `group` (e.g.: group='time.month' means the loss of the time dimension and the addition of a month one). Statistical measures usually take two arrays as input: "sim" and "ref", "sim" being measured against "ref". Statistical property-measures are generally unit-generic. If the inputs have different units, "sim" is converted to match "ref". """ aspect = None """The aspect the statistical property studies: marginal, temporal, multivariate or spatial.""" allowed_groups = None """A list of allowed groupings. A subset of dayofyear, week, month, season or group. The latter stands for no temporal grouping.""" realm = "generic" @classmethod def _ensure_correct_parameters(cls, parameters): inputs = {k for k, p in parameters.items() if p.kind == InputKind.VARIABLE} if not inputs.issuperset({"sim", "ref"}): raise ValueError(f"{cls.__name__} requires 'sim' and 'ref' as inputs. Got {inputs}.") if "group" not in parameters: raise ValueError( f"{cls.__name__} require a 'group' argument, use the base Indicator class if your computation doesn't perform any regrouping." ) return super()._ensure_correct_parameters(parameters) def _preprocess_and_checks(self, das, params): """Perform parent's checks and also check convert units so that sim matches ref.""" das, params = super()._preprocess_and_checks(das, params) das["sim"] = convert_units_to(das["sim"], das["ref"]) # Convert grouping and check if allowed: if isinstance(params["group"], str): params["group"] = Grouper(params["group"]) if self.allowed_groups is not None: if params["group"].prop not in self.allowed_groups: raise ValueError( f"Grouping period {params['group'].prop_name} is not allowed for property " f"{self.identifier} (needs something in " f"{list(map(lambda g: '<dim>.' + g.replace('group', ''), self.allowed_groups))})." ) return das, params def _postprocess(self, outs, das, params): """Squeeze `group` dim if needed.""" outs = super()._postprocess(outs, das, params) for ii, out in enumerate(outs): if "group" in out.dims: outs[ii] = out.squeeze("group", drop=True) return outs
base_registry["StatisticalMeasure"] = StatisticalMeasure base_registry["StatisticalPropertyMeasure"] = StatisticalPropertyMeasure def _bias(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray: """ Bias. The bias is the simulation minus the reference. Parameters ---------- sim : xr.DataArray data from the simulation (one value for each grid-point). ref : xr.DataArray data from the reference (observations) (one value for each grid-point). Returns ------- xr.DataArray, [same as ref] Absolute bias. """ out = sim - ref out.attrs.update(pint2cfattrs(units2pint(ref.attrs["units"]), is_difference=True)) return out bias = StatisticalMeasure(identifier="bias", compute=_bias) def _relative_bias(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray: """ Relative Bias. The relative bias is the simulation minus reference, divided by the reference. Parameters ---------- sim : xr.DataArray data from the simulation (one value for each grid-point). ref : xr.DataArray data from the reference (observations) (one value for each grid-point). Returns ------- xr.DataArray, [dimensionless] Relative bias. """ out = (sim - ref) / ref return out.assign_attrs(units="") relative_bias = StatisticalMeasure(identifier="relative_bias", compute=_relative_bias, units="") def _circular_bias(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray: """ Circular bias. Bias considering circular time series. E.g. The bias between doy 365 and doy 1 is 364, but the circular bias is -1. Parameters ---------- sim : xr.DataArray data from the simulation (one value for each grid-point). ref : xr.DataArray data from the reference (observations) (one value for each grid-point). Returns ------- xr.DataArray, [days] Circular bias. """ out = (sim - ref) % 365 out = out.where(out <= 365 / 2, 365 - out) # when condition false, replace by 2nd arg out = out.where(ref >= sim, out * -1) # when condition false, replace by 2nd arg return out.assign_attrs(units="days") circular_bias = StatisticalMeasure(identifier="circular_bias", compute=_circular_bias, units="days") def _ratio(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray: """ Ratio. The ratio is the quotient of the simulation over the reference. Parameters ---------- sim : xr.DataArray data from the simulation (one value for each grid-point). ref : xr.DataArray data from the reference (observations) (one value for each grid-point). Returns ------- xr.DataArray, [dimensionless] Ratio. """ out = sim / ref out.attrs["units"] = "" return out ratio = StatisticalMeasure(identifier="ratio", compute=_ratio, units="") def _rmse(sim: xr.DataArray, ref: xr.DataArray, group: str | Grouper = "time") -> xr.DataArray: """ Root mean square error. The root mean square error on the time dimension between the simulation and the reference. Parameters ---------- sim : xr.DataArray Data from the simulation (a time-series for each grid-point). ref : xr.DataArray Data from the reference (observations) (a time-series for each grid-point). group: str Compute the property and measure for each temporal groups individually. Currently not implemented. Returns ------- xr.DataArray, [same as ref] Root mean square error. """ def _rmse_internal(_sim: xr.DataArray, _ref: xr.DataArray) -> xr.DataArray: _f: xr.DataArray = np.sqrt(np.nanmean((_sim - _ref) ** 2, axis=-1)) return _f out = xr.apply_ufunc( _rmse_internal, sim, ref, input_core_dims=[["time"], ["time"]], dask="parallelized", ) out = out.assign_attrs(pint2cfattrs(units2pint(ref.units), is_difference=True)) return out rmse = StatisticalPropertyMeasure( identifier="rmse", aspect="temporal", compute=_rmse, allowed_groups=["group"], cell_methods="time: mean", ) def _mae(sim: xr.DataArray, ref: xr.DataArray, group: str | Grouper = "time") -> xr.DataArray: """ Mean absolute error. The mean absolute error on the time dimension between the simulation and the reference. Parameters ---------- sim : xr.DataArray data from the simulation (a time-series for each grid-point). ref : xr.DataArray data from the reference (observations) (a time-series for each grid-point). group : str Compute the property and measure for each temporal groups individually. Currently not implemented. Returns ------- xr.DataArray, [same as ref] Mean absolute error. """ def _mae_internal(_sim: xr.DataArray, _ref: xr.DataArray) -> xr.DataArray: _f: xr.DataArray = np.nanmean(np.abs(_sim - _ref), axis=-1) return _f out = xr.apply_ufunc( _mae_internal, sim, ref, input_core_dims=[["time"], ["time"]], dask="parallelized", ) out = out.assign_attrs(pint2cfattrs(units2pint(ref.units), is_difference=True)) return out mae = StatisticalPropertyMeasure( identifier="mae", aspect="temporal", compute=_mae, allowed_groups=["group"], cell_methods="time: mean", ) def _annual_cycle_correlation( sim: xr.DataArray, ref: xr.DataArray, window: int = 15, group: str | Grouper = "time", ) -> xr.DataArray: """ Annual cycle correlation. Pearson correlation coefficient between the smooth day-of-year averaged annual cycles of the simulation and the reference. In the smooth day-of-year averaged annual cycles, each day-of-year is averaged over all years and over a window of days around that day. Parameters ---------- sim : xr.DataArray data from the simulation (a time-series for each grid-point). ref : xr.DataArray data from the reference (observations) (a time-series for each grid-point). window : int Size of window around each day of year around which to take the mean. E.g. If window=31, Jan 1st is averaged over from December 17th to January 16th. group : str Compute the property and measure for each temporal groups individually. Currently not implemented. Returns ------- xr.DataArray, [dimensionless] Annual cycle correlation. """ # group by day-of-year and window around each doy grouper_test = Grouper("time.dayofyear", window=window) # for each day, mean over X day window and over all years to create a smooth avg annual cycle sim_annual_cycle = grouper_test.apply("mean", sim) ref_annual_cycle = grouper_test.apply("mean", ref) out = xr.corr(ref_annual_cycle, sim_annual_cycle, dim="dayofyear") return out.assign_attrs(units="") annual_cycle_correlation = StatisticalPropertyMeasure( identifier="annual_cycle_correlation", aspect="temporal", compute=_annual_cycle_correlation, allowed_groups=["group"], ) def _scorr( sim: xr.DataArray, ref: xr.DataArray, *, dims: Sequence | None = None, group: str | Grouper = "time", ): """ Spatial correllogram. Compute the inter-site correlations of each array, compute the difference in correlations and sum. Taken from Vrac (2018). The spatial and temporal dimensions are reduced. Parameters ---------- sim : xr.DataArray data from the simulation (a time-series for each grid-point). ref : xr.DataArray data from the reference (observations) (a time-series for each grid-point). dims : sequence of strings, optional Name of the spatial dimensions. If None (default), all dimensions except 'time' are used. group : str Compute the property and measure for each temporal groups individually. Currently not implemented. Returns ------- xr.DataArray, [dimensionless] Sum of the inter-site correlation differences. """ if dims is None: dims = [d for d in sim.dims if d != "time"] refcorr = _pairwise_spearman(ref, dims) simcorr = _pairwise_spearman(sim, dims) S_corr = (simcorr - refcorr).sum(["_spatial", "_spatial2"]) return S_corr.assign_attrs(units="") scorr = StatisticalPropertyMeasure(identifier="Scorr", aspect="spatial", compute=_scorr, allowed_groups=["group"]) def _taylordiagram( sim: xr.DataArray, ref: xr.DataArray, dim: str = "time", group: str | Grouper = "time", normalize: bool = False, ) -> xr.DataArray: """ Taylor diagram. Compute the respective standard deviations of a simulation and a reference array, as well as the Pearson correlation coefficient between both, all necessary parameters to plot points on a Taylor diagram. Parameters ---------- sim : xr.DataArray data from the simulation (a time-series for each grid-point). ref : xr.DataArray data from the reference (observations) (a time-series for each grid-point). dim : str Dimension across which the correlation and standard deviation should be computed. group : str Compute the property and measure for each temporal groups individually. Currently not implemented. normalize : bool If `True`, divide the standard deviations by the standard deviation of the reference. Default is `False`. Returns ------- xr.DataArray, [same as ref] Standard deviations of sim, ref and correlation coefficient between both. """ corr = xr.corr(sim, ref, dim=dim) ref_std = ref.std(dim=dim, skipna=True, keep_attrs=True) sim_std = sim.std(dim=dim, skipna=True, keep_attrs=True) new_dim = xr.DataArray(["ref_std", "sim_std", "corr"], dims=("taylor_param",), name="taylor_param") out = xr.concat( [ref_std, sim_std, corr], new_dim, coords="minimal", compat="override", # Take common coords from `ref_std`. ).assign_attrs( { "correlation_type": "Pearson correlation coefficient", "units": ref.units, } ) # Normalize the standard deviations byt the standard deviation of the reference. if normalize: if (out[{"taylor_param": 0}] == 0).any(): raise ValueError("`ref_std =0` (homogeneous field) obtained, normalization is not possible.") with xr.set_options(keep_attrs=True): out[{"taylor_param": [0, 1]}] = out[{"taylor_param": [0, 1]}] / out[{"taylor_param": 0}] out.attrs["normalized"] = True out.attrs["units"] = "" return out taylordiagram = StatisticalPropertyMeasure( identifier="taylordiagram", aspect="temporal", compute=_taylordiagram, allowed_groups=["group"], )