[1]:
from __future__ import annotations

import time

import numpy as np

# install fastnanquantile before importing xsdba

! pip install fastnanquantile
import xsdba
from xclim.testing import open_dataset

ds = open_dataset("sdba/CanESM2_1950-2100.nc")
tx = ds.sel(time=slice("1950", "1980")).tasmax
kws = {"dim": "time", "q": np.linspace(0, 1, 50)}
Requirement already satisfied: fastnanquantile in /home/docs/checkouts/readthedocs.org/user_builds/xsdba/conda/latest/lib/python3.12/site-packages (0.0.2)
Requirement already satisfied: numba in /home/docs/checkouts/readthedocs.org/user_builds/xsdba/conda/latest/lib/python3.12/site-packages (from fastnanquantile) (0.61.2)
Requirement already satisfied: numpy in /home/docs/checkouts/readthedocs.org/user_builds/xsdba/conda/latest/lib/python3.12/site-packages (from fastnanquantile) (2.2.6)
Requirement already satisfied: llvmlite<0.45,>=0.44.0dev0 in /home/docs/checkouts/readthedocs.org/user_builds/xsdba/conda/latest/lib/python3.12/site-packages (from numba->fastnanquantile) (0.44.0)
/home/docs/checkouts/readthedocs.org/user_builds/xsdba/conda/latest/lib/python3.12/site-packages/xclim/sdba/__init__.py:22: FutureWarning: The SDBA submodule is in the process of being split from `xclim` in order to facilitate development and effective maintenance of the SDBA utilities. The `xclim.sdba` functionality will change in the future. For more information, please visit https://xsdba.readthedocs.io/en/latest/.
  warnings.warn(

Tests with %%timeit (full 30 years)

Here fastnanquantile is the best algorithm out of

  • xr.DataArray.quantile

  • nbutils.quantile, using:

    • xclim.core.utils.nan_quantile

    • fastnanquantile

[2]:
%%timeit
tx.quantile(**kws).compute()
3.38 ms ± 3.22 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
[3]:
%%timeit
xsdba.nbutils.USE_FASTNANQUANTILE = False
xsdba.nbutils.quantile(tx, **kws).compute()
2.34 ms ± 236 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)
[4]:
%%timeit
xsdba.nbutils.USE_FASTNANQUANTILE = True
xsdba.nbutils.quantile(tx, **kws).compute()
2.61 ms ± 292 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)

Test computation time as a function of number of points

For a smaller number of time steps <=2000, _sortquantile is the best algorithm in general

[5]:
import matplotlib.pyplot as plt
import xarray as xr

num_tests = 500
timed = {}
# fastnanquantile has nothing to do with sortquantile
# I just added a third step using this variable

for use_fnq in [True, False]:
    timed[use_fnq] = []
    xsdba.nbutils.USE_FASTNANQUANTILE = use_fnq
    # heat-up the jit
    xsdba.nbutils.quantile(
        xr.DataArray(np.array([0, 1.5])), dim="dim_0", q=np.array([0.5])
    )
    for size in np.arange(250, 2000 + 250, 250):
        da = tx.isel(time=slice(0, size))
        t0 = time.time()
        for _i in range(num_tests):
            xsdba.nbutils.quantile(da, **kws).compute()
        timed[use_fnq].append([size, time.time() - t0])

for k, lab in zip(
    [True, False], ["xclim.core.utils.nan_quantile", "fastnanquantile"], strict=False
):
    arr = np.array(timed[k])
    plt.plot(arr[:, 0], arr[:, 1] / num_tests, label=lab)
plt.legend()
plt.title("Quantile computation, average time vs array size, for 50 quantiles")
plt.xlabel("Number of time steps in the distribution")
plt.ylabel("Computation time (s)")
[5]:
Text(0, 0.5, 'Computation time (s)')
../../_images/notebooks_benchmarks_quantiles_6_1.png