[1]:
from __future__ import annotations
import time
import numpy as np
# install fastnanquantile before importing xsdba
! pip install fastnanquantile
import xsdba
from xclim.testing import open_dataset
ds = open_dataset("sdba/CanESM2_1950-2100.nc")
tx = ds.sel(time=slice("1950", "1980")).tasmax
kws = {"dim": "time", "q": np.linspace(0, 1, 50)}
Requirement already satisfied: fastnanquantile in /home/docs/checkouts/readthedocs.org/user_builds/xsdba/conda/latest/lib/python3.12/site-packages (0.0.2)
Requirement already satisfied: numba in /home/docs/checkouts/readthedocs.org/user_builds/xsdba/conda/latest/lib/python3.12/site-packages (from fastnanquantile) (0.61.2)
Requirement already satisfied: numpy in /home/docs/checkouts/readthedocs.org/user_builds/xsdba/conda/latest/lib/python3.12/site-packages (from fastnanquantile) (2.2.6)
Requirement already satisfied: llvmlite<0.45,>=0.44.0dev0 in /home/docs/checkouts/readthedocs.org/user_builds/xsdba/conda/latest/lib/python3.12/site-packages (from numba->fastnanquantile) (0.44.0)
/home/docs/checkouts/readthedocs.org/user_builds/xsdba/conda/latest/lib/python3.12/site-packages/xclim/sdba/__init__.py:22: FutureWarning: The SDBA submodule is in the process of being split from `xclim` in order to facilitate development and effective maintenance of the SDBA utilities. The `xclim.sdba` functionality will change in the future. For more information, please visit https://xsdba.readthedocs.io/en/latest/.
warnings.warn(
Tests with %%timeit (full 30 years)¶
Here fastnanquantile
is the best algorithm out of
xr.DataArray.quantile
nbutils.quantile
, using:xclim.core.utils.nan_quantile
fastnanquantile
[2]:
%%timeit
tx.quantile(**kws).compute()
3.38 ms ± 3.22 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
[3]:
%%timeit
xsdba.nbutils.USE_FASTNANQUANTILE = False
xsdba.nbutils.quantile(tx, **kws).compute()
2.34 ms ± 236 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)
[4]:
%%timeit
xsdba.nbutils.USE_FASTNANQUANTILE = True
xsdba.nbutils.quantile(tx, **kws).compute()
2.61 ms ± 292 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)
Test computation time as a function of number of points¶
For a smaller number of time steps <=2000, _sortquantile
is the best algorithm in general
[5]:
import matplotlib.pyplot as plt
import xarray as xr
num_tests = 500
timed = {}
# fastnanquantile has nothing to do with sortquantile
# I just added a third step using this variable
for use_fnq in [True, False]:
timed[use_fnq] = []
xsdba.nbutils.USE_FASTNANQUANTILE = use_fnq
# heat-up the jit
xsdba.nbutils.quantile(
xr.DataArray(np.array([0, 1.5])), dim="dim_0", q=np.array([0.5])
)
for size in np.arange(250, 2000 + 250, 250):
da = tx.isel(time=slice(0, size))
t0 = time.time()
for _i in range(num_tests):
xsdba.nbutils.quantile(da, **kws).compute()
timed[use_fnq].append([size, time.time() - t0])
for k, lab in zip(
[True, False], ["xclim.core.utils.nan_quantile", "fastnanquantile"], strict=False
):
arr = np.array(timed[k])
plt.plot(arr[:, 0], arr[:, 1] / num_tests, label=lab)
plt.legend()
plt.title("Quantile computation, average time vs array size, for 50 quantiles")
plt.xlabel("Number of time steps in the distribution")
plt.ylabel("Computation time (s)")
[5]:
Text(0, 0.5, 'Computation time (s)')
