python-xarray-2026.01.0/0000775000175000017500000000000015136607172015043 5ustar alastairalastairpython-xarray-2026.01.0/xarray/0000775000175000017500000000000015136607163016351 5ustar alastairalastairpython-xarray-2026.01.0/xarray/convert.py0000664000175000017500000001476615136607163020421 0ustar alastairalastair"""Functions for converting to and from xarray objects""" from collections import Counter import numpy as np from xarray.coders import CFDatetimeCoder from xarray.coding.times import CFTimedeltaCoder from xarray.conventions import decode_cf from xarray.core import duck_array_ops from xarray.core.dataarray import DataArray from xarray.core.dtypes import get_fill_value from xarray.namedarray.pycompat import array_type iris_forbidden_keys = { "standard_name", "long_name", "units", "bounds", "axis", "calendar", "leap_month", "leap_year", "month_lengths", "coordinates", "grid_mapping", "climatology", "cell_methods", "formula_terms", "compress", "missing_value", "add_offset", "scale_factor", "valid_max", "valid_min", "valid_range", "_FillValue", } cell_methods_strings = { "point", "sum", "maximum", "median", "mid_range", "minimum", "mean", "mode", "standard_deviation", "variance", } def encode(var): return CFTimedeltaCoder().encode(CFDatetimeCoder().encode(var.variable)) def _filter_attrs(attrs, ignored_attrs): """Return attrs that are not in ignored_attrs""" return {k: v for k, v in attrs.items() if k not in ignored_attrs} def _pick_attrs(attrs, keys): """Return attrs with keys in keys list""" return {k: v for k, v in attrs.items() if k in keys} def _get_iris_args(attrs): """Converts the xarray attrs into args that can be passed into Iris""" # iris.unit is deprecated in Iris v1.9 import cf_units args = {"attributes": _filter_attrs(attrs, iris_forbidden_keys)} args.update(_pick_attrs(attrs, ("standard_name", "long_name"))) unit_args = _pick_attrs(attrs, ("calendar",)) if "units" in attrs: args["units"] = cf_units.Unit(attrs["units"], **unit_args) return args # TODO: Add converting bounds from xarray to Iris and back def to_iris(dataarray): """Convert a DataArray into an Iris Cube""" # Iris not a hard dependency import iris from iris.fileformats.netcdf import parse_cell_methods dim_coords = [] aux_coords = [] for coord_name in dataarray.coords: coord = encode(dataarray.coords[coord_name]) coord_args = _get_iris_args(coord.attrs) coord_args["var_name"] = coord_name axis = None if coord.dims: axis = dataarray.get_axis_num(coord.dims) if coord_name in dataarray.dims: try: iris_coord = iris.coords.DimCoord(coord.values, **coord_args) dim_coords.append((iris_coord, axis)) except ValueError: iris_coord = iris.coords.AuxCoord(coord.values, **coord_args) aux_coords.append((iris_coord, axis)) else: iris_coord = iris.coords.AuxCoord(coord.values, **coord_args) aux_coords.append((iris_coord, axis)) args = _get_iris_args(dataarray.attrs) args["var_name"] = dataarray.name args["dim_coords_and_dims"] = dim_coords args["aux_coords_and_dims"] = aux_coords if "cell_methods" in dataarray.attrs: args["cell_methods"] = parse_cell_methods(dataarray.attrs["cell_methods"]) masked_data = duck_array_ops.masked_invalid(dataarray.data) cube = iris.cube.Cube(masked_data, **args) return cube def _iris_obj_to_attrs(obj): """Return a dictionary of attrs when given an Iris object""" attrs = {"standard_name": obj.standard_name, "long_name": obj.long_name} if obj.units.calendar: attrs["calendar"] = obj.units.calendar if obj.units.origin != "1" and not obj.units.is_unknown(): attrs["units"] = obj.units.origin attrs.update(obj.attributes) return {k: v for k, v in attrs.items() if v is not None} def _iris_cell_methods_to_str(cell_methods_obj): """Converts an Iris cell methods into a string""" cell_methods = [] for cell_method in cell_methods_obj: names = "".join(f"{n}: " for n in cell_method.coord_names) intervals = " ".join( f"interval: {interval}" for interval in cell_method.intervals ) comments = " ".join(f"comment: {comment}" for comment in cell_method.comments) extra = f"{intervals} {comments}".strip() if extra: extra = f" ({extra})" cell_methods.append(names + cell_method.method + extra) return " ".join(cell_methods) def _name(iris_obj, default="unknown"): """Mimics `iris_obj.name()` but with different name resolution order. Similar to iris_obj.name() method, but using iris_obj.var_name first to enable roundtripping. """ return iris_obj.var_name or iris_obj.standard_name or iris_obj.long_name or default def from_iris(cube): """Convert an Iris cube into a DataArray""" import iris.exceptions name = _name(cube) if name == "unknown": name = None dims = [] for i in range(cube.ndim): try: dim_coord = cube.coord(dim_coords=True, dimensions=(i,)) dims.append(_name(dim_coord)) except iris.exceptions.CoordinateNotFoundError: dims.append(f"dim_{i}") if len(set(dims)) != len(dims): duplicates = [k for k, v in Counter(dims).items() if v > 1] raise ValueError(f"Duplicate coordinate name {duplicates}.") coords = {} for coord in cube.coords(): coord_attrs = _iris_obj_to_attrs(coord) coord_dims = [dims[i] for i in cube.coord_dims(coord)] if coord_dims: coords[_name(coord)] = (coord_dims, coord.points, coord_attrs) else: coords[_name(coord)] = ((), coord.points.item(), coord_attrs) array_attrs = _iris_obj_to_attrs(cube) cell_methods = _iris_cell_methods_to_str(cube.cell_methods) if cell_methods: array_attrs["cell_methods"] = cell_methods # Deal with iris 1.* and 2.* cube_data = cube.core_data() if hasattr(cube, "core_data") else cube.data # Deal with dask and numpy masked arrays dask_array_type = array_type("dask") if isinstance(cube_data, dask_array_type): from dask.array import ma as dask_ma filled_data = dask_ma.filled(cube_data, get_fill_value(cube.dtype)) elif isinstance(cube_data, np.ma.MaskedArray): filled_data = np.ma.filled(cube_data, get_fill_value(cube.dtype)) else: filled_data = cube_data dataarray = DataArray( filled_data, coords=coords, name=name, attrs=array_attrs, dims=dims ) decoded_ds = decode_cf(dataarray._to_temp_dataset()) return dataarray._from_temp_dataset(decoded_ds) python-xarray-2026.01.0/xarray/py.typed0000664000175000017500000000000015136607163020036 0ustar alastairalastairpython-xarray-2026.01.0/xarray/namedarray/0000775000175000017500000000000015136607163020474 5ustar alastairalastairpython-xarray-2026.01.0/xarray/namedarray/dtypes.py0000664000175000017500000001272415136607163022364 0ustar alastairalastairfrom __future__ import annotations import functools from typing import Any, Literal, TypeGuard import numpy as np from xarray.namedarray import utils # Use as a sentinel value to indicate a dtype appropriate NA value. NA = utils.ReprObject("") @functools.total_ordering class AlwaysGreaterThan: def __gt__(self, other: object) -> Literal[True]: return True def __eq__(self, other: object) -> bool: return isinstance(other, type(self)) @functools.total_ordering class AlwaysLessThan: def __lt__(self, other: object) -> Literal[True]: return True def __eq__(self, other: object) -> bool: return isinstance(other, type(self)) # Equivalence to np.inf (-np.inf) for object-type INF = AlwaysGreaterThan() NINF = AlwaysLessThan() # Pairs of types that, if both found, should be promoted to object dtype # instead of following NumPy's own type-promotion rules. These type promotion # rules match pandas instead. For reference, see the NumPy type hierarchy: # https://numpy.org/doc/stable/reference/arrays.scalars.html PROMOTE_TO_OBJECT: tuple[tuple[type[np.generic], type[np.generic]], ...] = ( (np.number, np.character), # numpy promotes to character (np.bool_, np.character), # numpy promotes to character (np.bytes_, np.str_), # numpy promotes to unicode ) def maybe_promote(dtype: np.dtype[np.generic]) -> tuple[np.dtype[np.generic], Any]: """Simpler equivalent of pandas.core.common._maybe_promote Parameters ---------- dtype : np.dtype Returns ------- dtype : Promoted dtype that can hold missing values. fill_value : Valid missing value for the promoted dtype. """ # N.B. these casting rules should match pandas dtype_: np.typing.DTypeLike fill_value: Any if np.issubdtype(dtype, np.floating): dtype_ = dtype fill_value = np.nan elif np.issubdtype(dtype, np.timedelta64): # See https://github.com/numpy/numpy/issues/10685 # np.timedelta64 is a subclass of np.integer # Check np.timedelta64 before np.integer fill_value = np.timedelta64("NaT") dtype_ = dtype elif np.issubdtype(dtype, np.integer): dtype_ = np.float32 if dtype.itemsize <= 2 else np.float64 fill_value = np.nan elif np.issubdtype(dtype, np.complexfloating): dtype_ = dtype fill_value = np.nan + np.nan * 1j elif np.issubdtype(dtype, np.datetime64): dtype_ = dtype fill_value = np.datetime64("NaT") else: dtype_ = np.object_ fill_value = np.nan dtype_out = np.dtype(dtype_) fill_value = dtype_out.type(fill_value) return dtype_out, fill_value NAT_TYPES = {np.datetime64("NaT").dtype, np.timedelta64("NaT").dtype} def get_fill_value(dtype: np.dtype[np.generic]) -> Any: """Return an appropriate fill value for this dtype. Parameters ---------- dtype : np.dtype Returns ------- fill_value : Missing value corresponding to this dtype. """ _, fill_value = maybe_promote(dtype) return fill_value def get_pos_infinity( dtype: np.dtype[np.generic], max_for_int: bool = False ) -> float | complex | AlwaysGreaterThan: """Return an appropriate positive infinity for this dtype. Parameters ---------- dtype : np.dtype max_for_int : bool Return np.iinfo(dtype).max instead of np.inf Returns ------- fill_value : positive infinity value corresponding to this dtype. """ if issubclass(dtype.type, np.floating): return np.inf if issubclass(dtype.type, np.integer): return np.iinfo(dtype.type).max if max_for_int else np.inf if issubclass(dtype.type, np.complexfloating): return np.inf + 1j * np.inf return INF def get_neg_infinity( dtype: np.dtype[np.generic], min_for_int: bool = False ) -> float | complex | AlwaysLessThan: """Return an appropriate positive infinity for this dtype. Parameters ---------- dtype : np.dtype min_for_int : bool Return np.iinfo(dtype).min instead of -np.inf Returns ------- fill_value : positive infinity value corresponding to this dtype. """ if issubclass(dtype.type, np.floating): return -np.inf if issubclass(dtype.type, np.integer): return np.iinfo(dtype.type).min if min_for_int else -np.inf if issubclass(dtype.type, np.complexfloating): return -np.inf - 1j * np.inf return NINF def is_datetime_like( dtype: np.dtype[np.generic], ) -> TypeGuard[np.datetime64 | np.timedelta64]: """Check if a dtype is a subclass of the numpy datetime types""" return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64) def result_type( *arrays_and_dtypes: np.typing.ArrayLike | np.typing.DTypeLike | None, ) -> np.dtype[np.generic]: """Like np.result_type, but with type promotion rules matching pandas. Examples of changed behavior: number + string -> object (not string) bytes + unicode -> object (not unicode) Parameters ---------- *arrays_and_dtypes : list of arrays and dtypes The dtype is extracted from both numpy and dask arrays. Returns ------- numpy.dtype for the result. """ types = {np.result_type(t).type for t in arrays_and_dtypes} for left, right in PROMOTE_TO_OBJECT: if any(issubclass(t, left) for t in types) and any( issubclass(t, right) for t in types ): return np.dtype(object) return np.result_type(*arrays_and_dtypes) python-xarray-2026.01.0/xarray/namedarray/_aggregations.py0000664000175000017500000007277315136607163023677 0ustar alastairalastair"""Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_aggregations. Do not edit manually. from __future__ import annotations from collections.abc import Callable, Sequence from typing import Any from xarray.core import duck_array_ops from xarray.core.types import Dims, Self class NamedArrayAggregations: __slots__ = () def reduce( self, func: Callable[..., Any], dim: Dims = None, *, axis: int | Sequence[int] | None = None, keepdims: bool = False, **kwargs: Any, ) -> Self: raise NotImplementedError() def count( self, dim: Dims = None, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``count`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``count`` applied to its data and the indicated dimension(s) removed See Also -------- pandas.DataFrame.count dask.dataframe.DataFrame.count Dataset.count DataArray.count :ref:`agg` User guide on reduction or aggregation operations. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) >>> na.count() Size: 8B array(5) """ return self.reduce( duck_array_ops.count, dim=dim, **kwargs, ) def all( self, dim: Dims = None, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``all`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``all`` applied to its data and the indicated dimension(s) removed See Also -------- numpy.all dask.array.all Dataset.all DataArray.all :ref:`agg` User guide on reduction or aggregation operations. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray( ... "x", np.array([True, True, True, True, True, False], dtype=bool) ... ) >>> na Size: 6B array([ True, True, True, True, True, False]) >>> na.all() Size: 1B array(False) """ return self.reduce( duck_array_ops.array_all, dim=dim, **kwargs, ) def any( self, dim: Dims = None, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``any`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``any`` applied to its data and the indicated dimension(s) removed See Also -------- numpy.any dask.array.any Dataset.any DataArray.any :ref:`agg` User guide on reduction or aggregation operations. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray( ... "x", np.array([True, True, True, True, True, False], dtype=bool) ... ) >>> na Size: 6B array([ True, True, True, True, True, False]) >>> na.any() Size: 1B array(True) """ return self.reduce( duck_array_ops.array_any, dim=dim, **kwargs, ) def max( self, dim: Dims = None, *, skipna: bool | None = None, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``max`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``max`` applied to its data and the indicated dimension(s) removed See Also -------- numpy.max dask.array.max Dataset.max DataArray.max :ref:`agg` User guide on reduction or aggregation operations. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) >>> na.max() Size: 8B array(3.) Use ``skipna`` to control whether NaNs are ignored. >>> na.max(skipna=False) Size: 8B array(nan) """ return self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, **kwargs, ) def min( self, dim: Dims = None, *, skipna: bool | None = None, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``min`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``min`` applied to its data and the indicated dimension(s) removed See Also -------- numpy.min dask.array.min Dataset.min DataArray.min :ref:`agg` User guide on reduction or aggregation operations. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) >>> na.min() Size: 8B array(0.) Use ``skipna`` to control whether NaNs are ignored. >>> na.min(skipna=False) Size: 8B array(nan) """ return self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, **kwargs, ) def mean( self, dim: Dims = None, *, skipna: bool | None = None, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``mean`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``mean`` applied to its data and the indicated dimension(s) removed See Also -------- numpy.mean dask.array.mean Dataset.mean DataArray.mean :ref:`agg` User guide on reduction or aggregation operations. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) >>> na.mean() Size: 8B array(1.6) Use ``skipna`` to control whether NaNs are ignored. >>> na.mean(skipna=False) Size: 8B array(nan) """ return self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, **kwargs, ) def prod( self, dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``prod`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int or None, optional The required number of valid values to perform the operation. If fewer than min_count non-NA values are present the result will be NA. Only used if skipna is set to True or defaults to True for the array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``prod`` applied to its data and the indicated dimension(s) removed See Also -------- numpy.prod dask.array.prod Dataset.prod DataArray.prod :ref:`agg` User guide on reduction or aggregation operations. Notes ----- Non-numeric variables will be removed prior to reducing. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) >>> na.prod() Size: 8B array(0.) Use ``skipna`` to control whether NaNs are ignored. >>> na.prod(skipna=False) Size: 8B array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> na.prod(skipna=True, min_count=2) Size: 8B array(0.) """ return self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, min_count=min_count, **kwargs, ) def sum( self, dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``sum`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int or None, optional The required number of valid values to perform the operation. If fewer than min_count non-NA values are present the result will be NA. Only used if skipna is set to True or defaults to True for the array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``sum`` applied to its data and the indicated dimension(s) removed See Also -------- numpy.sum dask.array.sum Dataset.sum DataArray.sum :ref:`agg` User guide on reduction or aggregation operations. Notes ----- Non-numeric variables will be removed prior to reducing. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) >>> na.sum() Size: 8B array(8.) Use ``skipna`` to control whether NaNs are ignored. >>> na.sum(skipna=False) Size: 8B array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> na.sum(skipna=True, min_count=2) Size: 8B array(8.) """ return self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, min_count=min_count, **kwargs, ) def std( self, dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``std`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, where ``N`` represents the number of elements. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``std`` applied to its data and the indicated dimension(s) removed See Also -------- numpy.std dask.array.std Dataset.std DataArray.std :ref:`agg` User guide on reduction or aggregation operations. Notes ----- Non-numeric variables will be removed prior to reducing. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) >>> na.std() Size: 8B array(1.0198039) Use ``skipna`` to control whether NaNs are ignored. >>> na.std(skipna=False) Size: 8B array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> na.std(skipna=True, ddof=1) Size: 8B array(1.14017543) """ return self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, ddof=ddof, **kwargs, ) def var( self, dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``var`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, where ``N`` represents the number of elements. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``var`` applied to its data and the indicated dimension(s) removed See Also -------- numpy.var dask.array.var Dataset.var DataArray.var :ref:`agg` User guide on reduction or aggregation operations. Notes ----- Non-numeric variables will be removed prior to reducing. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) >>> na.var() Size: 8B array(1.04) Use ``skipna`` to control whether NaNs are ignored. >>> na.var(skipna=False) Size: 8B array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> na.var(skipna=True, ddof=1) Size: 8B array(1.3) """ return self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, ddof=ddof, **kwargs, ) def median( self, dim: Dims = None, *, skipna: bool | None = None, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``median`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``median`` applied to its data and the indicated dimension(s) removed See Also -------- numpy.median dask.array.median Dataset.median DataArray.median :ref:`agg` User guide on reduction or aggregation operations. Notes ----- Non-numeric variables will be removed prior to reducing. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) >>> na.median() Size: 8B array(2.) Use ``skipna`` to control whether NaNs are ignored. >>> na.median(skipna=False) Size: 8B array(nan) """ return self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, **kwargs, ) def cumsum( self, dim: Dims = None, *, skipna: bool | None = None, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``cumsum`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``cumsum`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``cumsum`` applied to its data and the indicated dimension(s) removed See Also -------- numpy.cumsum dask.array.cumsum Dataset.cumsum DataArray.cumsum NamedArray.cumulative :ref:`agg` User guide on reduction or aggregation operations. Notes ----- Non-numeric variables will be removed prior to reducing. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated in the future. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) >>> na.cumsum() Size: 48B array([1., 3., 6., 6., 8., 8.]) Use ``skipna`` to control whether NaNs are ignored. >>> na.cumsum(skipna=False) Size: 48B array([ 1., 3., 6., 6., 8., nan]) """ return self.reduce( duck_array_ops.cumsum, dim=dim, skipna=skipna, **kwargs, ) def cumprod( self, dim: Dims = None, *, skipna: bool | None = None, **kwargs: Any, ) -> Self: """ Reduce this NamedArray's data by applying ``cumprod`` along some dimension(s). Parameters ---------- dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``cumprod`` on this object's data. These could include dask-specific kwargs like ``split_every``. Returns ------- reduced : NamedArray New NamedArray with ``cumprod`` applied to its data and the indicated dimension(s) removed See Also -------- numpy.cumprod dask.array.cumprod Dataset.cumprod DataArray.cumprod NamedArray.cumulative :ref:`agg` User guide on reduction or aggregation operations. Notes ----- Non-numeric variables will be removed prior to reducing. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated in the future. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) >>> na.cumprod() Size: 48B array([1., 2., 6., 0., 0., 0.]) Use ``skipna`` to control whether NaNs are ignored. >>> na.cumprod(skipna=False) Size: 48B array([ 1., 2., 6., 0., 0., nan]) """ return self.reduce( duck_array_ops.cumprod, dim=dim, skipna=skipna, **kwargs, ) python-xarray-2026.01.0/xarray/namedarray/parallelcompat.py0000664000175000017500000006672715136607163024070 0ustar alastairalastair""" The code in this module is an experiment in going from N=1 to N=2 parallel computing frameworks in xarray. It could later be used as the basis for a public interface allowing any N frameworks to interoperate with xarray, but for now it is just a private experiment. """ from __future__ import annotations import functools from abc import ABC, abstractmethod from collections.abc import Callable, Iterable, Sequence from importlib.metadata import EntryPoint, entry_points from typing import TYPE_CHECKING, Any, Generic, Protocol, TypeVar import numpy as np from xarray.core.options import OPTIONS from xarray.core.utils import emit_user_level_warning from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: from xarray.namedarray._typing import ( T_Chunks, _Chunks, _DType, _DType_co, _NormalizedChunks, _ShapeType, duckarray, ) class ChunkedArrayMixinProtocol(Protocol): def rechunk(self, chunks: Any, **kwargs: Any) -> Any: ... @property def dtype(self) -> np.dtype[Any]: ... @property def chunks(self) -> _NormalizedChunks: ... def compute( self, *data: Any, **kwargs: Any ) -> tuple[np.ndarray[Any, _DType_co], ...]: ... T_ChunkedArray = TypeVar("T_ChunkedArray", bound=ChunkedArrayMixinProtocol) KNOWN_CHUNKMANAGERS = { "dask": "dask", "cubed": "cubed-xarray", "arkouda": "arkouda-xarray", } @functools.lru_cache(maxsize=1) def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint[Any]]: """ Return a dictionary of available chunk managers and their ChunkManagerEntrypoint subclass objects. Returns ------- chunkmanagers : dict Dictionary whose values are registered ChunkManagerEntrypoint subclass instances, and whose values are the strings under which they are registered. """ entrypoints = entry_points(group="xarray.chunkmanagers") return load_chunkmanagers(entrypoints) def load_chunkmanagers( entrypoints: Sequence[EntryPoint], ) -> dict[str, ChunkManagerEntrypoint[Any]]: """Load entrypoints and instantiate chunkmanagers only once.""" loaded_entrypoints = {} for entrypoint in entrypoints: try: loaded_entrypoints[entrypoint.name] = entrypoint.load() except ModuleNotFoundError as e: emit_user_level_warning( f"Failed to load chunk manager entrypoint {entrypoint.name} due to {e}. Skipping.", ) available_chunkmanagers = { name: chunkmanager() for name, chunkmanager in loaded_entrypoints.items() if chunkmanager.available } return available_chunkmanagers def guess_chunkmanager( manager: str | ChunkManagerEntrypoint[Any] | None, ) -> ChunkManagerEntrypoint[Any]: """ Get namespace of chunk-handling methods, guessing from what's available. If the name of a specific ChunkManager is given (e.g. "dask"), then use that. Else use whatever is installed, defaulting to dask if there are multiple options. """ available_chunkmanagers = list_chunkmanagers() if manager is None: if len(available_chunkmanagers) == 1: # use the only option available manager = next(iter(available_chunkmanagers.keys())) else: # use the one in options (default dask) manager = OPTIONS["chunk_manager"] if isinstance(manager, str): if manager not in available_chunkmanagers and manager in KNOWN_CHUNKMANAGERS: raise ImportError( f"chunk manager {manager!r} is not available." f" Please make sure {KNOWN_CHUNKMANAGERS[manager]!r} is installed" " and importable." ) elif len(available_chunkmanagers) == 0: raise ImportError( "no chunk managers available. Try installing `dask` or another package" " that provides a chunk manager." ) elif manager not in available_chunkmanagers: raise ValueError( f"unrecognized chunk manager {manager!r} - must be one of the installed" f" chunk managers: {list(available_chunkmanagers)}" ) return available_chunkmanagers[manager] elif isinstance(manager, ChunkManagerEntrypoint): # already a valid ChunkManager so just pass through return manager else: raise TypeError( "manager must be a string or instance of ChunkManagerEntrypoint," f" but received type {type(manager)}" ) def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint[Any]: """ Detects which parallel backend should be used for given set of arrays. Also checks that all arrays are of same chunking type (i.e. not a mix of cubed and dask). """ # TODO this list is probably redundant with something inside xarray.apply_ufunc ALLOWED_NON_CHUNKED_TYPES = {int, float, np.ndarray} chunked_arrays = [ a for a in args if is_chunked_array(a) and type(a) not in ALLOWED_NON_CHUNKED_TYPES ] # Asserts all arrays are the same type (or numpy etc.) chunked_array_types = {type(a) for a in chunked_arrays} if len(chunked_array_types) > 1: raise TypeError( f"Mixing chunked array types is not supported, but received multiple types: {chunked_array_types}" ) elif len(chunked_array_types) == 0: raise TypeError("Expected a chunked array but none were found") # iterate over defined chunk managers, seeing if each recognises this array type chunked_arr = chunked_arrays[0] chunkmanagers = list_chunkmanagers() selected = [ chunkmanager for chunkmanager in chunkmanagers.values() if chunkmanager.is_chunked_array(chunked_arr) ] if not selected: raise TypeError( f"Could not find a Chunk Manager which recognises type {type(chunked_arr)}" ) elif len(selected) >= 2: raise TypeError(f"Multiple ChunkManagers recognise type {type(chunked_arr)}") else: return selected[0] class ChunkManagerEntrypoint(ABC, Generic[T_ChunkedArray]): """ Interface between a particular parallel computing framework and xarray. This abstract base class must be subclassed by libraries implementing chunked array types, and registered via the ``chunkmanagers`` entrypoint. Abstract methods on this class must be implemented, whereas non-abstract methods are only required in order to enable a subset of xarray functionality, and by default will raise a ``NotImplementedError`` if called. Attributes ---------- array_cls Type of the array class this parallel computing framework provides. Parallel frameworks need to provide an array class that supports the array API standard. This attribute is used for array instance type checking at runtime. """ array_cls: type[T_ChunkedArray] available: bool = True @abstractmethod def __init__(self) -> None: """Used to set the array_cls attribute at import time.""" raise NotImplementedError() def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: """ Check if the given object is an instance of this type of chunked array. Compares against the type stored in the array_cls attribute by default. Parameters ---------- data : Any Returns ------- is_chunked : bool See Also -------- dask.is_dask_collection """ return isinstance(data, self.array_cls) @abstractmethod def chunks(self, data: T_ChunkedArray) -> _NormalizedChunks: """ Return the current chunks of the given array. Returns chunks explicitly as a tuple of tuple of ints. Used internally by xarray objects' .chunks and .chunksizes properties. Parameters ---------- data : chunked array Returns ------- chunks : tuple[tuple[int, ...], ...] See Also -------- dask.array.Array.chunks cubed.Array.chunks """ raise NotImplementedError() @abstractmethod def normalize_chunks( self, chunks: _Chunks | _NormalizedChunks, shape: _ShapeType | None = None, limit: int | None = None, dtype: _DType | None = None, previous_chunks: _NormalizedChunks | None = None, ) -> _NormalizedChunks: """ Normalize given chunking pattern into an explicit tuple of tuples representation. Exposed primarily because different chunking backends may want to make different decisions about how to automatically chunk along dimensions not given explicitly in the input chunks. Called internally by xarray.open_dataset. Parameters ---------- chunks : tuple, int, dict, or string The chunks to be normalized. shape : Tuple[int] The shape of the array limit : int (optional) The maximum block size to target in bytes, if freedom is given to choose dtype : np.dtype previous_chunks : Tuple[Tuple[int]], optional Chunks from a previous array that we should use for inspiration when rechunking dimensions automatically. See Also -------- dask.array.core.normalize_chunks """ raise NotImplementedError() @abstractmethod def from_array( self, data: duckarray[Any, Any], chunks: _Chunks, **kwargs: Any ) -> T_ChunkedArray: """ Create a chunked array from a non-chunked numpy-like array. Generally input should have a ``.shape``, ``.ndim``, ``.dtype`` and support numpy-style slicing. Called when the .chunk method is called on an xarray object that is not already chunked. Also called within open_dataset (when chunks is not None) to create a chunked array from an xarray lazily indexed array. Parameters ---------- data : array_like chunks : int, tuple How to chunk the array. See Also -------- dask.array.from_array cubed.from_array """ raise NotImplementedError() def rechunk( self, data: T_ChunkedArray, chunks: _NormalizedChunks | tuple[int, ...] | _Chunks, **kwargs: Any, ) -> Any: """ Changes the chunking pattern of the given array. Called when the .chunk method is called on an xarray object that is already chunked. Parameters ---------- data : dask array Array to be rechunked. chunks : int, tuple, dict or str, optional The new block dimensions to create. -1 indicates the full size of the corresponding dimension. Default is "auto" which automatically determines chunk sizes. Returns ------- chunked array See Also -------- dask.array.Array.rechunk cubed.Array.rechunk """ from xarray.core.common import _contains_cftime_datetimes from xarray.namedarray.utils import _get_chunk if _contains_cftime_datetimes(data): chunks2 = _get_chunk(data, chunks, self, preferred_chunks={}) # type: ignore[arg-type] else: chunks2 = chunks # type: ignore[assignment] return data.rechunk(chunks2, **kwargs) @abstractmethod def compute( self, *data: T_ChunkedArray | Any, **kwargs: Any ) -> tuple[np.ndarray[Any, _DType_co], ...]: """ Computes one or more chunked arrays, returning them as eager numpy arrays. Called anytime something needs to computed, including multiple arrays at once. Used by `.compute`, `.persist`, `.values`. Parameters ---------- *data : object Any number of objects. If an object is an instance of the chunked array type, it is computed and the in-memory result returned as a numpy array. All other types should be passed through unchanged. Returns ------- objs The input, but with all chunked arrays now computed. See Also -------- dask.compute cubed.compute """ raise NotImplementedError() def shuffle( self, x: T_ChunkedArray, indexer: list[list[int]], axis: int, chunks: T_Chunks ) -> T_ChunkedArray: raise NotImplementedError() def persist( self, *data: T_ChunkedArray | Any, **kwargs: Any ) -> tuple[T_ChunkedArray | Any, ...]: """ Persist one or more chunked arrays in memory. Parameters ---------- *data : object Any number of objects. If an object is an instance of the chunked array type, it is persisted as a chunked array in memory. All other types should be passed through unchanged. Returns ------- objs The input, but with all chunked arrays now persisted in memory. See Also -------- dask.persist """ raise NotImplementedError() @property def array_api(self) -> Any: """ Return the array_api namespace following the python array API standard. See https://data-apis.org/array-api/latest/ . Currently used to access the array API function ``full_like``, which is called within the xarray constructors ``xarray.full_like``, ``xarray.ones_like``, ``xarray.zeros_like``, etc. See Also -------- dask.array cubed.array_api """ raise NotImplementedError() def reduction( self, arr: T_ChunkedArray, func: Callable[..., Any], combine_func: Callable[..., Any] | None = None, aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, dtype: _DType_co | None = None, keepdims: bool = False, ) -> T_ChunkedArray: """ A general version of array reductions along one or more axes. Used inside some reductions like nanfirst, which is used by ``groupby.first``. Parameters ---------- arr : chunked array Data to be reduced along one or more axes. func : Callable(x_chunk, axis, keepdims) First function to be executed when resolving the dask graph. This function is applied in parallel to all original chunks of x. See below for function parameters. combine_func : Callable(x_chunk, axis, keepdims), optional Function used for intermediate recursive aggregation (see split_every below). If omitted, it defaults to aggregate_func. aggregate_func : Callable(x_chunk, axis, keepdims) Last function to be executed, producing the final output. It is always invoked, even when the reduced Array counts a single chunk along the reduced axes. axis : int or sequence of ints, optional Axis or axes to aggregate upon. If omitted, aggregate along all axes. dtype : np.dtype data type of output. This argument was previously optional, but leaving as ``None`` will now raise an exception. keepdims : boolean, optional Whether the reduction function should preserve the reduced axes, leaving them at size ``output_size``, or remove them. Returns ------- chunked array See Also -------- dask.array.reduction cubed.core.reduction """ raise NotImplementedError() def scan( self, func: Callable[..., Any], binop: Callable[..., Any], ident: float, arr: T_ChunkedArray, axis: int | None = None, dtype: _DType_co | None = None, **kwargs: Any, ) -> T_ChunkedArray: """ General version of a 1D scan, also known as a cumulative array reduction. Used in ``ffill`` and ``bfill`` in xarray. Parameters ---------- func: callable Cumulative function like np.cumsum or np.cumprod binop: callable Associated binary operator like ``np.cumsum->add`` or ``np.cumprod->mul`` ident: Number Associated identity like ``np.cumsum->0`` or ``np.cumprod->1`` arr: dask Array axis: int, optional dtype: dtype Returns ------- Chunked array See also -------- dask.array.cumreduction """ raise NotImplementedError() @abstractmethod def apply_gufunc( self, func: Callable[..., Any], signature: str, *args: Any, axes: Sequence[tuple[int, ...]] | None = None, keepdims: bool = False, output_dtypes: Sequence[_DType_co] | None = None, vectorize: bool | None = None, **kwargs: Any, ) -> Any: """ Apply a generalized ufunc or similar python function to arrays. ``signature`` determines if the function consumes or produces core dimensions. The remaining dimensions in given input arrays (``*args``) are considered loop dimensions and are required to broadcast naturally against each other. In other terms, this function is like ``np.vectorize``, but for the blocks of chunked arrays. If the function itself shall also be vectorized use ``vectorize=True`` for convenience. Called inside ``xarray.apply_ufunc``, which is called internally for most xarray operations. Therefore this method must be implemented for the vast majority of xarray computations to be supported. Parameters ---------- func : callable Function to call like ``func(*args, **kwargs)`` on input arrays (``*args``) that returns an array or tuple of arrays. If multiple arguments with non-matching dimensions are supplied, this function is expected to vectorize (broadcast) over axes of positional arguments in the style of NumPy universal functions [1]_ (if this is not the case, set ``vectorize=True``). If this function returns multiple outputs, ``output_core_dims`` has to be set as well. signature: string Specifies what core dimensions are consumed and produced by ``func``. According to the specification of numpy.gufunc signature [2]_ *args : numeric Input arrays or scalars to the callable function. axes: List of tuples, optional, keyword only A list of tuples with indices of axes a generalized ufunc should operate on. For instance, for a signature of ``"(i,j),(j,k)->(i,k)"`` appropriate for matrix multiplication, the base elements are two-dimensional matrices and these are taken to be stored in the two last axes of each argument. The corresponding axes keyword would be ``[(-2, -1), (-2, -1), (-2, -1)]``. For simplicity, for generalized ufuncs that operate on 1-dimensional arrays (vectors), a single integer is accepted instead of a single-element tuple, and for generalized ufuncs for which all outputs are scalars, the output tuples can be omitted. keepdims: bool, optional, keyword only If this is set to True, axes which are reduced over will be left in the result as a dimension with size one, so that the result will broadcast correctly against the inputs. This option can only be used for generalized ufuncs that operate on inputs that all have the same number of core dimensions and with outputs that have no core dimensions , i.e., with signatures like ``"(i),(i)->()"`` or ``"(m,m)->()"``. If used, the location of the dimensions in the output can be controlled with axes and axis. output_dtypes : Optional, dtype or list of dtypes, keyword only Valid numpy dtype specification or list thereof. If not given, a call of ``func`` with a small set of data is performed in order to try to automatically determine the output dtypes. vectorize: bool, keyword only If set to ``True``, ``np.vectorize`` is applied to ``func`` for convenience. Defaults to ``False``. **kwargs : dict Extra keyword arguments to pass to `func` Returns ------- Single chunked array or tuple of chunked arrays See Also -------- dask.array.gufunc.apply_gufunc cubed.apply_gufunc References ---------- .. [1] https://docs.scipy.org/doc/numpy/reference/ufuncs.html .. [2] https://docs.scipy.org/doc/numpy/reference/c-api/generalized-ufuncs.html """ raise NotImplementedError() def map_blocks( self, func: Callable[..., Any], *args: Any, dtype: _DType_co | None = None, chunks: tuple[int, ...] | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, **kwargs: Any, ) -> Any: """ Map a function across all blocks of a chunked array. Called in elementwise operations, but notably not (currently) called within xarray.map_blocks. Parameters ---------- func : callable Function to apply to every block in the array. If ``func`` accepts ``block_info=`` or ``block_id=`` as keyword arguments, these will be passed dictionaries containing information about input and output chunks/arrays during computation. See examples for details. args : dask arrays or other objects dtype : np.dtype, optional The ``dtype`` of the output array. It is recommended to provide this. If not provided, will be inferred by applying the function to a small set of fake data. chunks : tuple, optional Chunk shape of resulting blocks if the function does not preserve shape. If not provided, the resulting array is assumed to have the same block structure as the first input array. drop_axis : number or iterable, optional Dimensions lost by the function. new_axis : number or iterable, optional New dimensions created by the function. Note that these are applied after ``drop_axis`` (if present). **kwargs : Other keyword arguments to pass to function. Values must be constants (not dask.arrays) See Also -------- dask.array.map_blocks cubed.map_blocks """ raise NotImplementedError() def blockwise( self, func: Callable[..., Any], out_ind: Iterable[Any], *args: Any, # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types adjust_chunks: dict[Any, Callable[..., Any]] | None = None, new_axes: dict[Any, int] | None = None, align_arrays: bool = True, **kwargs: Any, ) -> Any: """ Tensor operation: Generalized inner and outer products. A broad class of blocked algorithms and patterns can be specified with a concise multi-index notation. The ``blockwise`` function applies an in-memory function across multiple blocks of multiple inputs in a variety of ways. Many chunked array operations are special cases of blockwise including elementwise, broadcasting, reductions, tensordot, and transpose. Currently only called explicitly in xarray when performing multidimensional interpolation. Parameters ---------- func : callable Function to apply to individual tuples of blocks out_ind : iterable Block pattern of the output, something like 'ijk' or (1, 2, 3) *args : sequence of Array, index pairs You may also pass literal arguments, accompanied by None index e.g. (x, 'ij', y, 'jk', z, 'i', some_literal, None) **kwargs : dict Extra keyword arguments to pass to function adjust_chunks : dict Dictionary mapping index to function to be applied to chunk sizes new_axes : dict, keyword only New indexes and their dimension lengths align_arrays: bool Whether or not to align chunks along equally sized dimensions when multiple arrays are provided. This allows for larger chunks in some arrays to be broken into smaller ones that match chunk sizes in other arrays such that they are compatible for block function mapping. If this is false, then an error will be thrown if arrays do not already have the same number of blocks in each dimension. See Also -------- dask.array.blockwise cubed.core.blockwise """ raise NotImplementedError() def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types **kwargs: Any, ) -> tuple[dict[str, _NormalizedChunks], list[T_ChunkedArray]]: """ Unify chunks across a sequence of arrays. Called by xarray.unify_chunks. Parameters ---------- *args: sequence of Array, index pairs Sequence like (x, 'ij', y, 'jk', z, 'i') See Also -------- dask.array.core.unify_chunks cubed.core.unify_chunks """ raise NotImplementedError() def store( self, sources: T_ChunkedArray | Sequence[T_ChunkedArray], targets: Any, **kwargs: dict[str, Any], ) -> Any: """ Store chunked arrays in array-like objects, overwriting data in target. This stores chunked arrays into object that supports numpy-style setitem indexing (e.g. a Zarr Store). Allows storing values chunk by chunk so that it does not have to fill up memory. For best performance you likely want to align the block size of the storage target with the block size of your array. Used when writing to any registered xarray I/O backend. Parameters ---------- sources: Array or collection of Arrays targets: array-like or collection of array-likes These should support setitem syntax ``target[10:20] = ...``. If sources is a single item, targets must be a single item; if sources is a collection of arrays, targets must be a matching collection. kwargs: Parameters passed to compute/persist (only used if compute=True) See Also -------- dask.array.store cubed.store """ raise NotImplementedError() def get_auto_chunk_size( self, ) -> int: """ Get the default chunk size for a variable. This is used to determine the chunk size when opening a dataset with ``chunks="auto"`` or when rechunking an array with ``chunks="auto"``. Parameters ---------- target_chunksize : int, optional The target chunk size in bytes. If not provided, a default value is used. Returns ------- chunk_size : int The chunk size in bytes. """ raise NotImplementedError( "For 'auto' rechunking of cftime arrays, get_auto_chunk_size must be implemented by the chunk manager" ) python-xarray-2026.01.0/xarray/namedarray/_array_api.py0000664000175000017500000001352315136607163023160 0ustar alastairalastairfrom __future__ import annotations from types import ModuleType from typing import Any import numpy as np from xarray.namedarray._typing import ( Default, _arrayapi, _Axes, _Axis, _default, _Dim, _DType, _ScalarType, _ShapeType, _SupportsImag, _SupportsReal, ) from xarray.namedarray.core import NamedArray def _get_data_namespace(x: NamedArray[Any, Any]) -> ModuleType: if isinstance(x._data, _arrayapi): return x._data.__array_namespace__() return np # %% Creation Functions def astype( x: NamedArray[_ShapeType, Any], dtype: _DType, /, *, copy: bool = True ) -> NamedArray[_ShapeType, _DType]: """ Copies an array to a specified data type irrespective of Type Promotion Rules rules. Parameters ---------- x : NamedArray Array to cast. dtype : _DType Desired data type. copy : bool, optional Specifies whether to copy an array when the specified dtype matches the data type of the input array x. If True, a newly allocated array must always be returned. If False and the specified dtype matches the data type of the input array, the input array must be returned; otherwise, a newly allocated array must be returned. Default: True. Returns ------- out : NamedArray An array having the specified data type. The returned array must have the same shape as x. Examples -------- >>> narr = NamedArray(("x",), np.asarray([1.5, 2.5])) >>> narr Size: 16B array([1.5, 2.5]) >>> astype(narr, np.dtype(np.int32)) Size: 8B array([1, 2], dtype=int32) """ if isinstance(x._data, _arrayapi): xp = x._data.__array_namespace__() return x._new(data=xp.astype(x._data, dtype, copy=copy)) # np.astype doesn't exist yet: return x._new(data=x._data.astype(dtype, copy=copy)) # type: ignore[attr-defined] # %% Elementwise Functions def imag( x: NamedArray[_ShapeType, np.dtype[_SupportsImag[_ScalarType]]], # type: ignore[type-var] /, ) -> NamedArray[_ShapeType, np.dtype[_ScalarType]]: """ Returns the imaginary component of a complex number for each element x_i of the input array x. Parameters ---------- x : NamedArray Input array. Should have a complex floating-point data type. Returns ------- out : NamedArray An array containing the element-wise results. The returned array must have a floating-point data type with the same floating-point precision as x (e.g., if x is complex64, the returned array must have the floating-point data type float32). Examples -------- >>> narr = NamedArray(("x",), np.asarray([1.0 + 2j, 2 + 4j])) >>> imag(narr) Size: 16B array([2., 4.]) """ xp = _get_data_namespace(x) out = x._new(data=xp.imag(x._data)) return out def real( x: NamedArray[_ShapeType, np.dtype[_SupportsReal[_ScalarType]]], # type: ignore[type-var] /, ) -> NamedArray[_ShapeType, np.dtype[_ScalarType]]: """ Returns the real component of a complex number for each element x_i of the input array x. Parameters ---------- x : NamedArray Input array. Should have a complex floating-point data type. Returns ------- out : NamedArray An array containing the element-wise results. The returned array must have a floating-point data type with the same floating-point precision as x (e.g., if x is complex64, the returned array must have the floating-point data type float32). Examples -------- >>> narr = NamedArray(("x",), np.asarray([1.0 + 2j, 2 + 4j])) >>> real(narr) Size: 16B array([1., 2.]) """ xp = _get_data_namespace(x) out = x._new(data=xp.real(x._data)) return out # %% Manipulation functions def expand_dims( x: NamedArray[Any, _DType], /, *, dim: _Dim | Default = _default, axis: _Axis = 0, ) -> NamedArray[Any, _DType]: """ Expands the shape of an array by inserting a new dimension of size one at the position specified by dims. Parameters ---------- x : Array to expand. dim : Dimension name. New dimension will be stored in the axis position. axis : (Not recommended) Axis position (zero-based). Default is 0. Returns ------- out : An expanded output array having the same data type as x. Examples -------- >>> x = NamedArray(("x", "y"), np.asarray([[1.0, 2.0], [3.0, 4.0]])) >>> expand_dims(x) Size: 32B array([[[1., 2.], [3., 4.]]]) >>> expand_dims(x, dim="z") Size: 32B array([[[1., 2.], [3., 4.]]]) """ xp = _get_data_namespace(x) dims = x.dims if dim is _default: dim = f"dim_{len(dims)}" d = list(dims) d.insert(axis, dim) out = x._new(dims=tuple(d), data=xp.expand_dims(x._data, axis=axis)) return out def permute_dims(x: NamedArray[Any, _DType], axes: _Axes) -> NamedArray[Any, _DType]: """ Permutes the dimensions of an array. Parameters ---------- x : Array to permute. axes : Permutation of the dimensions of x. Returns ------- out : An array with permuted dimensions. The returned array must have the same data type as x. """ dims = x.dims new_dims = tuple(dims[i] for i in axes) if isinstance(x._data, _arrayapi): xp = _get_data_namespace(x) out = x._new(dims=new_dims, data=xp.permute_dims(x._data, axes)) else: out = x._new(dims=new_dims, data=x._data.transpose(axes)) # type: ignore[attr-defined] return out python-xarray-2026.01.0/xarray/namedarray/pycompat.py0000664000175000017500000001263415136607163022710 0ustar alastairalastairfrom __future__ import annotations from importlib import import_module from types import ModuleType from typing import TYPE_CHECKING, Any, Literal import numpy as np from packaging.version import Version from xarray.core.utils import is_scalar from xarray.namedarray.utils import is_duck_array, is_duck_dask_array integer_types = (int, np.integer) if TYPE_CHECKING: ModType = Literal["dask", "pint", "cupy", "sparse", "cubed", "numbagg"] DuckArrayTypes = tuple[type[Any], ...] # TODO: improve this? maybe Generic from xarray.namedarray._typing import _DType, _ShapeType, duckarray class DuckArrayModule: """ Solely for internal isinstance and version checks. Motivated by having to only import pint when required (as pint currently imports xarray) https://github.com/pydata/xarray/pull/5561#discussion_r664815718 """ module: ModuleType | None version: Version type: DuckArrayTypes available: bool def __init__(self, mod: ModType) -> None: duck_array_module: ModuleType | None duck_array_version: Version duck_array_type: DuckArrayTypes try: duck_array_module = import_module(mod) duck_array_version = Version(duck_array_module.__version__) if mod == "dask": duck_array_type = (import_module("dask.array").Array,) elif mod == "pint": duck_array_type = (duck_array_module.Quantity,) elif mod == "cupy": duck_array_type = (duck_array_module.ndarray,) elif mod == "sparse": duck_array_type = (duck_array_module.SparseArray,) elif mod == "cubed": duck_array_type = (duck_array_module.Array,) # Not a duck array module, but using this system regardless, to get lazy imports elif mod == "numbagg": duck_array_type = () else: raise NotImplementedError except (ImportError, AttributeError): # pragma: no cover duck_array_module = None duck_array_version = Version("0.0.0") duck_array_type = () self.module = duck_array_module self.version = duck_array_version self.type = duck_array_type self.available = duck_array_module is not None _cached_duck_array_modules: dict[ModType, DuckArrayModule] = {} def _get_cached_duck_array_module(mod: ModType) -> DuckArrayModule: if mod not in _cached_duck_array_modules: duckmod = DuckArrayModule(mod) _cached_duck_array_modules[mod] = duckmod return duckmod else: return _cached_duck_array_modules[mod] def array_type(mod: ModType) -> DuckArrayTypes: """Quick wrapper to get the array class of the module.""" return _get_cached_duck_array_module(mod).type def mod_version(mod: ModType) -> Version: """Quick wrapper to get the version of the module.""" return _get_cached_duck_array_module(mod).version def is_chunked_array(x: duckarray[Any, Any]) -> bool: return is_duck_dask_array(x) or (is_duck_array(x) and hasattr(x, "chunks")) def is_0d_dask_array(x: duckarray[Any, Any]) -> bool: return is_duck_dask_array(x) and is_scalar(x) def to_numpy( data: duckarray[Any, Any], **kwargs: dict[str, Any] ) -> np.ndarray[Any, np.dtype[Any]]: from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray.parallelcompat import get_chunked_array_type try: # for tests only at the moment return data.to_numpy() # type: ignore[no-any-return,union-attr] except AttributeError: pass if isinstance(data, ExplicitlyIndexed): data = data.get_duck_array() # type: ignore[no-untyped-call] # TODO first attempt to call .to_numpy() once some libraries implement it if is_chunked_array(data): chunkmanager = get_chunked_array_type(data) data, *_ = chunkmanager.compute(data, **kwargs) if isinstance(data, array_type("cupy")): data = data.get() # pint has to be imported dynamically as pint imports xarray if isinstance(data, array_type("pint")): data = data.magnitude if isinstance(data, array_type("sparse")): data = data.todense() data = np.asarray(data) return data def to_duck_array(data: Any, **kwargs: dict[str, Any]) -> duckarray[_ShapeType, _DType]: from xarray.core.indexing import ( ExplicitlyIndexed, ImplicitToExplicitIndexingAdapter, ) from xarray.namedarray.parallelcompat import get_chunked_array_type if is_chunked_array(data): chunkmanager = get_chunked_array_type(data) loaded_data, *_ = chunkmanager.compute(data, **kwargs) # type: ignore[var-annotated] return loaded_data if isinstance(data, ExplicitlyIndexed | ImplicitToExplicitIndexingAdapter): return data.get_duck_array() # type: ignore[no-untyped-call, no-any-return] elif is_duck_array(data): return data else: return np.asarray(data) # type: ignore[return-value] async def async_to_duck_array( data: Any, **kwargs: dict[str, Any] ) -> duckarray[_ShapeType, _DType]: from xarray.core.indexing import ( ExplicitlyIndexed, ImplicitToExplicitIndexingAdapter, ) if isinstance(data, ExplicitlyIndexed | ImplicitToExplicitIndexingAdapter): return await data.async_get_duck_array() # type: ignore[union-attr, no-any-return] else: return to_duck_array(data, **kwargs) python-xarray-2026.01.0/xarray/namedarray/utils.py0000664000175000017500000002436515136607163022220 0ustar alastairalastairfrom __future__ import annotations import importlib import itertools import sys import warnings from collections.abc import Hashable, Iterable, Iterator, Mapping from functools import lru_cache from numbers import Number from typing import TYPE_CHECKING, Any, TypeVar, cast import numpy as np from packaging.version import Version from xarray.namedarray._typing import ErrorOptionsWithWarn, _DimsLike if TYPE_CHECKING: from typing import TypeGuard from numpy.typing import NDArray try: from dask.array.core import Array as DaskArray from dask.typing import DaskCollection except ImportError: DaskArray = NDArray # type: ignore[assignment, misc] DaskCollection: Any = NDArray # type: ignore[no-redef] from xarray.core.types import T_ChunkDim from xarray.namedarray._typing import DuckArray, _Dim, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint K = TypeVar("K") V = TypeVar("V") T = TypeVar("T") @lru_cache def module_available(module: str, minversion: str | None = None) -> bool: """Checks whether a module is installed without importing it. Use this for a lightweight check and lazy imports. Parameters ---------- module : str Name of the module. minversion : str, optional Minimum version of the module Returns ------- available : bool Whether the module is installed. """ if importlib.util.find_spec(module) is None: return False if minversion is not None: version = importlib.metadata.version(module) return Version(version) >= Version(minversion) return True def is_dask_collection(x: object) -> TypeGuard[DaskCollection]: if module_available("dask"): from dask.base import is_dask_collection # use is_dask_collection function instead of dask.typing.DaskCollection # see https://github.com/pydata/xarray/pull/8241#discussion_r1476276023 return is_dask_collection(x) return False def is_duck_array(value: Any) -> TypeGuard[duckarray[Any, Any]]: # TODO: replace is_duck_array with runtime checks via _arrayfunction_or_api protocol on # python 3.12 and higher (see https://github.com/pydata/xarray/issues/8696#issuecomment-1924588981) if isinstance(value, np.ndarray): return True return ( hasattr(value, "ndim") and hasattr(value, "shape") and hasattr(value, "dtype") and ( (hasattr(value, "__array_function__") and hasattr(value, "__array_ufunc__")) or hasattr(value, "__array_namespace__") ) ) def is_duck_dask_array(x: duckarray[Any, Any]) -> TypeGuard[DaskArray]: return is_duck_array(x) and is_dask_collection(x) def to_0d_object_array( value: object, ) -> NDArray[np.object_]: """Given a value, wrap it in a 0-D numpy.ndarray with dtype=object.""" result = np.empty((), dtype=object) result[()] = value return result def is_dict_like(value: Any) -> TypeGuard[Mapping[Any, Any]]: return hasattr(value, "keys") and hasattr(value, "__getitem__") def drop_missing_dims( supplied_dims: Iterable[_Dim], dims: Iterable[_Dim], missing_dims: ErrorOptionsWithWarn, ) -> _DimsLike: """Depending on the setting of missing_dims, drop any dimensions from supplied_dims that are not present in dims. Parameters ---------- supplied_dims : Iterable of Hashable dims : Iterable of Hashable missing_dims : {"raise", "warn", "ignore"} """ if missing_dims == "raise": supplied_dims_set = {val for val in supplied_dims if val is not ...} if invalid := supplied_dims_set - set(dims): raise ValueError( f"Dimensions {invalid} do not exist. Expected one or more of {dims}" ) return supplied_dims elif missing_dims == "warn": if invalid := set(supplied_dims) - set(dims): warnings.warn( f"Dimensions {invalid} do not exist. Expected one or more of {dims}", stacklevel=2, ) return [val for val in supplied_dims if val in dims or val is ...] elif missing_dims == "ignore": return [val for val in supplied_dims if val in dims or val is ...] else: raise ValueError( f"Unrecognised option {missing_dims} for missing_dims argument" ) def infix_dims( dims_supplied: Iterable[_Dim], dims_all: Iterable[_Dim], missing_dims: ErrorOptionsWithWarn = "raise", ) -> Iterator[_Dim]: """ Resolves a supplied list containing an ellipsis representing other items, to a generator with the 'realized' list of all items """ if ... in dims_supplied: dims_all_list = list(dims_all) if len(set(dims_all)) != len(dims_all_list): raise ValueError("Cannot use ellipsis with repeated dims") if list(dims_supplied).count(...) > 1: raise ValueError("More than one ellipsis supplied") other_dims = [d for d in dims_all if d not in dims_supplied] existing_dims = drop_missing_dims(dims_supplied, dims_all, missing_dims) for d in existing_dims: if d is ...: yield from other_dims else: yield d else: existing_dims = drop_missing_dims(dims_supplied, dims_all, missing_dims) if set(existing_dims) ^ set(dims_all): raise ValueError( f"{dims_supplied} must be a permuted list of {dims_all}, unless `...` is included" ) yield from existing_dims def either_dict_or_kwargs( pos_kwargs: Mapping[Any, T] | None, kw_kwargs: Mapping[str, T], func_name: str, ) -> Mapping[Hashable, T]: if pos_kwargs is None or pos_kwargs == {}: # Need an explicit cast to appease mypy due to invariance; see # https://github.com/python/mypy/issues/6228 return cast(Mapping[Hashable, T], kw_kwargs) if not is_dict_like(pos_kwargs): raise ValueError(f"the first argument to .{func_name} must be a dictionary") if kw_kwargs: raise ValueError( f"cannot specify both keyword and positional arguments to .{func_name}" ) return pos_kwargs def _get_chunk( # type: ignore[no-untyped-def] data: DuckArray[Any], chunks, chunkmanager: ChunkManagerEntrypoint[Any], *, preferred_chunks, dims=None, ) -> Mapping[Any, T_ChunkDim]: """ Return map from each dim to chunk sizes, accounting for backend's preferred chunks. """ from xarray.core.common import _contains_cftime_datetimes from xarray.core.utils import emit_user_level_warning from xarray.structure.chunks import _get_breaks_cached dims = chunks.keys() if dims is None else dims shape = data.shape # Determine the explicit requested chunks. preferred_chunk_shape = tuple( itertools.starmap(preferred_chunks.get, zip(dims, shape, strict=True)) ) if isinstance(chunks, Number) or (chunks == "auto"): chunks = dict.fromkeys(dims, chunks) chunk_shape = tuple( chunks.get(dim, None) or preferred_chunk_sizes for dim, preferred_chunk_sizes in zip(dims, preferred_chunk_shape, strict=True) ) limit: int | None if _contains_cftime_datetimes(data): limit, dtype = fake_target_chunksize(data, chunkmanager.get_auto_chunk_size()) else: limit = None dtype = data.dtype chunk_shape = chunkmanager.normalize_chunks( chunk_shape, shape=shape, dtype=dtype, limit=limit, previous_chunks=preferred_chunk_shape, ) # Warn where requested chunks break preferred chunks, provided that the variable # contains data. if data.size: # type: ignore[unused-ignore,attr-defined] # DuckArray protocol doesn't include 'size' - should it? for dim, size, chunk_sizes in zip(dims, shape, chunk_shape, strict=True): if preferred_chunk_sizes := preferred_chunks.get(dim): disagreement = _get_breaks_cached( size=size, chunk_sizes=chunk_sizes, preferred_chunk_sizes=preferred_chunk_sizes, ) if disagreement: emit_user_level_warning( "The specified chunks separate the stored chunks along " f'dimension "{dim}" starting at index {disagreement}. This could ' "degrade performance. Instead, consider rechunking after loading.", ) return dict(zip(dims, chunk_shape, strict=True)) def fake_target_chunksize( data: DuckArray[Any], limit: int, ) -> tuple[int, np.dtype[Any]]: """ The `normalize_chunks` algorithm takes a size `limit` in bytes, but will not work for object dtypes. So we rescale the `limit` to an appropriate one based on `float64` dtype, and pass that to `normalize_chunks`. Arguments --------- data : Variable or ChunkedArray The data for which we want to determine chunk sizes. limit : int The target chunk size in bytes. Passed to the chunk manager's `normalize_chunks` method. """ # Short circuit for non-object dtypes from xarray.core.common import _contains_cftime_datetimes if not _contains_cftime_datetimes(data): return limit, data.dtype from xarray.core.formatting import first_n_items output_dtype = np.dtype(np.float64) nbytes_approx: int = sys.getsizeof(first_n_items(data, 1)) # type: ignore[no-untyped-call] f64_nbytes = output_dtype.itemsize limit = int(limit * (f64_nbytes / nbytes_approx)) return limit, output_dtype class ReprObject: """Object that prints as the given value, for use with sentinel values.""" __slots__ = ("_value",) _value: str def __init__(self, value: str): self._value = value def __repr__(self) -> str: return self._value def __eq__(self, other: ReprObject | Any) -> bool: # TODO: What type can other be? ArrayLike? return self._value == other._value if isinstance(other, ReprObject) else False def __hash__(self) -> int: return hash((type(self), self._value)) def __dask_tokenize__(self) -> object: from dask.base import normalize_token return normalize_token((type(self), self._value)) python-xarray-2026.01.0/xarray/namedarray/_typing.py0000664000175000017500000001761315136607163022527 0ustar alastairalastairfrom __future__ import annotations from collections.abc import Callable, Hashable, Iterable, Mapping, Sequence from enum import Enum from types import EllipsisType, ModuleType from typing import ( TYPE_CHECKING, Any, Final, Literal, Protocol, SupportsIndex, TypeVar, Union, overload, runtime_checkable, ) import numpy as np try: from typing import TypeAlias except ImportError: if TYPE_CHECKING: raise else: Self: Any = None # Singleton type, as per https://github.com/python/typing/pull/240 class Default(Enum): token: Final = 0 _default = Default.token # https://stackoverflow.com/questions/74633074/how-to-type-hint-a-generic-numpy-array _T_co = TypeVar("_T_co", covariant=True) _dtype = np.dtype _DType = TypeVar("_DType", bound=np.dtype[Any]) _DType_co = TypeVar("_DType_co", covariant=True, bound=np.dtype[Any]) # A subset of `npt.DTypeLike` that can be parametrized w.r.t. `np.generic` _ScalarType = TypeVar("_ScalarType", bound=np.generic) _ScalarType_co = TypeVar("_ScalarType_co", bound=np.generic, covariant=True) # A protocol for anything with the dtype attribute @runtime_checkable class _SupportsDType(Protocol[_DType_co]): @property def dtype(self) -> _DType_co: ... _DTypeLike = Union[ np.dtype[_ScalarType], type[_ScalarType], _SupportsDType[np.dtype[_ScalarType]], ] # For unknown shapes Dask uses np.nan, array_api uses None: _IntOrUnknown = int _Shape = tuple[_IntOrUnknown, ...] _ShapeLike = Union[SupportsIndex, Sequence[SupportsIndex]] _ShapeType = TypeVar("_ShapeType", bound=Any) _ShapeType_co = TypeVar("_ShapeType_co", bound=Any, covariant=True) _Axis = int _Axes = tuple[_Axis, ...] _AxisLike = Union[_Axis, _Axes] _Chunks = tuple[_Shape, ...] _NormalizedChunks = tuple[tuple[int, ...], ...] # FYI in some cases we don't allow `None`, which this doesn't take account of. # # FYI the `str` is for a size string, e.g. "16MB", supported by dask. T_ChunkDim: TypeAlias = str | int | Literal["auto"] | tuple[int, ...] | None # noqa: PYI051 # We allow the tuple form of this (though arguably we could transition to named dims only) T_Chunks: TypeAlias = T_ChunkDim | Mapping[Any, T_ChunkDim] _Dim = Hashable _Dims = tuple[_Dim, ...] _DimsLike = Union[str, Iterable[_Dim]] # https://data-apis.org/array-api/latest/API_specification/indexing.html # TODO: np.array_api was bugged and didn't allow (None,), but should! # https://github.com/numpy/numpy/pull/25022 # https://github.com/data-apis/array-api/pull/674 _IndexKey = Union[int, slice, EllipsisType] _IndexKeys = tuple[_IndexKey, ...] # tuple[Union[_IndexKey, None], ...] _IndexKeyLike = Union[_IndexKey, _IndexKeys] _AttrsLike = Union[Mapping[Any, Any], None] class _SupportsReal(Protocol[_T_co]): @property def real(self) -> _T_co: ... class _SupportsImag(Protocol[_T_co]): @property def imag(self) -> _T_co: ... @runtime_checkable class _array(Protocol[_ShapeType_co, _DType_co]): """ Minimal duck array named array uses. Corresponds to np.ndarray. """ @property def shape(self) -> _Shape: ... @property def dtype(self) -> _DType_co: ... @runtime_checkable class _arrayfunction( _array[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co] ): """ Duck array supporting NEP 18. Corresponds to np.ndarray. """ @overload def __getitem__( self, key: _arrayfunction[Any, Any] | tuple[_arrayfunction[Any, Any], ...], / ) -> _arrayfunction[Any, _DType_co]: ... @overload def __getitem__(self, key: _IndexKeyLike, /) -> Any: ... def __getitem__( self, key: ( _IndexKeyLike | _arrayfunction[Any, Any] | tuple[_arrayfunction[Any, Any], ...] ), /, ) -> _arrayfunction[Any, _DType_co] | Any: ... @overload def __array__( self, dtype: None = ..., /, *, copy: bool | None = ... ) -> np.ndarray[Any, _DType_co]: ... @overload def __array__( self, dtype: _DType, /, *, copy: bool | None = ... ) -> np.ndarray[Any, _DType]: ... def __array__( self, dtype: _DType | None = ..., /, *, copy: bool | None = ... ) -> np.ndarray[Any, _DType] | np.ndarray[Any, _DType_co]: ... # TODO: Should return the same subclass but with a new dtype generic. # https://github.com/python/typing/issues/548 def __array_ufunc__( self, ufunc: Any, method: Any, *inputs: Any, **kwargs: Any, ) -> Any: ... # TODO: Should return the same subclass but with a new dtype generic. # https://github.com/python/typing/issues/548 def __array_function__( self, func: Callable[..., Any], types: Iterable[type], args: Iterable[Any], kwargs: Mapping[str, Any], ) -> Any: ... @property def imag(self) -> _arrayfunction[_ShapeType_co, Any]: ... @property def real(self) -> _arrayfunction[_ShapeType_co, Any]: ... @runtime_checkable class _arrayapi(_array[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co]): """ Duck array supporting NEP 47. Corresponds to np.ndarray. """ def __getitem__( self, key: ( _IndexKeyLike | Any ), # TODO: Any should be _arrayapi[Any, _dtype[np.integer]] /, ) -> _arrayapi[Any, Any]: ... def __array_namespace__(self) -> ModuleType: ... # NamedArray can most likely use both __array_function__ and __array_namespace__: _arrayfunction_or_api = (_arrayfunction, _arrayapi) duckarray = Union[ _arrayfunction[_ShapeType_co, _DType_co], _arrayapi[_ShapeType_co, _DType_co] ] # Corresponds to np.typing.NDArray: DuckArray = _arrayfunction[Any, np.dtype[_ScalarType_co]] @runtime_checkable class _chunkedarray( _array[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co] ): """ Minimal chunked duck array. Corresponds to np.ndarray. """ @property def chunks(self) -> _Chunks: ... @runtime_checkable class _chunkedarrayfunction( _arrayfunction[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co] ): """ Chunked duck array supporting NEP 18. Corresponds to np.ndarray. """ @property def chunks(self) -> _Chunks: ... @runtime_checkable class _chunkedarrayapi( _arrayapi[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co] ): """ Chunked duck array supporting NEP 47. Corresponds to np.ndarray. """ @property def chunks(self) -> _Chunks: ... # NamedArray can most likely use both __array_function__ and __array_namespace__: _chunkedarrayfunction_or_api = (_chunkedarrayfunction, _chunkedarrayapi) chunkedduckarray = Union[ _chunkedarrayfunction[_ShapeType_co, _DType_co], _chunkedarrayapi[_ShapeType_co, _DType_co], ] @runtime_checkable class _sparsearray( _array[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co] ): """ Minimal sparse duck array. Corresponds to np.ndarray. """ def todense(self) -> np.ndarray[Any, _DType_co]: ... @runtime_checkable class _sparsearrayfunction( _arrayfunction[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co] ): """ Sparse duck array supporting NEP 18. Corresponds to np.ndarray. """ def todense(self) -> np.ndarray[Any, _DType_co]: ... @runtime_checkable class _sparsearrayapi( _arrayapi[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co] ): """ Sparse duck array supporting NEP 47. Corresponds to np.ndarray. """ def todense(self) -> np.ndarray[Any, _DType_co]: ... # NamedArray can most likely use both __array_function__ and __array_namespace__: _sparsearrayfunction_or_api = (_sparsearrayfunction, _sparsearrayapi) sparseduckarray = Union[ _sparsearrayfunction[_ShapeType_co, _DType_co], _sparsearrayapi[_ShapeType_co, _DType_co], ] ErrorOptions = Literal["raise", "ignore"] ErrorOptionsWithWarn = Literal["raise", "warn", "ignore"] python-xarray-2026.01.0/xarray/namedarray/core.py0000664000175000017500000011612515136607163022004 0ustar alastairalastairfrom __future__ import annotations import copy import math import warnings from collections.abc import Callable, Hashable, Iterable, Mapping, Sequence from itertools import starmap from types import EllipsisType from typing import ( TYPE_CHECKING, Any, Generic, Literal, TypeVar, cast, overload, ) import numpy as np # TODO: get rid of this after migrating this class to array API from xarray.core import dtypes, formatting, formatting_html from xarray.core.indexing import ( ExplicitlyIndexed, ImplicitToExplicitIndexingAdapter, OuterIndexer, ) from xarray.namedarray._aggregations import NamedArrayAggregations from xarray.namedarray._typing import ( ErrorOptionsWithWarn, _arrayapi, _arrayfunction_or_api, _chunkedarray, _default, _dtype, _DType_co, _ScalarType_co, _ShapeType_co, _sparsearrayfunction_or_api, _SupportsImag, _SupportsReal, ) from xarray.namedarray.parallelcompat import guess_chunkmanager from xarray.namedarray.pycompat import to_numpy from xarray.namedarray.utils import ( either_dict_or_kwargs, infix_dims, is_dict_like, is_duck_dask_array, to_0d_object_array, ) if TYPE_CHECKING: from numpy.typing import ArrayLike, NDArray from xarray.core.types import Dims, T_Chunks from xarray.namedarray._typing import ( Default, _AttrsLike, _Chunks, _Dim, _Dims, _DimsLike, _DType, _IntOrUnknown, _ScalarType, _Shape, _ShapeType, duckarray, ) from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint try: from dask.typing import ( Graph, NestedKeys, PostComputeCallable, PostPersistCallable, SchedulerGetCallable, ) except ImportError: Graph: Any # type: ignore[no-redef] NestedKeys: Any # type: ignore[no-redef] SchedulerGetCallable: Any # type: ignore[no-redef] PostComputeCallable: Any # type: ignore[no-redef] PostPersistCallable: Any # type: ignore[no-redef] from typing import Self T_NamedArray = TypeVar("T_NamedArray", bound="_NamedArray[Any]") T_NamedArrayInteger = TypeVar( "T_NamedArrayInteger", bound="_NamedArray[np.integer[Any]]" ) @overload def _new( x: NamedArray[Any, _DType_co], dims: _DimsLike | Default = ..., data: duckarray[_ShapeType, _DType] = ..., attrs: _AttrsLike | Default = ..., ) -> NamedArray[_ShapeType, _DType]: ... @overload def _new( x: NamedArray[_ShapeType_co, _DType_co], dims: _DimsLike | Default = ..., data: Default = ..., attrs: _AttrsLike | Default = ..., ) -> NamedArray[_ShapeType_co, _DType_co]: ... def _new( x: NamedArray[Any, _DType_co], dims: _DimsLike | Default = _default, data: duckarray[_ShapeType, _DType] | Default = _default, attrs: _AttrsLike | Default = _default, ) -> NamedArray[_ShapeType, _DType] | NamedArray[Any, _DType_co]: """ Create a new array with new typing information. Parameters ---------- x : NamedArray Array to create a new array from dims : Iterable of Hashable, optional Name(s) of the dimension(s). Will copy the dims from x by default. data : duckarray, optional The actual data that populates the array. Should match the shape specified by `dims`. Will copy the data from x by default. attrs : dict, optional A dictionary containing any additional information or attributes you want to store with the array. Will copy the attrs from x by default. """ dims_ = copy.copy(x._dims) if dims is _default else dims attrs_: Mapping[Any, Any] | None if attrs is _default: attrs_ = None if x._attrs is None else x._attrs.copy() else: attrs_ = attrs if data is _default: return type(x)(dims_, copy.copy(x._data), attrs_) else: cls_ = cast("type[NamedArray[_ShapeType, _DType]]", type(x)) return cls_(dims_, data, attrs_) @overload def from_array( dims: _DimsLike, data: duckarray[_ShapeType, _DType], attrs: _AttrsLike = ..., ) -> NamedArray[_ShapeType, _DType]: ... @overload def from_array( dims: _DimsLike, data: ArrayLike, attrs: _AttrsLike = ..., ) -> NamedArray[Any, Any]: ... def from_array( dims: _DimsLike, data: duckarray[_ShapeType, _DType] | ArrayLike, attrs: _AttrsLike = None, ) -> NamedArray[_ShapeType, _DType] | NamedArray[Any, Any]: """ Create a Named array from an array-like object. Parameters ---------- dims : str or iterable of str Name(s) of the dimension(s). data : T_DuckArray or ArrayLike The actual data that populates the array. Should match the shape specified by `dims`. attrs : dict, optional A dictionary containing any additional information or attributes you want to store with the array. Default is None, meaning no attributes will be stored. """ if isinstance(data, NamedArray): raise TypeError( "Array is already a Named array. Use 'data.data' to retrieve the data array" ) # TODO: dask.array.ma.MaskedArray also exists, better way? if isinstance(data, np.ma.MaskedArray): mask = np.ma.getmaskarray(data) # type: ignore[no-untyped-call] if mask.any(): # TODO: requires refactoring/vendoring xarray.core.dtypes and # xarray.core.duck_array_ops raise NotImplementedError("MaskedArray is not supported yet") return NamedArray(dims, data, attrs) if isinstance(data, _arrayfunction_or_api) and not isinstance(data, np.generic): return NamedArray(dims, data, attrs) if isinstance(data, tuple): return NamedArray(dims, to_0d_object_array(data), attrs) # validate whether the data is valid data types. return NamedArray(dims, np.asarray(data), attrs) class NamedArray(NamedArrayAggregations, Generic[_ShapeType_co, _DType_co]): """ A wrapper around duck arrays with named dimensions and attributes which describe a single Array. Numeric operations on this object implement array broadcasting and dimension alignment based on dimension names, rather than axis order. Parameters ---------- dims : str or iterable of hashable Name(s) of the dimension(s). data : array-like or duck-array The actual data that populates the array. Should match the shape specified by `dims`. attrs : dict, optional A dictionary containing any additional information or attributes you want to store with the array. Default is None, meaning no attributes will be stored. Raises ------ ValueError If the `dims` length does not match the number of data dimensions (ndim). Examples -------- >>> data = np.array([1.5, 2, 3], dtype=float) >>> narr = NamedArray(("x",), data, {"units": "m"}) # TODO: Better name than narr? """ __slots__ = ("_attrs", "_data", "_dims") _data: duckarray[Any, _DType_co] _dims: _Dims _attrs: dict[Any, Any] | None def __init__( self, dims: _DimsLike, data: duckarray[Any, _DType_co], attrs: _AttrsLike = None, ): self._data = data self._dims = self._parse_dimensions(dims) self._attrs = dict(attrs) if attrs else None def __init_subclass__(cls, **kwargs: Any) -> None: if NamedArray in cls.__bases__ and (cls._new == NamedArray._new): # Type hinting does not work for subclasses unless _new is # overridden with the correct class. raise TypeError( "Subclasses of `NamedArray` must override the `_new` method." ) super().__init_subclass__(**kwargs) @overload def _new( self, dims: _DimsLike | Default = ..., data: duckarray[_ShapeType, _DType] = ..., attrs: _AttrsLike | Default = ..., ) -> NamedArray[_ShapeType, _DType]: ... @overload def _new( self, dims: _DimsLike | Default = ..., data: Default = ..., attrs: _AttrsLike | Default = ..., ) -> NamedArray[_ShapeType_co, _DType_co]: ... def _new( self, dims: _DimsLike | Default = _default, data: duckarray[Any, _DType] | Default = _default, attrs: _AttrsLike | Default = _default, ) -> NamedArray[_ShapeType, _DType] | NamedArray[_ShapeType_co, _DType_co]: """ Create a new array with new typing information. _new has to be reimplemented each time NamedArray is subclassed, otherwise type hints will not be correct. The same is likely true for methods that relied on _new. Parameters ---------- dims : Iterable of Hashable, optional Name(s) of the dimension(s). Will copy the dims from x by default. data : duckarray, optional The actual data that populates the array. Should match the shape specified by `dims`. Will copy the data from x by default. attrs : dict, optional A dictionary containing any additional information or attributes you want to store with the array. Will copy the attrs from x by default. """ return _new(self, dims, data, attrs) def _replace( self, dims: _DimsLike | Default = _default, data: duckarray[_ShapeType_co, _DType_co] | Default = _default, attrs: _AttrsLike | Default = _default, ) -> Self: """ Create a new array with the same typing information. The types for each argument cannot change, use self._new if that is a risk. Parameters ---------- dims : Iterable of Hashable, optional Name(s) of the dimension(s). Will copy the dims from x by default. data : duckarray, optional The actual data that populates the array. Should match the shape specified by `dims`. Will copy the data from x by default. attrs : dict, optional A dictionary containing any additional information or attributes you want to store with the array. Will copy the attrs from x by default. """ return cast("Self", self._new(dims, data, attrs)) def _copy( self, deep: bool = True, data: duckarray[_ShapeType_co, _DType_co] | None = None, memo: dict[int, Any] | None = None, ) -> Self: if data is None: ndata = self._data if deep: ndata = copy.deepcopy(ndata, memo=memo) else: ndata = data self._check_shape(ndata) attrs = ( copy.deepcopy(self._attrs, memo=memo) if deep else copy.copy(self._attrs) ) return self._replace(data=ndata, attrs=attrs) def __copy__(self) -> Self: return self._copy(deep=False) def __deepcopy__(self, memo: dict[int, Any] | None = None) -> Self: return self._copy(deep=True, memo=memo) def copy( self, deep: bool = True, data: duckarray[_ShapeType_co, _DType_co] | None = None, ) -> Self: """Returns a copy of this object. If `deep=True`, the data array is loaded into memory and copied onto the new object. Dimensions, attributes and encodings are always copied. Use `data` to create a new object with the same structure as original but entirely new data. Parameters ---------- deep : bool, default: True Whether the data array is loaded into memory and copied onto the new object. Default is True. data : array_like, optional Data to use in the new object. Must have same shape as original. When `data` is used, `deep` is ignored. Returns ------- object : NamedArray New object with dimensions, attributes, and optionally data copied from original. """ return self._copy(deep=deep, data=data) @property def ndim(self) -> int: """ Number of array dimensions. See Also -------- numpy.ndarray.ndim """ return len(self.shape) @property def size(self) -> _IntOrUnknown: """ Number of elements in the array. Equal to ``np.prod(a.shape)``, i.e., the product of the array’s dimensions. See Also -------- numpy.ndarray.size """ return math.prod(self.shape) def __len__(self) -> _IntOrUnknown: try: return self.shape[0] except Exception as exc: raise TypeError("len() of unsized object") from exc @property def dtype(self) -> _DType_co: """ Data-type of the array’s elements. See Also -------- ndarray.dtype numpy.dtype """ return self._data.dtype @property def shape(self) -> _Shape: """ Get the shape of the array. Returns ------- shape : tuple of ints Tuple of array dimensions. See Also -------- numpy.ndarray.shape """ return self._data.shape @property def nbytes(self) -> _IntOrUnknown: """ Total bytes consumed by the elements of the data array. If the underlying data array does not include ``nbytes``, estimates the bytes consumed based on the ``size`` and ``dtype``. """ from xarray.namedarray._array_api import _get_data_namespace if hasattr(self._data, "nbytes"): return self._data.nbytes # type: ignore[no-any-return] if hasattr(self.dtype, "itemsize"): itemsize = self.dtype.itemsize elif isinstance(self._data, _arrayapi): xp = _get_data_namespace(self) if xp.isdtype(self.dtype, "bool"): itemsize = 1 elif xp.isdtype(self.dtype, "integral"): itemsize = xp.iinfo(self.dtype).bits // 8 else: itemsize = xp.finfo(self.dtype).bits // 8 else: raise TypeError( "cannot compute the number of bytes (no array API nor nbytes / itemsize)" ) return self.size * itemsize @property def dims(self) -> _Dims: """Tuple of dimension names with which this NamedArray is associated.""" return self._dims @dims.setter def dims(self, value: _DimsLike) -> None: self._dims = self._parse_dimensions(value) def _parse_dimensions(self, dims: _DimsLike) -> _Dims: dims = (dims,) if isinstance(dims, str) else tuple(dims) if len(dims) != self.ndim: raise ValueError( f"dimensions {dims} must have the same length as the " f"number of data dimensions, ndim={self.ndim}" ) if len(set(dims)) < len(dims): repeated_dims = {d for d in dims if dims.count(d) > 1} warnings.warn( f"Duplicate dimension names present: dimensions {repeated_dims} appear more than once in dims={dims}. " "We do not yet support duplicate dimension names, but we do allow initial construction of the object. " "We recommend you rename the dims immediately to become distinct, as most xarray functionality is likely to fail silently if you do not. " "To rename the dimensions you will need to set the ``.dims`` attribute of each variable, ``e.g. var.dims=('x0', 'x1')``.", UserWarning, stacklevel=2, ) return dims @property def attrs(self) -> dict[Any, Any]: """Dictionary of local attributes on this NamedArray.""" if self._attrs is None: self._attrs = {} return self._attrs @attrs.setter def attrs(self, value: Mapping[Any, Any]) -> None: self._attrs = dict(value) if value else None def _check_shape(self, new_data: duckarray[Any, _DType_co]) -> None: if new_data.shape != self.shape: raise ValueError( f"replacement data must match the {self.__class__.__name__}'s shape. " f"replacement data has shape {new_data.shape}; {self.__class__.__name__} has shape {self.shape}" ) @property def data(self) -> duckarray[Any, _DType_co]: """ The NamedArray's data as an array. The underlying array type (e.g. dask, sparse, pint) is preserved. """ return self._data @data.setter def data(self, data: duckarray[Any, _DType_co]) -> None: self._check_shape(data) self._data = data @property def imag( self: NamedArray[_ShapeType, np.dtype[_SupportsImag[_ScalarType]]], # type: ignore[type-var] ) -> NamedArray[_ShapeType, _dtype[_ScalarType]]: """ The imaginary part of the array. See Also -------- numpy.ndarray.imag """ if isinstance(self._data, _arrayapi): from xarray.namedarray._array_api import imag return imag(self) return self._new(data=self._data.imag) @property def real( self: NamedArray[_ShapeType, np.dtype[_SupportsReal[_ScalarType]]], # type: ignore[type-var] ) -> NamedArray[_ShapeType, _dtype[_ScalarType]]: """ The real part of the array. See Also -------- numpy.ndarray.real """ if isinstance(self._data, _arrayapi): from xarray.namedarray._array_api import real return real(self) return self._new(data=self._data.real) def __dask_tokenize__(self) -> object: # Use v.data, instead of v._data, in order to cope with the wrappers # around NetCDF and the like from dask.base import normalize_token return normalize_token((type(self), self._dims, self.data, self._attrs or None)) def __dask_graph__(self) -> Graph | None: if is_duck_dask_array(self._data): return self._data.__dask_graph__() else: # TODO: Should this method just raise instead? # raise NotImplementedError("Method requires self.data to be a dask array") return None def __dask_keys__(self) -> NestedKeys: if is_duck_dask_array(self._data): return self._data.__dask_keys__() else: raise AttributeError("Method requires self.data to be a dask array.") def __dask_layers__(self) -> Sequence[str]: if is_duck_dask_array(self._data): return self._data.__dask_layers__() else: raise AttributeError("Method requires self.data to be a dask array.") @property def __dask_optimize__( self, ) -> Callable[..., dict[Any, Any]]: if is_duck_dask_array(self._data): return self._data.__dask_optimize__ # type: ignore[no-any-return] else: raise AttributeError("Method requires self.data to be a dask array.") @property def __dask_scheduler__(self) -> SchedulerGetCallable: if is_duck_dask_array(self._data): return self._data.__dask_scheduler__ else: raise AttributeError("Method requires self.data to be a dask array.") def __dask_postcompute__( self, ) -> tuple[PostComputeCallable, tuple[Any, ...]]: if is_duck_dask_array(self._data): array_func, array_args = self._data.__dask_postcompute__() # type: ignore[no-untyped-call] return self._dask_finalize, (array_func,) + array_args else: raise AttributeError("Method requires self.data to be a dask array.") def __dask_postpersist__( self, ) -> tuple[ Callable[ [Graph, PostPersistCallable[Any], Any, Any], Self, ], tuple[Any, ...], ]: if is_duck_dask_array(self._data): a: tuple[PostPersistCallable[Any], tuple[Any, ...]] a = self._data.__dask_postpersist__() # type: ignore[no-untyped-call] array_func, array_args = a return self._dask_finalize, (array_func,) + array_args else: raise AttributeError("Method requires self.data to be a dask array.") def _dask_finalize( self, results: Graph, array_func: PostPersistCallable[Any], *args: Any, **kwargs: Any, ) -> Self: data = array_func(results, *args, **kwargs) return type(self)(self._dims, data, attrs=self._attrs) @overload def get_axis_num(self, dim: str) -> int: ... # type: ignore [overload-overlap] @overload def get_axis_num(self, dim: Iterable[Hashable]) -> tuple[int, ...]: ... @overload def get_axis_num(self, dim: Hashable) -> int: ... def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, ...]: """Return axis number(s) corresponding to dimension(s) in this array. Parameters ---------- dim : str or iterable of str Dimension name(s) for which to lookup axes. Returns ------- int or tuple of int Axis number or numbers corresponding to the given dimensions. """ if not isinstance(dim, str) and isinstance(dim, Iterable): return tuple(self._get_axis_num(d) for d in dim) else: return self._get_axis_num(dim) def _get_axis_num(self: Any, dim: Hashable) -> int: _raise_if_any_duplicate_dimensions(self.dims) try: return self.dims.index(dim) # type: ignore[no-any-return] except ValueError as err: raise ValueError( f"{dim!r} not found in array dimensions {self.dims!r}" ) from err @property def chunks(self) -> _Chunks | None: """ Tuple of block lengths for this NamedArray's data, in order of dimensions, or None if the underlying data is not a dask array. See Also -------- NamedArray.chunk NamedArray.chunksizes xarray.unify_chunks """ data = self._data if isinstance(data, _chunkedarray): return data.chunks else: return None @property def chunksizes( self, ) -> Mapping[_Dim, _Shape]: """ Mapping from dimension names to block lengths for this NamedArray's data. If this NamedArray does not contain chunked arrays, the mapping will be empty. Cannot be modified directly, but can be modified by calling .chunk(). Differs from NamedArray.chunks because it returns a mapping of dimensions to chunk shapes instead of a tuple of chunk shapes. See Also -------- NamedArray.chunk NamedArray.chunks xarray.unify_chunks """ data = self._data if isinstance(data, _chunkedarray): return dict(zip(self.dims, data.chunks, strict=True)) else: return {} @property def sizes(self) -> dict[_Dim, _IntOrUnknown]: """Ordered mapping from dimension names to lengths.""" return dict(zip(self.dims, self.shape, strict=True)) def chunk( self, chunks: T_Chunks = {}, # noqa: B006 # even though it's unsafe, it is being used intentionally here (#4667) chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, from_array_kwargs: Any = None, **chunks_kwargs: Any, ) -> Self: """Coerce this array's data into a dask array with the given chunks. If this variable is a non-dask array, it will be converted to dask array. If it's a dask array, it will be rechunked to the given chunk sizes. If neither chunks is not provided for one or more dimensions, chunk sizes along that dimension will not be updated; non-dask arrays will be converted into dask arrays with a single block. Parameters ---------- chunks : int, tuple or dict, optional Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or ``{'x': 5, 'y': 5}``. chunked_array_type: str, optional Which chunked array type to coerce this datasets' arrays to. Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEntrypoint` system. Experimental API that should not be relied upon. from_array_kwargs: dict, optional Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg. For example, with dask as the default chunked array type, this method would pass additional kwargs to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. **chunks_kwargs : {dim: chunks, ...}, optional The keyword arguments form of ``chunks``. One of chunks or chunks_kwargs must be provided. Returns ------- chunked : xarray.Variable See Also -------- Variable.chunks Variable.chunksizes xarray.unify_chunks dask.array.from_array """ if from_array_kwargs is None: from_array_kwargs = {} if chunks is None: warnings.warn( "None value for 'chunks' is deprecated. " "It will raise an error in the future. Use instead '{}'", category=FutureWarning, stacklevel=2, ) chunks = {} if isinstance(chunks, float | str | int | tuple | list): # TODO we shouldn't assume here that other chunkmanagers can handle these types # TODO should we call normalize_chunks here? pass # dask.array.from_array can handle these directly else: chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") if is_dict_like(chunks): # This method of iteration allows for duplicated dimension names, GH8579 chunks = { dim_number: chunks[dim] for dim_number, dim in enumerate(self.dims) if dim in chunks } chunkmanager = guess_chunkmanager(chunked_array_type) data_old = self._data if chunkmanager.is_chunked_array(data_old): data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore[arg-type] else: ndata: duckarray[Any, Any] if not isinstance(data_old, ExplicitlyIndexed): ndata = data_old else: # Unambiguously handle array storage backends (like NetCDF4 and h5py) # that can't handle general array indexing. For example, in netCDF4 you # can do "outer" indexing along two dimensions independent, which works # differently from how NumPy handles it. # da.from_array works by using lazy indexing with a tuple of slices. # Using OuterIndexer is a pragmatic choice: dask does not yet handle # different indexing types in an explicit way: # https://github.com/dask/dask/issues/2883 ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) # type: ignore[assignment] if is_dict_like(chunks): chunks = tuple(starmap(chunks.get, enumerate(ndata.shape))) data_chunked = chunkmanager.from_array(ndata, chunks, **from_array_kwargs) # type: ignore[arg-type] return self._replace(data=data_chunked) def to_numpy(self) -> np.ndarray[Any, Any]: """Coerces wrapped data to numpy and returns a numpy.ndarray""" # TODO an entrypoint so array libraries can choose coercion method? return to_numpy(self._data) def as_numpy(self) -> Self: """Coerces wrapped data into a numpy array, returning a Variable.""" return self._replace(data=self.to_numpy()) def reduce( self, func: Callable[..., Any], dim: Dims = None, axis: int | Sequence[int] | None = None, keepdims: bool = False, **kwargs: Any, ) -> NamedArray[Any, Any]: """Reduce this array by applying `func` along some dimension(s). Parameters ---------- func : callable Function which can be called in the form `func(x, axis=axis, **kwargs)` to return the result of reducing an np.ndarray over an integer valued axis. dim : "...", str, Iterable of Hashable or None, optional Dimension(s) over which to apply `func`. By default `func` is applied over all dimensions. axis : int or Sequence of int, optional Axis(es) over which to apply `func`. Only one of the 'dim' and 'axis' arguments can be supplied. If neither are supplied, then the reduction is calculated over the flattened array (by calling `func(x)` without an axis argument). keepdims : bool, default: False If True, the dimensions which are reduced are left in the result as dimensions of size one **kwargs : dict Additional keyword arguments passed on to `func`. Returns ------- reduced : Array Array with summarized data and the indicated dimension(s) removed. """ if dim == ...: dim = None if dim is not None and axis is not None: raise ValueError("cannot supply both 'axis' and 'dim' arguments") if dim is not None: axis = self.get_axis_num(dim) with warnings.catch_warnings(): warnings.filterwarnings( "ignore", r"Mean of empty slice", category=RuntimeWarning ) if axis is not None: if isinstance(axis, tuple) and len(axis) == 1: # unpack axis for the benefit of functions # like np.argmin which can't handle tuple arguments axis = axis[0] data = func(self.data, axis=axis, **kwargs) else: data = func(self.data, **kwargs) if getattr(data, "shape", ()) == self.shape: dims = self.dims else: removed_axes: Iterable[int] if axis is None: removed_axes = range(self.ndim) else: removed_axes = np.atleast_1d(axis) % self.ndim if keepdims: # Insert np.newaxis for removed dims slices = tuple( np.newaxis if i in removed_axes else slice(None, None) for i in range(self.ndim) ) if getattr(data, "shape", None) is None: # Reduce has produced a scalar value, not an array-like data = np.asanyarray(data)[slices] else: data = data[slices] dims = self.dims else: dims = tuple( adim for n, adim in enumerate(self.dims) if n not in removed_axes ) # Return NamedArray to handle IndexVariable when data is nD return from_array(dims, data, attrs=self._attrs) def _nonzero(self: T_NamedArrayInteger) -> tuple[T_NamedArrayInteger, ...]: """Equivalent numpy's nonzero but returns a tuple of NamedArrays.""" # TODO: we should replace dask's native nonzero # after https://github.com/dask/dask/issues/1076 is implemented. # TODO: cast to ndarray and back to T_DuckArray is a workaround nonzeros = np.nonzero(cast("NDArray[np.integer[Any]]", self.data)) _attrs = self.attrs return tuple( cast("T_NamedArrayInteger", self._new((dim,), nz, _attrs)) for nz, dim in zip(nonzeros, self.dims, strict=True) ) def __repr__(self) -> str: return formatting.array_repr(self) def _repr_html_(self) -> str: return formatting_html.array_repr(self) def _as_sparse( self, sparse_format: Literal["coo"] | Default = _default, fill_value: ArrayLike | Default = _default, ) -> NamedArray[Any, _DType_co]: """ Use sparse-array as backend. """ import sparse from xarray.namedarray._array_api import astype # TODO: what to do if dask-backended? if fill_value is _default: dtype, fill_value = dtypes.maybe_promote(self.dtype) else: dtype = dtypes.result_type(self.dtype, fill_value) if sparse_format is _default: sparse_format = "coo" try: as_sparse = getattr(sparse, f"as_{sparse_format.lower()}") except AttributeError as exc: raise ValueError(f"{sparse_format} is not a valid sparse format") from exc data = as_sparse(astype(self, dtype).data, fill_value=fill_value) return self._new(data=data) def _to_dense(self) -> NamedArray[Any, _DType_co]: """ Change backend from sparse to np.array. """ if isinstance(self._data, _sparsearrayfunction_or_api): data_dense: np.ndarray[Any, _DType_co] = self._data.todense() return self._new(data=data_dense) else: raise TypeError("self.data is not a sparse array") def permute_dims( self, *dim: Iterable[_Dim] | EllipsisType, missing_dims: ErrorOptionsWithWarn = "raise", ) -> NamedArray[Any, _DType_co]: """Return a new object with transposed dimensions. Parameters ---------- *dim : Hashable, optional By default, reverse the order of the dimensions. Otherwise, reorder the dimensions to this order. missing_dims : {"raise", "warn", "ignore"}, default: "raise" What to do if dimensions that should be selected from are not present in the NamedArray: - "raise": raise an exception - "warn": raise a warning, and ignore the missing dimensions - "ignore": ignore the missing dimensions Returns ------- NamedArray The returned NamedArray has permuted dimensions and data with the same attributes as the original. See Also -------- numpy.transpose """ from xarray.namedarray._array_api import permute_dims if not dim: dims = self.dims[::-1] else: dims = tuple(infix_dims(dim, self.dims, missing_dims)) # type: ignore[arg-type] if len(dims) < 2 or dims == self.dims: # no need to transpose if only one dimension # or dims are in same order return self.copy(deep=False) axes = self.get_axis_num(dims) assert isinstance(axes, tuple) return permute_dims(self, axes) @property def T(self) -> NamedArray[Any, _DType_co]: """Return a new object with transposed dimensions.""" if self.ndim != 2: raise ValueError( f"x.T requires x to have 2 dimensions, got {self.ndim}. Use x.permute_dims() to permute dimensions." ) return self.permute_dims() def broadcast_to( self, dim: Mapping[_Dim, int] | None = None, **dim_kwargs: Any ) -> NamedArray[Any, _DType_co]: """ Broadcast the NamedArray to a new shape. New dimensions are not allowed. This method allows for the expansion of the array's dimensions to a specified shape. It handles both positional and keyword arguments for specifying the dimensions to broadcast. An error is raised if new dimensions are attempted to be added. Parameters ---------- dim : dict, str, sequence of str, optional Dimensions to broadcast the array to. If a dict, keys are dimension names and values are the new sizes. If a string or sequence of strings, existing dimensions are matched with a size of 1. **dim_kwargs : Any Additional dimensions specified as keyword arguments. Each keyword argument specifies the name of an existing dimension and its size. Returns ------- NamedArray A new NamedArray with the broadcasted dimensions. Examples -------- >>> data = np.asarray([[1.0, 2.0], [3.0, 4.0]]) >>> array = xr.NamedArray(("x", "y"), data) >>> array.sizes {'x': 2, 'y': 2} >>> broadcasted = array.broadcast_to(x=2, y=2) >>> broadcasted.sizes {'x': 2, 'y': 2} """ from xarray.core import duck_array_ops combined_dims = either_dict_or_kwargs(dim, dim_kwargs, "broadcast_to") # Check that no new dimensions are added if new_dims := set(combined_dims) - set(self.dims): raise ValueError( f"Cannot add new dimensions: {new_dims}. Only existing dimensions are allowed. " "Use `expand_dims` method to add new dimensions." ) # Create a dictionary of the current dimensions and their sizes current_shape = self.sizes # Update the current shape with the new dimensions, keeping the order of the original dimensions broadcast_shape = {d: current_shape.get(d, 1) for d in self.dims} broadcast_shape |= combined_dims # Ensure the dimensions are in the correct order ordered_dims = list(broadcast_shape.keys()) ordered_shape = tuple(broadcast_shape[d] for d in ordered_dims) data = duck_array_ops.broadcast_to(self._data, ordered_shape) # type: ignore[no-untyped-call] # TODO: use array-api-compat function return self._new(data=data, dims=ordered_dims) def expand_dims( self, dim: _Dim | Default = _default, ) -> NamedArray[Any, _DType_co]: """ Expand the dimensions of the NamedArray. This method adds new dimensions to the object. The new dimensions are added at the beginning of the array. Parameters ---------- dim : Hashable, optional Dimension name to expand the array to. This dimension will be added at the beginning of the array. Returns ------- NamedArray A new NamedArray with expanded dimensions. Examples -------- >>> data = np.asarray([[1.0, 2.0], [3.0, 4.0]]) >>> array = xr.NamedArray(("x", "y"), data) # expand dimensions by specifying a new dimension name >>> expanded = array.expand_dims(dim="z") >>> expanded.dims ('z', 'x', 'y') """ from xarray.namedarray._array_api import expand_dims return expand_dims(self, dim=dim) _NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]] def _raise_if_any_duplicate_dimensions( dims: _Dims, err_context: str = "This function" ) -> None: if len(set(dims)) < len(dims): repeated_dims = {d for d in dims if dims.count(d) > 1} raise ValueError( f"{err_context} cannot handle duplicate dimensions, but dimensions {repeated_dims} appear more than once on this object's dims: {dims}" ) python-xarray-2026.01.0/xarray/namedarray/daskmanager.py0000664000175000017500000001777315136607163023342 0ustar alastairalastairfrom __future__ import annotations from collections.abc import Callable, Iterable, Sequence from typing import TYPE_CHECKING, Any import numpy as np from xarray.core.indexing import ImplicitToExplicitIndexingAdapter from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray from xarray.namedarray.utils import is_duck_dask_array, module_available if TYPE_CHECKING: from xarray.namedarray._typing import ( T_Chunks, _DType_co, _NormalizedChunks, duckarray, ) try: from dask.array import Array as DaskArray except ImportError: DaskArray = np.ndarray[Any, Any] dask_available = module_available("dask") class DaskManager(ChunkManagerEntrypoint["DaskArray"]): array_cls: type[DaskArray] available: bool = dask_available def __init__(self) -> None: # TODO can we replace this with a class attribute instead? from dask.array import Array self.array_cls = Array def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: return is_duck_dask_array(data) def chunks(self, data: Any) -> _NormalizedChunks: return data.chunks # type: ignore[no-any-return] def normalize_chunks( self, chunks: T_Chunks | _NormalizedChunks, shape: tuple[int, ...] | None = None, limit: int | None = None, dtype: _DType_co | None = None, previous_chunks: _NormalizedChunks | None = None, ) -> Any: """Called by open_dataset""" from dask.array.core import normalize_chunks return normalize_chunks( chunks, shape=shape, limit=limit, dtype=dtype, previous_chunks=previous_chunks, ) # type: ignore[no-untyped-call] def from_array( self, data: Any, chunks: T_Chunks | _NormalizedChunks, **kwargs: Any ) -> DaskArray | Any: import dask.array as da if isinstance(data, ImplicitToExplicitIndexingAdapter): # lazily loaded backend array classes should use NumPy array operations. kwargs["meta"] = np.ndarray return da.from_array( data, chunks, **kwargs, ) # type: ignore[no-untyped-call] def compute( self, *data: Any, **kwargs: Any ) -> tuple[np.ndarray[Any, _DType_co], ...]: from dask.array import compute return compute(*data, **kwargs) # type: ignore[no-untyped-call, no-any-return] def persist(self, *data: Any, **kwargs: Any) -> tuple[DaskArray | Any, ...]: from dask import persist return persist(*data, **kwargs) # type: ignore[no-untyped-call, no-any-return] @property def array_api(self) -> Any: from dask import array as da return da def reduction( self, arr: T_ChunkedArray, func: Callable[..., Any], combine_func: Callable[..., Any] | None = None, aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, dtype: _DType_co | None = None, keepdims: bool = False, ) -> DaskArray | Any: from dask.array import reduction return reduction( arr, chunk=func, combine=combine_func, aggregate=aggregate_func, axis=axis, dtype=dtype, keepdims=keepdims, ) # type: ignore[no-untyped-call] def scan( self, func: Callable[..., Any], binop: Callable[..., Any], ident: float, arr: T_ChunkedArray, axis: int | None = None, dtype: _DType_co | None = None, **kwargs: Any, ) -> DaskArray | Any: from dask.array.reductions import cumreduction return cumreduction( func, binop, ident, arr, axis=axis, dtype=dtype, **kwargs, ) # type: ignore[no-untyped-call] def apply_gufunc( self, func: Callable[..., Any], signature: str, *args: Any, axes: Sequence[tuple[int, ...]] | None = None, axis: int | None = None, keepdims: bool = False, output_dtypes: Sequence[_DType_co] | None = None, output_sizes: dict[str, int] | None = None, vectorize: bool | None = None, allow_rechunk: bool = False, meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, **kwargs: Any, ) -> Any: from dask.array.gufunc import apply_gufunc return apply_gufunc( func, signature, *args, axes=axes, axis=axis, keepdims=keepdims, output_dtypes=output_dtypes, output_sizes=output_sizes, vectorize=vectorize, allow_rechunk=allow_rechunk, meta=meta, **kwargs, ) # type: ignore[no-untyped-call] def map_blocks( self, func: Callable[..., Any], *args: Any, dtype: _DType_co | None = None, chunks: tuple[int, ...] | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, **kwargs: Any, ) -> Any: from dask.array import map_blocks # pass through name, meta, token as kwargs return map_blocks( func, *args, dtype=dtype, chunks=chunks, drop_axis=drop_axis, new_axis=new_axis, **kwargs, ) # type: ignore[no-untyped-call] def blockwise( self, func: Callable[..., Any], out_ind: Iterable[Any], *args: Any, # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types name: str | None = None, token: Any | None = None, dtype: _DType_co | None = None, adjust_chunks: dict[Any, Callable[..., Any]] | None = None, new_axes: dict[Any, int] | None = None, align_arrays: bool = True, concatenate: bool | None = None, meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, **kwargs: Any, ) -> DaskArray | Any: from dask.array import blockwise return blockwise( func, out_ind, *args, name=name, token=token, dtype=dtype, adjust_chunks=adjust_chunks, new_axes=new_axes, align_arrays=align_arrays, concatenate=concatenate, meta=meta, **kwargs, ) # type: ignore[no-untyped-call] def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types **kwargs: Any, ) -> tuple[dict[str, _NormalizedChunks], list[DaskArray]]: from dask.array.core import unify_chunks return unify_chunks(*args, **kwargs) # type: ignore[no-any-return, no-untyped-call] def store( self, sources: Any | Sequence[Any], targets: Any, **kwargs: Any, ) -> Any: from dask.array import store return store( sources=sources, targets=targets, **kwargs, ) def shuffle( self, x: DaskArray, indexer: list[list[int]], axis: int, chunks: T_Chunks ) -> DaskArray: import dask.array if not module_available("dask", minversion="2024.08.1"): raise ValueError( "This method is very inefficient on dask<2024.08.1. Please upgrade." ) if chunks is None: chunks = "auto" if chunks != "auto": raise NotImplementedError("Only chunks='auto' is supported at present.") return dask.array.shuffle(x, indexer, axis, chunks="auto") def get_auto_chunk_size(self) -> int: from dask import config as dask_config from dask.utils import parse_bytes return parse_bytes(dask_config.get("array.chunk-size")) python-xarray-2026.01.0/xarray/namedarray/__init__.py0000664000175000017500000000000015136607163022573 0ustar alastairalastairpython-xarray-2026.01.0/xarray/conventions.py0000664000175000017500000007625515136607163021307 0ustar alastairalastairfrom __future__ import annotations import itertools import warnings from collections import defaultdict from collections.abc import Hashable, Iterable, Mapping, MutableMapping from typing import TYPE_CHECKING, Any, Literal, TypeVar, Union, cast import numpy as np from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder from xarray.coding import strings, variables from xarray.coding.variables import SerializationWarning, pop_to from xarray.core import indexing from xarray.core.common import ( _contains_datetime_like_objects, contains_cftime_datetimes, ) from xarray.core.utils import emit_user_level_warning from xarray.core.variable import IndexVariable, Variable from xarray.namedarray.utils import is_duck_array CF_RELATED_DATA = ( "bounds", "grid_mapping", "climatology", "geometry", "node_coordinates", "node_count", "part_node_count", "interior_ring", "cell_measures", "formula_terms", ) CF_RELATED_DATA_NEEDS_PARSING = ( "grid_mapping", "cell_measures", "formula_terms", ) if TYPE_CHECKING: from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset T_VarTuple = tuple[tuple[Hashable, ...], Any, dict, dict] T_Name = Union[Hashable, None] T_Variables = Mapping[Any, Variable] T_Attrs = MutableMapping[Any, Any] T_DropVariables = Union[str, Iterable[Hashable], None] T_DatasetOrAbstractstore = Union[Dataset, AbstractDataStore] def ensure_not_multiindex(var: Variable, name: T_Name = None) -> None: # only the pandas multi-index dimension coordinate cannot be serialized (tuple values) if isinstance(var._data, indexing.PandasMultiIndexingAdapter): if name is None and isinstance(var, IndexVariable): name = var.name if var.dims == (name,): raise NotImplementedError( f"variable {name!r} is a MultiIndex, which cannot yet be " "serialized. Instead, either use reset_index() " "to convert MultiIndex levels into coordinate variables instead " "or use https://cf-xarray.readthedocs.io/en/latest/coding.html." ) def encode_cf_variable( var: Variable, needs_copy: bool = True, name: T_Name = None ) -> Variable: """ Converts a Variable into a Variable which follows some of the CF conventions: - Nans are masked using _FillValue (or the deprecated missing_value) - Rescaling via: scale_factor and add_offset - datetimes are converted to the CF 'units since time' format - dtype encodings are enforced. Parameters ---------- var : Variable A variable holding un-encoded data. Returns ------- out : Variable A variable which has been encoded as described above. """ ensure_not_multiindex(var, name=name) for coder in [ CFDatetimeCoder(), CFTimedeltaCoder(), variables.CFScaleOffsetCoder(), variables.CFMaskCoder(), variables.NativeEnumCoder(), variables.NonStringCoder(), variables.DefaultFillvalueCoder(), variables.BooleanCoder(), ]: var = coder.encode(var, name=name) for attr_name in CF_RELATED_DATA: pop_to(var.encoding, var.attrs, attr_name) return var def decode_cf_variable( name: Hashable, var: Variable, concat_characters: bool = True, mask_and_scale: bool = True, decode_times: bool | CFDatetimeCoder = True, decode_endianness: bool = True, stack_char_dim: bool = True, use_cftime: bool | None = None, decode_timedelta: bool | CFTimedeltaCoder | None = None, ) -> Variable: """ Decodes a variable which may hold CF encoded information. This includes variables that have been masked and scaled, which hold CF style time variables (this is almost always the case if the dataset has been serialized) and which have strings encoded as character arrays. Parameters ---------- name : str Name of the variable. Used for better error messages. var : Variable A variable holding potentially CF encoded information. concat_characters : bool Should character arrays be concatenated to strings, for example: ["h", "e", "l", "l", "o"] -> "hello" mask_and_scale : bool Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). If the _Unsigned attribute is present treat integer arrays as unsigned. decode_times : bool or CFDatetimeCoder Decode cf times ("hours since 2000-01-01") to np.datetime64. decode_endianness : bool Decode arrays from non-native to native endianness. stack_char_dim : bool Whether to stack characters into bytes along the last dimension of this array. Passed as an argument because we need to look at the full dataset to figure out if this is appropriate. use_cftime : bool, optional Only relevant if encoded dates come from a standard calendar (e.g. "gregorian", "proleptic_gregorian", "standard", or not specified). If None (default), attempt to decode times to ``np.datetime64[ns]`` objects; if this is not possible, decode times to ``cftime.datetime`` objects. If True, always decode times to ``cftime.datetime`` objects, regardless of whether or not they can be represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. .. deprecated:: 2025.01.1 Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. decode_timedelta : None, bool, or CFTimedeltaCoder Decode cf timedeltas ("hours") to np.timedelta64. Returns ------- out : Variable A variable holding the decoded equivalent of var. """ # Ensure datetime-like Variables are passed through unmodified (GH 6453) if _contains_datetime_like_objects(var): return var original_dtype = var.dtype decode_timedelta_was_none = decode_timedelta is None if decode_timedelta is None: if isinstance(decode_times, CFDatetimeCoder): decode_timedelta = CFTimedeltaCoder(time_unit=decode_times.time_unit) else: decode_timedelta = bool(decode_times) if concat_characters: if stack_char_dim: var = strings.CharacterArrayCoder().decode(var, name=name) var = strings.EncodedStringCoder().decode(var) if original_dtype.kind == "O": var = variables.ObjectVLenStringCoder().decode(var) original_dtype = var.dtype if original_dtype.kind == "T": var = variables.Numpy2StringDTypeCoder().decode(var) if mask_and_scale: for coder in [ variables.CFMaskCoder( decode_times=decode_times, decode_timedelta=decode_timedelta ), variables.CFScaleOffsetCoder( decode_times=decode_times, decode_timedelta=decode_timedelta ), ]: var = coder.decode(var, name=name) if decode_timedelta: if isinstance(decode_timedelta, bool): decode_timedelta = CFTimedeltaCoder( decode_via_units=decode_timedelta, decode_via_dtype=decode_timedelta ) decode_timedelta._emit_decode_timedelta_future_warning = ( decode_timedelta_was_none ) var = decode_timedelta.decode(var, name=name) if decode_times: # remove checks after end of deprecation cycle if not isinstance(decode_times, CFDatetimeCoder): if use_cftime is not None: emit_user_level_warning( "Usage of 'use_cftime' as a kwarg is deprecated. " "Please pass a 'CFDatetimeCoder' instance initialized " "with 'use_cftime' to the 'decode_times' kwarg instead.\n" "Example usage:\n" " time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)\n" " ds = xr.open_dataset(decode_times=time_coder)\n", DeprecationWarning, ) decode_times = CFDatetimeCoder(use_cftime=use_cftime) elif use_cftime is not None: raise TypeError( "Usage of 'use_cftime' as a kwarg is not allowed " "if a 'CFDatetimeCoder' instance is passed to " "'decode_times'. Please set 'use_cftime' " "when initializing 'CFDatetimeCoder' instead.\n" "Example usage:\n" " time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)\n" " ds = xr.open_dataset(decode_times=time_coder)\n", ) var = decode_times.decode(var, name=name) if decode_endianness and not var.dtype.isnative: var = variables.EndianCoder().decode(var) original_dtype = var.dtype var = variables.BooleanCoder().decode(var) dimensions, data, attributes, encoding = variables.unpack_for_decoding(var) encoding.setdefault("dtype", original_dtype) if ( # we don't need to lazily index duck arrays not is_duck_array(data) # These arrays already support lazy indexing # OR for IndexingAdapters, it makes no sense to wrap them and not isinstance(data, indexing.ExplicitlyIndexedNDArrayMixin) ): # this path applies to bare BackendArray objects. # It is not hit for any internal Xarray backend data = indexing.LazilyIndexedArray(data) return Variable(dimensions, data, attributes, encoding=encoding, fastpath=True) def _update_bounds_attributes(variables: T_Variables) -> None: """Adds time attributes to time bounds variables. Variables handling time bounds ("Cell boundaries" in the CF conventions) do not necessarily carry the necessary attributes to be decoded. This copies the attributes from the time variable to the associated boundaries. See Also: http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/ cf-conventions.html#cell-boundaries https://github.com/pydata/xarray/issues/2565 """ # For all time variables with bounds for v in variables.values(): attrs = v.attrs units = attrs.get("units") has_date_units = isinstance(units, str) and "since" in units if has_date_units and "bounds" in attrs and attrs["bounds"] in variables: bounds_attrs = variables[attrs["bounds"]].attrs bounds_attrs.setdefault("units", attrs["units"]) if "calendar" in attrs: bounds_attrs.setdefault("calendar", attrs["calendar"]) def _update_bounds_encoding(variables: T_Variables) -> None: """Adds time encoding to time bounds variables. Variables handling time bounds ("Cell boundaries" in the CF conventions) do not necessarily carry the necessary attributes to be decoded. This copies the encoding from the time variable to the associated bounds variable so that we write CF-compliant files. See Also: http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/ cf-conventions.html#cell-boundaries https://github.com/pydata/xarray/issues/2565 """ # For all time variables with bounds for name, v in variables.items(): attrs = v.attrs encoding = v.encoding has_date_units = "units" in encoding and "since" in encoding["units"] is_datetime_type = np.issubdtype( v.dtype, np.datetime64 ) or contains_cftime_datetimes(v) if ( is_datetime_type and not has_date_units and "bounds" in attrs and attrs["bounds"] in variables ): emit_user_level_warning( f"Variable {name} has datetime type and a " f"bounds variable but {name}.encoding does not have " f"units specified. The units encodings for {name} " f"and {attrs['bounds']} will be determined independently " "and may not be equal, counter to CF-conventions. " "If this is a concern, specify a units encoding for " f"{name} before writing to a file.", ) if has_date_units and "bounds" in attrs and attrs["bounds"] in variables: bounds_encoding = variables[attrs["bounds"]].encoding bounds_encoding.setdefault("units", encoding["units"]) if "calendar" in encoding: bounds_encoding.setdefault("calendar", encoding["calendar"]) T = TypeVar("T") U = TypeVar("U") def _item_or_default(obj: Mapping[Any, T | U] | T, key: Hashable, default: T) -> T | U: """ Return item by key if obj is mapping and key is present, else return default value. """ return obj.get(key, default) if isinstance(obj, Mapping) else obj def decode_cf_variables( variables: T_Variables, attributes: T_Attrs, concat_characters: bool | Mapping[str, bool] = True, mask_and_scale: bool | Mapping[str, bool] = True, decode_times: bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] = True, decode_coords: bool | Literal["coordinates", "all"] = True, drop_variables: T_DropVariables = None, use_cftime: bool | Mapping[str, bool] | None = None, decode_timedelta: bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None = None, ) -> tuple[T_Variables, T_Attrs, set[Hashable]]: """ Decode several CF encoded variables. See: decode_cf_variable """ # Only emit one instance of the decode_timedelta default change # FutureWarning. This can be removed once this change is made. warnings.filterwarnings("once", "decode_timedelta", FutureWarning) dimensions_used_by = defaultdict(list) for v in variables.values(): for d in v.dims: dimensions_used_by[d].append(v) def stackable(dim: Hashable) -> bool: # figure out if a dimension can be concatenated over if dim in variables: return False for v in dimensions_used_by[dim]: if v.dtype.kind != "S" or dim != v.dims[-1]: return False return True coord_names = set() if isinstance(drop_variables, str): drop_variables = [drop_variables] elif drop_variables is None: drop_variables = [] drop_variables = set(drop_variables) # Time bounds coordinates might miss the decoding attributes if decode_times: _update_bounds_attributes(variables) new_vars = {} for k, v in variables.items(): if k in drop_variables: continue stack_char_dim = ( _item_or_default(concat_characters, k, True) and v.dtype == "S1" and v.ndim > 0 and stackable(v.dims[-1]) ) try: new_vars[k] = decode_cf_variable( k, v, concat_characters=_item_or_default(concat_characters, k, True), mask_and_scale=_item_or_default(mask_and_scale, k, True), decode_times=cast( bool | CFDatetimeCoder, _item_or_default(decode_times, k, True) ), stack_char_dim=stack_char_dim, use_cftime=_item_or_default(use_cftime, k, None), decode_timedelta=_item_or_default(decode_timedelta, k, None), ) except Exception as e: e.add_note(f"Raised while decoding variable {k!r} with value {v!r}") raise if decode_coords in [True, "coordinates", "all"]: var_attrs = new_vars[k].attrs if "coordinates" in var_attrs: var_coord_names = [ c for c in var_attrs["coordinates"].split() if c in variables ] # propagate as is new_vars[k].encoding["coordinates"] = var_attrs["coordinates"] del var_attrs["coordinates"] # but only use as coordinate if existing if var_coord_names: coord_names.update(var_coord_names) if decode_coords == "all": for attr_name in CF_RELATED_DATA: if attr_name in var_attrs: # fixes stray colon attr_val = var_attrs[attr_name].replace(" :", ":") var_names = attr_val.split() # if grid_mapping is a single string, do not enter here if ( attr_name in CF_RELATED_DATA_NEEDS_PARSING and len(var_names) > 1 ): # map the keys to list of strings # "A: b c d E: f g" returns # {"A": ["b", "c", "d"], "E": ["f", "g"]} roles_and_names = defaultdict(list) key = None for vname in var_names: if ":" in vname: key = vname.strip(":") else: if key is None: raise ValueError( f"First element {vname!r} of [{attr_val!r}] misses ':', " f"cannot decode {attr_name!r}." ) roles_and_names[key].append(vname) # for grid_mapping keys are var_names if attr_name == "grid_mapping": var_names = list(roles_and_names.keys()) else: # for cell_measures and formula_terms values are var names var_names = list(itertools.chain(*roles_and_names.values())) # consistency check (one element per key) if len(var_names) != len(roles_and_names.keys()): emit_user_level_warning( f"Attribute {attr_name!r} has malformed content [{attr_val!r}], " f"decoding {var_names!r} to coordinates." ) if all(var_name in variables for var_name in var_names): new_vars[k].encoding[attr_name] = attr_val coord_names.update(var_names) else: referenced_vars_not_in_variables = [ proj_name for proj_name in var_names if proj_name not in variables ] emit_user_level_warning( f"Variable(s) referenced in {attr_name} not in variables: {referenced_vars_not_in_variables}", ) del var_attrs[attr_name] if decode_coords and isinstance(attributes.get("coordinates", None), str): attributes = dict(attributes) crds = attributes.pop("coordinates") coord_names.update(crds.split()) return new_vars, attributes, coord_names def decode_cf( obj: T_DatasetOrAbstractstore, concat_characters: bool = True, mask_and_scale: bool = True, decode_times: bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] = True, decode_coords: bool | Literal["coordinates", "all"] = True, drop_variables: T_DropVariables = None, use_cftime: bool | None = None, decode_timedelta: bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None = None, ) -> Dataset: """Decode the given Dataset or Datastore according to CF conventions into a new Dataset. Parameters ---------- obj : Dataset or DataStore Object to decode. concat_characters : bool, optional Should character arrays be concatenated to strings, for example: ["h", "e", "l", "l", "o"] -> "hello" mask_and_scale : bool, optional Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). decode_times : bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder], optional Decode cf times (e.g., integers since "hours since 2000-01-01") to np.datetime64. decode_coords : bool or {"coordinates", "all"}, optional Controls which variables are set as coordinate variables: - "coordinates" or True: Set variables referred to in the ``'coordinates'`` attribute of the datasets or individual variables as coordinate variables. - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and other attributes as coordinate variables. drop_variables : str or iterable, optional A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or inconsistent values. use_cftime : bool, optional Only relevant if encoded dates come from a standard calendar (e.g. "gregorian", "proleptic_gregorian", "standard", or not specified). If None (default), attempt to decode times to ``np.datetime64[ns]`` objects; if this is not possible, decode times to ``cftime.datetime`` objects. If True, always decode times to ``cftime.datetime`` objects, regardless of whether or not they can be represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. .. deprecated:: 2025.01.1 Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. decode_timedelta : bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder], optional If True or :py:class:`CFTimedeltaCoder`, decode variables and coordinates with time units in {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} into timedelta objects. If False, leave them encoded as numbers. If None (default), assume the same behavior as decode_times. The resolution of the decoded timedeltas can be configured with the ``time_unit`` argument in the :py:class:`CFTimedeltaCoder` passed. Returns ------- decoded : Dataset """ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset vars: T_Variables attrs: T_Attrs if isinstance(obj, Dataset): vars = obj._variables attrs = obj.attrs extra_coords = set(obj.coords) close = obj._close encoding = obj.encoding elif isinstance(obj, AbstractDataStore): vars, attrs = obj.load() extra_coords = set() close = obj.close encoding = obj.get_encoding() else: raise TypeError("can only decode Dataset or DataStore objects") vars, attrs, coord_names = decode_cf_variables( vars, attrs, concat_characters, mask_and_scale, decode_times, decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) ds = Dataset(vars, attrs=attrs) ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars)) ds.set_close(close) ds.encoding = encoding return ds def cf_decoder( variables: T_Variables, attributes: T_Attrs, concat_characters: bool = True, mask_and_scale: bool = True, decode_times: bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] = True, ) -> tuple[T_Variables, T_Attrs]: """ Decode a set of CF encoded variables and attributes. Parameters ---------- variables : dict A dictionary mapping from variable name to xarray.Variable attributes : dict A dictionary mapping from attribute name to value concat_characters : bool Should character arrays be concatenated to strings, for example: ["h", "e", "l", "l", "o"] -> "hello" mask_and_scale : bool Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). decode_times : bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] Decode cf times ("hours since 2000-01-01") to np.datetime64. Returns ------- decoded_variables : dict A dictionary mapping from variable name to xarray.Variable objects. decoded_attributes : dict A dictionary mapping from attribute name to values. See Also -------- decode_cf_variable """ variables, attributes, _ = decode_cf_variables( variables, attributes, concat_characters, mask_and_scale, decode_times, ) return variables, attributes def _encode_coordinates( variables: T_Variables, attributes: T_Attrs, non_dim_coord_names ): # calculate global and variable specific coordinates non_dim_coord_names = set(non_dim_coord_names) for name in list(non_dim_coord_names): if isinstance(name, str) and " " in name: emit_user_level_warning( f"coordinate {name!r} has a space in its name, which means it " "cannot be marked as a coordinate on disk and will be " "saved as a data variable instead", category=SerializationWarning, ) non_dim_coord_names.discard(name) global_coordinates = non_dim_coord_names.copy() variable_coordinates = defaultdict(set) not_technically_coordinates = set() for coord_name in non_dim_coord_names: target_dims = variables[coord_name].dims for k, v in variables.items(): if ( k not in non_dim_coord_names and k not in v.dims and set(target_dims) <= set(v.dims) ): variable_coordinates[k].add(coord_name) if any( coord_name in v.encoding.get(attr_name, tuple()) for attr_name in CF_RELATED_DATA ): not_technically_coordinates.add(coord_name) global_coordinates.discard(coord_name) variables = {k: v.copy(deep=False) for k, v in variables.items()} # keep track of variable names written to file under the "coordinates" attributes written_coords = set() for name, var in variables.items(): encoding = var.encoding attrs = var.attrs if "coordinates" in attrs and "coordinates" in encoding: raise ValueError( f"'coordinates' found in both attrs and encoding for variable {name!r}." ) # if coordinates set to None, don't write coordinates attribute if ("coordinates" in attrs and attrs.get("coordinates") is None) or ( "coordinates" in encoding and encoding.get("coordinates") is None ): # make sure "coordinates" is removed from attrs/encoding attrs.pop("coordinates", None) encoding.pop("coordinates", None) continue # this will copy coordinates from encoding to attrs if "coordinates" in attrs # after the next line, "coordinates" is never in encoding # we get support for attrs["coordinates"] for free. coords_str = pop_to(encoding, attrs, "coordinates") or attrs.get("coordinates") if not coords_str and variable_coordinates[name]: coordinates_text = " ".join( str(coord_name) for coord_name in sorted(variable_coordinates[name]) if coord_name not in not_technically_coordinates ) if coordinates_text: attrs["coordinates"] = coordinates_text if "coordinates" in attrs: written_coords.update(attrs["coordinates"].split()) # These coordinates are not associated with any particular variables, so we # save them under a global 'coordinates' attribute so xarray can roundtrip # the dataset faithfully. Because this serialization goes beyond CF # conventions, only do it if necessary. # Reference discussion: # https://cfconventions.org/mailing-list-archive/Data/7400.html global_coordinates.difference_update(written_coords) if global_coordinates: attributes = dict(attributes) if "coordinates" in attributes: emit_user_level_warning( f"cannot serialize global coordinates {global_coordinates!r} because the global " f"attribute 'coordinates' already exists. This may prevent faithful roundtripping" f"of xarray datasets", category=SerializationWarning, ) else: attributes["coordinates"] = " ".join(sorted(map(str, global_coordinates))) return variables, attributes def encode_dataset_coordinates(dataset: Dataset): """Encode coordinates on the given dataset object into variable specific and global attributes. When possible, this is done according to CF conventions. Parameters ---------- dataset : Dataset Object to encode. Returns ------- variables : dict attrs : dict """ non_dim_coord_names = set(dataset.coords) - set(dataset.dims) return _encode_coordinates( dataset._variables, dataset.attrs, non_dim_coord_names=non_dim_coord_names ) def cf_encoder(variables: T_Variables, attributes: T_Attrs): """ Encode a set of CF encoded variables and attributes. Takes a dicts of variables and attributes and encodes them to conform to CF conventions as much as possible. This includes masking, scaling, character array handling, and CF-time encoding. Parameters ---------- variables : dict A dictionary mapping from variable name to xarray.Variable attributes : dict A dictionary mapping from attribute name to value Returns ------- encoded_variables : dict A dictionary mapping from variable name to xarray.Variable, encoded_attributes : dict A dictionary mapping from attribute name to value See Also -------- decode_cf_variable, encode_cf_variable """ # add encoding for time bounds variables if present. _update_bounds_encoding(variables) new_vars = {} for k, v in variables.items(): try: new_vars[k] = encode_cf_variable(v, name=k) except Exception as e: e.add_note(f"Raised while encoding variable {k!r} with value {v!r}") raise # Remove attrs from bounds variables (issue #2921) for var in new_vars.values(): bounds = var.attrs.get("bounds") if bounds and bounds in new_vars: # see http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries for attr in [ "units", "standard_name", "axis", "positive", "calendar", "long_name", "leap_month", "leap_year", "month_lengths", ]: if ( attr in new_vars[bounds].attrs and attr in var.attrs and new_vars[bounds].attrs[attr] == var.attrs[attr] ): new_vars[bounds].attrs.pop(attr) return new_vars, attributes python-xarray-2026.01.0/xarray/testing/0000775000175000017500000000000015136607163020026 5ustar alastairalastairpython-xarray-2026.01.0/xarray/testing/strategies.py0000664000175000017500000005747715136607163022576 0ustar alastairalastairimport datetime import warnings from collections.abc import Hashable, Iterable, Mapping, Sequence from itertools import compress from typing import TYPE_CHECKING, Any, Protocol, overload import hypothesis.extra.numpy as npst import numpy as np from hypothesis.errors import InvalidArgument import xarray as xr from xarray.core.types import T_DuckArray from xarray.core.utils import attempt_import, module_available if TYPE_CHECKING: from xarray.core.types import _DTypeLikeNested, _ShapeLike if TYPE_CHECKING: import hypothesis.strategies as st else: st = attempt_import("hypothesis.strategies") __all__ = [ "attrs", "basic_indexers", "cftime_datetimes", "datetimes", "dimension_names", "dimension_sizes", "names", "outer_array_indexers", "pandas_index_dtypes", "supported_dtypes", "unique_subset_of", "variables", "vectorized_indexers", ] class ArrayStrategyFn(Protocol[T_DuckArray]): def __call__( self, *, shape: "_ShapeLike", dtype: "_DTypeLikeNested", ) -> st.SearchStrategy[T_DuckArray]: ... def supported_dtypes() -> st.SearchStrategy[np.dtype]: """ Generates only those numpy dtypes which xarray can handle. Use instead of hypothesis.extra.numpy.scalar_dtypes in order to exclude weirder dtypes such as unicode, byte_string, array, or nested dtypes. Also excludes datetimes, which dodges bugs with pandas non-nanosecond datetime overflows. Checks only native endianness. Requires the hypothesis package to be installed. See Also -------- :ref:`testing.hypothesis`_ """ # TODO should this be exposed publicly? # We should at least decide what the set of numpy dtypes that xarray officially supports is. return ( npst.integer_dtypes(endianness="=") | npst.unsigned_integer_dtypes(endianness="=") | npst.floating_dtypes(endianness="=") | npst.complex_number_dtypes(endianness="=") # | npst.datetime64_dtypes() # | npst.timedelta64_dtypes() # | npst.unicode_string_dtypes() ) def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: """ Dtypes supported by pandas indexes. Restrict datetime64 and timedelta64 to ns frequency till Xarray relaxes that. """ return ( npst.integer_dtypes(endianness="=", sizes=(32, 64)) | npst.unsigned_integer_dtypes(endianness="=", sizes=(32, 64)) | npst.floating_dtypes(endianness="=", sizes=(32, 64)) # TODO: unset max_period | npst.datetime64_dtypes(endianness="=", max_period="ns") # TODO: set max_period="D" | npst.timedelta64_dtypes(endianness="=", max_period="ns") | npst.unicode_string_dtypes(endianness="=") ) def datetimes() -> st.SearchStrategy: """ Generates datetime objects including both standard library datetimes and cftime datetimes. Returns standard library datetime.datetime objects, and if cftime is available, also includes cftime datetime objects from various calendars. Requires the hypothesis package to be installed. See Also -------- :ref:`testing.hypothesis`_ """ strategy = st.datetimes() if module_available("cftime"): strategy = strategy | cftime_datetimes() return strategy # TODO Generalize to all valid unicode characters once formatting bugs in xarray's reprs are fixed + docs can handle it. _readable_characters = st.characters( categories=["L", "N"], max_codepoint=0x017F ) # only use characters within the "Latin Extended-A" subset of unicode def names() -> st.SearchStrategy[str]: """ Generates arbitrary string names for dimensions / variables. Requires the hypothesis package to be installed. See Also -------- :ref:`testing.hypothesis`_ """ return st.text( _readable_characters, min_size=1, max_size=5, ) def dimension_names( *, name_strategy=None, min_dims: int = 0, max_dims: int = 3, ) -> st.SearchStrategy[list[Hashable]]: """ Generates an arbitrary list of valid dimension names. Requires the hypothesis package to be installed. Parameters ---------- name_strategy Strategy for making names. Useful if we need to share this. min_dims Minimum number of dimensions in generated list. max_dims Maximum number of dimensions in generated list. """ if name_strategy is None: name_strategy = names() return st.lists( elements=name_strategy, min_size=min_dims, max_size=max_dims, unique=True, ) def dimension_sizes( *, dim_names: st.SearchStrategy[Hashable] = names(), # noqa: B008 min_dims: int = 0, max_dims: int = 3, min_side: int = 1, max_side: int | None = None, ) -> st.SearchStrategy[Mapping[Hashable, int]]: """ Generates an arbitrary mapping from dimension names to lengths. Requires the hypothesis package to be installed. Parameters ---------- dim_names: strategy generating strings, optional Strategy for generating dimension names. Defaults to the `names` strategy. min_dims: int, optional Minimum number of dimensions in generated list. Default is 1. max_dims: int, optional Maximum number of dimensions in generated list. Default is 3. min_side: int, optional Minimum size of a dimension. Default is 1. max_side: int, optional Minimum size of a dimension. Default is `min_length` + 5. See Also -------- :ref:`testing.hypothesis`_ """ if max_side is None: max_side = min_side + 3 return st.dictionaries( keys=dim_names, values=st.integers(min_value=min_side, max_value=max_side), min_size=min_dims, max_size=max_dims, ) _readable_strings = st.text( _readable_characters, max_size=5, ) _attr_keys = _readable_strings _small_arrays = npst.arrays( shape=npst.array_shapes( max_side=2, max_dims=2, ), dtype=npst.scalar_dtypes() | npst.byte_string_dtypes() | npst.unicode_string_dtypes(), ) _attr_values = st.none() | st.booleans() | _readable_strings | _small_arrays simple_attrs = st.dictionaries(_attr_keys, _attr_values) def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: """ Generates arbitrary valid attributes dictionaries for xarray objects. The generated dictionaries can potentially be recursive. Requires the hypothesis package to be installed. See Also -------- :ref:`testing.hypothesis`_ """ return st.recursive( st.dictionaries(_attr_keys, _attr_values), lambda children: st.dictionaries(_attr_keys, children), max_leaves=3, ) ATTRS = attrs() @st.composite def variables( draw: st.DrawFn, *, array_strategy_fn: ArrayStrategyFn | None = None, dims: st.SearchStrategy[Sequence[Hashable] | Mapping[Hashable, int]] | None = None, dtype: st.SearchStrategy[np.dtype] | None = None, attrs: st.SearchStrategy[Mapping] = ATTRS, ) -> xr.Variable: """ Generates arbitrary xarray.Variable objects. Follows the basic signature of the xarray.Variable constructor, but allows passing alternative strategies to generate either numpy-like array data or dimensions. Also allows specifying the shape or dtype of the wrapped array up front. Passing nothing will generate a completely arbitrary Variable (containing a numpy array). Requires the hypothesis package to be installed. Parameters ---------- array_strategy_fn: Callable which returns a strategy generating array-likes, optional Callable must only accept shape and dtype kwargs, and must generate results consistent with its input. If not passed the default is to generate a small numpy array with one of the supported_dtypes. dims: Strategy for generating the dimensions, optional Can either be a strategy for generating a sequence of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. If provided as a mapping the array shape will be passed to array_strategy_fn. Default is to generate arbitrary dimension names for each axis in data. dtype: Strategy which generates np.dtype objects, optional Will be passed in to array_strategy_fn. Default is to generate any scalar dtype using supported_dtypes. Be aware that this default set of dtypes includes some not strictly allowed by the array API standard. attrs: Strategy which generates dicts, optional Default is to generate a nested attributes dictionary containing arbitrary strings, booleans, integers, Nones, and numpy arrays. Returns ------- variable_strategy Strategy for generating xarray.Variable objects. Raises ------ ValueError If a custom array_strategy_fn returns a strategy which generates an example array inconsistent with the shape & dtype input passed to it. Examples -------- Generate completely arbitrary Variable objects backed by a numpy array: >>> variables().example() # doctest: +SKIP array([43506, -16, -151], dtype=int32) >>> variables().example() # doctest: +SKIP array([[[-10000000., -10000000.], [-10000000., -10000000.]], [[-10000000., -10000000.], [ 0., -10000000.]], [[ 0., -10000000.], [-10000000., inf]], [[ -0., -10000000.], [-10000000., -0.]]], dtype=float32) Attributes: śřĴ: {'ĉ': {'iĥf': array([-30117, -1740], dtype=int16)}} Generate only Variable objects with certain dimension names: >>> variables(dims=st.just(["a", "b"])).example() # doctest: +SKIP array([[ 248, 4294967295, 4294967295], [2412855555, 3514117556, 4294967295], [ 111, 4294967295, 4294967295], [4294967295, 1084434988, 51688], [ 47714, 252, 11207]], dtype=uint32) Generate only Variable objects with certain dimension names and lengths: >>> variables(dims=st.just({"a": 2, "b": 1})).example() # doctest: +SKIP array([[-1.00000000e+007+3.40282347e+038j], [-2.75034266e-225+2.22507386e-311j]]) See Also -------- :ref:`testing.hypothesis`_ """ if dtype is None: dtype = supported_dtypes() if not isinstance(dims, st.SearchStrategy) and dims is not None: raise InvalidArgument( f"dims must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(dims)}. " "To specify fixed contents, use hypothesis.strategies.just()." ) if not isinstance(dtype, st.SearchStrategy) and dtype is not None: raise InvalidArgument( f"dtype must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(dtype)}. " "To specify fixed contents, use hypothesis.strategies.just()." ) if not isinstance(attrs, st.SearchStrategy) and attrs is not None: raise InvalidArgument( f"attrs must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(attrs)}. " "To specify fixed contents, use hypothesis.strategies.just()." ) _array_strategy_fn: ArrayStrategyFn if array_strategy_fn is None: # For some reason if I move the default value to the function signature definition mypy incorrectly says the ignore is no longer necessary, making it impossible to satisfy mypy _array_strategy_fn = npst.arrays # type: ignore[assignment] # npst.arrays has extra kwargs that we aren't using later elif not callable(array_strategy_fn): raise InvalidArgument( "array_strategy_fn must be a Callable that accepts the kwargs dtype and shape and returns a hypothesis " "strategy which generates corresponding array-like objects." ) else: _array_strategy_fn = ( array_strategy_fn # satisfy mypy that this new variable cannot be None ) _dtype = draw(dtype) if dims is not None: # generate dims first then draw data to match _dims = draw(dims) if isinstance(_dims, Sequence): dim_names = list(_dims) valid_shapes = npst.array_shapes(min_dims=len(_dims), max_dims=len(_dims)) _shape = draw(valid_shapes) array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) elif isinstance(_dims, Mapping | dict): # should be a mapping of form {dim_names: lengths} dim_names, _shape = list(_dims.keys()), tuple(_dims.values()) array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) else: raise InvalidArgument( f"Invalid type returned by dims strategy - drew an object of type {type(dims)}" ) else: # nothing provided, so generate everything consistently # We still generate the shape first here just so that we always pass shape to array_strategy_fn _shape = draw(npst.array_shapes()) array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) dim_names = draw(dimension_names(min_dims=len(_shape), max_dims=len(_shape))) _data = draw(array_strategy) if _data.shape != _shape: raise ValueError( "array_strategy_fn returned an array object with a different shape than it was passed." f"Passed {_shape}, but returned {_data.shape}." "Please either specify a consistent shape via the dims kwarg or ensure the array_strategy_fn callable " "obeys the shape argument passed to it." ) if _data.dtype != _dtype: raise ValueError( "array_strategy_fn returned an array object with a different dtype than it was passed." f"Passed {_dtype}, but returned {_data.dtype}" "Please either specify a consistent dtype via the dtype kwarg or ensure the array_strategy_fn callable " "obeys the dtype argument passed to it." ) return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs)) @overload def unique_subset_of( objs: Sequence[Hashable], *, min_size: int = 0, max_size: int | None = None, ) -> st.SearchStrategy[Sequence[Hashable]]: ... @overload def unique_subset_of( objs: Mapping[Hashable, Any], *, min_size: int = 0, max_size: int | None = None, ) -> st.SearchStrategy[Mapping[Hashable, Any]]: ... @st.composite def unique_subset_of( draw: st.DrawFn, objs: Sequence[Hashable] | Mapping[Hashable, Any], *, min_size: int = 0, max_size: int | None = None, ) -> Sequence[Hashable] | Mapping[Hashable, Any]: """ Return a strategy which generates a unique subset of the given objects. Each entry in the output subset will be unique (if input was a sequence) or have a unique key (if it was a mapping). Requires the hypothesis package to be installed. Parameters ---------- objs: Union[Sequence[Hashable], Mapping[Hashable, Any]] Objects from which to sample to produce the subset. min_size: int, optional Minimum size of the returned subset. Default is 0. max_size: int, optional Maximum size of the returned subset. Default is the full length of the input. If set to 0 the result will be an empty mapping. Returns ------- unique_subset_strategy Strategy generating subset of the input. Examples -------- >>> unique_subset_of({"x": 2, "y": 3}).example() # doctest: +SKIP {'y': 3} >>> unique_subset_of(["x", "y"]).example() # doctest: +SKIP ['x'] See Also -------- :ref:`testing.hypothesis`_ """ if not isinstance(objs, Iterable): raise TypeError( f"Object to sample from must be an Iterable or a Mapping, but received type {type(objs)}" ) if len(objs) == 0: raise ValueError("Can't sample from a length-zero object.") keys = list(objs.keys()) if isinstance(objs, Mapping) else objs subset_keys = draw( st.lists( st.sampled_from(keys), unique=True, min_size=min_size, max_size=max_size, ) ) return ( {k: objs[k] for k in subset_keys} if isinstance(objs, Mapping) else subset_keys ) @st.composite def cftime_datetimes(draw: st.DrawFn): """ Generates cftime datetime objects across various calendars. This strategy generates cftime datetime objects from all available cftime calendars with dates ranging from year -99999 to 99999. Requires both the hypothesis and cftime packages to be installed. Returns ------- cftime_datetime_strategy Strategy for generating cftime datetime objects. See Also -------- :ref:`testing.hypothesis`_ """ from xarray.tests import _all_cftime_date_types date_types = _all_cftime_date_types() calendars = list(date_types) calendar = draw(st.sampled_from(calendars)) date_type = date_types[calendar] with warnings.catch_warnings(): warnings.filterwarnings("ignore", message=".*date/calendar/year zero.*") daysinmonth = date_type(99999, 12, 1).daysinmonth min_value = date_type(-99999, 1, 1) max_value = date_type(99999, 12, daysinmonth, 23, 59, 59, 999999) unit_microsecond = datetime.timedelta(microseconds=1) timespan_microseconds = (max_value - min_value) // unit_microsecond microseconds_offset = draw(st.integers(0, timespan_microseconds)) return min_value + datetime.timedelta(microseconds=microseconds_offset) @st.composite def basic_indexers( draw, /, *, sizes: dict[Hashable, int], min_dims: int = 1, max_dims: int | None = None, ) -> dict[Hashable, int | slice]: """Generate basic indexers using ``hypothesis.extra.numpy.basic_indices``. Parameters ---------- draw : callable sizes : dict[Hashable, int] Dictionary mapping dimension names to their sizes. min_dims : int, optional Minimum number of dimensions to index. max_dims : int or None, optional Maximum number of dimensions to index. Returns ------- sizes : mapping of hashable to int or slice Indexers as a dict with keys randomly selected from ``sizes.keys()``. See Also -------- hypothesis.strategies.slices """ selected_dims = draw(unique_subset_of(sizes, min_size=min_dims, max_size=max_dims)) # Generate one basic index (int or slice) per selected dimension idxr = { dim: draw( st.one_of( st.integers(min_value=-size, max_value=size - 1), st.slices(size), ) ) for dim, size in selected_dims.items() } return idxr @st.composite def outer_array_indexers( draw, /, *, sizes: dict[Hashable, int], min_dims: int = 0, max_dims: int | None = None, max_size: int = 10, ) -> dict[Hashable, np.ndarray]: """Generate outer array indexers (vectorized/orthogonal indexing). Parameters ---------- draw : callable The Hypothesis draw function (automatically provided by @st.composite). sizes : dict[Hashable, int] Dictionary mapping dimension names to their sizes. min_dims : int, optional Minimum number of dimensions to index max_dims : int or None, optional Maximum number of dimensions to index Returns ------- sizes : mapping of hashable to np.ndarray Indexers as a dict with keys randomly selected from ``sizes.keys()``. Values are 1D numpy arrays of integer indices for each dimension. See Also -------- hypothesis.extra.numpy.arrays """ selected_dims = draw(unique_subset_of(sizes, min_size=min_dims, max_size=max_dims)) idxr = { dim: draw( npst.arrays( dtype=np.int64, shape=st.integers(min_value=1, max_value=min(size, max_size)), elements=st.integers(min_value=-size, max_value=size - 1), ) ) for dim, size in selected_dims.items() } return idxr @st.composite def vectorized_indexers( draw, /, *, sizes: dict[Hashable, int], min_dims: int = 2, max_dims: int | None = None, min_ndim: int = 1, max_ndim: int = 3, min_size: int = 1, max_size: int = 5, ) -> dict[Hashable, xr.DataArray]: """Generate vectorized (fancy) indexers where all arrays are broadcastable. In vectorized indexing, all array indexers must have compatible shapes that can be broadcast together, and the result shape is determined by broadcasting the indexer arrays. Parameters ---------- draw : callable The Hypothesis draw function (automatically provided by @st.composite). sizes : dict[Hashable, int] Dictionary mapping dimension names to their sizes. min_dims : int, optional Minimum number of dimensions to index. Default is 2, so that we always have a "trajectory". Use ``outer_array_indexers`` for the ``min_dims==1`` case. max_dims : int or None, optional Maximum number of dimensions to index. min_ndim : int, optional Minimum number of dimensions for the result arrays. max_ndim : int, optional Maximum number of dimensions for the result arrays. min_size : int, optional Minimum size for each dimension in the result arrays. max_size : int, optional Maximum size for each dimension in the result arrays. Returns ------- sizes : mapping of hashable to DataArray or Variable Indexers as a dict with keys randomly selected from sizes.keys(). Values are DataArrays of integer indices that are all broadcastable to a common shape. See Also -------- hypothesis.extra.numpy.arrays """ selected_dims = draw(unique_subset_of(sizes, min_size=min_dims, max_size=max_dims)) # Generate a common broadcast shape for all arrays # Use min_ndim to max_ndim dimensions for the result shape result_shape = draw( st.lists( st.integers(min_value=min_size, max_value=max_size), min_size=min_ndim, max_size=max_ndim, ) ) result_ndim = len(result_shape) # Create dimension names for the vectorized result vec_dims = tuple(f"vec_{i}" for i in range(result_ndim)) # Generate array indexers for each selected dimension # All arrays must be broadcastable to the same result_shape idxr = {} for dim, size in selected_dims.items(): array_shape = draw( npst.broadcastable_shapes( shape=tuple(result_shape), min_dims=min_ndim, max_dims=result_ndim, ) ) # For xarray broadcasting, drop dimensions where size differs from result_shape # (numpy broadcasts size-1, but xarray requires matching sizes or missing dims) # Right-align array_shape with result_shape for comparison aligned_dims = vec_dims[-len(array_shape) :] if array_shape else () aligned_result = result_shape[-len(array_shape) :] if array_shape else [] keep_mask = [s == r for s, r in zip(array_shape, aligned_result, strict=True)] filtered_shape = tuple(compress(array_shape, keep_mask)) filtered_dims = tuple(compress(aligned_dims, keep_mask)) # Generate array of valid indices for this dimension indices = draw( npst.arrays( dtype=np.int64, shape=filtered_shape, elements=st.integers(min_value=-size, max_value=size - 1), ) ) idxr[dim] = xr.DataArray(indices, dims=filtered_dims) return idxr python-xarray-2026.01.0/xarray/testing/assertions.py0000664000175000017500000004360415136607163022601 0ustar alastairalastair"""Testing functions exposed to the user API""" import functools import warnings from collections.abc import Hashable from typing import Any import numpy as np import pandas as pd from xarray.core import duck_array_ops, formatting, utils from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.datatree_mapping import map_over_datasets from xarray.core.formatting import diff_datatree_repr from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex, default_indexes from xarray.core.variable import IndexVariable, Variable def ensure_warnings(func): # sometimes tests elevate warnings to errors # -> make sure that does not happen in the assert_* functions @functools.wraps(func) def wrapper(*args, **kwargs): __tracebackhide__ = True with warnings.catch_warnings(): # only remove filters that would "error" warnings.filters = [f for f in warnings.filters if f[0] != "error"] return func(*args, **kwargs) return wrapper def _decode_string_data(data): if data.dtype.kind == "S": return np.char.decode(data, "utf-8", "replace") return data def _data_allclose_or_equiv(arr1, arr2, rtol=1e-05, atol=1e-08, decode_bytes=True): if any(arr.dtype.kind == "S" for arr in [arr1, arr2]) and decode_bytes: arr1 = _decode_string_data(arr1) arr2 = _decode_string_data(arr2) exact_dtypes = ["M", "m", "O", "S", "U"] if any(arr.dtype.kind in exact_dtypes for arr in [arr1, arr2]): return duck_array_ops.array_equiv(arr1, arr2) else: return duck_array_ops.allclose_or_equiv(arr1, arr2, rtol=rtol, atol=atol) @ensure_warnings def assert_isomorphic(a: DataTree, b: DataTree): """ Two DataTrees are considered isomorphic if the set of paths to their descendent nodes are the same. Nothing about the data or attrs in each node is checked. Isomorphism is a necessary condition for two trees to be used in a nodewise binary operation, such as tree1 + tree2. Parameters ---------- a : DataTree The first object to compare. b : DataTree The second object to compare. See Also -------- DataTree.isomorphic assert_equal assert_identical """ __tracebackhide__ = True assert isinstance(a, type(b)) if isinstance(a, DataTree): assert a.isomorphic(b), diff_datatree_repr(a, b, "isomorphic") else: raise TypeError(f"{type(a)} not of type DataTree") def maybe_transpose_dims(a, b, check_dim_order: bool): """Helper for assert_equal/allclose/identical""" __tracebackhide__ = True def _maybe_transpose_dims(a, b): if not isinstance(a, Variable | DataArray | Dataset): return b if set(a.dims) == set(b.dims): # Ensure transpose won't fail if a dimension is missing # If this is the case, the difference will be caught by the caller return b.transpose(*a.dims) return b if check_dim_order: return b if isinstance(a, DataTree): return map_over_datasets(_maybe_transpose_dims, a, b) return _maybe_transpose_dims(a, b) @ensure_warnings def assert_equal(a, b, check_dim_order: bool = True): """Like :py:func:`numpy.testing.assert_array_equal`, but for xarray objects. Raises an AssertionError if two objects are not equal. This will match data values, dimensions and coordinates, but not names or attributes (except for Dataset objects for which the variable names must match). Arrays with NaN in the same location are considered equal. For DataTree objects, assert_equal is mapped over all Datasets on each node, with the DataTrees being equal if both are isomorphic and the corresponding Datasets at each node are themselves equal. Parameters ---------- a : xarray.Dataset, xarray.DataArray, xarray.Variable, xarray.Coordinates or xarray.core.datatree.DataTree. The first object to compare. b : xarray.Dataset, xarray.DataArray, xarray.Variable, xarray.Coordinates or xarray.core.datatree.DataTree. The second object to compare. check_dim_order : bool, optional, default is True Whether dimensions must be in the same order. See Also -------- assert_identical, assert_allclose, Dataset.equals, DataArray.equals numpy.testing.assert_array_equal """ __tracebackhide__ = True assert type(a) is type(b) or ( isinstance(a, Coordinates) and isinstance(b, Coordinates) ) b = maybe_transpose_dims(a, b, check_dim_order) if isinstance(a, Variable | DataArray): assert a.equals(b), formatting.diff_array_repr(a, b, "equals") elif isinstance(a, Dataset): assert a.equals(b), formatting.diff_dataset_repr(a, b, "equals") elif isinstance(a, Coordinates): assert a.equals(b), formatting.diff_coords_repr(a, b, "equals") elif isinstance(a, DataTree): assert a.equals(b), diff_datatree_repr(a, b, "equals") else: raise TypeError(f"{type(a)} not supported by assertion comparison") @ensure_warnings def assert_identical(a, b): """Like :py:func:`xarray.testing.assert_equal`, but also matches the objects' names and attributes. Raises an AssertionError if two objects are not identical. For DataTree objects, assert_identical is mapped over all Datasets on each node, with the DataTrees being identical if both are isomorphic and the corresponding Datasets at each node are themselves identical. Parameters ---------- a : xarray.Dataset, xarray.DataArray, xarray.Variable or xarray.Coordinates The first object to compare. b : xarray.Dataset, xarray.DataArray, xarray.Variable or xarray.Coordinates The second object to compare. See Also -------- assert_equal, assert_allclose, Dataset.equals, DataArray.equals """ __tracebackhide__ = True assert type(a) is type(b) or ( isinstance(a, Coordinates) and isinstance(b, Coordinates) ) if isinstance(a, Variable): assert a.identical(b), formatting.diff_array_repr(a, b, "identical") elif isinstance(a, DataArray): assert a.name == b.name, ( f"DataArray names are different. L: {a.name}, R: {b.name}" ) assert a.identical(b), formatting.diff_array_repr(a, b, "identical") elif isinstance(a, Dataset | Variable): assert a.identical(b), formatting.diff_dataset_repr(a, b, "identical") elif isinstance(a, Coordinates): assert a.identical(b), formatting.diff_coords_repr(a, b, "identical") elif isinstance(a, DataTree): assert a.identical(b), diff_datatree_repr(a, b, "identical") else: raise TypeError(f"{type(a)} not supported by assertion comparison") @ensure_warnings def assert_allclose( a, b, rtol=1e-05, atol=1e-08, decode_bytes=True, check_dim_order: bool = True ): """Like :py:func:`numpy.testing.assert_allclose`, but for xarray objects. Raises an AssertionError if two objects are not equal up to desired tolerance. Parameters ---------- a : xarray.Dataset, xarray.DataArray or xarray.Variable The first object to compare. b : xarray.Dataset, xarray.DataArray or xarray.Variable The second object to compare. rtol : float, optional Relative tolerance. atol : float, optional Absolute tolerance. decode_bytes : bool, optional Whether byte dtypes should be decoded to strings as UTF-8 or not. This is useful for testing serialization methods on Python 3 that return saved strings as bytes. check_dim_order : bool, optional, default is True Whether dimensions must be in the same order. See Also -------- assert_identical, assert_equal, numpy.testing.assert_allclose """ __tracebackhide__ = True assert type(a) is type(b) b = maybe_transpose_dims(a, b, check_dim_order) equiv = functools.partial( _data_allclose_or_equiv, rtol=rtol, atol=atol, decode_bytes=decode_bytes ) equiv.__name__ = "allclose" # type: ignore[attr-defined] def compat_variable(a, b): a = getattr(a, "variable", a) b = getattr(b, "variable", b) return a.dims == b.dims and (a._data is b._data or equiv(a.data, b.data)) def compat_node(a, b): return a.ds._coord_names == b.ds._coord_names and utils.dict_equiv( a.variables, b.variables, compat=compat_variable ) if isinstance(a, Variable): allclose = compat_variable(a, b) assert allclose, formatting.diff_array_repr(a, b, compat=equiv) elif isinstance(a, DataArray): allclose = utils.dict_equiv( a.coords, b.coords, compat=compat_variable ) and compat_variable(a.variable, b.variable) assert allclose, formatting.diff_array_repr(a, b, compat=equiv) elif isinstance(a, Dataset): allclose = a._coord_names == b._coord_names and utils.dict_equiv( a.variables, b.variables, compat=compat_variable ) assert allclose, formatting.diff_dataset_repr(a, b, compat=equiv) elif isinstance(a, Coordinates): allclose = utils.dict_equiv(a.variables, b.variables, compat=compat_variable) assert allclose, formatting.diff_coords_repr(a, b, compat=equiv) elif isinstance(a, DataTree): allclose = utils.dict_equiv( dict(a.subtree_with_keys), dict(b.subtree_with_keys), compat=compat_node ) assert allclose, formatting.diff_datatree_repr(a, b, compat=equiv) else: raise TypeError(f"{type(a)} not supported by assertion comparison") def _format_message(x, y, err_msg, verbose): diff = x - y abs_diff = max(abs(diff)) rel_diff = "not implemented" n_diff = np.count_nonzero(diff) n_total = diff.size fraction = f"{n_diff} / {n_total}" percentage = float(n_diff / n_total * 100) parts = [ "Arrays are not equal", err_msg, f"Mismatched elements: {fraction} ({percentage:.0f}%)", f"Max absolute difference: {abs_diff}", f"Max relative difference: {rel_diff}", ] if verbose: parts += [ f" x: {x!r}", f" y: {y!r}", ] return "\n".join(parts) @ensure_warnings def assert_duckarray_allclose( actual, desired, rtol=1e-07, atol=0, err_msg="", verbose=True ): """Like `np.testing.assert_allclose`, but for duckarrays.""" __tracebackhide__ = True allclose = duck_array_ops.allclose_or_equiv(actual, desired, rtol=rtol, atol=atol) assert allclose, _format_message(actual, desired, err_msg=err_msg, verbose=verbose) @ensure_warnings def assert_duckarray_equal(x, y, err_msg="", verbose=True): """Like `np.testing.assert_array_equal`, but for duckarrays""" __tracebackhide__ = True if not utils.is_duck_array(x) and not utils.is_scalar(x): x = np.asarray(x) if not utils.is_duck_array(y) and not utils.is_scalar(y): y = np.asarray(y) if (utils.is_duck_array(x) and utils.is_scalar(y)) or ( utils.is_scalar(x) and utils.is_duck_array(y) ): equiv = duck_array_ops.array_all(x == y) else: equiv = duck_array_ops.array_equiv(x, y) assert equiv, _format_message(x, y, err_msg=err_msg, verbose=verbose) def assert_chunks_equal(a, b): """ Assert that chunksizes along chunked dimensions are equal. Parameters ---------- a : xarray.Dataset or xarray.DataArray The first object to compare. b : xarray.Dataset or xarray.DataArray The second object to compare. """ if isinstance(a, DataArray) != isinstance(b, DataArray): raise TypeError("a and b have mismatched types") left = a.unify_chunks() right = b.unify_chunks() assert left.chunks == right.chunks def _assert_indexes_invariants_checks( indexes, possible_coord_variables, dims, check_default=True ): assert isinstance(indexes, dict), indexes assert all(isinstance(v, Index) for v in indexes.values()), { k: type(v) for k, v in indexes.items() } if check_default: index_vars = { k for k, v in possible_coord_variables.items() if isinstance(v, IndexVariable) } assert indexes.keys() <= index_vars, (set(indexes), index_vars) assert all( k in index_vars for k, v in possible_coord_variables.items() if v.dims == (k,) ), {k: type(v) for k, v in possible_coord_variables.items()} assert not any( isinstance(v, IndexVariable) for k, v in possible_coord_variables.items() if k not in indexes.keys() ), {k: type(v) for k, v in possible_coord_variables.items()} # check pandas index wrappers vs. coordinate data adapters for k, index in indexes.items(): if isinstance(index, PandasIndex): pd_index = index.index var = possible_coord_variables[k] assert (index.dim,) == var.dims, (pd_index, var) if k == index.dim: # skip multi-index levels here (checked below) assert index.coord_dtype == var.dtype, (index.coord_dtype, var.dtype) assert isinstance(var._data.array, pd.Index), var._data.array # TODO: check identity instead of equality? assert pd_index.equals(var._data.array), (pd_index, var) if isinstance(index, PandasMultiIndex): pd_index = index.index for name in index.index.names: assert name in possible_coord_variables, (pd_index, index_vars) var = possible_coord_variables[name] assert (index.dim,) == var.dims, (pd_index, var) assert index.level_coords_dtype[name] == var.dtype, ( index.level_coords_dtype[name], var.dtype, ) assert isinstance(var._data.array, pd.MultiIndex), var._data.array assert pd_index.equals(var._data.array), (pd_index, var) # check all all levels are in `indexes` assert name in indexes, (name, set(indexes)) # index identity is used to find unique indexes in `indexes` assert index is indexes[name], (pd_index, indexes[name].index) if check_default: defaults = default_indexes(possible_coord_variables, dims) assert indexes.keys() == defaults.keys(), (set(indexes), set(defaults)) assert all(v.equals(defaults[k]) for k, v in indexes.items()), ( indexes, defaults, ) def _assert_variable_invariants( var: Variable | Any, name: Hashable = None, ) -> None: if name is None: name_or_empty: tuple = () else: name_or_empty = (name,) assert isinstance(var, Variable), {name: type(var)} assert isinstance(var._dims, tuple), name_or_empty + (var._dims,) assert len(var._dims) == len(var._data.shape), name_or_empty + ( var._dims, var._data.shape, ) assert isinstance(var._encoding, type(None) | dict), name_or_empty + ( var._encoding, ) assert isinstance(var._attrs, type(None) | dict), name_or_empty + (var._attrs,) def _assert_dataarray_invariants(da: DataArray, check_default_indexes: bool): _assert_variable_invariants(da._variable) assert isinstance(da._coords, dict), da._coords if check_default_indexes: assert all(set(v.dims) <= set(da.dims) for v in da._coords.values()), ( da.dims, {k: v.dims for k, v in da._coords.items()}, ) for k, v in da._coords.items(): _assert_variable_invariants(v, k) assert da._indexes is not None _assert_indexes_invariants_checks( da._indexes, da._coords, da.dims, check_default=check_default_indexes ) def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool): assert isinstance(ds._variables, dict), type(ds._variables) for k, v in ds._variables.items(): _assert_variable_invariants(v, k) assert isinstance(ds._coord_names, set), ds._coord_names assert ds._coord_names <= ds._variables.keys(), ( ds._coord_names, set(ds._variables), ) assert type(ds._dims) is dict, ds._dims assert all(isinstance(v, int) for v in ds._dims.values()), ds._dims var_dims: set[Hashable] = set() for v in ds._variables.values(): var_dims.update(v.dims) assert ds._dims.keys() == var_dims, (set(ds._dims), var_dims) assert all( ds._dims[k] == v.sizes[k] for v in ds._variables.values() for k in v.sizes ), (ds._dims, {k: v.sizes for k, v in ds._variables.items()}) assert ds._indexes is not None _assert_indexes_invariants_checks( ds._indexes, ds._variables, ds._dims, check_default=check_default_indexes ) assert isinstance(ds._encoding, type(None) | dict) assert isinstance(ds._attrs, type(None) | dict) def _assert_internal_invariants( xarray_obj: DataArray | Dataset | Variable, check_default_indexes: bool ): """Validate that an xarray object satisfies its own internal invariants. This exists for the benefit of xarray's own test suite, but may be useful in external projects if they (ill-advisedly) create objects using xarray's private APIs. """ if isinstance(xarray_obj, Variable): _assert_variable_invariants(xarray_obj) elif isinstance(xarray_obj, DataArray): _assert_dataarray_invariants( xarray_obj, check_default_indexes=check_default_indexes ) elif isinstance(xarray_obj, Dataset): _assert_dataset_invariants( xarray_obj, check_default_indexes=check_default_indexes ) elif isinstance(xarray_obj, Coordinates): _assert_dataset_invariants( xarray_obj.to_dataset(), check_default_indexes=check_default_indexes ) else: raise TypeError( f"{type(xarray_obj)} is not a supported type for xarray invariant checks" ) python-xarray-2026.01.0/xarray/testing/__init__.py0000664000175000017500000000115515136607163022141 0ustar alastairalastairfrom xarray.testing.assertions import ( # noqa: F401 _assert_dataarray_invariants, _assert_dataset_invariants, _assert_indexes_invariants_checks, _assert_internal_invariants, _assert_variable_invariants, _data_allclose_or_equiv, assert_allclose, assert_chunks_equal, assert_duckarray_allclose, assert_duckarray_equal, assert_equal, assert_identical, assert_isomorphic, ) __all__ = [ "assert_allclose", "assert_chunks_equal", "assert_duckarray_allclose", "assert_duckarray_equal", "assert_equal", "assert_identical", "assert_isomorphic", ] python-xarray-2026.01.0/xarray/structure/0000775000175000017500000000000015136607163020411 5ustar alastairalastairpython-xarray-2026.01.0/xarray/structure/chunks.py0000664000175000017500000001421115136607163022255 0ustar alastairalastair""" Functions for handling chunked arrays. """ from __future__ import annotations import itertools from collections.abc import Hashable, Mapping from functools import lru_cache from typing import TYPE_CHECKING, Any, Literal, TypeVar, Union, overload from xarray.core import utils from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import ( ChunkManagerEntrypoint, get_chunked_array_type, guess_chunkmanager, ) if TYPE_CHECKING: from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.types import T_ChunkDim from xarray.core.variable import Variable MissingCoreDimOptions = Literal["raise", "copy", "drop"] @lru_cache(maxsize=512) def _get_breaks_cached( *, size: int, chunk_sizes: tuple[int, ...], preferred_chunk_sizes: int | tuple[int, ...], ) -> int | None: if isinstance(preferred_chunk_sizes, int) and preferred_chunk_sizes == 1: # short-circuit for the trivial case return None # Determine the stop indices of the preferred chunks, but omit the last stop # (equal to the dim size). In particular, assume that when a sequence # expresses the preferred chunks, the sequence sums to the size. preferred_stops = ( range(preferred_chunk_sizes, size, preferred_chunk_sizes) if isinstance(preferred_chunk_sizes, int) else set(itertools.accumulate(preferred_chunk_sizes[:-1])) ) # Gather any stop indices of the specified chunks that are not a stop index # of a preferred chunk. Again, omit the last stop, assuming that it equals # the dim size. actual_stops = itertools.accumulate(chunk_sizes[:-1]) # This copy is required for parallel iteration actual_stops_2 = itertools.accumulate(chunk_sizes[:-1]) disagrees = itertools.compress( actual_stops_2, (a not in preferred_stops for a in actual_stops) ) try: return next(disagrees) except StopIteration: return None def _maybe_chunk( name: Hashable, var: Variable, chunks: Mapping[Any, T_ChunkDim] | None, token=None, lock=None, name_prefix: str = "xarray-", overwrite_encoded_chunks: bool = False, inline_array: bool = False, chunked_array_type: str | ChunkManagerEntrypoint | None = None, from_array_kwargs=None, ) -> Variable: from xarray.namedarray.daskmanager import DaskManager if chunks is not None: chunks = {dim: chunks[dim] for dim in var.dims if dim in chunks} if var.ndim: chunked_array_type = guess_chunkmanager( chunked_array_type ) # coerce string to ChunkManagerEntrypoint type if isinstance(chunked_array_type, DaskManager): from dask.base import tokenize # when rechunking by different amounts, make sure dask names change # by providing chunks as an input to tokenize. # subtle bugs result otherwise. see GH3350 # we use str() for speed, and use the name for the final array name on the next line token2 = tokenize(token or var._data, str(chunks)) name2 = f"{name_prefix}{name}-{token2}" from_array_kwargs = utils.consolidate_dask_from_array_kwargs( from_array_kwargs, name=name2, lock=lock, inline_array=inline_array, ) var = var.chunk( chunks, chunked_array_type=chunked_array_type, from_array_kwargs=from_array_kwargs, ) if overwrite_encoded_chunks and var.chunks is not None: var.encoding["chunks"] = tuple(x[0] for x in var.chunks) return var else: return var _T = TypeVar("_T", bound=Union["Dataset", "DataArray"]) _U = TypeVar("_U", bound=Union["Dataset", "DataArray"]) _V = TypeVar("_V", bound=Union["Dataset", "DataArray"]) @overload def unify_chunks(obj: _T, /) -> tuple[_T]: ... @overload def unify_chunks(obj1: _T, obj2: _U, /) -> tuple[_T, _U]: ... @overload def unify_chunks(obj1: _T, obj2: _U, obj3: _V, /) -> tuple[_T, _U, _V]: ... @overload def unify_chunks(*objects: Dataset | DataArray) -> tuple[Dataset | DataArray, ...]: ... def unify_chunks(*objects: Dataset | DataArray) -> tuple[Dataset | DataArray, ...]: """ Given any number of Dataset and/or DataArray objects, returns new objects with unified chunk size along all chunked dimensions. Returns ------- unified (DataArray or Dataset) – Tuple of objects with the same type as *objects with consistent chunk sizes for all dask-array variables See Also -------- dask.array.core.unify_chunks """ from xarray.core.dataarray import DataArray # Convert all objects to datasets datasets = [ obj._to_temp_dataset() if isinstance(obj, DataArray) else obj.copy() for obj in objects ] # Get arguments to pass into dask.array.core.unify_chunks unify_chunks_args = [] sizes: dict[Hashable, int] = {} for ds in datasets: for v in ds._variables.values(): if v.chunks is not None: # Check that sizes match across different datasets for dim, size in v.sizes.items(): try: if sizes[dim] != size: raise ValueError( f"Dimension {dim!r} size mismatch: {sizes[dim]} != {size}" ) except KeyError: sizes[dim] = size unify_chunks_args += [v._data, v._dims] # No dask arrays: Return inputs if not unify_chunks_args: return objects chunkmanager = get_chunked_array_type(*list(unify_chunks_args)) _, chunked_data = chunkmanager.unify_chunks(*unify_chunks_args) chunked_data_iter = iter(chunked_data) out: list[Dataset | DataArray] = [] for obj, ds in zip(objects, datasets, strict=True): for k, v in ds._variables.items(): if v.chunks is not None: ds._variables[k] = v.copy(data=next(chunked_data_iter)) out.append(obj._from_temp_dataset(ds) if isinstance(obj, DataArray) else ds) return tuple(out) python-xarray-2026.01.0/xarray/structure/combine.py0000664000175000017500000012614415136607163022407 0ustar alastairalastairfrom __future__ import annotations from collections import Counter, defaultdict from collections.abc import Callable, Hashable, Iterable, Iterator, Sequence from typing import TYPE_CHECKING, Literal, TypeAlias, TypeVar, cast, overload import pandas as pd from xarray.core import dtypes from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.utils import iterate_nested from xarray.structure.alignment import AlignmentError from xarray.structure.concat import concat from xarray.structure.merge import merge from xarray.util.deprecation_helpers import ( _COMPAT_DEFAULT, _COORDS_DEFAULT, _DATA_VARS_DEFAULT, _JOIN_DEFAULT, CombineKwargDefault, ) if TYPE_CHECKING: from xarray.core.types import ( CombineAttrsOptions, CompatOptions, JoinOptions, NestedSequence, ) T = TypeVar("T") def _infer_concat_order_from_positions( datasets: NestedSequence[T], ) -> dict[tuple[int, ...], T]: return dict(_infer_tile_ids_from_nested_list(datasets, ())) def _infer_tile_ids_from_nested_list( entry: NestedSequence[T], current_pos: tuple[int, ...] ) -> Iterator[tuple[tuple[int, ...], T]]: """ Given a list of lists (of lists...) of objects, returns an iterator which returns a tuple containing the index of each object in the nested list structure as the key, and the object. This can then be called by the dict constructor to create a dictionary of the objects organised by their position in the original nested list. Recursively traverses the given structure, while keeping track of the current position. Should work for any type of object which isn't a list. Parameters ---------- entry : list[list[obj, obj, ...], ...] List of lists of arbitrary depth, containing objects in the order they are to be concatenated. Returns ------- combined_tile_ids : dict[tuple(int, ...), obj] """ if not isinstance(entry, str) and isinstance(entry, Sequence): for i, item in enumerate(entry): yield from _infer_tile_ids_from_nested_list(item, current_pos + (i,)) else: yield current_pos, cast(T, entry) def _ensure_same_types(series, dim): if series.dtype == object: types = set(series.map(type)) if len(types) > 1: try: import cftime cftimes = any(issubclass(t, cftime.datetime) for t in types) except ImportError: cftimes = False types = ", ".join(t.__name__ for t in types) error_msg = ( f"Cannot combine along dimension '{dim}' with mixed types." f" Found: {types}." ) if cftimes: error_msg = ( f"{error_msg} If importing data directly from a file then " f"setting `use_cftime=True` may fix this issue." ) raise TypeError(error_msg) def _infer_concat_order_from_coords(datasets: list[Dataset] | list[DataTree]): concat_dims = [] tile_ids: list[tuple[int, ...]] = [() for ds in datasets] # All datasets have same variables because they've been grouped as such ds0 = datasets[0] for dim in ds0.dims: # Check if dim is a coordinate dimension if dim in ds0: # Need to read coordinate values to do ordering indexes: list[pd.Index] = [] for ds in datasets: index = ds._indexes.get(dim) if index is None: error_msg = ( f"Every dimension requires a corresponding 1D coordinate " f"and index for inferring concatenation order but the " f"coordinate '{dim}' has no corresponding index" ) raise ValueError(error_msg) # TODO (benbovy, flexible indexes): support flexible indexes? indexes.append(index.to_pandas_index()) # If dimension coordinate values are same on every dataset then # should be leaving this dimension alone (it's just a "bystander") if not all(index.equals(indexes[0]) for index in indexes[1:]): # Infer order datasets should be arranged in along this dim concat_dims.append(dim) if all(index.is_monotonic_increasing for index in indexes): ascending = True elif all(index.is_monotonic_decreasing for index in indexes): ascending = False else: raise ValueError( f"Coordinate variable {dim} is neither " "monotonically increasing nor " "monotonically decreasing on all datasets" ) # Assume that any two datasets whose coord along dim starts # with the same value have the same coord values throughout. if any(index.size == 0 for index in indexes): raise ValueError("Cannot handle size zero dimensions") first_items = pd.Index([index[0] for index in indexes]) series = first_items.to_series() # ensure series does not contain mixed types, e.g. cftime calendars _ensure_same_types(series, dim) # Sort datasets along dim # We want rank but with identical elements given identical # position indices - they should be concatenated along another # dimension, not along this one rank = series.rank( method="dense", ascending=ascending, numeric_only=False ) order = (rank.astype(int).values - 1).tolist() # Append positions along extra dimension to structure which # encodes the multi-dimensional concatenation order tile_ids = [ tile_id + (position,) for tile_id, position in zip(tile_ids, order, strict=True) ] if len(datasets) > 1 and not concat_dims: if any(isinstance(data, DataTree) for data in datasets): raise ValueError( "Did not find any dimension coordinates at root nodes " "to order the DataTree objects for concatenation" ) else: raise ValueError( "Could not find any dimension coordinates to use to " "order the Dataset objects for concatenation" ) combined_ids = dict(zip(tile_ids, datasets, strict=True)) return combined_ids, concat_dims def _check_dimension_depth_tile_ids(combined_tile_ids): """ Check all tuples are the same length, i.e. check that all lists are nested to the same depth. """ tile_ids = combined_tile_ids.keys() nesting_depths = [len(tile_id) for tile_id in tile_ids] if not nesting_depths: nesting_depths = [0] if set(nesting_depths) != {nesting_depths[0]}: raise ValueError( "The supplied objects do not form a hypercube because" " sub-lists do not have consistent depths" ) # return these just to be reused in _check_shape_tile_ids return tile_ids, nesting_depths def _check_shape_tile_ids(combined_tile_ids): """Check all lists along one dimension are same length.""" tile_ids, nesting_depths = _check_dimension_depth_tile_ids(combined_tile_ids) for dim in range(nesting_depths[0]): indices_along_dim = [tile_id[dim] for tile_id in tile_ids] occurrences = Counter(indices_along_dim) if len(set(occurrences.values())) != 1: raise ValueError( "The supplied objects do not form a hypercube " "because sub-lists do not have consistent " f"lengths along dimension {dim}" ) def _combine_nd( combined_ids, concat_dims, data_vars, coords, compat: CompatOptions | CombineKwargDefault, fill_value, join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): """ Combines an N-dimensional structure of datasets into one by applying a series of either concat and merge operations along each dimension. No checks are performed on the consistency of the datasets, concat_dims or tile_IDs, because it is assumed that this has already been done. Parameters ---------- combined_ids : Dict[Tuple[int, ...]], xarray.Dataset | xarray.DataTree] Structure containing all datasets to be concatenated with "tile_IDs" as keys, which specify position within the desired final combined result. concat_dims : sequence of str The dimensions along which the datasets should be concatenated. Must be in order, and the length must match the length of the tuples used as keys in combined_ids. If the string is a dimension name then concat along that dimension, if it is None then merge. Returns ------- combined_ds : xarray.Dataset | xarray.DataTree """ example_tile_id = next(iter(combined_ids.keys())) n_dims = len(example_tile_id) if len(concat_dims) != n_dims: raise ValueError( f"concat_dims has length {len(concat_dims)} but the datasets " f"passed are nested in a {n_dims}-dimensional structure" ) # Each iteration of this loop reduces the length of the tile_ids tuples # by one. It always combines along the first dimension, removing the first # element of the tuple for concat_dim in concat_dims: combined_ids = _combine_all_along_first_dim( combined_ids, dim=concat_dim, data_vars=data_vars, coords=coords, compat=compat, fill_value=fill_value, join=join, combine_attrs=combine_attrs, ) (combined_ds,) = combined_ids.values() return combined_ds def _combine_all_along_first_dim( combined_ids, dim, data_vars, coords, compat: CompatOptions | CombineKwargDefault, fill_value, join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): # Group into lines of datasets which must be combined along dim grouped = groupby_defaultdict(list(combined_ids.items()), key=_new_tile_id) # Combine all of these datasets along dim new_combined_ids = {} for new_id, group in grouped: combined_ids = dict(sorted(group)) datasets = combined_ids.values() new_combined_ids[new_id] = _combine_1d( datasets, concat_dim=dim, compat=compat, data_vars=data_vars, coords=coords, fill_value=fill_value, join=join, combine_attrs=combine_attrs, ) return new_combined_ids def _combine_1d( datasets, concat_dim, compat: CompatOptions | CombineKwargDefault, data_vars, coords, fill_value, join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): """ Applies either concat or merge to 1D list of datasets depending on value of concat_dim """ if concat_dim is not None: try: combined = concat( datasets, dim=concat_dim, data_vars=data_vars, coords=coords, compat=compat, fill_value=fill_value, join=join, combine_attrs=combine_attrs, ) except ValueError as err: if "encountered unexpected variable" in str(err): raise ValueError( "These objects cannot be combined using only " "xarray.combine_nested, instead either use " "xarray.combine_by_coords, or do it manually " "with xarray.concat, xarray.merge and " "xarray.align" ) from err else: raise else: try: combined = merge( datasets, compat=compat, fill_value=fill_value, join=join, combine_attrs=combine_attrs, ) except AlignmentError as e: e.add_note( "If you are intending to concatenate datasets, please specify the concatenation dimension explicitly. " "Using merge to concatenate is quite inefficient." ) raise e return combined def _new_tile_id(single_id_ds_pair): tile_id, _ds = single_id_ds_pair return tile_id[1:] def _nested_combine( datasets, concat_dims, compat, data_vars, coords, ids, fill_value, join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): if len(datasets) == 0: return Dataset() # Arrange datasets for concatenation # Use information from the shape of the user input if not ids: # Determine tile_IDs by structure of input in N-D # (i.e. ordering in list-of-lists) combined_ids = _infer_concat_order_from_positions(datasets) else: # Already sorted so just use the ids already passed combined_ids = dict(zip(ids, datasets, strict=True)) # Check that the inferred shape is combinable _check_shape_tile_ids(combined_ids) # Apply series of concatenate or merge operations along each dimension combined = _combine_nd( combined_ids, concat_dims=concat_dims, compat=compat, data_vars=data_vars, coords=coords, fill_value=fill_value, join=join, combine_attrs=combine_attrs, ) return combined # Define types for arbitrarily-nested list of lists. # Mypy doesn't seem to handle overloads properly with recursive types, so we # explicitly expand the first handful of levels of recursion. DatasetLike: TypeAlias = DataArray | Dataset DatasetHyperCube: TypeAlias = ( DatasetLike | Sequence[DatasetLike] | Sequence[Sequence[DatasetLike]] | Sequence[Sequence[Sequence[DatasetLike]]] | Sequence[Sequence[Sequence[Sequence[DatasetLike]]]] ) DataTreeHyperCube: TypeAlias = ( DataTree | Sequence[DataTree] | Sequence[Sequence[DataTree]] | Sequence[Sequence[Sequence[DataTree]]] | Sequence[Sequence[Sequence[Sequence[DataTree]]]] ) @overload def combine_nested( datasets: DatasetHyperCube, concat_dim: str | DataArray | list[str] | Sequence[str | DataArray | pd.Index | None] | None, compat: str | CombineKwargDefault = ..., data_vars: str | CombineKwargDefault = ..., coords: str | CombineKwargDefault = ..., fill_value: object = ..., join: JoinOptions | CombineKwargDefault = ..., combine_attrs: CombineAttrsOptions = ..., ) -> Dataset: ... @overload def combine_nested( datasets: DataTreeHyperCube, concat_dim: str | DataArray | list[str] | Sequence[str | DataArray | pd.Index | None] | None, compat: str | CombineKwargDefault = ..., data_vars: str | CombineKwargDefault = ..., coords: str | CombineKwargDefault = ..., fill_value: object = ..., join: JoinOptions | CombineKwargDefault = ..., combine_attrs: CombineAttrsOptions = ..., ) -> DataTree: ... def combine_nested( datasets: DatasetHyperCube | DataTreeHyperCube, concat_dim: str | DataArray | list[str] | Sequence[str | DataArray | pd.Index | None] | None, compat: str | CombineKwargDefault = _COMPAT_DEFAULT, data_vars: str | CombineKwargDefault = _DATA_VARS_DEFAULT, coords: str | CombineKwargDefault = _COORDS_DEFAULT, fill_value: object = dtypes.NA, join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "drop", ) -> Dataset | DataTree: """ Explicitly combine an N-dimensional grid of datasets into one by using a succession of concat and merge operations along each dimension of the grid. Does not sort the supplied datasets under any circumstances, so the datasets must be passed in the order you wish them to be concatenated. It does align coordinates, but different variables on datasets can cause it to fail under some scenarios. In complex cases, you may need to clean up your data and use concat/merge explicitly. To concatenate along multiple dimensions the datasets must be passed as a nested list-of-lists, with a depth equal to the length of ``concat_dims``. ``combine_nested`` will concatenate along the top-level list first. Useful for combining datasets from a set of nested directories, or for collecting the output of a simulation parallelized along multiple dimensions. Parameters ---------- datasets : list or nested list of Dataset, DataArray or DataTree Dataset objects to combine. If concatenation or merging along more than one dimension is desired, then datasets must be supplied in a nested list-of-lists. concat_dim : str, or list of str, DataArray, Index or None Dimensions along which to concatenate variables, as used by :py:func:`xarray.concat`. Set ``concat_dim=[..., None, ...]`` explicitly to disable concatenation and merge instead along a particular dimension. The position of ``None`` in the list specifies the dimension of the nested-list input along which to merge. Must be the same length as the depth of the list passed to ``datasets``. compat : {"identical", "equals", "broadcast_equals", \ "no_conflicts", "override"}, default: "no_conflicts" String indicating how to compare variables of the same name for potential merge conflicts: - "broadcast_equals": all values must be equal when variables are broadcast against each other to ensure common dimensions. - "equals": all values and dimensions must be the same. - "identical": all values, dimensions and attributes must be the same. - "no_conflicts": only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. - "override": skip comparing and pick variable from first dataset data_vars : {"minimal", "different", "all", None} or list of str, default: "all" These data variables will be concatenated together: * "minimal": Only data variables in which the dimension already appears are included. * "different": Data variables which are not equal (ignoring attributes) across all datasets are also concatenated (as well as all for which dimension already appears). Beware: this option may load the data payload of data variables into memory if they are not already loaded. * "all": All data variables will be concatenated. * None: Means ``"all"`` if ``concat_dim`` is not present in any of the ``objs``, and ``"minimal"`` if ``concat_dim`` is present in any of ``objs``. * list of str: The listed data variables will be concatenated, in addition to the "minimal" data variables. coords : {"minimal", "different", "all"} or list of str, default: "different" These coordinate variables will be concatenated together: * "minimal": Only coordinates in which the dimension already appears are included. If concatenating over a dimension _not_ present in any of the objects, then all data variables will be concatenated along that new dimension. * "different": Coordinates which are not equal (ignoring attributes) across all datasets are also concatenated (as well as all for which dimension already appears). Beware: this option may load the data payload of coordinate variables into memory if they are not already loaded. * "all": All coordinate variables will be concatenated, except those corresponding to other dimensions. * list of str: The listed coordinate variables will be concatenated, in addition to the "minimal" coordinates. fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps variable names to fill values. Use a data array's name to refer to its values. join : {"outer", "inner", "left", "right", "exact"}, default: "outer" String indicating how to combine differing indexes (excluding concat_dim) in objects - "outer": use the union of object indexes - "inner": use the intersection of object indexes - "left": use indexes from the first object with each dimension - "right": use indexes from the last object with each dimension - "exact": instead of aligning, raise `ValueError` when indexes to be aligned are not equal - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "drop" A callable or a string indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. - "drop_conflicts": attrs from all objects are combined, any that have the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. If a callable, it must expect a sequence of ``attrs`` dicts and a context object as its only parameters. Returns ------- combined : xarray.Dataset or xarray.DataTree Examples -------- A common task is collecting data from a parallelized simulation in which each process wrote out to a separate file. A domain which was decomposed into 4 parts, 2 each along both the x and y axes, requires organising the datasets into a doubly-nested list, e.g: >>> x1y1 = xr.Dataset( ... { ... "temperature": (("x", "y"), np.random.randn(2, 2)), ... "precipitation": (("x", "y"), np.random.randn(2, 2)), ... } ... ) >>> x1y1 Size: 64B Dimensions: (x: 2, y: 2) Dimensions without coordinates: x, y Data variables: temperature (x, y) float64 32B 1.764 0.4002 0.9787 2.241 precipitation (x, y) float64 32B 1.868 -0.9773 0.9501 -0.1514 >>> x1y2 = xr.Dataset( ... { ... "temperature": (("x", "y"), np.random.randn(2, 2)), ... "precipitation": (("x", "y"), np.random.randn(2, 2)), ... } ... ) >>> x2y1 = xr.Dataset( ... { ... "temperature": (("x", "y"), np.random.randn(2, 2)), ... "precipitation": (("x", "y"), np.random.randn(2, 2)), ... } ... ) >>> x2y2 = xr.Dataset( ... { ... "temperature": (("x", "y"), np.random.randn(2, 2)), ... "precipitation": (("x", "y"), np.random.randn(2, 2)), ... } ... ) >>> ds_grid = [[x1y1, x1y2], [x2y1, x2y2]] >>> combined = xr.combine_nested(ds_grid, concat_dim=["x", "y"]) >>> combined Size: 256B Dimensions: (x: 4, y: 4) Dimensions without coordinates: x, y Data variables: temperature (x, y) float64 128B 1.764 0.4002 -0.1032 ... 0.04576 -0.1872 precipitation (x, y) float64 128B 1.868 -0.9773 0.761 ... 0.1549 0.3782 ``combine_nested`` can also be used to explicitly merge datasets with different variables. For example if we have 4 datasets, which are divided along two times, and contain two different variables, we can pass ``None`` to ``concat_dim`` to specify the dimension of the nested list over which we wish to use ``merge`` instead of ``concat``: >>> t1temp = xr.Dataset({"temperature": ("t", np.random.randn(5))}) >>> t1temp Size: 40B Dimensions: (t: 5) Dimensions without coordinates: t Data variables: temperature (t) float64 40B -0.8878 -1.981 -0.3479 0.1563 1.23 >>> t1precip = xr.Dataset({"precipitation": ("t", np.random.randn(5))}) >>> t1precip Size: 40B Dimensions: (t: 5) Dimensions without coordinates: t Data variables: precipitation (t) float64 40B 1.202 -0.3873 -0.3023 -1.049 -1.42 >>> t2temp = xr.Dataset({"temperature": ("t", np.random.randn(5))}) >>> t2precip = xr.Dataset({"precipitation": ("t", np.random.randn(5))}) >>> ds_grid = [[t1temp, t1precip], [t2temp, t2precip]] >>> combined = xr.combine_nested(ds_grid, concat_dim=["t", None]) >>> combined Size: 160B Dimensions: (t: 10) Dimensions without coordinates: t Data variables: temperature (t) float64 80B -0.8878 -1.981 -0.3479 ... -0.4381 -1.253 precipitation (t) float64 80B 1.202 -0.3873 -0.3023 ... -0.8955 0.3869 See also -------- concat merge """ any_datasets = any(isinstance(obj, Dataset) for obj in iterate_nested(datasets)) any_unnamed_arrays = any( isinstance(obj, DataArray) and obj.name is None for obj in iterate_nested(datasets) ) if any_datasets and any_unnamed_arrays: raise ValueError("Can't combine datasets with unnamed arrays.") any_datatrees = any(isinstance(obj, DataTree) for obj in iterate_nested(datasets)) all_datatrees = all(isinstance(obj, DataTree) for obj in iterate_nested(datasets)) if any_datatrees and not all_datatrees: raise ValueError("Can't combine a mix of DataTree and non-DataTree objects.") concat_dims = ( [concat_dim] if isinstance(concat_dim, str | DataArray) or concat_dim is None else concat_dim ) # The IDs argument tells _nested_combine that datasets aren't yet sorted return _nested_combine( datasets, concat_dims=concat_dims, compat=compat, data_vars=data_vars, coords=coords, ids=False, fill_value=fill_value, join=join, combine_attrs=combine_attrs, ) def vars_as_keys(ds): return tuple(sorted(ds)) K = TypeVar("K", bound=Hashable) def groupby_defaultdict( iter: list[T], key: Callable[[T], K], ) -> Iterator[tuple[K, Iterator[T]]]: """replacement for itertools.groupby""" idx = defaultdict(list) for i, obj in enumerate(iter): idx[key(obj)].append(i) for k, ix in idx.items(): yield k, (iter[i] for i in ix) def _combine_single_variable_hypercube( datasets, fill_value, data_vars, coords, compat: CompatOptions | CombineKwargDefault, join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): """ Attempt to combine a list of Datasets into a hypercube using their coordinates. All provided Datasets must belong to a single variable, ie. must be assigned the same variable name. This precondition is not checked by this function, so the caller is assumed to know what it's doing. This function is NOT part of the public API. """ if len(datasets) == 0: raise ValueError( "At least one Dataset is required to resolve variable names " "for combined hypercube." ) combined_ids, concat_dims = _infer_concat_order_from_coords(list(datasets)) if fill_value is None: # check that datasets form complete hypercube _check_shape_tile_ids(combined_ids) else: # check only that all datasets have same dimension depth for these # vars _check_dimension_depth_tile_ids(combined_ids) # Concatenate along all of concat_dims one by one to create single ds concatenated = _combine_nd( combined_ids, concat_dims=concat_dims, data_vars=data_vars, coords=coords, compat=compat, fill_value=fill_value, join=join, combine_attrs=combine_attrs, ) # Check the overall coordinates are monotonically increasing for dim in concat_dims: indexes = concatenated.indexes.get(dim) if not (indexes.is_monotonic_increasing or indexes.is_monotonic_decreasing): raise ValueError( "Resulting object does not have monotonic" f" global indexes along dimension {dim}" ) return concatenated def combine_by_coords( data_objects: Iterable[Dataset | DataArray] = [], compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT, data_vars: Literal["all", "minimal", "different"] | None | list[str] | CombineKwargDefault = _DATA_VARS_DEFAULT, coords: str | CombineKwargDefault = _COORDS_DEFAULT, fill_value: object = dtypes.NA, join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "no_conflicts", ) -> Dataset | DataArray: """ Attempt to auto-magically combine the given datasets (or data arrays) into one by using dimension coordinates. This function attempts to combine a group of datasets along any number of dimensions into a single entity by inspecting coords and metadata and using a combination of concat and merge. Will attempt to order the datasets such that the values in their dimension coordinates are monotonic along all dimensions. If it cannot determine the order in which to concatenate the datasets, it will raise a ValueError. Non-coordinate dimensions will be ignored, as will any coordinate dimensions which do not vary between each dataset. Aligns coordinates, but different variables on datasets can cause it to fail under some scenarios. In complex cases, you may need to clean up your data and use concat/merge explicitly (also see `combine_nested`). Works well if, for example, you have N years of data and M data variables, and each combination of a distinct time period and set of data variables is saved as its own dataset. Also useful for if you have a simulation which is parallelized in multiple dimensions, but has global coordinates saved in each file specifying the positions of points within the global domain. Parameters ---------- data_objects : Iterable of Datasets or DataArrays Data objects to combine. compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, \ default: "no_conflicts" String indicating how to compare variables of the same name for potential conflicts: - "broadcast_equals": all values must be equal when variables are broadcast against each other to ensure common dimensions. - "equals": all values and dimensions must be the same. - "identical": all values, dimensions and attributes must be the same. - "no_conflicts": only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. - "override": skip comparing and pick variable from first dataset data_vars : {"minimal", "different", "all", None} or list of str, default: "all" These data variables will be concatenated together: - "minimal": Only data variables in which the dimension already appears are included. - "different": Data variables which are not equal (ignoring attributes) across all datasets are also concatenated (as well as all for which dimension already appears). Beware: this option may load the data payload of data variables into memory if they are not already loaded. - "all": All data variables will be concatenated. - None: Means ``"all"`` if ``concat_dim`` is not present in any of the ``objs``, and ``"minimal"`` if ``concat_dim`` is present in any of ``objs``. - list of str: The listed data variables will be concatenated, in addition to the "minimal" data variables. coords : {"minimal", "different", "all"} or list of str, default: "different" These coordinate variables will be concatenated together: - "minimal": Only coordinates in which the dimension already appears are included. If concatenating over a dimension _not_ present in any of the objects, then all data variables will be concatenated along that new dimension. - "different": Coordinates which are not equal (ignoring attributes) across all datasets are also concatenated (as well as all for which dimension already appears). Beware: this option may load the data payload of coordinate variables into memory if they are not already loaded. - "all": All coordinate variables will be concatenated, except those corresponding to other dimensions. - list of str: The listed coordinate variables will be concatenated, in addition to the "minimal" coordinates. fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps variable names to fill values. Use a data array's name to refer to its values. If None, raises a ValueError if the passed Datasets do not create a complete hypercube. join : {"outer", "inner", "left", "right", "exact"}, default: "outer" String indicating how to combine differing indexes in objects - "outer": use the union of object indexes - "inner": use the intersection of object indexes - "left": use indexes from the first object with each dimension - "right": use indexes from the last object with each dimension - "exact": instead of aligning, raise `ValueError` when indexes to be aligned are not equal - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "no_conflicts" A callable or a string indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. - "drop_conflicts": attrs from all objects are combined, any that have the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. If a callable, it must expect a sequence of ``attrs`` dicts and a context object as its only parameters. Returns ------- combined : xarray.Dataset or xarray.DataArray Will return a Dataset unless all the inputs are unnamed DataArrays, in which case a DataArray will be returned. See also -------- concat merge combine_nested Examples -------- Combining two datasets using their common dimension coordinates. Notice they are concatenated based on the values in their dimension coordinates, not on their position in the list passed to `combine_by_coords`. >>> x1 = xr.Dataset( ... { ... "temperature": (("y", "x"), 20 * np.random.rand(6).reshape(2, 3)), ... "precipitation": (("y", "x"), np.random.rand(6).reshape(2, 3)), ... }, ... coords={"y": [0, 1], "x": [10, 20, 30]}, ... ) >>> x2 = xr.Dataset( ... { ... "temperature": (("y", "x"), 20 * np.random.rand(6).reshape(2, 3)), ... "precipitation": (("y", "x"), np.random.rand(6).reshape(2, 3)), ... }, ... coords={"y": [2, 3], "x": [10, 20, 30]}, ... ) >>> x3 = xr.Dataset( ... { ... "temperature": (("y", "x"), 20 * np.random.rand(6).reshape(2, 3)), ... "precipitation": (("y", "x"), np.random.rand(6).reshape(2, 3)), ... }, ... coords={"y": [2, 3], "x": [40, 50, 60]}, ... ) >>> x1 Size: 136B Dimensions: (y: 2, x: 3) Coordinates: * y (y) int64 16B 0 1 * x (x) int64 24B 10 20 30 Data variables: temperature (y, x) float64 48B 10.98 14.3 12.06 10.9 8.473 12.92 precipitation (y, x) float64 48B 0.4376 0.8918 0.9637 0.3834 0.7917 0.5289 >>> x2 Size: 136B Dimensions: (y: 2, x: 3) Coordinates: * y (y) int64 16B 2 3 * x (x) int64 24B 10 20 30 Data variables: temperature (y, x) float64 48B 11.36 18.51 1.421 1.743 0.4044 16.65 precipitation (y, x) float64 48B 0.7782 0.87 0.9786 0.7992 0.4615 0.7805 >>> x3 Size: 136B Dimensions: (y: 2, x: 3) Coordinates: * y (y) int64 16B 2 3 * x (x) int64 24B 40 50 60 Data variables: temperature (y, x) float64 48B 2.365 12.8 2.867 18.89 10.44 8.293 precipitation (y, x) float64 48B 0.2646 0.7742 0.4562 0.5684 0.01879 0.6176 >>> xr.combine_by_coords([x2, x1]) Size: 248B Dimensions: (y: 4, x: 3) Coordinates: * y (y) int64 32B 0 1 2 3 * x (x) int64 24B 10 20 30 Data variables: temperature (y, x) float64 96B 10.98 14.3 12.06 ... 1.743 0.4044 16.65 precipitation (y, x) float64 96B 0.4376 0.8918 0.9637 ... 0.4615 0.7805 >>> xr.combine_by_coords([x3, x1], join="outer") Size: 464B Dimensions: (y: 4, x: 6) Coordinates: * y (y) int64 32B 0 1 2 3 * x (x) int64 48B 10 20 30 40 50 60 Data variables: temperature (y, x) float64 192B 10.98 14.3 12.06 ... 18.89 10.44 8.293 precipitation (y, x) float64 192B 0.4376 0.8918 0.9637 ... 0.01879 0.6176 >>> xr.combine_by_coords([x3, x1], join="override") Size: 256B Dimensions: (y: 2, x: 6) Coordinates: * y (y) int64 16B 0 1 * x (x) int64 48B 10 20 30 40 50 60 Data variables: temperature (y, x) float64 96B 10.98 14.3 12.06 ... 18.89 10.44 8.293 precipitation (y, x) float64 96B 0.4376 0.8918 0.9637 ... 0.01879 0.6176 >>> xr.combine_by_coords([x1, x2, x3], join="outer") Size: 464B Dimensions: (y: 4, x: 6) Coordinates: * y (y) int64 32B 0 1 2 3 * x (x) int64 48B 10 20 30 40 50 60 Data variables: temperature (y, x) float64 192B 10.98 14.3 12.06 ... 18.89 10.44 8.293 precipitation (y, x) float64 192B 0.4376 0.8918 0.9637 ... 0.01879 0.6176 You can also combine DataArray objects, but the behaviour will differ depending on whether or not the DataArrays are named. If all DataArrays are named then they will be promoted to Datasets before combining, and then the resultant Dataset will be returned, e.g. >>> named_da1 = xr.DataArray( ... name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x" ... ) >>> named_da1 Size: 16B array([1., 2.]) Coordinates: * x (x) int64 16B 0 1 >>> named_da2 = xr.DataArray( ... name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x" ... ) >>> named_da2 Size: 16B array([3., 4.]) Coordinates: * x (x) int64 16B 2 3 >>> xr.combine_by_coords([named_da1, named_da2]) Size: 64B Dimensions: (x: 4) Coordinates: * x (x) int64 32B 0 1 2 3 Data variables: a (x) float64 32B 1.0 2.0 3.0 4.0 If all the DataArrays are unnamed, a single DataArray will be returned, e.g. >>> unnamed_da1 = xr.DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") >>> unnamed_da2 = xr.DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") >>> xr.combine_by_coords([unnamed_da1, unnamed_da2]) Size: 32B array([1., 2., 3., 4.]) Coordinates: * x (x) int64 32B 0 1 2 3 Finally, if you attempt to combine a mix of unnamed DataArrays with either named DataArrays or Datasets, a ValueError will be raised (as this is an ambiguous operation). """ if any(isinstance(data_object, DataTree) for data_object in data_objects): raise NotImplementedError( "combine_by_coords() does not yet support DataTree objects." ) if not data_objects: return Dataset() objs_are_unnamed_dataarrays = [ isinstance(data_object, DataArray) and data_object.name is None for data_object in data_objects ] if any(objs_are_unnamed_dataarrays): if all(objs_are_unnamed_dataarrays): # Combine into a single larger DataArray temp_datasets = [ unnamed_dataarray._to_temp_dataset() for unnamed_dataarray in data_objects ] combined_temp_dataset = _combine_single_variable_hypercube( temp_datasets, fill_value=fill_value, data_vars=data_vars, coords=coords, compat=compat, join=join, combine_attrs=combine_attrs, ) return DataArray()._from_temp_dataset(combined_temp_dataset) else: # Must be a mix of unnamed dataarrays with either named dataarrays or with datasets # Can't combine these as we wouldn't know whether to merge or concatenate the arrays raise ValueError( "Can't automatically combine unnamed DataArrays with named DataArrays or Datasets." ) else: # Promote any named DataArrays to single-variable Datasets to simplify combining data_objects = [ obj.to_dataset() if isinstance(obj, DataArray) else obj for obj in data_objects ] # Group by data vars grouped_by_vars = groupby_defaultdict(data_objects, key=vars_as_keys) # Perform the multidimensional combine on each group of data variables # before merging back together concatenated_grouped_by_data_vars = tuple( _combine_single_variable_hypercube( tuple(datasets_with_same_vars), fill_value=fill_value, data_vars=data_vars, coords=coords, compat=compat, join=join, combine_attrs=combine_attrs, ) for vars, datasets_with_same_vars in grouped_by_vars ) return merge( concatenated_grouped_by_data_vars, compat=compat, fill_value=fill_value, join=join, combine_attrs=combine_attrs, ) python-xarray-2026.01.0/xarray/structure/concat.py0000664000175000017500000011457215136607163022244 0ustar alastairalastairfrom __future__ import annotations from collections.abc import Hashable, Iterable from typing import TYPE_CHECKING, Any, Literal, Union, overload import numpy as np import pandas as pd from xarray.core import dtypes, utils from xarray.core.coordinates import Coordinates from xarray.core.duck_array_ops import lazy_array_equiv from xarray.core.indexes import Index, PandasIndex from xarray.core.types import T_DataArray, T_Dataset, T_Variable from xarray.core.utils import emit_user_level_warning from xarray.core.variable import Variable from xarray.core.variable import concat as concat_vars from xarray.structure.alignment import align, reindex_variables from xarray.structure.merge import ( _VALID_COMPAT, collect_variables_and_indexes, merge_attrs, merge_collected, ) from xarray.util.deprecation_helpers import ( _COMPAT_CONCAT_DEFAULT, _COORDS_DEFAULT, _DATA_VARS_DEFAULT, _JOIN_DEFAULT, CombineKwargDefault, ) if TYPE_CHECKING: from xarray.core.datatree import DataTree from xarray.core.types import ( CombineAttrsOptions, CompatOptions, ConcatOptions, JoinOptions, ) T_DataVars = Union[ConcatOptions, Iterable[Hashable], None] @overload def concat( objs: Iterable[DataTree], dim: Hashable | T_Variable | T_DataArray | pd.Index | Any, data_vars: T_DataVars | CombineKwargDefault = _DATA_VARS_DEFAULT, coords: ConcatOptions | Iterable[Hashable] | CombineKwargDefault = _COORDS_DEFAULT, compat: CompatOptions | CombineKwargDefault = _COMPAT_CONCAT_DEFAULT, positions: Iterable[Iterable[int]] | None = None, fill_value: object = dtypes.NA, join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "override", create_index_for_new_dim: bool = True, ) -> DataTree: ... # TODO: replace dim: Any by 1D array_likes @overload def concat( objs: Iterable[T_Dataset], dim: Hashable | T_Variable | T_DataArray | pd.Index | Any, data_vars: T_DataVars | CombineKwargDefault = _DATA_VARS_DEFAULT, coords: ConcatOptions | Iterable[Hashable] | CombineKwargDefault = _COORDS_DEFAULT, compat: CompatOptions | CombineKwargDefault = _COMPAT_CONCAT_DEFAULT, positions: Iterable[Iterable[int]] | None = None, fill_value: object = dtypes.NA, join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "override", create_index_for_new_dim: bool = True, ) -> T_Dataset: ... @overload def concat( objs: Iterable[T_DataArray], dim: Hashable | T_Variable | T_DataArray | pd.Index | Any, data_vars: T_DataVars | CombineKwargDefault = _DATA_VARS_DEFAULT, coords: ConcatOptions | Iterable[Hashable] | CombineKwargDefault = _COORDS_DEFAULT, compat: CompatOptions | CombineKwargDefault = _COMPAT_CONCAT_DEFAULT, positions: Iterable[Iterable[int]] | None = None, fill_value: object = dtypes.NA, join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "override", create_index_for_new_dim: bool = True, ) -> T_DataArray: ... def concat( objs, dim, data_vars: T_DataVars | CombineKwargDefault = _DATA_VARS_DEFAULT, coords: ConcatOptions | Iterable[Hashable] | CombineKwargDefault = _COORDS_DEFAULT, compat: CompatOptions | CombineKwargDefault = _COMPAT_CONCAT_DEFAULT, positions=None, fill_value=dtypes.NA, join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "override", create_index_for_new_dim: bool = True, ): """Concatenate xarray objects along a new or existing dimension. Parameters ---------- objs : sequence of DataArray, Dataset or DataTree xarray objects to concatenate together. Each object is expected to consist of variables and coordinates with matching shapes except for along the concatenated dimension. dim : Hashable or Variable or DataArray or pandas.Index Name of the dimension to concatenate along. This can either be a new dimension name, in which case it is added along axis=0, or an existing dimension name, in which case the location of the dimension is unchanged. If dimension is provided as a Variable, DataArray or Index, its name is used as the dimension to concatenate along and the values are added as a coordinate. data_vars : {"minimal", "different", "all", None} or list of str, default: "all" These data variables will be concatenated together: * "minimal": Only data variables in which the dimension already appears are included. * "different": Data variables which are not equal (ignoring attributes) across all datasets are also concatenated (as well as all for which dimension already appears). Beware: this option may load the data payload of data variables into memory if they are not already loaded. * "all": All data variables will be concatenated. * None: Means ``"all"`` if ``dim`` is not present in any of the ``objs``, and ``"minimal"`` if ``dim`` is present in any of ``objs``. * list of str: The listed data variables will be concatenated, in addition to the "minimal" data variables. If objects are DataArrays, data_vars must be "all" or None. coords : {"minimal", "different", "all"} or list of str, default: "different" These coordinate variables will be concatenated together: * "minimal": Only coordinates in which the dimension already appears are included. * "different": Coordinates which are not equal (ignoring attributes) across all datasets are also concatenated (as well as all for which dimension already appears). Beware: this option may load the data payload of coordinate variables into memory if they are not already loaded. * "all": All coordinate variables will be concatenated, except those corresponding to other dimensions. * list of str: The listed coordinate variables will be concatenated, in addition to the "minimal" coordinates. compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, \ default: "equals" String indicating how to compare non-concatenated variables of the same name for potential conflicts. This is passed down to merge. - "broadcast_equals": all values must be equal when variables are broadcast against each other to ensure common dimensions. - "equals": all values and dimensions must be the same. - "identical": all values, dimensions and attributes must be the same. - "no_conflicts": only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. - "override": skip comparing and pick variable from first dataset positions : None or list of integer arrays, optional List of integer arrays which specifies the integer positions to which to assign each dataset along the concatenated dimension. If not supplied, objects are concatenated in the provided order. fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps variable names to fill values. Use a data array's name to refer to its values. join : {"outer", "inner", "left", "right", "exact"}, default: "outer" String indicating how to combine differing indexes (excluding dim) in objects - "outer": use the union of object indexes - "inner": use the intersection of object indexes - "left": use indexes from the first object with each dimension - "right": use indexes from the last object with each dimension - "exact": instead of aligning, raise `ValueError` when indexes to be aligned are not equal - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "override" A callable or a string indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. - "drop_conflicts": attrs from all objects are combined, any that have the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. If a callable, it must expect a sequence of ``attrs`` dicts and a context object as its only parameters. create_index_for_new_dim : bool, default: True Whether to create a new ``PandasIndex`` object when the objects being concatenated contain scalar variables named ``dim``. Returns ------- concatenated : type of objs See also -------- merge Examples -------- >>> da = xr.DataArray( ... np.arange(6).reshape(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])] ... ) >>> da Size: 48B array([[0, 1, 2], [3, 4, 5]]) Coordinates: * x (x) >> xr.concat([da.isel(y=slice(0, 1)), da.isel(y=slice(1, None))], dim="y") Size: 48B array([[0, 1, 2], [3, 4, 5]]) Coordinates: * x (x) >> xr.concat([da.isel(x=0), da.isel(x=1)], "x", coords="minimal") Size: 48B array([[0, 1, 2], [3, 4, 5]]) Coordinates: * x (x) >> xr.concat([da.isel(x=0), da.isel(x=1)], "new_dim", coords="all") Size: 48B array([[0, 1, 2], [3, 4, 5]]) Coordinates: x (new_dim) >> xr.concat( ... [da.isel(x=0), da.isel(x=1)], ... pd.Index([-90, -100], name="new_dim"), ... coords="all", ... ) Size: 48B array([[0, 1, 2], [3, 4, 5]]) Coordinates: * new_dim (new_dim) int64 16B -90 -100 x (new_dim) >> ds = xr.Dataset(coords={"x": 0}) >>> xr.concat([ds, ds], dim="x") Size: 16B Dimensions: (x: 2) Coordinates: * x (x) int64 16B 0 0 Data variables: *empty* >>> xr.concat([ds, ds], dim="x").indexes Indexes: x Index([0, 0], dtype='int64', name='x') >>> xr.concat([ds, ds], dim="x", create_index_for_new_dim=False).indexes Indexes: *empty* """ # TODO: add ignore_index arguments copied from pandas.concat # TODO: support concatenating scalar coordinates even if the concatenated # dimension already exists from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree try: first_obj, objs = utils.peek_at(objs) except StopIteration as err: raise ValueError("must supply at least one object to concatenate") from err if not isinstance(compat, CombineKwargDefault) and compat not in set( _VALID_COMPAT ) - {"minimal"}: raise ValueError( f"compat={compat!r} invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" ) if isinstance(first_obj, DataTree): return _datatree_concat( objs, dim=dim, data_vars=data_vars, coords=coords, compat=compat, positions=positions, fill_value=fill_value, join=join, combine_attrs=combine_attrs, create_index_for_new_dim=create_index_for_new_dim, ) elif isinstance(first_obj, DataArray): return _dataarray_concat( objs, dim=dim, data_vars=data_vars, coords=coords, compat=compat, positions=positions, fill_value=fill_value, join=join, combine_attrs=combine_attrs, create_index_for_new_dim=create_index_for_new_dim, ) elif isinstance(first_obj, Dataset): return _dataset_concat( objs, dim=dim, data_vars=data_vars, coords=coords, compat=compat, positions=positions, fill_value=fill_value, join=join, combine_attrs=combine_attrs, create_index_for_new_dim=create_index_for_new_dim, ) else: raise TypeError( "can only concatenate xarray Dataset and DataArray " f"objects, got {type(first_obj)}" ) def _calc_concat_dim_index( dim_or_data: Hashable | Any, ) -> tuple[Hashable, PandasIndex | None]: """Infer the dimension name and 1d index / coordinate variable (if appropriate) for concatenating along the new dimension. """ from xarray.core.dataarray import DataArray dim: Hashable | None if utils.hashable(dim_or_data): dim = dim_or_data index = None else: if not isinstance(dim_or_data, DataArray | Variable): dim = getattr(dim_or_data, "name", None) if dim is None: dim = "concat_dim" else: (dim,) = dim_or_data.dims coord_dtype = getattr(dim_or_data, "dtype", None) index = PandasIndex(dim_or_data, dim, coord_dtype=coord_dtype) return dim, index def _calc_concat_over( datasets: list[T_Dataset], dim: Hashable, all_dims: set[Hashable], data_vars: T_DataVars | Iterable[Hashable] | CombineKwargDefault, coords: ConcatOptions | Iterable[Hashable] | CombineKwargDefault, compat: CompatOptions | CombineKwargDefault, ) -> tuple[set[Hashable], dict[Hashable, bool], list[int], set[Hashable]]: """ Determine which dataset variables need to be concatenated in the result, """ # variables to be concatenated concat_over = set() # variables checked for equality equals: dict[Hashable, bool] = {} # skip merging these variables. # if concatenating over a dimension 'x' that is associated with an index over 2 variables, # 'x' and 'y', then we assert join="equals" on `y` and don't need to merge it. # that assertion happens in the align step prior to this function being called skip_merge: set[Hashable] = set() if dim in all_dims: concat_over_existing_dim = True concat_over.add(dim) else: concat_over_existing_dim = False if data_vars == "minimal" and coords == "minimal" and not concat_over_existing_dim: raise ValueError( "Cannot specify both data_vars='minimal' and coords='minimal' when " "concatenating over a new dimension." ) if data_vars is None or ( isinstance(data_vars, CombineKwargDefault) and data_vars._value is None ): data_vars = "minimal" if concat_over_existing_dim else "all" concat_dim_lengths = [] for ds in datasets: if concat_over_existing_dim and dim not in ds.dims and dim in ds: ds = ds.set_coords(dim) concat_over.update(k for k, v in ds.variables.items() if dim in v.dims) for _, idx_vars in ds.xindexes.group_by_index(): if any(dim in v.dims for v in idx_vars.values()): skip_merge.update(idx_vars.keys()) concat_dim_lengths.append(ds.sizes.get(dim, 1)) def process_subset_opt( opt: ConcatOptions | Iterable[Hashable] | CombineKwargDefault, subset: Literal["coords", "data_vars"], ) -> None: original = set(concat_over) compat_str = ( compat._value if isinstance(compat, CombineKwargDefault) else compat ) assert compat_str is not None if isinstance(opt, str | CombineKwargDefault): if opt == "different": if isinstance(compat, CombineKwargDefault) and compat != "override": if not isinstance(opt, CombineKwargDefault): emit_user_level_warning( compat.warning_message( "This change will result in the following ValueError: " f"Cannot specify both {subset}='different' and compat='override'.", recommend_set_options=False, ), FutureWarning, ) if compat == "override": raise ValueError( f"Cannot specify both {subset}='different' and compat='override'." + ( compat.error_message() if isinstance(compat, CombineKwargDefault) else "" ) ) # all nonindexes that are not the same in each dataset for k in getattr(datasets[0], subset): if k not in concat_over: equal = None variables = [ ds.variables[k] for ds in datasets if k in ds.variables ] if len(variables) == 1: # coords="different" doesn't make sense when only one object # contains a particular variable. break elif len(variables) != len(datasets) and opt == "different": raise ValueError( f"{k!r} not present in all datasets and coords='different'. " f"Either add {k!r} to datasets where it is missing or " "specify coords='minimal'." ) # first check without comparing values i.e. no computes for var in variables[1:]: equal = getattr(variables[0], compat_str)( var, equiv=lazy_array_equiv ) if equal is not True: # exit early if we know these are not equal or that # equality cannot be determined i.e. one or all of # the variables wraps a numpy array break if equal is False: concat_over.add(k) elif equal is None: # Compare the variable of all datasets vs. the one # of the first dataset. Perform the minimum amount of # loads in order to avoid multiple loads from disk # while keeping the RAM footprint low. v_lhs = datasets[0].variables[k].load() # We'll need to know later on if variables are equal. computed = [] for ds_rhs in datasets[1:]: v_rhs = ds_rhs.variables[k].compute() computed.append(v_rhs) if not getattr(v_lhs, compat_str)(v_rhs): concat_over.add(k) equals[k] = False # computed variables are not to be re-computed # again in the future for ds, v in zip( datasets[1:], computed, strict=False ): ds.variables[k].data = v.data break else: equal = True if TYPE_CHECKING: assert equal is not None equals[k] = equal elif opt == "all": concat_over.update( set().union( *[set(getattr(d, subset)) - set(d.dims) for d in datasets] ) ) elif opt == "minimal": pass else: raise ValueError(f"unexpected value for {subset}: {opt}") if ( isinstance(opt, CombineKwargDefault) and opt._value is not None and original != concat_over and concat_over_existing_dim ): warnings.append( opt.warning_message( "This is likely to lead to different results when multiple datasets " "have matching variables with overlapping values.", ) ) else: valid_vars = tuple(getattr(datasets[0], subset)) invalid_vars = [k for k in opt if k not in valid_vars] if invalid_vars: if subset == "coords": raise ValueError( f"the variables {invalid_vars} in coords are not " f"found in the coordinates of the first dataset {valid_vars}" ) else: # note: data_vars are not listed in the error message here, # because there may be lots of them raise ValueError( f"the variables {invalid_vars} in data_vars are not " f"found in the data variables of the first dataset" ) concat_over.update(opt) warnings: list[str] = [] process_subset_opt(data_vars, "data_vars") process_subset_opt(coords, "coords") for warning in warnings: emit_user_level_warning(warning, FutureWarning) return concat_over, equals, concat_dim_lengths, skip_merge # determine dimensional coordinate names and a dict mapping name to DataArray def _parse_datasets( datasets: list[T_Dataset], ) -> tuple[ set[Hashable], dict[Hashable, Variable], dict[Hashable, int], set[Hashable], set[Hashable], list[Hashable], ]: dims: set[Hashable] = set() all_coord_names: set[Hashable] = set() data_vars: set[Hashable] = set() # list of data_vars dim_coords: dict[Hashable, Variable] = {} # maps dim name to variable dims_sizes: dict[Hashable, int] = {} # shared dimension sizes to expand variables variables_order: dict[Hashable, Variable] = {} # variables in order of appearance for ds in datasets: dims_sizes.update(ds.sizes) all_coord_names.update(ds.coords) data_vars.update(ds.data_vars) variables_order.update(ds.variables) # preserves ordering of dimensions for dim in ds.dims: if dim in dims: continue if dim in ds.coords and dim not in dim_coords: dim_coords[dim] = ds.coords[dim].variable dims = dims | set(ds.dims) return ( dims, dim_coords, dims_sizes, all_coord_names, data_vars, list(variables_order), ) def _dataset_concat( datasets: Iterable[T_Dataset], dim: Hashable | T_Variable | T_DataArray | pd.Index, data_vars: T_DataVars | CombineKwargDefault, coords: ConcatOptions | Iterable[Hashable] | CombineKwargDefault, compat: CompatOptions | CombineKwargDefault, positions: Iterable[Iterable[int]] | None, fill_value: Any, join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, create_index_for_new_dim: bool, *, preexisting_dim: bool = False, ) -> T_Dataset: """ Concatenate a sequence of datasets along a new or existing dimension """ from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset datasets = list(datasets) if not all(isinstance(dataset, Dataset) for dataset in datasets): raise TypeError( "The elements in the input list need to be either all 'Dataset's or all 'DataArray's" ) dim_var: Variable | None if isinstance(dim, DataArray): dim_var = dim.variable elif isinstance(dim, Variable): dim_var = dim else: dim_var = None dim_name, index = _calc_concat_dim_index(dim) # Make sure we're working on a copy (we'll be loading variables) datasets = [ds.copy() for ds in datasets] datasets = list( align( *datasets, join=join, copy=False, exclude=[dim_name], fill_value=fill_value ) ) all_dims, dim_coords, dims_sizes, coord_names, data_names, vars_order = ( _parse_datasets(datasets) ) if preexisting_dim: # When concatenating DataTree objects, a dimension may be pre-existing # because it exists elsewhere on the trees, even if it does not exist # on the dataset objects at this node. all_dims.add(dim_name) indexed_dim_names = set(dim_coords) both_data_and_coords = coord_names & data_names if both_data_and_coords: raise ValueError( f"{both_data_and_coords!r} is a coordinate in some datasets but not others." ) # we don't want the concat dimension in the result dataset yet dim_coords.pop(dim_name, None) dims_sizes.pop(dim_name, None) # case where concat dimension is a coordinate or data_var but not a dimension if ( dim_name in coord_names or dim_name in data_names ) and dim_name not in indexed_dim_names: datasets = [ ds.expand_dims(dim_name, create_index_for_new_dim=create_index_for_new_dim) for ds in datasets ] all_dims.add(dim_name) # This isn't being used any more, but keeping it up to date # just in case we decide to use it later. indexed_dim_names.add(dim_name) # determine which variables to concatenate concat_over, equals, concat_dim_lengths, skip_merge = _calc_concat_over( datasets, dim_name, all_dims, data_vars, coords, compat ) # determine which variables to merge, and then merge them according to compat variables_to_merge = (coord_names | data_names) - concat_over - skip_merge result_vars = {} result_indexes = {} if variables_to_merge: grouped = { k: v for k, v in collect_variables_and_indexes(datasets).items() if k in variables_to_merge } merged_vars, merged_indexes = merge_collected( grouped, compat=compat, equals=equals ) result_vars.update(merged_vars) result_indexes.update(merged_indexes) result_vars.update(dim_coords) # assign attrs and encoding from first dataset result_attrs = merge_attrs([ds.attrs for ds in datasets], combine_attrs) result_encoding = datasets[0].encoding # check that global attributes are fixed across all datasets if necessary if compat == "identical": for ds in datasets[1:]: if not utils.dict_equiv(ds.attrs, result_attrs): raise ValueError("Dataset global attributes not equal.") # we've already verified everything is consistent; now, calculate # shared dimension sizes so we can expand the necessary variables def ensure_common_dims(vars, concat_dim_lengths): # ensure each variable with the given name shares the same # dimensions and the same shape for all of them except along the # concat dimension common_dims = tuple(utils.OrderedSet(d for v in vars for d in v.dims)) if dim_name not in common_dims: common_dims = (dim_name,) + common_dims for var, dim_len in zip(vars, concat_dim_lengths, strict=True): if var.dims != common_dims: common_shape = tuple(dims_sizes.get(d, dim_len) for d in common_dims) var = var.set_dims(common_dims, common_shape) yield var # get the indexes to concatenate together, create a PandasIndex # for any scalar coordinate variable found with ``name`` matching ``dim``. # TODO: depreciate concat a mix of scalar and dimensional indexed coordinates? # TODO: (benbovy - explicit indexes): check index types and/or coordinates # of all datasets? def get_indexes(name): for ds in datasets: if name in ds._indexes: yield ds._indexes[name] elif name == dim_name: var = ds._variables[name] if not var.dims: data = var.set_dims(dim_name).values if create_index_for_new_dim: yield PandasIndex(data, dim_name, coord_dtype=var.dtype) # create concatenation index, needed for later reindexing # use np.cumulative_sum(concat_dim_lengths, include_initial=True) when we support numpy>=2 file_start_indexes = np.append(0, np.cumsum(concat_dim_lengths)) concat_index_size = file_start_indexes[-1] variable_index_mask = np.ones(concat_index_size, dtype=bool) variable_reindexer = None # stack up each variable and/or index to fill-out the dataset (in order) # n.b. this loop preserves variable order, needed for groupby. ndatasets = len(datasets) for name in vars_order: if name in concat_over and name not in result_indexes: variables = [] # Initialize the mask to all True then set False if any name is missing in # the datasets: variable_index_mask.fill(True) var_concat_dim_length = [] for i, ds in enumerate(datasets): if name in ds.variables: variables.append(ds[name].variable) var_concat_dim_length.append(concat_dim_lengths[i]) else: # raise if coordinate not in all datasets if name in coord_names: raise ValueError( f"coordinate {name!r} not present in all datasets." ) # Mask out the indexes without the name: start = file_start_indexes[i] end = file_start_indexes[i + 1] variable_index_mask[slice(start, end)] = False vars = ensure_common_dims(variables, var_concat_dim_length) # Try to concatenate the indexes, concatenate the variables when no index # is found on all datasets. indexes: list[Index] = list(get_indexes(name)) if indexes: if len(indexes) < ndatasets: raise ValueError( f"{name!r} must have either an index or no index in all datasets, " f"found {len(indexes)}/{len(datasets)} datasets with an index." ) combined_idx = indexes[0].concat(indexes, dim_name, positions) if name in datasets[0]._indexes: idx_vars = datasets[0].xindexes.get_all_coords(name) else: # index created from a scalar coordinate idx_vars = {name: datasets[0][name].variable} result_indexes.update(dict.fromkeys(idx_vars, combined_idx)) combined_idx_vars = combined_idx.create_variables(idx_vars) for k, v in combined_idx_vars.items(): v.attrs = merge_attrs( [ds.variables[k].attrs for ds in datasets], combine_attrs=combine_attrs, ) result_vars[k] = v else: combined_var = concat_vars( vars, dim_name, positions, combine_attrs=combine_attrs ) # reindex if variable is not present in all datasets if not variable_index_mask.all(): if variable_reindexer is None: # allocate only once variable_reindexer = np.empty( concat_index_size, # cannot use uint since we need -1 as a sentinel for reindexing dtype=np.min_scalar_type(-concat_index_size), ) np.cumsum(variable_index_mask, out=variable_reindexer) # variable_index_mask is boolean, so the first element is 1. # offset by 1 to start at 0. variable_reindexer -= 1 variable_reindexer[~variable_index_mask] = -1 combined_var = reindex_variables( variables={name: combined_var}, dim_pos_indexers={dim_name: variable_reindexer}, fill_value=fill_value, )[name] result_vars[name] = combined_var elif name in result_vars: # preserves original variable order result_vars[name] = result_vars.pop(name) absent_coord_names = coord_names - set(result_vars) if absent_coord_names: raise ValueError( f"Variables {absent_coord_names!r} are coordinates in some datasets but not others." ) result_data_vars = {} coord_vars = {} for name, result_var in result_vars.items(): if name in coord_names: coord_vars[name] = result_var else: result_data_vars[name] = result_var if index is not None: if dim_var is not None: index_vars = index.create_variables({dim_name: dim_var}) else: index_vars = index.create_variables() coord_vars[dim_name] = index_vars[dim_name] result_indexes[dim_name] = index coords_obj = Coordinates(coord_vars, indexes=result_indexes) result = type(datasets[0])(result_data_vars, coords=coords_obj, attrs=result_attrs) result.encoding = result_encoding return result def _dataarray_concat( arrays: Iterable[T_DataArray], dim: Hashable | T_Variable | T_DataArray | pd.Index, data_vars: T_DataVars | Iterable[Hashable] | CombineKwargDefault, coords: ConcatOptions | Iterable[Hashable] | CombineKwargDefault, compat: CompatOptions | CombineKwargDefault, positions: Iterable[Iterable[int]] | None, fill_value: object, join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, create_index_for_new_dim: bool, ) -> T_DataArray: from xarray.core.dataarray import DataArray arrays = list(arrays) if not all(isinstance(array, DataArray) for array in arrays): raise TypeError( "The elements in the input list need to be either all 'Dataset's or all 'DataArray's" ) # Allow passing `all` or `None` even though we always use `data_vars='all'` # when passing off to `_dataset_concat`. if not isinstance(data_vars, CombineKwargDefault) and data_vars not in [ "all", None, ]: raise ValueError( "data_vars is not a valid argument when concatenating DataArray objects" ) datasets = [] for n, arr in enumerate(arrays): if n == 0: name = arr.name elif name != arr.name: if compat == "identical": raise ValueError("array names not identical") else: arr = arr.rename(name) datasets.append(arr._to_temp_dataset()) ds = _dataset_concat( datasets, dim=dim, data_vars="all", coords=coords, compat=compat, positions=positions, fill_value=fill_value, join=join, combine_attrs=combine_attrs, create_index_for_new_dim=create_index_for_new_dim, ) merged_attrs = merge_attrs([da.attrs for da in arrays], combine_attrs) result = arrays[0]._from_temp_dataset(ds, name) result.attrs = merged_attrs return result def _datatree_concat( objs: Iterable[DataTree], dim: Hashable | Variable | T_DataArray | pd.Index | Any, data_vars: T_DataVars | Iterable[Hashable] | CombineKwargDefault, coords: ConcatOptions | Iterable[Hashable] | CombineKwargDefault, compat: CompatOptions | CombineKwargDefault, positions: Iterable[Iterable[int]] | None, fill_value: Any, join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, create_index_for_new_dim: bool, ) -> DataTree: """ Concatenate a sequence of datatrees along a new or existing dimension """ from xarray.core.datatree import DataTree from xarray.core.treenode import TreeIsomorphismError, group_subtrees dim_name, _ = _calc_concat_dim_index(dim) objs = list(objs) if not all(isinstance(obj, DataTree) for obj in objs): raise TypeError("All objects to concatenate must be DataTree objects") if compat == "identical": if any(obj.name != objs[0].name for obj in objs[1:]): raise ValueError("DataTree names not identical") dim_in_tree = any(dim_name in node.dims for node in objs[0].subtree) results = {} try: for path, nodes in group_subtrees(*objs): datasets_to_concat = [node.to_dataset() for node in nodes] results[path] = _dataset_concat( datasets_to_concat, dim=dim, data_vars=data_vars, coords=coords, compat=compat, positions=positions, fill_value=fill_value, join=join, combine_attrs=combine_attrs, create_index_for_new_dim=create_index_for_new_dim, preexisting_dim=dim_in_tree, ) except TreeIsomorphismError as e: raise ValueError("All trees must be isomorphic to be concatenated") from e return DataTree.from_dict(results, name=objs[0].name) python-xarray-2026.01.0/xarray/structure/alignment.py0000664000175000017500000013032415136607163022744 0ustar alastairalastairfrom __future__ import annotations import functools import operator from collections import defaultdict from collections.abc import Callable, Hashable, Iterable, Mapping from contextlib import suppress from itertools import starmap from typing import TYPE_CHECKING, Any, Final, Generic, TypeVar, get_args, overload import numpy as np import pandas as pd from xarray.core import dtypes from xarray.core.indexes import ( Index, Indexes, PandasIndex, PandasMultiIndex, indexes_all_equal, safe_cast_to_index, ) from xarray.core.types import JoinOptions, T_Alignable from xarray.core.utils import emit_user_level_warning, is_dict_like, is_full_slice from xarray.core.variable import Variable, as_compatible_data, calculate_dimensions from xarray.util.deprecation_helpers import CombineKwargDefault if TYPE_CHECKING: from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.types import ( Alignable, T_DataArray, T_Dataset, T_DuckArray, ) class AlignmentError(ValueError): """Error class for alignment failures due to incompatible arguments.""" def reindex_variables( variables: Mapping[Any, Variable], dim_pos_indexers: Mapping[Any, Any], copy: bool = True, fill_value: Any = dtypes.NA, sparse: bool = False, ) -> dict[Hashable, Variable]: """Conform a dictionary of variables onto a new set of variables reindexed with dimension positional indexers and possibly filled with missing values. Not public API. """ new_variables = {} dim_sizes = calculate_dimensions(variables) masked_dims = set() unchanged_dims = set() for dim, indxr in dim_pos_indexers.items(): # Negative values in dim_pos_indexers mean values missing in the new index # See ``Index.reindex_like``. if (indxr < 0).any(): masked_dims.add(dim) elif np.array_equal(indxr, np.arange(dim_sizes.get(dim, 0))): unchanged_dims.add(dim) for name, var in variables.items(): if isinstance(fill_value, dict): fill_value_ = fill_value.get(name, dtypes.NA) else: fill_value_ = fill_value if sparse: var = var._as_sparse(fill_value=fill_value_) indxr = tuple( slice(None) if d in unchanged_dims else dim_pos_indexers.get(d, slice(None)) for d in var.dims ) needs_masking = any(d in masked_dims for d in var.dims) if needs_masking: new_var = var._getitem_with_mask(indxr, fill_value=fill_value_) elif all(is_full_slice(k) for k in indxr): # no reindexing necessary # here we need to manually deal with copying data, since # we neither created a new ndarray nor used fancy indexing new_var = var.copy(deep=copy) else: new_var = var[indxr] new_variables[name] = new_var return new_variables def _normalize_indexes( indexes: Mapping[Any, Any | T_DuckArray], ) -> Indexes: """Normalize the indexes/indexers given for re-indexing or alignment. Wrap any arbitrary array or `pandas.Index` as an Xarray `PandasIndex` associated with its corresponding dimension coordinate variable. """ xr_indexes: dict[Hashable, Index] = {} xr_variables: dict[Hashable, Variable] if isinstance(indexes, Indexes): xr_variables = dict(indexes.variables) else: xr_variables = {} for k, idx in indexes.items(): if not isinstance(idx, Index): if getattr(idx, "dims", (k,)) != (k,): raise AlignmentError( f"Indexer has dimensions {idx.dims} that are different " f"from that to be indexed along '{k}'" ) data: T_DuckArray = as_compatible_data(idx) pd_idx = safe_cast_to_index(data) if pd_idx.name != k: pd_idx = pd_idx.copy() pd_idx.name = k if isinstance(pd_idx, pd.MultiIndex): idx = PandasMultiIndex(pd_idx, k) else: idx = PandasIndex(pd_idx, k, coord_dtype=data.dtype) xr_variables.update(idx.create_variables()) xr_indexes[k] = idx return Indexes(xr_indexes, xr_variables) CoordNamesAndDims = tuple[tuple[Hashable, tuple[Hashable, ...]], ...] MatchingIndexKey = tuple[CoordNamesAndDims, type[Index]] IndexesToAlign = dict[MatchingIndexKey, Index] IndexVarsToAlign = dict[MatchingIndexKey, dict[Hashable, Variable]] class Aligner(Generic[T_Alignable]): """Implements all the complex logic for the re-indexing and alignment of Xarray objects. For internal use only, not public API. Usage: aligner = Aligner(*objects, **kwargs) aligner.align() aligned_objects = aligner.results """ objects: tuple[T_Alignable, ...] results: tuple[T_Alignable, ...] objects_matching_index_vars: tuple[ dict[MatchingIndexKey, dict[Hashable, Variable]], ... ] join: JoinOptions | CombineKwargDefault exclude_dims: frozenset[Hashable] exclude_vars: frozenset[Hashable] copy: bool fill_value: Any sparse: bool indexes: dict[MatchingIndexKey, Index] index_vars: dict[MatchingIndexKey, dict[Hashable, Variable]] all_indexes: dict[MatchingIndexKey, list[Index]] all_index_vars: dict[MatchingIndexKey, list[dict[Hashable, Variable]]] aligned_indexes: dict[MatchingIndexKey, Index] aligned_index_vars: dict[MatchingIndexKey, dict[Hashable, Variable]] reindex: dict[MatchingIndexKey, bool] keep_original_indexes: set[MatchingIndexKey] reindex_kwargs: dict[str, Any] unindexed_dim_sizes: dict[Hashable, set] new_indexes: Indexes[Index] def __init__( self, objects: Iterable[T_Alignable], join: JoinOptions | CombineKwargDefault = "inner", indexes: Mapping[Any, Any] | None = None, exclude_dims: str | Iterable[Hashable] = frozenset(), exclude_vars: Iterable[Hashable] = frozenset(), method: str | None = None, tolerance: float | Iterable[float] | str | None = None, copy: bool = True, fill_value: Any = dtypes.NA, sparse: bool = False, ): self.objects = tuple(objects) self.objects_matching_indexes: tuple[Any, ...] = () self.objects_matching_index_vars = () if not isinstance(join, CombineKwargDefault) and join not in get_args( JoinOptions ): raise ValueError(f"invalid value for join: {join}") self.join = join self.copy = copy self.fill_value = fill_value self.sparse = sparse if method is None and tolerance is None: self.reindex_kwargs = {} else: self.reindex_kwargs = {"method": method, "tolerance": tolerance} if isinstance(exclude_dims, str): exclude_dims = [exclude_dims] self.exclude_dims = frozenset(exclude_dims) self.exclude_vars = frozenset(exclude_vars) if indexes is None: indexes = {} self.indexes, self.index_vars = self._collect_indexes( _normalize_indexes(indexes) ) self.all_indexes = {} self.all_index_vars = {} self.unindexed_dim_sizes = {} self.aligned_indexes = {} self.aligned_index_vars = {} self.reindex = {} self.keep_original_indexes = set() self.results = tuple() def _collect_indexes( self, indexes: Indexes ) -> tuple[IndexesToAlign, IndexVarsToAlign]: """Collect input and/or object indexes for alignment. Return new dictionaries of xarray Index objects and coordinate variables, whose keys are used to later retrieve all the indexes to compare with each other (based on the name and dimensions of their associated coordinate variables as well as the Index type). """ collected_indexes = {} collected_index_vars = {} for idx, idx_vars in indexes.group_by_index(): idx_coord_names_and_dims = [] idx_all_dims: set[Hashable] = set() for name, var in idx_vars.items(): dims = var.dims idx_coord_names_and_dims.append((name, dims)) idx_all_dims.update(dims) key: MatchingIndexKey = (tuple(idx_coord_names_and_dims), type(idx)) if idx_all_dims: exclude_dims = idx_all_dims & self.exclude_dims if exclude_dims == idx_all_dims: # Do not collect an index if all the dimensions it uses are # also excluded from the alignment continue elif exclude_dims: # If the dimensions used by index partially overlap with the dimensions # excluded from alignment, it is possible to check index equality along # non-excluded dimensions only. However, in this case each of the aligned # objects must retain (a copy of) their original index. Re-indexing and # overriding the index are not supported. if self.join == "override": excl_dims_str = ", ".join(str(d) for d in exclude_dims) incl_dims_str = ", ".join( str(d) for d in idx_all_dims - exclude_dims ) raise AlignmentError( f"cannot exclude dimension(s) {excl_dims_str} from alignment " "with `join='override` because these are used by an index " f"together with non-excluded dimensions {incl_dims_str}" "(cannot safely override the index)." ) else: self.keep_original_indexes.add(key) collected_indexes[key] = idx collected_index_vars[key] = idx_vars return collected_indexes, collected_index_vars def find_matching_indexes(self) -> None: all_indexes: dict[MatchingIndexKey, list[Index]] all_index_vars: dict[MatchingIndexKey, list[dict[Hashable, Variable]]] all_indexes_dim_sizes: dict[MatchingIndexKey, dict[Hashable, set]] objects_matching_indexes: list[dict[MatchingIndexKey, Index]] objects_matching_index_vars: list[ dict[MatchingIndexKey, dict[Hashable, Variable]] ] all_indexes = defaultdict(list) all_index_vars = defaultdict(list) all_indexes_dim_sizes = defaultdict(lambda: defaultdict(set)) objects_matching_indexes = [] objects_matching_index_vars = [] for obj in self.objects: obj_indexes, obj_index_vars = self._collect_indexes(obj.xindexes) objects_matching_indexes.append(obj_indexes) objects_matching_index_vars.append(obj_index_vars) for key, idx in obj_indexes.items(): all_indexes[key].append(idx) for key, index_vars in obj_index_vars.items(): all_index_vars[key].append(index_vars) for dim, size in calculate_dimensions(index_vars).items(): all_indexes_dim_sizes[key][dim].add(size) self.objects_matching_indexes = tuple(objects_matching_indexes) self.objects_matching_index_vars = tuple(objects_matching_index_vars) self.all_indexes = all_indexes self.all_index_vars = all_index_vars if self.join == "override": for dim_sizes in all_indexes_dim_sizes.values(): for dim, sizes in dim_sizes.items(): if len(sizes) > 1: raise AlignmentError( "cannot align objects with join='override' with matching indexes " f"along dimension {dim!r} that don't have the same size" ) def find_matching_unindexed_dims(self) -> None: unindexed_dim_sizes = defaultdict(set) for obj in self.objects: for dim in obj.dims: if dim not in self.exclude_dims and dim not in obj.xindexes.dims: unindexed_dim_sizes[dim].add(obj.sizes[dim]) self.unindexed_dim_sizes = unindexed_dim_sizes def _need_reindex(self, dim, cmp_indexes) -> bool: """Whether or not we need to reindex variables for a set of matching indexes. We don't reindex when all matching indexes are equal for two reasons: - It's faster for the usual case (already aligned objects). - It ensures it's possible to do operations that don't require alignment on indexes with duplicate values (which cannot be reindexed with pandas). This is useful, e.g., for overwriting such duplicate indexes. """ if not indexes_all_equal(cmp_indexes, self.exclude_dims): # always reindex when matching indexes are not equal return True unindexed_dims_sizes = {} for d in dim: if d in self.unindexed_dim_sizes: sizes = self.unindexed_dim_sizes[d] if len(sizes) > 1: # reindex if different sizes are found for unindexed dims return True else: unindexed_dims_sizes[d] = next(iter(sizes)) if unindexed_dims_sizes: indexed_dims_sizes = {} for cmp in cmp_indexes: index_vars = cmp[1] for var in index_vars.values(): indexed_dims_sizes.update(var.sizes) for d, size in unindexed_dims_sizes.items(): if indexed_dims_sizes.get(d, -1) != size: # reindex if unindexed dimension size doesn't match return True return False def _get_index_joiner(self, index_cls) -> Callable: if self.join in ["outer", "inner"]: return functools.partial( functools.reduce, functools.partial(index_cls.join, how=self.join), ) elif self.join == "left": return operator.itemgetter(0) elif self.join == "right": return operator.itemgetter(-1) elif self.join == "override": # We rewrite all indexes and then use join='left' return operator.itemgetter(0) else: # join='exact' return dummy lambda (error is raised) return lambda _: None def align_indexes(self) -> None: """Compute all aligned indexes and their corresponding coordinate variables.""" aligned_indexes: dict[MatchingIndexKey, Index] = {} aligned_index_vars: dict[MatchingIndexKey, dict[Hashable, Variable]] = {} reindex: dict[MatchingIndexKey, bool] = {} new_indexes: dict[Hashable, Index] = {} new_index_vars: dict[Hashable, Variable] = {} def update_dicts( key: MatchingIndexKey, idx: Index, idx_vars: dict[Hashable, Variable], need_reindex: bool, ): reindex[key] = need_reindex aligned_indexes[key] = idx aligned_index_vars[key] = idx_vars for name, var in idx_vars.items(): if name in new_indexes: other_idx = new_indexes[name] other_var = new_index_vars[name] raise AlignmentError( f"cannot align objects on coordinate {name!r} because of conflicting indexes\n" f"first index: {idx!r}\nsecond index: {other_idx!r}\n" f"first variable: {var!r}\nsecond variable: {other_var!r}\n" ) new_indexes[name] = idx new_index_vars[name] = var for key, matching_indexes in self.all_indexes.items(): matching_index_vars = self.all_index_vars[key] dims = {d for coord in matching_index_vars[0].values() for d in coord.dims} index_cls = key[1] if self.join == "override": joined_index = matching_indexes[0] joined_index_vars = matching_index_vars[0] need_reindex = False elif key in self.indexes: joined_index = self.indexes[key] joined_index_vars = self.index_vars[key] cmp_indexes = list( zip( [joined_index] + matching_indexes, [joined_index_vars] + matching_index_vars, strict=True, ) ) need_reindex = self._need_reindex(dims, cmp_indexes) else: if len(matching_indexes) > 1: need_reindex = self._need_reindex( dims, list(zip(matching_indexes, matching_index_vars, strict=True)), ) else: need_reindex = False if need_reindex: if ( isinstance(self.join, CombineKwargDefault) and self.join != "exact" ): emit_user_level_warning( self.join.warning_message( "This change will result in the following ValueError: " "cannot be aligned with join='exact' because " "index/labels/sizes are not equal along " "these coordinates (dimensions): " + ", ".join( f"{name!r} {dims!r}" for name, dims in key[0] ), recommend_set_options=False, ), FutureWarning, ) if self.join == "exact": raise AlignmentError( "cannot align objects with join='exact' where " "index/labels/sizes are not equal along " "these coordinates (dimensions): " + ", ".join(f"{name!r} {dims!r}" for name, dims in key[0]) + ( self.join.error_message() if isinstance(self.join, CombineKwargDefault) else "" ) ) joiner = self._get_index_joiner(index_cls) joined_index = joiner(matching_indexes) if self.join == "left": joined_index_vars = matching_index_vars[0] elif self.join == "right": joined_index_vars = matching_index_vars[-1] else: joined_index_vars = joined_index.create_variables() else: joined_index = matching_indexes[0] joined_index_vars = matching_index_vars[0] update_dicts(key, joined_index, joined_index_vars, need_reindex) # Explicitly provided indexes that are not found in objects to align # may relate to unindexed dimensions so we add them too for key, idx in self.indexes.items(): if key not in aligned_indexes: index_vars = self.index_vars[key] update_dicts(key, idx, index_vars, False) self.aligned_indexes = aligned_indexes self.aligned_index_vars = aligned_index_vars self.reindex = reindex self.new_indexes = Indexes(new_indexes, new_index_vars) def assert_unindexed_dim_sizes_equal(self) -> None: for dim, sizes in self.unindexed_dim_sizes.items(): index_size = self.new_indexes.dims.get(dim) if index_size is not None: sizes.add(index_size) add_err_msg = ( f" (note: an index is found along that dimension " f"with size={index_size!r})" ) else: add_err_msg = "" if len(sizes) > 1: raise AlignmentError( f"cannot reindex or align along dimension {dim!r} " f"because of conflicting dimension sizes: {sizes!r}" + add_err_msg ) def override_indexes(self) -> None: objects = list(self.objects) for i, obj in enumerate(objects[1:]): new_indexes = {} new_variables = {} matching_indexes = self.objects_matching_indexes[i + 1] for key, aligned_idx in self.aligned_indexes.items(): obj_idx = matching_indexes.get(key) if obj_idx is not None: for name, var in self.aligned_index_vars[key].items(): new_indexes[name] = aligned_idx new_variables[name] = var.copy(deep=self.copy) objects[i + 1] = obj._overwrite_indexes(new_indexes, new_variables) self.results = tuple(objects) def _get_dim_pos_indexers( self, matching_indexes: dict[MatchingIndexKey, Index], ) -> dict[Hashable, Any]: dim_pos_indexers: dict[Hashable, Any] = {} dim_index: dict[Hashable, Index] = {} for key, aligned_idx in self.aligned_indexes.items(): obj_idx = matching_indexes.get(key) if obj_idx is not None and self.reindex[key]: indexers = obj_idx.reindex_like(aligned_idx, **self.reindex_kwargs) for dim, idxer in indexers.items(): if dim in self.exclude_dims: raise AlignmentError( f"cannot reindex or align along dimension {dim!r} because " "it is explicitly excluded from alignment. This is likely caused by " "wrong results returned by the `reindex_like` method of this index:\n" f"{obj_idx!r}" ) if dim in dim_pos_indexers and not np.array_equal( idxer, dim_pos_indexers[dim] ): raise AlignmentError( f"cannot reindex or align along dimension {dim!r} because " "of conflicting re-indexers returned by multiple indexes\n" f"first index: {obj_idx!r}\nsecond index: {dim_index[dim]!r}\n" ) dim_pos_indexers[dim] = idxer dim_index[dim] = obj_idx return dim_pos_indexers def _get_indexes_and_vars( self, obj: T_Alignable, matching_indexes: dict[MatchingIndexKey, Index], matching_index_vars: dict[MatchingIndexKey, dict[Hashable, Variable]], ) -> tuple[dict[Hashable, Index], dict[Hashable, Variable]]: new_indexes = {} new_variables = {} for key, aligned_idx in self.aligned_indexes.items(): aligned_idx_vars = self.aligned_index_vars[key] obj_idx = matching_indexes.get(key) obj_idx_vars = matching_index_vars.get(key) if obj_idx is None: # add the aligned index if it relates to unindexed dimensions in obj dims = {d for var in aligned_idx_vars.values() for d in var.dims} if dims <= set(obj.dims): obj_idx = aligned_idx if obj_idx is not None: # TODO: always copy object's index when no re-indexing is required? # (instead of assigning the aligned index) # (need performance assessment) if key in self.keep_original_indexes: assert self.reindex[key] is False new_idx = obj_idx.copy(deep=self.copy) new_idx_vars = new_idx.create_variables(obj_idx_vars) else: new_idx = aligned_idx new_idx_vars = { k: v.copy(deep=self.copy) for k, v in aligned_idx_vars.items() } new_indexes.update(dict.fromkeys(new_idx_vars, new_idx)) new_variables.update(new_idx_vars) return new_indexes, new_variables def _reindex_one( self, obj: T_Alignable, matching_indexes: dict[MatchingIndexKey, Index], matching_index_vars: dict[MatchingIndexKey, dict[Hashable, Variable]], ) -> T_Alignable: new_indexes, new_variables = self._get_indexes_and_vars( obj, matching_indexes, matching_index_vars ) dim_pos_indexers = self._get_dim_pos_indexers(matching_indexes) return obj._reindex_callback( self, dim_pos_indexers, new_variables, new_indexes, self.fill_value, self.exclude_dims, self.exclude_vars, ) def reindex_all(self) -> None: self.results = tuple( starmap( self._reindex_one, zip( self.objects, self.objects_matching_indexes, self.objects_matching_index_vars, strict=True, ), ) ) def align(self) -> None: if not self.indexes and len(self.objects) == 1: # fast path for the trivial case (obj,) = self.objects self.results = (obj.copy(deep=self.copy),) return self.find_matching_indexes() self.find_matching_unindexed_dims() self.align_indexes() self.assert_unindexed_dim_sizes_equal() if self.join == "override": self.override_indexes() elif self.join == "exact" and not self.copy: self.results = self.objects else: self.reindex_all() T_Obj1 = TypeVar("T_Obj1", bound="Alignable") T_Obj2 = TypeVar("T_Obj2", bound="Alignable") T_Obj3 = TypeVar("T_Obj3", bound="Alignable") T_Obj4 = TypeVar("T_Obj4", bound="Alignable") T_Obj5 = TypeVar("T_Obj5", bound="Alignable") @overload def align( obj1: T_Obj1, /, *, join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), fill_value=dtypes.NA, ) -> tuple[T_Obj1]: ... @overload def align( obj1: T_Obj1, obj2: T_Obj2, /, *, join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), fill_value=dtypes.NA, ) -> tuple[T_Obj1, T_Obj2]: ... @overload def align( obj1: T_Obj1, obj2: T_Obj2, obj3: T_Obj3, /, *, join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), fill_value=dtypes.NA, ) -> tuple[T_Obj1, T_Obj2, T_Obj3]: ... @overload def align( obj1: T_Obj1, obj2: T_Obj2, obj3: T_Obj3, obj4: T_Obj4, /, *, join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), fill_value=dtypes.NA, ) -> tuple[T_Obj1, T_Obj2, T_Obj3, T_Obj4]: ... @overload def align( obj1: T_Obj1, obj2: T_Obj2, obj3: T_Obj3, obj4: T_Obj4, obj5: T_Obj5, /, *, join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), fill_value=dtypes.NA, ) -> tuple[T_Obj1, T_Obj2, T_Obj3, T_Obj4, T_Obj5]: ... @overload def align( *objects: T_Alignable, join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), fill_value=dtypes.NA, ) -> tuple[T_Alignable, ...]: ... def align( *objects: T_Alignable, join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), fill_value=dtypes.NA, ) -> tuple[T_Alignable, ...]: """ Given any number of Dataset and/or DataArray objects, returns new objects with aligned indexes and dimension sizes. Array from the aligned objects are suitable as input to mathematical operators, because along each dimension they have the same index and size. Missing values (if ``join != 'inner'``) are filled with ``fill_value``. The default fill value is NaN. Parameters ---------- *objects : Dataset or DataArray Objects to align. join : {"outer", "inner", "left", "right", "exact", "override"}, optional Method for joining the indexes of the passed objects along each dimension: - "outer": use the union of object indexes - "inner": use the intersection of object indexes - "left": use indexes from the first object with each dimension - "right": use indexes from the last object with each dimension - "exact": instead of aligning, raise `ValueError` when indexes to be aligned are not equal - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. copy : bool, default: True If ``copy=True``, data in the return values is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed with only slice operations, then the output may share memory with the input. In either case, new xarray objects are always returned. indexes : dict-like, optional Any indexes explicitly provided with the `indexes` argument should be used in preference to the aligned indexes. exclude : str, iterable of hashable or None, optional Dimensions that must be excluded from alignment fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps variable names to fill values. Use a data array's name to refer to its values. Returns ------- aligned : tuple of DataArray or Dataset Tuple of objects with the same type as `*objects` with aligned coordinates. Raises ------ AlignmentError If any dimensions without labels on the arguments have different sizes, or a different size than the size of the aligned dimension labels. Examples -------- >>> x = xr.DataArray( ... [[25, 35], [10, 24]], ... dims=("lat", "lon"), ... coords={"lat": [35.0, 40.0], "lon": [100.0, 120.0]}, ... ) >>> y = xr.DataArray( ... [[20, 5], [7, 13]], ... dims=("lat", "lon"), ... coords={"lat": [35.0, 42.0], "lon": [100.0, 120.0]}, ... ) >>> x Size: 32B array([[25, 35], [10, 24]]) Coordinates: * lat (lat) float64 16B 35.0 40.0 * lon (lon) float64 16B 100.0 120.0 >>> y Size: 32B array([[20, 5], [ 7, 13]]) Coordinates: * lat (lat) float64 16B 35.0 42.0 * lon (lon) float64 16B 100.0 120.0 >>> a, b = xr.align(x, y) >>> a Size: 16B array([[25, 35]]) Coordinates: * lat (lat) float64 8B 35.0 * lon (lon) float64 16B 100.0 120.0 >>> b Size: 16B array([[20, 5]]) Coordinates: * lat (lat) float64 8B 35.0 * lon (lon) float64 16B 100.0 120.0 >>> a, b = xr.align(x, y, join="outer") >>> a Size: 48B array([[25., 35.], [10., 24.], [nan, nan]]) Coordinates: * lat (lat) float64 24B 35.0 40.0 42.0 * lon (lon) float64 16B 100.0 120.0 >>> b Size: 48B array([[20., 5.], [nan, nan], [ 7., 13.]]) Coordinates: * lat (lat) float64 24B 35.0 40.0 42.0 * lon (lon) float64 16B 100.0 120.0 >>> a, b = xr.align(x, y, join="outer", fill_value=-999) >>> a Size: 48B array([[ 25, 35], [ 10, 24], [-999, -999]]) Coordinates: * lat (lat) float64 24B 35.0 40.0 42.0 * lon (lon) float64 16B 100.0 120.0 >>> b Size: 48B array([[ 20, 5], [-999, -999], [ 7, 13]]) Coordinates: * lat (lat) float64 24B 35.0 40.0 42.0 * lon (lon) float64 16B 100.0 120.0 >>> a, b = xr.align(x, y, join="left") >>> a Size: 32B array([[25, 35], [10, 24]]) Coordinates: * lat (lat) float64 16B 35.0 40.0 * lon (lon) float64 16B 100.0 120.0 >>> b Size: 32B array([[20., 5.], [nan, nan]]) Coordinates: * lat (lat) float64 16B 35.0 40.0 * lon (lon) float64 16B 100.0 120.0 >>> a, b = xr.align(x, y, join="right") >>> a Size: 32B array([[25., 35.], [nan, nan]]) Coordinates: * lat (lat) float64 16B 35.0 42.0 * lon (lon) float64 16B 100.0 120.0 >>> b Size: 32B array([[20, 5], [ 7, 13]]) Coordinates: * lat (lat) float64 16B 35.0 42.0 * lon (lon) float64 16B 100.0 120.0 >>> a, b = xr.align(x, y, join="exact") Traceback (most recent call last): ... xarray.structure.alignment.AlignmentError: cannot align objects with join='exact' ... >>> a, b = xr.align(x, y, join="override") >>> a Size: 32B array([[25, 35], [10, 24]]) Coordinates: * lat (lat) float64 16B 35.0 40.0 * lon (lon) float64 16B 100.0 120.0 >>> b Size: 32B array([[20, 5], [ 7, 13]]) Coordinates: * lat (lat) float64 16B 35.0 40.0 * lon (lon) float64 16B 100.0 120.0 """ aligner = Aligner( objects, join=join, copy=copy, indexes=indexes, exclude_dims=exclude, fill_value=fill_value, ) aligner.align() return aligner.results def deep_align( objects: Iterable[Any], join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), raise_on_invalid: bool = True, fill_value=dtypes.NA, ) -> list[Any]: """Align objects for merging, recursing into dictionary values. This function is not public API. """ from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset if indexes is None: indexes = {} def is_alignable(obj): return isinstance(obj, Coordinates | DataArray | Dataset) positions: list[int] = [] keys: list[type[object] | Hashable] = [] out: list[Any] = [] targets: list[Alignable] = [] no_key: Final = object() not_replaced: Final = object() for position, variables in enumerate(objects): if is_alignable(variables): positions.append(position) keys.append(no_key) targets.append(variables) out.append(not_replaced) elif is_dict_like(variables): current_out = {} for k, v in variables.items(): if is_alignable(v) and k not in indexes: # Skip variables in indexes for alignment, because these # should to be overwritten instead: # https://github.com/pydata/xarray/issues/725 # https://github.com/pydata/xarray/issues/3377 # TODO(shoyer): doing this here feels super-hacky -- can we # move it explicitly into merge instead? positions.append(position) keys.append(k) targets.append(v) current_out[k] = not_replaced else: current_out[k] = v out.append(current_out) elif raise_on_invalid: raise ValueError( "object to align is neither an xarray.Dataset, " f"an xarray.DataArray nor a dictionary: {variables!r}" ) else: out.append(variables) aligned = align( *targets, join=join, copy=copy, indexes=indexes, exclude=exclude, fill_value=fill_value, ) for position, key, aligned_obj in zip(positions, keys, aligned, strict=True): if key is no_key: out[position] = aligned_obj else: out[position][key] = aligned_obj return out def reindex( obj: T_Alignable, indexers: Mapping[Any, Any], method: str | None = None, tolerance: float | Iterable[float] | str | None = None, copy: bool = True, fill_value: Any = dtypes.NA, sparse: bool = False, exclude_vars: Iterable[Hashable] = frozenset(), ) -> T_Alignable: """Re-index either a Dataset or a DataArray. Not public API. """ # TODO: (benbovy - explicit indexes): uncomment? # --> from reindex docstrings: "any mismatched dimension is simply ignored" # bad_keys = [k for k in indexers if k not in obj._indexes and k not in obj.dims] # if bad_keys: # raise ValueError( # f"indexer keys {bad_keys} do not correspond to any indexed coordinate " # "or unindexed dimension in the object to reindex" # ) aligner = Aligner( (obj,), indexes=indexers, method=method, tolerance=tolerance, copy=copy, fill_value=fill_value, sparse=sparse, exclude_vars=exclude_vars, ) aligner.align() return aligner.results[0] def reindex_like( obj: T_Alignable, other: Dataset | DataArray, method: str | None = None, tolerance: float | Iterable[float] | str | None = None, copy: bool = True, fill_value: Any = dtypes.NA, ) -> T_Alignable: """Re-index either a Dataset or a DataArray like another Dataset/DataArray. Not public API. """ if not other._indexes: # This check is not performed in Aligner. for dim in other.dims: if dim in obj.dims: other_size = other.sizes[dim] obj_size = obj.sizes[dim] if other_size != obj_size: raise ValueError( "different size for unlabeled " f"dimension on argument {dim!r}: {other_size!r} vs {obj_size!r}" ) return reindex( obj, indexers=other.xindexes, method=method, tolerance=tolerance, copy=copy, fill_value=fill_value, ) def _get_broadcast_dims_map_common_coords(args, exclude): common_coords = {} dims_map = {} for arg in args: for dim in arg.dims: if dim not in common_coords and dim not in exclude: dims_map[dim] = arg.sizes[dim] if dim in arg._indexes: common_coords.update(arg.xindexes.get_all_coords(dim)) return dims_map, common_coords def _broadcast_helper( arg: T_Alignable, exclude, dims_map, common_coords ) -> T_Alignable: from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset def _set_dims(var): # Add excluded dims to a copy of dims_map var_dims_map = dims_map.copy() for dim in exclude: with suppress(ValueError): # ignore dim not in var.dims var_dims_map[dim] = var.shape[var.dims.index(dim)] return var.set_dims(var_dims_map) def _broadcast_array(array: T_DataArray) -> T_DataArray: data = _set_dims(array.variable) coords = dict(array.coords) coords.update(common_coords) return array.__class__( data, coords, data.dims, name=array.name, attrs=array.attrs ) def _broadcast_dataset(ds: T_Dataset) -> T_Dataset: data_vars = {k: _set_dims(ds.variables[k]) for k in ds.data_vars} coords = dict(ds.coords) coords.update(common_coords) return ds.__class__(data_vars, coords, ds.attrs) # remove casts once https://github.com/python/mypy/issues/12800 is resolved if isinstance(arg, DataArray): return _broadcast_array(arg) # type: ignore[return-value,unused-ignore] elif isinstance(arg, Dataset): return _broadcast_dataset(arg) # type: ignore[return-value,unused-ignore] else: raise ValueError("all input must be Dataset or DataArray objects") @overload def broadcast( obj1: T_Obj1, /, *, exclude: str | Iterable[Hashable] | None = None ) -> tuple[T_Obj1]: ... @overload def broadcast( obj1: T_Obj1, obj2: T_Obj2, /, *, exclude: str | Iterable[Hashable] | None = None ) -> tuple[T_Obj1, T_Obj2]: ... @overload def broadcast( obj1: T_Obj1, obj2: T_Obj2, obj3: T_Obj3, /, *, exclude: str | Iterable[Hashable] | None = None, ) -> tuple[T_Obj1, T_Obj2, T_Obj3]: ... @overload def broadcast( obj1: T_Obj1, obj2: T_Obj2, obj3: T_Obj3, obj4: T_Obj4, /, *, exclude: str | Iterable[Hashable] | None = None, ) -> tuple[T_Obj1, T_Obj2, T_Obj3, T_Obj4]: ... @overload def broadcast( obj1: T_Obj1, obj2: T_Obj2, obj3: T_Obj3, obj4: T_Obj4, obj5: T_Obj5, /, *, exclude: str | Iterable[Hashable] | None = None, ) -> tuple[T_Obj1, T_Obj2, T_Obj3, T_Obj4, T_Obj5]: ... @overload def broadcast( *args: T_Alignable, exclude: str | Iterable[Hashable] | None = None ) -> tuple[T_Alignable, ...]: ... def broadcast( *args: T_Alignable, exclude: str | Iterable[Hashable] | None = None ) -> tuple[T_Alignable, ...]: """Explicitly broadcast any number of DataArray or Dataset objects against one another. xarray objects automatically broadcast against each other in arithmetic operations, so this function should not be necessary for normal use. If no change is needed, the input data is returned to the output without being copied. Parameters ---------- *args : DataArray or Dataset Arrays to broadcast against each other. exclude : str, iterable of hashable or None, optional Dimensions that must not be broadcasted Returns ------- broadcast : tuple of DataArray or tuple of Dataset The same data as the input arrays, but with additional dimensions inserted so that all data arrays have the same dimensions and shape. Examples -------- Broadcast two data arrays against one another to fill out their dimensions: >>> a = xr.DataArray([1, 2, 3], dims="x") >>> b = xr.DataArray([5, 6], dims="y") >>> a Size: 24B array([1, 2, 3]) Dimensions without coordinates: x >>> b Size: 16B array([5, 6]) Dimensions without coordinates: y >>> a2, b2 = xr.broadcast(a, b) >>> a2 Size: 48B array([[1, 1], [2, 2], [3, 3]]) Dimensions without coordinates: x, y >>> b2 Size: 48B array([[5, 6], [5, 6], [5, 6]]) Dimensions without coordinates: x, y Fill out the dimensions of all data variables in a dataset: >>> ds = xr.Dataset({"a": a, "b": b}) >>> (ds2,) = xr.broadcast(ds) # use tuple unpacking to extract one dataset >>> ds2 Size: 96B Dimensions: (x: 3, y: 2) Dimensions without coordinates: x, y Data variables: a (x, y) int64 48B 1 1 2 2 3 3 b (x, y) int64 48B 5 6 5 6 5 6 """ if exclude is None: exclude = set() args = align(*args, join="outer", copy=False, exclude=exclude) dims_map, common_coords = _get_broadcast_dims_map_common_coords(args, exclude) result = [_broadcast_helper(arg, exclude, dims_map, common_coords) for arg in args] return tuple(result) python-xarray-2026.01.0/xarray/structure/merge.py0000664000175000017500000013117215136607163022067 0ustar alastairalastairfrom __future__ import annotations from collections import defaultdict from collections.abc import Hashable, Iterable, Mapping, Sequence from collections.abc import Set as AbstractSet from typing import TYPE_CHECKING, Any, NamedTuple, Union, cast, overload import pandas as pd from xarray.core import dtypes from xarray.core.duck_array_ops import lazy_array_equiv from xarray.core.indexes import ( Index, create_default_index_implicit, filter_indexes_from_coords, indexes_equal, ) from xarray.core.utils import ( Frozen, compat_dict_union, dict_equiv, emit_user_level_warning, equivalent, ) from xarray.core.variable import ( IndexVariable, Variable, as_variable, calculate_dimensions, ) from xarray.structure.alignment import deep_align from xarray.util.deprecation_helpers import ( _COMPAT_DEFAULT, _JOIN_DEFAULT, CombineKwargDefault, ) if TYPE_CHECKING: from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.types import ( CombineAttrsOptions, CompatOptions, DataVars, JoinOptions, ) DimsLike = Union[Hashable, Sequence[Hashable]] ArrayLike = Any VariableLike = Union[ ArrayLike, tuple[DimsLike, ArrayLike], tuple[DimsLike, ArrayLike, Mapping], tuple[DimsLike, ArrayLike, Mapping, Mapping], ] XarrayValue = Union[DataArray, Variable, VariableLike] DatasetLike = Union[Dataset, Coordinates, Mapping[Any, XarrayValue]] CoercibleValue = Union[XarrayValue, pd.Series, pd.DataFrame] CoercibleMapping = Union[Dataset, Mapping[Any, CoercibleValue]] PANDAS_TYPES = (pd.Series, pd.DataFrame) _VALID_COMPAT = Frozen( { "identical": 0, "equals": 1, "broadcast_equals": 2, "minimal": 3, "no_conflicts": 4, "override": 5, } ) class Context: """object carrying the information of a call""" def __init__(self, func): self.func = func def broadcast_dimension_size(variables: list[Variable]) -> dict[Hashable, int]: """Extract dimension sizes from a dictionary of variables. Raises ValueError if any dimensions have different sizes. """ dims: dict[Hashable, int] = {} for var in variables: for dim, size in zip(var.dims, var.shape, strict=True): if dim in dims and size != dims[dim]: raise ValueError(f"index {dim!r} not aligned") dims[dim] = size return dims class MergeError(ValueError): """Error class for merge failures due to incompatible arguments.""" # inherits from ValueError for backward compatibility # TODO: move this to an xarray.exceptions module? def unique_variable( name: Hashable, variables: list[Variable], compat: CompatOptions | CombineKwargDefault = "broadcast_equals", equals: bool | None = None, ) -> tuple[bool | None, Variable]: """Return the unique variable from a list of variables or raise MergeError. Parameters ---------- name : hashable Name for this variable. variables : list of Variable List of Variable objects, all of which go by the same name in different inputs. compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, optional Type of equality check to use. equals : None or bool, optional corresponding to result of compat test Returns ------- Variable to use in the result. Raises ------ MergeError: if any of the variables are not equal. """ out = variables[0] if len(variables) == 1 or compat == "override": return equals, out combine_method = None if compat == "minimal": compat = "broadcast_equals" if compat == "broadcast_equals": dim_lengths = broadcast_dimension_size(variables) out = out.set_dims(dim_lengths) if compat == "no_conflicts": combine_method = "fillna" # we return the lazy equals, so we can warn about behaviour changes lazy_equals = equals if equals is None: compat_str = ( compat._value if isinstance(compat, CombineKwargDefault) else compat ) assert compat_str is not None # first check without comparing values i.e. no computes for var in variables[1:]: equals = getattr(out, compat_str)(var, equiv=lazy_array_equiv) if equals is not True: break lazy_equals = equals if equals is None: # now compare values with minimum number of computes out = out.compute() for var in variables[1:]: equals = getattr(out, compat_str)(var) if not equals: break if not equals: raise MergeError( f"conflicting values for variable {name!r} on objects to be combined. " "You can skip this check by specifying compat='override'." ) if combine_method: for var in variables[1:]: out = getattr(out, combine_method)(var) return lazy_equals, out def _assert_compat_valid(compat): if not isinstance(compat, CombineKwargDefault) and compat not in _VALID_COMPAT: raise ValueError(f"compat={compat!r} invalid: must be {set(_VALID_COMPAT)}") MergeElement = tuple[Variable, Index | None] def _assert_prioritized_valid( grouped: dict[Hashable, list[MergeElement]], prioritized: Mapping[Any, MergeElement], ) -> None: """Make sure that elements given in prioritized will not corrupt any index given in grouped. """ prioritized_names = set(prioritized) grouped_by_index: dict[int, list[Hashable]] = defaultdict(list) indexes: dict[int, Index] = {} for name, elements_list in grouped.items(): for _, index in elements_list: if index is not None: grouped_by_index[id(index)].append(name) indexes[id(index)] = index # An index may be corrupted when the set of its corresponding coordinate name(s) # partially overlaps the set of names given in prioritized for index_id, index_coord_names in grouped_by_index.items(): index_names = set(index_coord_names) common_names = index_names & prioritized_names if common_names and len(common_names) != len(index_names): common_names_str = ", ".join(f"{k!r}" for k in common_names) index_names_str = ", ".join(f"{k!r}" for k in index_coord_names) raise ValueError( f"cannot set or update variable(s) {common_names_str}, which would corrupt " f"the following index built from coordinates {index_names_str}:\n" f"{indexes[index_id]!r}" ) def merge_collected( grouped: dict[Any, list[MergeElement]], prioritized: Mapping[Any, MergeElement] | None = None, compat: CompatOptions | CombineKwargDefault = "minimal", combine_attrs: CombineAttrsOptions = "override", equals: dict[Any, bool] | None = None, ) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]: """Merge dicts of variables, while resolving conflicts appropriately. Parameters ---------- grouped : mapping prioritized : mapping compat : str Type of equality check to use when checking for conflicts. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "override" A callable or a string indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. - "drop_conflicts": attrs from all objects are combined, any that have the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. If a callable, it must expect a sequence of ``attrs`` dicts and a context object as its only parameters. equals : mapping, optional corresponding to result of compat test Returns ------- Dict with keys taken by the union of keys on list_of_mappings, and Variable values corresponding to those that should be found on the merged result. """ if prioritized is None: prioritized = {} if equals is None: equals = {} _assert_compat_valid(compat) _assert_prioritized_valid(grouped, prioritized) merged_vars: dict[Hashable, Variable] = {} merged_indexes: dict[Hashable, Index] = {} index_cmp_cache: dict[tuple[int, int], bool | None] = {} for name, elements_list in grouped.items(): if name in prioritized: variable, index = prioritized[name] merged_vars[name] = variable if index is not None: merged_indexes[name] = index else: attrs: dict[Any, Any] = {} indexed_elements = [ (variable, index) for variable, index in elements_list if index is not None ] if indexed_elements: # TODO(shoyer): consider adjusting this logic. Are we really # OK throwing away variable without an index in favor of # indexed variables, without even checking if values match? variable, index = indexed_elements[0] for other_var, other_index in indexed_elements[1:]: if not indexes_equal( index, other_index, variable, other_var, index_cmp_cache ): raise MergeError( f"conflicting values/indexes on objects to be combined for coordinate {name!r}\n" f"first index: {index!r}\nsecond index: {other_index!r}\n" f"first variable: {variable!r}\nsecond variable: {other_var!r}\n" ) if compat == "identical": for other_variable, _ in indexed_elements[1:]: if not dict_equiv(variable.attrs, other_variable.attrs): raise MergeError( "conflicting attribute values on combined " f"variable {name!r}:\nfirst value: {variable.attrs!r}\nsecond value: {other_variable.attrs!r}" ) attrs = merge_attrs( [var.attrs for var, _ in indexed_elements], combine_attrs=combine_attrs, ) merged_vars[name] = variable merged_indexes[name] = index else: variables = [variable for variable, _ in elements_list] try: equals_this_var, merged_vars[name] = unique_variable( name, variables, compat, equals.get(name) ) # This is very likely to result in false positives, but there is no way # to tell if the output will change without computing. if ( isinstance(compat, CombineKwargDefault) and compat == "no_conflicts" and len(variables) > 1 and not equals_this_var ): emit_user_level_warning( compat.warning_message( "This is likely to lead to different results when " "combining overlapping variables with the same name.", ), FutureWarning, ) except MergeError: if compat != "minimal": # we need more than "minimal" compatibility (for which # we drop conflicting coordinates) raise if name in merged_vars: attrs = merge_attrs( [var.attrs for var in variables], combine_attrs=combine_attrs ) if name in merged_vars and (merged_vars[name].attrs or attrs): # Ensure that assigning attrs does not affect the original input variable. merged_vars[name] = merged_vars[name].copy(deep=False) merged_vars[name].attrs = attrs return merged_vars, merged_indexes def collect_variables_and_indexes( list_of_mappings: Iterable[DatasetLike], indexes: Mapping[Any, Any] | None = None, ) -> dict[Hashable, list[MergeElement]]: """Collect variables and indexes from list of mappings of xarray objects. Mappings can be Dataset or Coordinates objects, in which case both variables and indexes are extracted from it. It can also have values of one of the following types: - an xarray.Variable - a tuple `(dims, data[, attrs[, encoding]])` that can be converted in an xarray.Variable - or an xarray.DataArray If a mapping of indexes is given, those indexes are assigned to all variables with a matching key/name. For dimension variables with no matching index, a default (pandas) index is assigned. DataArray indexes that don't match mapping keys are also extracted. """ from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset if indexes is None: indexes = {} grouped: dict[Hashable, list[MergeElement]] = defaultdict(list) def append(name, variable, index): grouped[name].append((variable, index)) def append_all(variables, indexes): for name, variable in variables.items(): append(name, variable, indexes.get(name)) for mapping in list_of_mappings: if isinstance(mapping, Coordinates | Dataset): append_all(mapping.variables, mapping.xindexes) continue for name, variable in mapping.items(): if isinstance(variable, DataArray): coords_ = variable._coords.copy() # use private API for speed indexes_ = dict(variable._indexes) # explicitly overwritten variables should take precedence coords_.pop(name, None) indexes_.pop(name, None) append_all(coords_, indexes_) variable = as_variable(variable, name=name, auto_convert=False) if name in indexes: append(name, variable, indexes[name]) elif variable.dims == (name,): idx, idx_vars = create_default_index_implicit(variable) append_all(idx_vars, dict.fromkeys(idx_vars, idx)) else: append(name, variable, None) return grouped def collect_from_coordinates( list_of_coords: list[Coordinates], ) -> dict[Hashable, list[MergeElement]]: """Collect variables and indexes to be merged from Coordinate objects.""" grouped: dict[Hashable, list[MergeElement]] = defaultdict(list) for coords in list_of_coords: variables = coords.variables indexes = coords.xindexes for name, variable in variables.items(): grouped[name].append((variable, indexes.get(name))) return grouped def merge_coordinates_without_align( objects: list[Coordinates], prioritized: Mapping[Any, MergeElement] | None = None, exclude_dims: AbstractSet = frozenset(), combine_attrs: CombineAttrsOptions = "override", compat: CompatOptions | CombineKwargDefault = "minimal", ) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]: """Merge variables/indexes from coordinates without automatic alignments. This function is used for merging coordinate from pre-existing xarray objects. """ collected = collect_from_coordinates(objects) if exclude_dims: filtered: dict[Hashable, list[MergeElement]] = {} for name, elements in collected.items(): new_elements = [ (variable, index) for variable, index in elements if exclude_dims.isdisjoint(variable.dims) ] if new_elements: filtered[name] = new_elements else: filtered = collected # TODO: indexes should probably be filtered in collected elements # before merging them merged_coords, merged_indexes = merge_collected( filtered, prioritized, compat=compat, combine_attrs=combine_attrs ) merged_indexes = filter_indexes_from_coords(merged_indexes, set(merged_coords)) return merged_coords, merged_indexes def determine_coords( list_of_mappings: Iterable[DatasetLike], ) -> tuple[set[Hashable], set[Hashable]]: """Given a list of dicts with xarray object values, identify coordinates. Parameters ---------- list_of_mappings : list of dict or list of Dataset Of the same form as the arguments to expand_variable_dicts. Returns ------- coord_names : set of variable names noncoord_names : set of variable names All variable found in the input should appear in either the set of coordinate or non-coordinate names. """ from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset coord_names: set[Hashable] = set() noncoord_names: set[Hashable] = set() for mapping in list_of_mappings: if isinstance(mapping, Dataset): coord_names.update(mapping.coords) noncoord_names.update(mapping.data_vars) else: for name, var in mapping.items(): if isinstance(var, DataArray): coords = set(var._coords) # use private API for speed # explicitly overwritten variables should take precedence coords.discard(name) coord_names.update(coords) return coord_names, noncoord_names def coerce_pandas_values(objects: Iterable[CoercibleMapping]) -> list[DatasetLike]: """Convert pandas values found in a list of labeled objects. Parameters ---------- objects : list of Dataset or mapping The mappings may contain any sort of objects coercible to xarray.Variables as keys, including pandas objects. Returns ------- List of Dataset or dictionary objects. Any inputs or values in the inputs that were pandas objects have been converted into native xarray objects. """ from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset out: list[DatasetLike] = [] for obj in objects: variables: DatasetLike if isinstance(obj, Dataset | Coordinates): variables = obj else: variables = {} if isinstance(obj, PANDAS_TYPES): obj = dict(obj.items()) for k, v in obj.items(): if isinstance(v, PANDAS_TYPES): v = DataArray(v) variables[k] = v out.append(variables) return out def _get_priority_vars_and_indexes( objects: Sequence[DatasetLike], priority_arg: int | None, compat: CompatOptions | CombineKwargDefault = "equals", ) -> dict[Hashable, MergeElement]: """Extract the priority variable from a list of mappings. We need this method because in some cases the priority argument itself might have conflicting values (e.g., if it is a dict with two DataArray values with conflicting coordinate values). Parameters ---------- objects : sequence of dict-like of Variable Dictionaries in which to find the priority variables. priority_arg : int or None Integer object whose variable should take priority. compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, optional String indicating how to compare non-concatenated variables of the same name for potential conflicts. This is passed down to merge. - "broadcast_equals": all values must be equal when variables are broadcast against each other to ensure common dimensions. - "equals": all values and dimensions must be the same. - "identical": all values, dimensions and attributes must be the same. - "no_conflicts": only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. - "override": skip comparing and pick variable from first dataset Returns ------- A dictionary of variables and associated indexes (if any) to prioritize. """ if priority_arg is None: return {} collected = collect_variables_and_indexes([objects[priority_arg]]) variables, indexes = merge_collected(collected, compat=compat) grouped: dict[Hashable, MergeElement] = {} for name, variable in variables.items(): grouped[name] = (variable, indexes.get(name)) return grouped def merge_coords( objects: Iterable[CoercibleMapping], compat: CompatOptions = "minimal", join: JoinOptions = "outer", priority_arg: int | None = None, indexes: Mapping[Any, Index] | None = None, fill_value: object = dtypes.NA, ) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]: """Merge coordinate variables. See merge_core below for argument descriptions. This works similarly to merge_core, except everything we don't worry about whether variables are coordinates or not. """ _assert_compat_valid(compat) coerced = coerce_pandas_values(objects) aligned = deep_align( coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value ) collected = collect_variables_and_indexes(aligned, indexes=indexes) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) variables, out_indexes = merge_collected(collected, prioritized, compat=compat) return variables, out_indexes def equivalent_attrs(a: Any, b: Any) -> bool: """Check if two attribute values are equivalent. Returns False if the comparison raises ValueError or TypeError. This handles cases like numpy arrays with ambiguous truth values and xarray Datasets which can't be directly converted to numpy arrays. Since equivalent() now handles non-boolean returns by returning False, this wrapper mainly catches exceptions from comparisons that can't be evaluated at all. """ try: return equivalent(a, b) except (ValueError, TypeError): # These exceptions indicate the comparison is truly ambiguous # (e.g., nested numpy arrays that would raise "ambiguous truth value") return False def merge_attrs(variable_attrs, combine_attrs, context=None): """Combine attributes from different variables according to combine_attrs""" if not variable_attrs: # no attributes to merge return None if callable(combine_attrs): return combine_attrs(variable_attrs, context=context) elif combine_attrs == "drop": return {} elif combine_attrs == "override": return dict(variable_attrs[0]) elif combine_attrs == "no_conflicts": result = dict(variable_attrs[0]) for attrs in variable_attrs[1:]: try: result = compat_dict_union(result, attrs) except ValueError as e: raise MergeError( "combine_attrs='no_conflicts', but some values are not " f"the same. Merging {result} with {attrs}" ) from e return result elif combine_attrs == "drop_conflicts": result = {} dropped_keys = set() for attrs in variable_attrs: for key, value in attrs.items(): if key in dropped_keys: continue if key not in result: result[key] = value elif not equivalent_attrs(result[key], value): del result[key] dropped_keys.add(key) return result elif combine_attrs == "identical": result = dict(variable_attrs[0]) for attrs in variable_attrs[1:]: if not dict_equiv(result, attrs): raise MergeError( f"combine_attrs='identical', but attrs differ. First is {result} " f", other is {attrs}." ) return result else: raise ValueError(f"Unrecognised value for combine_attrs={combine_attrs}") class _MergeResult(NamedTuple): variables: dict[Hashable, Variable] coord_names: set[Hashable] dims: dict[Hashable, int] indexes: dict[Hashable, Index] attrs: dict[Hashable, Any] def merge_core( objects: Iterable[CoercibleMapping], compat: CompatOptions | CombineKwargDefault, join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions = "override", priority_arg: int | None = None, explicit_coords: Iterable[Hashable] | None = None, indexes: Mapping[Any, Any] | None = None, fill_value: object = dtypes.NA, skip_align_args: list[int] | None = None, ) -> _MergeResult: """Core logic for merging labeled objects. This is not public API. Parameters ---------- objects : list of mapping All values must be convertible to labeled arrays. compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, optional Compatibility checks to use when merging variables. join : {"outer", "inner", "left", "right"}, optional How to combine objects with different indexes. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "override" How to combine attributes of objects priority_arg : int, optional Optional argument in `objects` that takes precedence over the others. explicit_coords : set, optional An explicit list of variables from `objects` that are coordinates. indexes : dict, optional Dictionary with values given by xarray.Index objects or anything that may be cast to pandas.Index objects. fill_value : scalar, optional Value to use for newly missing values skip_align_args : list of int, optional Optional arguments in `objects` that are not included in alignment. Returns ------- variables : dict Dictionary of Variable objects. coord_names : set Set of coordinate names. dims : dict Dictionary mapping from dimension names to sizes. attrs : dict Dictionary of attributes """ from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset _assert_compat_valid(compat) objects = list(objects) if skip_align_args is None: skip_align_args = [] skip_align_objs = [(pos, objects.pop(pos)) for pos in skip_align_args] coerced = coerce_pandas_values(objects) aligned = deep_align( coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value, ) for pos, obj in skip_align_objs: aligned.insert(pos, obj) collected = collect_variables_and_indexes(aligned, indexes=indexes) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) variables, out_indexes = merge_collected( collected, prioritized, compat=compat, combine_attrs=combine_attrs, ) dims = calculate_dimensions(variables) coord_names, noncoord_names = determine_coords(coerced) if compat == "minimal": # coordinates may be dropped in merged results coord_names.intersection_update(variables) if explicit_coords is not None: coord_names.update(explicit_coords) for dim in dims: if dim in variables: coord_names.add(dim) ambiguous_coords = coord_names.intersection(noncoord_names) if ambiguous_coords: raise MergeError( "unable to determine if these variables should be " f"coordinates or not in the merged result: {ambiguous_coords}" ) attrs = merge_attrs( [var.attrs for var in coerced if isinstance(var, Dataset | DataArray)], combine_attrs, ) return _MergeResult(variables, coord_names, dims, out_indexes, attrs) def merge_trees( trees: Sequence[DataTree], compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT, join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, fill_value: object = dtypes.NA, combine_attrs: CombineAttrsOptions = "override", ) -> DataTree: """Merge specialized to DataTree objects.""" from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.datatree_mapping import add_path_context_to_errors if fill_value is not dtypes.NA: # fill_value support dicts, which probably should be mapped to sub-groups? raise NotImplementedError( "fill_value is not yet supported for DataTree objects in merge" ) node_lists: defaultdict[str, list[DataTree]] = defaultdict(list) for tree in trees: for key, node in tree.subtree_with_keys: node_lists[key].append(node) root_datasets = [node.dataset for node in node_lists.pop(".")] with add_path_context_to_errors("."): root_ds = merge( root_datasets, compat=compat, join=join, combine_attrs=combine_attrs ) result = DataTree(dataset=root_ds) def level(kv): # all trees with the same path have the same level _, trees = kv return trees[0].level for key, nodes in sorted(node_lists.items(), key=level): # Merge datasets, including inherited indexes to ensure alignment. datasets = [node.dataset for node in nodes] with add_path_context_to_errors(key): merge_result = merge_core( datasets, compat=compat, join=join, combine_attrs=combine_attrs, ) merged_ds = Dataset._construct_direct(**merge_result._asdict()) result[key] = DataTree(dataset=merged_ds) return result @overload def merge( objects: Iterable[DataTree], compat: CompatOptions | CombineKwargDefault = ..., join: JoinOptions | CombineKwargDefault = ..., fill_value: object = ..., combine_attrs: CombineAttrsOptions = ..., ) -> DataTree: ... @overload def merge( objects: Iterable[DataArray | Dataset | Coordinates | dict], compat: CompatOptions | CombineKwargDefault = ..., join: JoinOptions | CombineKwargDefault = ..., fill_value: object = ..., combine_attrs: CombineAttrsOptions = ..., ) -> Dataset: ... def merge( objects: Iterable[DataTree | DataArray | Dataset | Coordinates | dict], compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT, join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, fill_value: object = dtypes.NA, combine_attrs: CombineAttrsOptions = "override", ) -> DataTree | Dataset: """Merge any number of xarray objects into a single Dataset as variables. Parameters ---------- objects : iterable of DataArray, Dataset, DataTree or dict Merge together all variables from these objects. If any of them are DataArray objects, they must have a name. compat : {"identical", "equals", "broadcast_equals", "no_conflicts", \ "override", "minimal"}, default: "no_conflicts" String indicating how to compare variables of the same name for potential conflicts: - "identical": all values, dimensions and attributes must be the same. - "equals": all values and dimensions must be the same. - "broadcast_equals": all values must be equal when variables are broadcast against each other to ensure common dimensions. - "no_conflicts": only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. - "override": skip comparing and pick variable from first dataset - "minimal": drop conflicting coordinates join : {"outer", "inner", "left", "right", "exact", "override"}, default: "outer" String indicating how to combine differing indexes in objects. - "outer": use the union of object indexes - "inner": use the intersection of object indexes - "left": use indexes from the first object with each dimension - "right": use indexes from the last object with each dimension - "exact": instead of aligning, raise `ValueError` when indexes to be aligned are not equal - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps variable names to fill values. Use a data array's name to refer to its values. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "override" A callable or a string indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. - "drop_conflicts": attrs from all objects are combined, any that have the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. If a callable, it must expect a sequence of ``attrs`` dicts and a context object as its only parameters. Returns ------- Dataset or DataTree Objects with combined variables from the inputs. If any inputs are a DataTree, this will also be a DataTree. Otherwise it will be a Dataset. Examples -------- >>> x = xr.DataArray( ... [[1.0, 2.0], [3.0, 5.0]], ... dims=("lat", "lon"), ... coords={"lat": [35.0, 40.0], "lon": [100.0, 120.0]}, ... name="var1", ... ) >>> y = xr.DataArray( ... [[5.0, 6.0], [7.0, 8.0]], ... dims=("lat", "lon"), ... coords={"lat": [35.0, 42.0], "lon": [100.0, 150.0]}, ... name="var2", ... ) >>> z = xr.DataArray( ... [[0.0, 3.0], [4.0, 9.0]], ... dims=("time", "lon"), ... coords={"time": [30.0, 60.0], "lon": [100.0, 150.0]}, ... name="var3", ... ) >>> x Size: 32B array([[1., 2.], [3., 5.]]) Coordinates: * lat (lat) float64 16B 35.0 40.0 * lon (lon) float64 16B 100.0 120.0 >>> y Size: 32B array([[5., 6.], [7., 8.]]) Coordinates: * lat (lat) float64 16B 35.0 42.0 * lon (lon) float64 16B 100.0 150.0 >>> z Size: 32B array([[0., 3.], [4., 9.]]) Coordinates: * time (time) float64 16B 30.0 60.0 * lon (lon) float64 16B 100.0 150.0 >>> xr.merge([x, y, z], join="outer") Size: 256B Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: * lat (lat) float64 24B 35.0 40.0 42.0 * lon (lon) float64 24B 100.0 120.0 150.0 * time (time) float64 16B 30.0 60.0 Data variables: var1 (lat, lon) float64 72B 1.0 2.0 nan 3.0 5.0 nan nan nan nan var2 (lat, lon) float64 72B 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 48B 0.0 nan 3.0 4.0 nan 9.0 >>> xr.merge([x, y, z], compat="identical", join="outer") Size: 256B Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: * lat (lat) float64 24B 35.0 40.0 42.0 * lon (lon) float64 24B 100.0 120.0 150.0 * time (time) float64 16B 30.0 60.0 Data variables: var1 (lat, lon) float64 72B 1.0 2.0 nan 3.0 5.0 nan nan nan nan var2 (lat, lon) float64 72B 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 48B 0.0 nan 3.0 4.0 nan 9.0 >>> xr.merge([x, y, z], compat="equals", join="outer") Size: 256B Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: * lat (lat) float64 24B 35.0 40.0 42.0 * lon (lon) float64 24B 100.0 120.0 150.0 * time (time) float64 16B 30.0 60.0 Data variables: var1 (lat, lon) float64 72B 1.0 2.0 nan 3.0 5.0 nan nan nan nan var2 (lat, lon) float64 72B 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 48B 0.0 nan 3.0 4.0 nan 9.0 >>> xr.merge([x, y, z], compat="equals", join="outer", fill_value=-999.0) Size: 256B Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: * lat (lat) float64 24B 35.0 40.0 42.0 * lon (lon) float64 24B 100.0 120.0 150.0 * time (time) float64 16B 30.0 60.0 Data variables: var1 (lat, lon) float64 72B 1.0 2.0 -999.0 3.0 ... -999.0 -999.0 -999.0 var2 (lat, lon) float64 72B 5.0 -999.0 6.0 -999.0 ... 7.0 -999.0 8.0 var3 (time, lon) float64 48B 0.0 -999.0 3.0 4.0 -999.0 9.0 >>> xr.merge([x, y, z], join="override") Size: 144B Dimensions: (lat: 2, lon: 2, time: 2) Coordinates: * lat (lat) float64 16B 35.0 40.0 * lon (lon) float64 16B 100.0 120.0 * time (time) float64 16B 30.0 60.0 Data variables: var1 (lat, lon) float64 32B 1.0 2.0 3.0 5.0 var2 (lat, lon) float64 32B 5.0 6.0 7.0 8.0 var3 (time, lon) float64 32B 0.0 3.0 4.0 9.0 >>> xr.merge([x, y, z], join="inner") Size: 64B Dimensions: (lat: 1, lon: 1, time: 2) Coordinates: * lat (lat) float64 8B 35.0 * lon (lon) float64 8B 100.0 * time (time) float64 16B 30.0 60.0 Data variables: var1 (lat, lon) float64 8B 1.0 var2 (lat, lon) float64 8B 5.0 var3 (time, lon) float64 16B 0.0 4.0 >>> xr.merge([x, y, z], compat="identical", join="inner") Size: 64B Dimensions: (lat: 1, lon: 1, time: 2) Coordinates: * lat (lat) float64 8B 35.0 * lon (lon) float64 8B 100.0 * time (time) float64 16B 30.0 60.0 Data variables: var1 (lat, lon) float64 8B 1.0 var2 (lat, lon) float64 8B 5.0 var3 (time, lon) float64 16B 0.0 4.0 >>> xr.merge([x, y, z], compat="broadcast_equals", join="outer") Size: 256B Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: * lat (lat) float64 24B 35.0 40.0 42.0 * lon (lon) float64 24B 100.0 120.0 150.0 * time (time) float64 16B 30.0 60.0 Data variables: var1 (lat, lon) float64 72B 1.0 2.0 nan 3.0 5.0 nan nan nan nan var2 (lat, lon) float64 72B 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 48B 0.0 nan 3.0 4.0 nan 9.0 >>> xr.merge([x, y, z], join="exact") Traceback (most recent call last): ... xarray.structure.alignment.AlignmentError: cannot align objects with join='exact' where ... Raises ------ xarray.MergeError If any variables with the same name have conflicting values. See also -------- concat combine_nested combine_by_coords """ from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree objects = list(objects) if any(isinstance(obj, DataTree) for obj in objects): if not all(isinstance(obj, DataTree) for obj in objects): raise TypeError( "merge does not support mixed type arguments when one argument " f"is a DataTree: {objects}" ) trees = cast(list[DataTree], objects) return merge_trees( trees, compat=compat, join=join, combine_attrs=combine_attrs, fill_value=fill_value, ) dict_like_objects = [] for obj in objects: if not isinstance(obj, DataArray | Dataset | Coordinates | dict): raise TypeError( "objects must be an iterable containing only DataTree(s), " f"Dataset(s), DataArray(s), and dictionaries: {objects}" ) if isinstance(obj, DataArray): obj = obj.to_dataset(promote_attrs=True) elif isinstance(obj, Coordinates): obj = obj.to_dataset() dict_like_objects.append(obj) merge_result = merge_core( dict_like_objects, compat=compat, join=join, combine_attrs=combine_attrs, fill_value=fill_value, ) return Dataset._construct_direct(**merge_result._asdict()) def dataset_merge_method( dataset: Dataset, other: CoercibleMapping, overwrite_vars: Hashable | Iterable[Hashable], compat: CompatOptions | CombineKwargDefault, join: JoinOptions | CombineKwargDefault, fill_value: Any, combine_attrs: CombineAttrsOptions, ) -> _MergeResult: """Guts of the Dataset.merge method.""" # we are locked into supporting overwrite_vars for the Dataset.merge # method due for backwards compatibility # TODO: consider deprecating it? if not isinstance(overwrite_vars, str) and isinstance(overwrite_vars, Iterable): overwrite_vars = set(overwrite_vars) else: overwrite_vars = {overwrite_vars} if not overwrite_vars: objs = [dataset, other] priority_arg = None elif overwrite_vars == set(other): objs = [dataset, other] priority_arg = 1 else: other_overwrite: dict[Hashable, CoercibleValue] = {} other_no_overwrite: dict[Hashable, CoercibleValue] = {} for k, v in other.items(): if k in overwrite_vars: other_overwrite[k] = v else: other_no_overwrite[k] = v objs = [dataset, other_no_overwrite, other_overwrite] priority_arg = 2 return merge_core( objs, compat=compat, join=join, priority_arg=priority_arg, fill_value=fill_value, combine_attrs=combine_attrs, ) def dataset_update_method(dataset: Dataset, other: CoercibleMapping) -> _MergeResult: """Guts of the Dataset.update method. This drops a duplicated coordinates from `other` if `other` is not an `xarray.Dataset`, e.g., if it's a dict with DataArray values (GH2068, GH2180). """ from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset if not isinstance(other, Dataset): other = dict(other) for key, value in other.items(): if isinstance(value, DataArray): # drop conflicting coordinates coord_names = [ c for c in value.coords if c not in value.dims and c in dataset.coords ] if coord_names: value = value.drop_vars(coord_names) if isinstance(value.variable, IndexVariable): variable = value.variable.to_base_variable() value = value._replace(variable=variable) other[key] = value return merge_core( [dataset, other], compat="broadcast_equals", join="outer", priority_arg=1, indexes=dataset.xindexes, combine_attrs="override", ) def merge_data_and_coords(data_vars: DataVars, coords) -> _MergeResult: """Used in Dataset.__init__.""" from xarray.core.coordinates import Coordinates, create_coords_with_default_indexes if isinstance(coords, Coordinates): coords = coords.copy() else: coords = create_coords_with_default_indexes(coords, data_vars) # exclude coords from alignment (all variables in a Coordinates object should # already be aligned together) and use coordinates' indexes to align data_vars return merge_core( [data_vars, coords], compat="broadcast_equals", join="outer", combine_attrs="override", explicit_coords=tuple(coords), indexes=coords.xindexes, priority_arg=1, skip_align_args=[1], ) python-xarray-2026.01.0/xarray/structure/__init__.py0000664000175000017500000000000015136607163022510 0ustar alastairalastairpython-xarray-2026.01.0/xarray/tests/0000775000175000017500000000000015136607163017513 5ustar alastairalastairpython-xarray-2026.01.0/xarray/tests/test_dataset_typing.yml0000664000175000017500000002024515136607163024317 0ustar alastairalastair- case: test_mypy_pipe_lambda_noarg_return_type main: | from xarray import Dataset ds = Dataset().pipe(lambda data: data) reveal_type(ds) # N: Revealed type is "xarray.core.dataset.Dataset" - case: test_mypy_pipe_lambda_posarg_return_type main: | from xarray import Dataset ds = Dataset().pipe(lambda data, arg: arg, "foo") reveal_type(ds) # N: Revealed type is "builtins.str" - case: test_mypy_pipe_lambda_chaining_return_type main: | from xarray import Dataset answer = Dataset().pipe(lambda data, arg: arg, "foo").count("o") reveal_type(answer) # N: Revealed type is "builtins.int" - case: test_mypy_pipe_lambda_missing_arg main: | from xarray import Dataset # Call to pipe missing argument for lambda parameter `arg` ds = Dataset().pipe(lambda data, arg: data) out: | main:4: error: No overload variant of "pipe" of "DataWithCoords" matches argument type "Callable[[Any, Any], Any]" [call-overload] main:4: note: Possible overload variants: main:4: note: def [P`2, T] pipe(self, func: Callable[[Dataset, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:4: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_lambda_extra_arg main: | from xarray import Dataset # Call to pipe with extra argument for lambda ds = Dataset().pipe(lambda data: data, "oops!") out: | main:4: error: No overload variant of "pipe" of "DataWithCoords" matches argument types "Callable[[Any], Any]", "str" [call-overload] main:4: note: Possible overload variants: main:4: note: def [P`2, T] pipe(self, func: Callable[[Dataset, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:4: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_function_missing_posarg main: | from xarray import Dataset def f(ds: Dataset, arg: int) -> Dataset: return ds # Call to pipe missing argument for function parameter `arg` ds = Dataset().pipe(f) out: | main:7: error: No overload variant of "pipe" of "DataWithCoords" matches argument type "Callable[[Dataset, int], Dataset]" [call-overload] main:7: note: Possible overload variants: main:7: note: def [P`2, T] pipe(self, func: Callable[[Dataset, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:7: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_function_extra_posarg main: | from xarray import Dataset def f(ds: Dataset, arg: int) -> Dataset: return ds # Call to pipe missing keyword for kwonly parameter `kwonly` ds = Dataset().pipe(f, 42, "oops!") out: | main:7: error: No overload variant of "pipe" of "DataWithCoords" matches argument types "Callable[[Dataset, int], Dataset]", "int", "str" [call-overload] main:7: note: Possible overload variants: main:7: note: def [P`2, T] pipe(self, func: Callable[[Dataset, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:7: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_function_missing_kwarg main: | from xarray import Dataset def f(ds: Dataset, arg: int, *, kwonly: int) -> Dataset: return ds # Call to pipe missing argument for kwonly parameter `kwonly` ds = Dataset().pipe(f, 42) out: | main:7: error: No overload variant of "pipe" of "DataWithCoords" matches argument types "Callable[[Dataset, int, NamedArg(int, 'kwonly')], Dataset]", "int" [call-overload] main:7: note: Possible overload variants: main:7: note: def [P`2, T] pipe(self, func: Callable[[Dataset, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:7: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_function_missing_keyword main: | from xarray import Dataset def f(ds: Dataset, arg: int, *, kwonly: int) -> Dataset: return ds # Call to pipe missing keyword for kwonly parameter `kwonly` ds = Dataset().pipe(f, 42, 99) out: | main:7: error: No overload variant of "pipe" of "DataWithCoords" matches argument types "Callable[[Dataset, int, NamedArg(int, 'kwonly')], Dataset]", "int", "int" [call-overload] main:7: note: Possible overload variants: main:7: note: def [P`2, T] pipe(self, func: Callable[[Dataset, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:7: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_function_unexpected_keyword skip: True # mypy 1.18.1 outputs "defined here" notes without line numbers that pytest-mypy-plugins can't parse # See: https://github.com/python/mypy/issues/19257 (mypy issue about missing line numbers) main: | from xarray import Dataset def f(ds: Dataset, arg: int, *, kwonly: int) -> Dataset: return ds # Call to pipe using wrong keyword: `kw` instead of `kwonly` ds = Dataset().pipe(f, 42, kw=99) out: | main:7: error: Unexpected keyword argument "kw" for "pipe" of "DataWithCoords" [call-arg] # Note: mypy 1.18.1 also outputs a "defined here" note that pytest-mypy-plugins can't parse - case: test_mypy_pipe_tuple_return_type_dataset main: | from xarray import Dataset def f(arg: int, ds: Dataset) -> Dataset: return ds ds = Dataset().pipe((f, "ds"), 42) reveal_type(ds) # N: Revealed type is "xarray.core.dataset.Dataset" - case: test_mypy_pipe_tuple_return_type_other main: | from xarray import Dataset def f(arg: int, ds: Dataset) -> int: return arg answer = Dataset().pipe((f, "ds"), 42) reveal_type(answer) # N: Revealed type is "builtins.int" - case: test_mypy_pipe_tuple_missing_arg main: | from xarray import Dataset def f(arg: int, ds: Dataset) -> Dataset: return ds # Since we cannot provide a precise type annotation when passing a tuple to # pipe, there's not enough information for type analysis to indicate that # we are missing an argument for parameter `arg`, so we get no error here. ds = Dataset().pipe((f, "ds")) reveal_type(ds) # N: Revealed type is "xarray.core.dataset.Dataset" # Rather than passing a tuple, passing a lambda that calls `f` with args in # the correct order allows for proper type analysis, indicating (perhaps # somewhat cryptically) that we failed to pass an argument for `arg`. ds = Dataset().pipe(lambda data, arg: f(arg, data)) out: | main:17: error: No overload variant of "pipe" of "DataWithCoords" matches argument type "Callable[[Any, Any], Dataset]" [call-overload] main:17: note: Possible overload variants: main:17: note: def [P`9, T] pipe(self, func: Callable[[Dataset, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:17: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_tuple_extra_arg main: | from xarray import Dataset def f(arg: int, ds: Dataset) -> Dataset: return ds # Since we cannot provide a precise type annotation when passing a tuple to # pipe, there's not enough information for type analysis to indicate that # we are providing too many args for `f`, so we get no error here. ds = Dataset().pipe((f, "ds"), 42, "foo") reveal_type(ds) # N: Revealed type is "xarray.core.dataset.Dataset" # Rather than passing a tuple, passing a lambda that calls `f` with args in # the correct order allows for proper type analysis, indicating (perhaps # somewhat cryptically) that we passed too many arguments. ds = Dataset().pipe(lambda data, arg: f(arg, data), 42, "foo") out: | main:17: error: No overload variant of "pipe" of "DataWithCoords" matches argument types "Callable[[Any, Any], Dataset]", "int", "str" [call-overload] main:17: note: Possible overload variants: main:17: note: def [P`9, T] pipe(self, func: Callable[[Dataset, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:17: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T python-xarray-2026.01.0/xarray/tests/test_coarsen.py0000664000175000017500000002714515136607163022567 0ustar alastairalastairfrom __future__ import annotations import numpy as np import pandas as pd import pytest import xarray as xr from xarray import DataArray, Dataset, set_options from xarray.core import duck_array_ops from xarray.tests import ( assert_allclose, assert_equal, assert_identical, has_dask, raise_if_dask_computes, requires_cftime, ) def test_coarsen_absent_dims_error(ds: Dataset) -> None: with pytest.raises( ValueError, match=r"Window dimensions \('foo',\) not found in Dataset dimensions", ): ds.coarsen(foo=2) @pytest.mark.parametrize("dask", [True, False]) @pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")]) def test_coarsen_dataset(ds, dask, boundary, side): if dask and has_dask: ds = ds.chunk({"x": 4}) actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max() assert_equal( actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max() ) # coordinate should be mean by default assert_equal( actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean() ) @pytest.mark.parametrize("dask", [True, False]) def test_coarsen_coords(ds, dask): if dask and has_dask: ds = ds.chunk({"x": 4}) # check if coord_func works actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max() assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max()) assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max()) # raise if exact with pytest.raises(ValueError): ds.coarsen(x=3).mean() # should be no error ds.isel(x=slice(0, 3 * (len(ds["x"]) // 3))).coarsen(x=3).mean() # working test with pd.time da = xr.DataArray( np.linspace(0, 365, num=364), dims="time", coords={"time": pd.date_range("1999-12-15", periods=364)}, ) actual = da.coarsen(time=2).mean() # type: ignore[attr-defined] @requires_cftime def test_coarsen_coords_cftime(): times = xr.date_range("2000", periods=6, use_cftime=True) da = xr.DataArray(range(6), [("time", times)]) actual = da.coarsen(time=3).mean() # type: ignore[attr-defined] expected_times = xr.date_range("2000-01-02", freq="3D", periods=2, use_cftime=True) np.testing.assert_array_equal(actual.time, expected_times) @pytest.mark.parametrize( "funcname, argument", [ ("reduce", (np.mean,)), ("mean", ()), ], ) def test_coarsen_keep_attrs(funcname, argument) -> None: global_attrs = {"units": "test", "long_name": "testing"} da_attrs = {"da_attr": "test"} attrs_coords = {"attrs_coords": "test"} da_not_coarsend_attrs = {"da_not_coarsend_attr": "test"} data = np.linspace(10, 15, 100) coords = np.linspace(1, 10, 100) ds = Dataset( data_vars={ "da": ("coord", data, da_attrs), "da_not_coarsend": ("no_coord", data, da_not_coarsend_attrs), }, coords={"coord": ("coord", coords, attrs_coords)}, attrs=global_attrs, ) # attrs are kept by default func = getattr(ds.coarsen(dim={"coord": 5}), funcname) result = func(*argument) assert result.attrs == global_attrs assert result.da.attrs == da_attrs assert result.da_not_coarsend.attrs == da_not_coarsend_attrs assert result.coord.attrs == attrs_coords assert result.da.name == "da" assert result.da_not_coarsend.name == "da_not_coarsend" # discard attrs func = getattr(ds.coarsen(dim={"coord": 5}), funcname) result = func(*argument, keep_attrs=False) assert result.attrs == {} assert result.da.attrs == {} assert result.da_not_coarsend.attrs == {} assert result.coord.attrs == {} assert result.da.name == "da" assert result.da_not_coarsend.name == "da_not_coarsend" # test discard attrs using global option func = getattr(ds.coarsen(dim={"coord": 5}), funcname) with set_options(keep_attrs=False): result = func(*argument) assert result.attrs == {} assert result.da.attrs == {} assert result.da_not_coarsend.attrs == {} assert result.coord.attrs == {} assert result.da.name == "da" assert result.da_not_coarsend.name == "da_not_coarsend" # keyword takes precedence over global option func = getattr(ds.coarsen(dim={"coord": 5}), funcname) with set_options(keep_attrs=False): result = func(*argument, keep_attrs=True) assert result.attrs == global_attrs assert result.da.attrs == da_attrs assert result.da_not_coarsend.attrs == da_not_coarsend_attrs assert result.coord.attrs == attrs_coords assert result.da.name == "da" assert result.da_not_coarsend.name == "da_not_coarsend" func = getattr(ds.coarsen(dim={"coord": 5}), funcname) with set_options(keep_attrs=True): result = func(*argument, keep_attrs=False) assert result.attrs == {} assert result.da.attrs == {} assert result.da_not_coarsend.attrs == {} assert result.coord.attrs == {} assert result.da.name == "da" assert result.da_not_coarsend.name == "da_not_coarsend" @pytest.mark.slow @pytest.mark.parametrize("ds", (1, 2), indirect=True) @pytest.mark.parametrize("window", (1, 2, 3, 4)) @pytest.mark.parametrize("name", ("sum", "mean", "std", "var", "min", "max", "median")) def test_coarsen_reduce(ds: Dataset, window, name) -> None: # Use boundary="trim" to accommodate all window sizes used in tests coarsen_obj = ds.coarsen(time=window, boundary="trim") # add nan prefix to numpy methods to get similar behavior as bottleneck actual = coarsen_obj.reduce(getattr(np, f"nan{name}")) expected = getattr(coarsen_obj, name)() assert_allclose(actual, expected) # make sure the order of data_var are not changed. assert list(ds.data_vars.keys()) == list(actual.data_vars.keys()) # Make sure the dimension order is restored for key, src_var in ds.data_vars.items(): assert src_var.dims == actual[key].dims @pytest.mark.parametrize( "funcname, argument", [ ("reduce", (np.mean,)), ("mean", ()), ], ) def test_coarsen_da_keep_attrs(funcname, argument) -> None: attrs_da = {"da_attr": "test"} attrs_coords = {"attrs_coords": "test"} data = np.linspace(10, 15, 100) coords = np.linspace(1, 10, 100) da = DataArray( data, dims=("coord"), coords={"coord": ("coord", coords, attrs_coords)}, attrs=attrs_da, name="name", ) # attrs are kept by default func = getattr(da.coarsen(dim={"coord": 5}), funcname) result = func(*argument) assert result.attrs == attrs_da assert da.coord.attrs == attrs_coords assert result.name == "name" # discard attrs func = getattr(da.coarsen(dim={"coord": 5}), funcname) result = func(*argument, keep_attrs=False) assert result.attrs == {} # XXX: no assert? _ = da.coord.attrs == {} assert result.name == "name" # test discard attrs using global option func = getattr(da.coarsen(dim={"coord": 5}), funcname) with set_options(keep_attrs=False): result = func(*argument) assert result.attrs == {} # XXX: no assert? _ = da.coord.attrs == {} assert result.name == "name" # keyword takes precedence over global option func = getattr(da.coarsen(dim={"coord": 5}), funcname) with set_options(keep_attrs=False): result = func(*argument, keep_attrs=True) assert result.attrs == attrs_da # XXX: no assert? _ = da.coord.attrs == {} assert result.name == "name" func = getattr(da.coarsen(dim={"coord": 5}), funcname) with set_options(keep_attrs=True): result = func(*argument, keep_attrs=False) assert result.attrs == {} # XXX: no assert? _ = da.coord.attrs == {} assert result.name == "name" @pytest.mark.parametrize("da", (1, 2), indirect=True) @pytest.mark.parametrize("window", (1, 2, 3, 4)) @pytest.mark.parametrize("name", ("sum", "mean", "std", "max")) def test_coarsen_da_reduce(da, window, name) -> None: if da.isnull().sum() > 1 and window == 1: pytest.skip("These parameters lead to all-NaN slices") # Use boundary="trim" to accommodate all window sizes used in tests coarsen_obj = da.coarsen(time=window, boundary="trim") # add nan prefix to numpy methods to get similar # behavior as bottleneck actual = coarsen_obj.reduce(getattr(np, f"nan{name}")) expected = getattr(coarsen_obj, name)() assert_allclose(actual, expected) class TestCoarsenConstruct: @pytest.mark.parametrize("dask", [True, False]) def test_coarsen_construct(self, dask: bool) -> None: ds = Dataset( { "vart": ("time", np.arange(48), {"a": "b"}), "varx": ("x", np.arange(10), {"a": "b"}), "vartx": (("x", "time"), np.arange(480).reshape(10, 48), {"a": "b"}), "vary": ("y", np.arange(12)), }, coords={"time": np.arange(48), "y": np.arange(12)}, attrs={"foo": "bar"}, ) if dask and has_dask: ds = ds.chunk({"x": 4, "time": 10}) expected = xr.Dataset(attrs={"foo": "bar"}) expected["vart"] = ( ("year", "month"), duck_array_ops.reshape(ds.vart.data, (-1, 12)), {"a": "b"}, ) expected["varx"] = ( ("x", "x_reshaped"), duck_array_ops.reshape(ds.varx.data, (-1, 5)), {"a": "b"}, ) expected["vartx"] = ( ("x", "x_reshaped", "year", "month"), duck_array_ops.reshape(ds.vartx.data, (2, 5, 4, 12)), {"a": "b"}, ) expected["vary"] = ds.vary expected.coords["time"] = ( ("year", "month"), duck_array_ops.reshape(ds.time.data, (-1, 12)), ) with raise_if_dask_computes(): actual = ds.coarsen(time=12, x=5).construct( {"time": ("year", "month"), "x": ("x", "x_reshaped")} ) assert_identical(actual, expected) with raise_if_dask_computes(): actual = ds.coarsen(time=12, x=5).construct( time=("year", "month"), x=("x", "x_reshaped") ) assert_identical(actual, expected) with raise_if_dask_computes(): actual = ds.coarsen(time=12, x=5).construct( {"time": ("year", "month"), "x": ("x", "x_reshaped")}, keep_attrs=False ) for var in actual: assert actual[var].attrs == {} assert actual.attrs == {} with raise_if_dask_computes(): actual = ds.vartx.coarsen(time=12, x=5).construct( {"time": ("year", "month"), "x": ("x", "x_reshaped")} ) assert_identical(actual, expected["vartx"]) with pytest.raises(ValueError): ds.coarsen(time=12).construct(foo="bar") with pytest.raises(ValueError): ds.coarsen(time=12, x=2).construct(time=("year", "month")) with pytest.raises(ValueError): ds.coarsen(time=12).construct() with pytest.raises(ValueError): ds.coarsen(time=12).construct(time="bar") with pytest.raises(ValueError): ds.coarsen(time=12).construct(time=("bar",)) def test_coarsen_construct_keeps_all_coords(self): da = xr.DataArray(np.arange(24), dims=["time"]) da = da.assign_coords(day=365 * da) result = da.coarsen(time=12).construct(time=("year", "month")) assert list(da.coords) == list(result.coords) ds = da.to_dataset(name="T") ds_result = ds.coarsen(time=12).construct(time=("year", "month")) assert list(da.coords) == list(ds_result.coords) python-xarray-2026.01.0/xarray/tests/namespace.py0000664000175000017500000000024115136607163022016 0ustar alastairalastairfrom xarray.core import duck_array_ops def reshape(array, shape, **kwargs): return type(array)(duck_array_ops.reshape(array.array, shape=shape, **kwargs)) python-xarray-2026.01.0/xarray/tests/test_parallelcompat.py0000664000175000017500000002072415136607163024131 0ustar alastairalastairfrom __future__ import annotations from importlib.metadata import EntryPoint from typing import Any import numpy as np import pytest from xarray import set_options from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks from xarray.namedarray._typing import _Chunks from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import ( KNOWN_CHUNKMANAGERS, ChunkManagerEntrypoint, get_chunked_array_type, guess_chunkmanager, list_chunkmanagers, load_chunkmanagers, ) from xarray.tests import requires_dask class DummyChunkedArray(np.ndarray): """ Mock-up of a chunked array class. Adds a (non-functional) .chunks attribute by following this example in the numpy docs https://numpy.org/doc/stable/user/basics.subclassing.html#simple-example-adding-an-extra-attribute-to-ndarray """ chunks: T_NormalizedChunks def __new__( cls, shape, dtype=float, buffer=None, offset=0, strides=None, order=None, chunks=None, ): obj = super().__new__(cls, shape, dtype, buffer, offset, strides, order) obj.chunks = chunks return obj def __array_finalize__(self, obj): if obj is None: return self.chunks = getattr(obj, "chunks", None) # type: ignore[assignment] def rechunk(self, chunks, **kwargs): copied = self.copy() copied.chunks = chunks return copied class DummyChunkManager(ChunkManagerEntrypoint): """Mock-up of ChunkManager class for DummyChunkedArray""" def __init__(self): self.array_cls = DummyChunkedArray def is_chunked_array(self, data: Any) -> bool: return isinstance(data, DummyChunkedArray) def chunks(self, data: DummyChunkedArray) -> T_NormalizedChunks: return data.chunks def normalize_chunks( self, chunks: T_Chunks | T_NormalizedChunks, shape: tuple[int, ...] | None = None, limit: int | None = None, dtype: np.dtype | None = None, previous_chunks: T_NormalizedChunks | None = None, ) -> T_NormalizedChunks: from dask.array.core import normalize_chunks return normalize_chunks(chunks, shape, limit, dtype, previous_chunks) def from_array( self, data: T_DuckArray | np.typing.ArrayLike, chunks: _Chunks, **kwargs ) -> DummyChunkedArray: from dask import array as da return da.from_array(data, chunks, **kwargs) def rechunk(self, data: DummyChunkedArray, chunks, **kwargs) -> DummyChunkedArray: return data.rechunk(chunks, **kwargs) def compute(self, *data: DummyChunkedArray, **kwargs) -> tuple[np.ndarray, ...]: # type: ignore[override] from dask.array import compute return compute(*data, **kwargs) def apply_gufunc( self, func, signature, *args, axes=None, axis=None, keepdims=False, output_dtypes=None, output_sizes=None, vectorize=None, allow_rechunk=False, meta=None, **kwargs, ): from dask.array.gufunc import apply_gufunc return apply_gufunc( func, signature, *args, axes=axes, axis=axis, keepdims=keepdims, output_dtypes=output_dtypes, output_sizes=output_sizes, vectorize=vectorize, allow_rechunk=allow_rechunk, meta=meta, **kwargs, ) @pytest.fixture def register_dummy_chunkmanager(monkeypatch): """ Mocks the registering of an additional ChunkManagerEntrypoint. This preserves the presence of the existing DaskManager, so a test that relies on this and DaskManager both being returned from list_chunkmanagers() at once would still work. The monkeypatching changes the behavior of list_chunkmanagers when called inside xarray.namedarray.parallelcompat, but not when called from this tests file. """ # Should include DaskManager iff dask is available to be imported preregistered_chunkmanagers = list_chunkmanagers() monkeypatch.setattr( "xarray.namedarray.parallelcompat.list_chunkmanagers", lambda: {"dummy": DummyChunkManager()} | preregistered_chunkmanagers, ) yield class TestGetChunkManager: def test_get_chunkmanger(self, register_dummy_chunkmanager) -> None: chunkmanager = guess_chunkmanager("dummy") assert isinstance(chunkmanager, DummyChunkManager) def test_get_chunkmanger_via_set_options(self, register_dummy_chunkmanager) -> None: with set_options(chunk_manager="dummy"): chunkmanager = guess_chunkmanager(None) assert isinstance(chunkmanager, DummyChunkManager) def test_fail_on_known_but_missing_chunkmanager( self, register_dummy_chunkmanager, monkeypatch ) -> None: monkeypatch.setitem(KNOWN_CHUNKMANAGERS, "test", "test-package") with pytest.raises( ImportError, match=r"chunk manager 'test' is not available.+test-package" ): guess_chunkmanager("test") def test_fail_on_nonexistent_chunkmanager( self, register_dummy_chunkmanager ) -> None: with pytest.raises(ValueError, match="unrecognized chunk manager 'foo'"): guess_chunkmanager("foo") @requires_dask def test_get_dask_if_installed(self) -> None: chunkmanager = guess_chunkmanager(None) assert isinstance(chunkmanager, DaskManager) def test_no_chunk_manager_available(self, monkeypatch) -> None: monkeypatch.setattr("xarray.namedarray.parallelcompat.list_chunkmanagers", dict) with pytest.raises(ImportError, match="no chunk managers available"): guess_chunkmanager("foo") def test_no_chunk_manager_available_but_known_manager_requested( self, monkeypatch ) -> None: monkeypatch.setattr("xarray.namedarray.parallelcompat.list_chunkmanagers", dict) with pytest.raises(ImportError, match="chunk manager 'dask' is not available"): guess_chunkmanager("dask") @requires_dask def test_choose_dask_over_other_chunkmanagers( self, register_dummy_chunkmanager ) -> None: chunk_manager = guess_chunkmanager(None) assert isinstance(chunk_manager, DaskManager) class TestGetChunkedArrayType: def test_detect_chunked_arrays(self, register_dummy_chunkmanager) -> None: dummy_arr = DummyChunkedArray([1, 2, 3]) chunk_manager = get_chunked_array_type(dummy_arr) assert isinstance(chunk_manager, DummyChunkManager) def test_ignore_inmemory_arrays(self, register_dummy_chunkmanager) -> None: dummy_arr = DummyChunkedArray([1, 2, 3]) chunk_manager = get_chunked_array_type(*[dummy_arr, 1.0, np.array([5, 6])]) assert isinstance(chunk_manager, DummyChunkManager) with pytest.raises(TypeError, match="Expected a chunked array"): get_chunked_array_type(5.0) def test_raise_if_no_arrays_chunked(self, register_dummy_chunkmanager) -> None: with pytest.raises(TypeError, match="Expected a chunked array "): get_chunked_array_type(*[1.0, np.array([5, 6])]) def test_raise_if_no_matching_chunkmanagers(self) -> None: dummy_arr = DummyChunkedArray([1, 2, 3]) with pytest.raises( TypeError, match="Could not find a Chunk Manager which recognises" ): get_chunked_array_type(dummy_arr) @requires_dask def test_detect_dask_if_installed(self) -> None: import dask.array as da dask_arr = da.from_array([1, 2, 3], chunks=(1,)) chunk_manager = get_chunked_array_type(dask_arr) assert isinstance(chunk_manager, DaskManager) @requires_dask def test_raise_on_mixed_array_types(self, register_dummy_chunkmanager) -> None: import dask.array as da dummy_arr = DummyChunkedArray([1, 2, 3]) dask_arr = da.from_array([1, 2, 3], chunks=(1,)) with pytest.raises(TypeError, match="received multiple types"): get_chunked_array_type(*[dask_arr, dummy_arr]) def test_bogus_entrypoint() -> None: # Create a bogus entry-point as if the user broke their setup.cfg # or is actively developing their new chunk manager entry_point = EntryPoint( "bogus", "xarray.bogus.doesnotwork", "xarray.chunkmanagers" ) with pytest.warns(UserWarning, match="Failed to load chunk manager"): assert len(load_chunkmanagers([entry_point])) == 0 python-xarray-2026.01.0/xarray/tests/test_datatree_typing.yml0000664000175000017500000002051715136607163024465 0ustar alastairalastair- case: test_mypy_pipe_lambda_noarg_return_type main: | from xarray import DataTree dt = DataTree().pipe(lambda data: data) reveal_type(dt) # N: Revealed type is "xarray.core.datatree.DataTree" - case: test_mypy_pipe_lambda_posarg_return_type main: | from xarray import DataTree dt = DataTree().pipe(lambda data, arg: arg, "foo") reveal_type(dt) # N: Revealed type is "builtins.str" - case: test_mypy_pipe_lambda_chaining_return_type main: | from xarray import DataTree answer = DataTree().pipe(lambda data, arg: arg, "foo").count("o") reveal_type(answer) # N: Revealed type is "builtins.int" - case: test_mypy_pipe_lambda_missing_arg main: | from xarray import DataTree # Call to pipe missing argument for lambda parameter `arg` dt = DataTree().pipe(lambda data, arg: data) out: | main:4: error: No overload variant of "pipe" of "DataTree" matches argument type "Callable[[Any, Any], Any]" [call-overload] main:4: note: Possible overload variants: main:4: note: def [P`2, T] pipe(self, func: Callable[[DataTree, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:4: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_lambda_extra_arg main: | from xarray import DataTree # Call to pipe with extra argument for lambda dt = DataTree().pipe(lambda data: data, "oops!") out: | main:4: error: No overload variant of "pipe" of "DataTree" matches argument types "Callable[[Any], Any]", "str" [call-overload] main:4: note: Possible overload variants: main:4: note: def [P`2, T] pipe(self, func: Callable[[DataTree, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:4: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_function_missing_posarg main: | from xarray import DataTree def f(dt: DataTree, arg: int) -> DataTree: return dt # Call to pipe missing argument for function parameter `arg` dt = DataTree().pipe(f) out: | main:7: error: No overload variant of "pipe" of "DataTree" matches argument type "Callable[[DataTree, int], DataTree]" [call-overload] main:7: note: Possible overload variants: main:7: note: def [P`2, T] pipe(self, func: Callable[[DataTree, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:7: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_function_extra_posarg main: | from xarray import DataTree def f(dt: DataTree, arg: int) -> DataTree: return dt # Call to pipe missing keyword for kwonly parameter `kwonly` dt = DataTree().pipe(f, 42, "oops!") out: | main:7: error: No overload variant of "pipe" of "DataTree" matches argument types "Callable[[DataTree, int], DataTree]", "int", "str" [call-overload] main:7: note: Possible overload variants: main:7: note: def [P`2, T] pipe(self, func: Callable[[DataTree, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:7: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_function_missing_kwarg main: | from xarray import DataTree def f(dt: DataTree, arg: int, *, kwonly: int) -> DataTree: return dt # Call to pipe missing argument for kwonly parameter `kwonly` dt = DataTree().pipe(f, 42) out: | main:7: error: No overload variant of "pipe" of "DataTree" matches argument types "Callable[[DataTree, int, NamedArg(int, 'kwonly')], DataTree]", "int" [call-overload] main:7: note: Possible overload variants: main:7: note: def [P`2, T] pipe(self, func: Callable[[DataTree, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:7: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_function_missing_keyword main: | from xarray import DataTree def f(dt: DataTree, arg: int, *, kwonly: int) -> DataTree: return dt # Call to pipe missing keyword for kwonly parameter `kwonly` dt = DataTree().pipe(f, 42, 99) out: | main:7: error: No overload variant of "pipe" of "DataTree" matches argument types "Callable[[DataTree, int, NamedArg(int, 'kwonly')], DataTree]", "int", "int" [call-overload] main:7: note: Possible overload variants: main:7: note: def [P`2, T] pipe(self, func: Callable[[DataTree, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:7: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_function_unexpected_keyword skip: True # mypy 1.18+ outputs "defined here" notes without line numbers (e.g., "xarray/core/datatree.py: note:...") # pytest-mypy-plugins expects all lines to match "file:line: severity: message" format and can't parse these notes. # This is a mypy behavior, not a bug. The test would need pytest-mypy-plugins to support notes without line numbers. main: | from xarray import DataTree def f(dt: DataTree, arg: int, *, kwonly: int) -> DataTree: return dt # Call to pipe using wrong keyword: `kw` instead of `kwonly` dt = DataTree().pipe(f, 42, kw=99) out: | main:7: error: Unexpected keyword argument "kw" for "pipe" of "DataTree" [call-arg] # Note: mypy 1.18.1 also outputs a "defined here" note that pytest-mypy-plugins can't parse - case: test_mypy_pipe_tuple_return_type_datatree main: | from xarray import DataTree def f(arg: int, dt: DataTree) -> DataTree: return dt dt = DataTree().pipe((f, "dt"), 42) reveal_type(dt) # N: Revealed type is "xarray.core.datatree.DataTree" - case: test_mypy_pipe_tuple_return_type_other main: | from xarray import DataTree def f(arg: int, dt: DataTree) -> int: return arg answer = DataTree().pipe((f, "dt"), 42) reveal_type(answer) # N: Revealed type is "builtins.int" - case: test_mypy_pipe_tuple_missing_arg main: | from xarray import DataTree def f(arg: int, dt: DataTree) -> DataTree: return dt # Since we cannot provide a precise type annotation when passing a tuple to # pipe, there's not enough information for type analysis to indicate that # we are missing an argument for parameter `arg`, so we get no error here. dt = DataTree().pipe((f, "dt")) reveal_type(dt) # N: Revealed type is "xarray.core.datatree.DataTree" # Rather than passing a tuple, passing a lambda that calls `f` with args in # the correct order allows for proper type analysis, indicating (perhaps # somewhat cryptically) that we failed to pass an argument for `arg`. dt = DataTree().pipe(lambda data, arg: f(arg, data)) out: | main:17: error: No overload variant of "pipe" of "DataTree" matches argument type "Callable[[Any, Any], DataTree]" [call-overload] main:17: note: Possible overload variants: main:17: note: def [P`9, T] pipe(self, func: Callable[[DataTree, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:17: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T - case: test_mypy_pipe_tuple_extra_arg main: | from xarray import DataTree def f(arg: int, dt: DataTree) -> DataTree: return dt # Since we cannot provide a precise type annotation when passing a tuple to # pipe, there's not enough information for type analysis to indicate that # we are providing too many args for `f`, so we get no error here. dt = DataTree().pipe((f, "dt"), 42, "foo") reveal_type(dt) # N: Revealed type is "xarray.core.datatree.DataTree" # Rather than passing a tuple, passing a lambda that calls `f` with args in # the correct order allows for proper type analysis, indicating (perhaps # somewhat cryptically) that we passed too many arguments. dt = DataTree().pipe(lambda data, arg: f(arg, data), 42, "foo") out: | main:17: error: No overload variant of "pipe" of "DataTree" matches argument types "Callable[[Any, Any], DataTree]", "int", "str" [call-overload] main:17: note: Possible overload variants: main:17: note: def [P`9, T] pipe(self, func: Callable[[DataTree, **P], T], *args: P.args, **kwargs: P.kwargs) -> T main:17: note: def [T] pipe(self, func: tuple[Callable[..., T], str], *args: Any, **kwargs: Any) -> T python-xarray-2026.01.0/xarray/tests/test_deprecation_helpers.py0000664000175000017500000001102615136607163025143 0ustar alastairalastairimport pytest from xarray.util.deprecation_helpers import _deprecate_positional_args def test_deprecate_positional_args_warns_for_function(): @_deprecate_positional_args("v0.1") def f1(a, b, *, c="c", d="d"): return a, b, c, d result = f1(1, 2) assert result == (1, 2, "c", "d") result = f1(1, 2, c=3, d=4) assert result == (1, 2, 3, 4) with pytest.warns(FutureWarning, match=r".*v0.1"): result = f1(1, 2, 3) # type: ignore[misc] assert result == (1, 2, 3, "d") with pytest.warns(FutureWarning, match=r"Passing 'c' as positional"): result = f1(1, 2, 3) # type: ignore[misc] assert result == (1, 2, 3, "d") with pytest.warns(FutureWarning, match=r"Passing 'c, d' as positional"): result = f1(1, 2, 3, 4) # type: ignore[misc] assert result == (1, 2, 3, 4) @_deprecate_positional_args("v0.1") def f2(a="a", *, b="b", c="c", d="d"): return a, b, c, d with pytest.warns(FutureWarning, match=r"Passing 'b' as positional"): result = f2(1, 2) # type: ignore[misc] assert result == (1, 2, "c", "d") @_deprecate_positional_args("v0.1") def f3(a, *, b="b", **kwargs): return a, b, kwargs with pytest.warns(FutureWarning, match=r"Passing 'b' as positional"): result = f3(1, 2) # type: ignore[misc] assert result == (1, 2, {}) with pytest.warns(FutureWarning, match=r"Passing 'b' as positional"): result = f3(1, 2, f="f") # type: ignore[misc] assert result == (1, 2, {"f": "f"}) @_deprecate_positional_args("v0.1") def f4(a, /, *, b="b", **kwargs): return a, b, kwargs result = f4(1) assert result == (1, "b", {}) result = f4(1, b=2, f="f") assert result == (1, 2, {"f": "f"}) with pytest.warns(FutureWarning, match=r"Passing 'b' as positional"): result = f4(1, 2, f="f") # type: ignore[misc] assert result == (1, 2, {"f": "f"}) with pytest.raises(TypeError, match=r"Keyword-only param without default"): @_deprecate_positional_args("v0.1") def f5(a, *, b, c=3, **kwargs): pass def test_deprecate_positional_args_warns_for_class(): class A1: @_deprecate_positional_args("v0.1") def method(self, a, b, *, c="c", d="d"): return a, b, c, d result = A1().method(1, 2) assert result == (1, 2, "c", "d") result = A1().method(1, 2, c=3, d=4) assert result == (1, 2, 3, 4) with pytest.warns(FutureWarning, match=r".*v0.1"): result = A1().method(1, 2, 3) # type: ignore[misc] assert result == (1, 2, 3, "d") with pytest.warns(FutureWarning, match=r"Passing 'c' as positional"): result = A1().method(1, 2, 3) # type: ignore[misc] assert result == (1, 2, 3, "d") with pytest.warns(FutureWarning, match=r"Passing 'c, d' as positional"): result = A1().method(1, 2, 3, 4) # type: ignore[misc] assert result == (1, 2, 3, 4) class A2: @_deprecate_positional_args("v0.1") def method(self, a=1, b=1, *, c="c", d="d"): return a, b, c, d with pytest.warns(FutureWarning, match=r"Passing 'c' as positional"): result = A2().method(1, 2, 3) # type: ignore[misc] assert result == (1, 2, 3, "d") with pytest.warns(FutureWarning, match=r"Passing 'c, d' as positional"): result = A2().method(1, 2, 3, 4) # type: ignore[misc] assert result == (1, 2, 3, 4) class A3: @_deprecate_positional_args("v0.1") def method(self, a, *, b="b", **kwargs): return a, b, kwargs with pytest.warns(FutureWarning, match=r"Passing 'b' as positional"): result = A3().method(1, 2) # type: ignore[misc] assert result == (1, 2, {}) with pytest.warns(FutureWarning, match=r"Passing 'b' as positional"): result = A3().method(1, 2, f="f") # type: ignore[misc] assert result == (1, 2, {"f": "f"}) class A4: @_deprecate_positional_args("v0.1") def method(self, a, /, *, b="b", **kwargs): return a, b, kwargs result = A4().method(1) assert result == (1, "b", {}) result = A4().method(1, b=2, f="f") assert result == (1, 2, {"f": "f"}) with pytest.warns(FutureWarning, match=r"Passing 'b' as positional"): result = A4().method(1, 2, f="f") # type: ignore[misc] assert result == (1, 2, {"f": "f"}) with pytest.raises(TypeError, match=r"Keyword-only param without default"): class A5: @_deprecate_positional_args("v0.1") def __init__(self, a, *, b, c=3, **kwargs): pass python-xarray-2026.01.0/xarray/tests/CLAUDE.md0000664000175000017500000000602515136607163020775 0ustar alastairalastair# Testing Guidelines for xarray ## Handling Optional Dependencies xarray has many optional dependencies that may not be available in all testing environments. Always use the standard decorators and patterns when writing tests that require specific dependencies. ### Standard Decorators **ALWAYS use decorators** like `@requires_dask`, `@requires_cftime`, etc. instead of conditional `if` statements. All available decorators are defined in `xarray/tests/__init__.py` (look for `requires_*` decorators). ### DO NOT use conditional imports or skipif ❌ **WRONG - Do not do this:** ```python def test_mean_with_cftime(): if has_dask: # WRONG! ds = ds.chunk({}) result = ds.mean() ``` ❌ **ALSO WRONG - Avoid pytest.mark.skipif in parametrize:** ```python @pytest.mark.parametrize( "chunk", [ pytest.param( True, marks=pytest.mark.skipif(not has_dask, reason="requires dask") ), False, ], ) def test_something(chunk): ... ``` ✅ **CORRECT - Do this instead:** ```python def test_mean_with_cftime(): # Test without dask result = ds.mean() @requires_dask def test_mean_with_cftime_dask(): # Separate test for dask functionality ds = ds.chunk({}) result = ds.mean() ``` ✅ **OR for parametrized tests, split them:** ```python def test_something_without_dask(): # Test the False case ... @requires_dask def test_something_with_dask(): # Test the True case with dask ... ``` ### Multiple dependencies When a test requires multiple optional dependencies: ```python @requires_dask @requires_scipy def test_interpolation_with_dask(): ... ``` ### Importing optional dependencies in tests For imports within test functions, use `pytest.importorskip`: ```python def test_cftime_functionality(): cftime = pytest.importorskip("cftime") # Now use cftime ``` ### Common patterns 1. **Split tests by dependency** - Don't mix optional dependency code with base functionality: ```python def test_base_functionality(): # Core test without optional deps result = ds.mean() assert result is not None @requires_dask def test_dask_functionality(): # Dask-specific test ds_chunked = ds.chunk({}) result = ds_chunked.mean() assert result is not None ``` 2. **Use fixtures for dependency-specific setup**: ```python @pytest.fixture def dask_array(): pytest.importorskip("dask.array") import dask.array as da return da.from_array([1, 2, 3], chunks=2) ``` 3. **Check available implementations**: ```python from xarray.core.duck_array_ops import available_implementations @pytest.mark.parametrize("implementation", available_implementations()) def test_with_available_backends(implementation): ... ``` ### Key Points - CI environments intentionally exclude certain dependencies (e.g., `all-but-dask`, `bare-minimum`) - A test failing in "all-but-dask" because it uses dask is a test bug, not a CI issue - Look at similar existing tests for patterns to follow python-xarray-2026.01.0/xarray/tests/test_concat.py0000664000175000017500000020566315136607163022407 0ustar alastairalastairfrom __future__ import annotations from collections.abc import Callable from contextlib import AbstractContextManager, nullcontext from copy import deepcopy from typing import TYPE_CHECKING, Any, Literal import numpy as np import pandas as pd import pytest from xarray import ( AlignmentError, DataArray, Dataset, Variable, concat, open_dataset, set_options, ) from xarray.core import dtypes, types from xarray.core.coordinates import Coordinates from xarray.core.datatree import DataTree from xarray.core.indexes import PandasIndex from xarray.structure import merge from xarray.tests import ( ConcatenatableArray, InaccessibleArray, UnexpectedDataAccess, assert_array_equal, assert_equal, assert_identical, requires_dask, requires_pyarrow, requires_scipy_or_netCDF4, ) from xarray.tests.indexes import XYIndex from xarray.tests.test_dataset import create_test_data if TYPE_CHECKING: from xarray.core.types import CombineAttrsOptions, JoinOptions # helper method to create multiple tests datasets to concat def create_concat_datasets( num_datasets: int = 2, seed: int | None = None, include_day: bool = True ) -> list[Dataset]: rng = np.random.default_rng(seed) lat = rng.standard_normal(size=(1, 4)) lon = rng.standard_normal(size=(1, 4)) result = [] variables = ["temperature", "pressure", "humidity", "precipitation", "cloud_cover"] for i in range(num_datasets): if include_day: data_tuple = ( ["x", "y", "day"], rng.standard_normal(size=(1, 4, 2)), ) data_vars = dict.fromkeys(variables, data_tuple) result.append( Dataset( data_vars=data_vars, coords={ "lat": (["x", "y"], lat), "lon": (["x", "y"], lon), "day": ["day" + str(i * 2 + 1), "day" + str(i * 2 + 2)], }, ) ) else: data_tuple = ( ["x", "y"], rng.standard_normal(size=(1, 4)), ) data_vars = dict.fromkeys(variables, data_tuple) result.append( Dataset( data_vars=data_vars, coords={"lat": (["x", "y"], lat), "lon": (["x", "y"], lon)}, ) ) return result # helper method to create multiple tests datasets to concat with specific types def create_typed_datasets( num_datasets: int = 2, seed: int | None = None ) -> list[Dataset]: var_strings = ["a", "b", "c", "d", "e", "f", "g", "h"] rng = np.random.default_rng(seed) lat = rng.standard_normal(size=(1, 4)) lon = rng.standard_normal(size=(1, 4)) return [ Dataset( data_vars={ "float": (["x", "y", "day"], rng.standard_normal(size=(1, 4, 2))), "float2": (["x", "y", "day"], rng.standard_normal(size=(1, 4, 2))), "string": ( ["x", "y", "day"], rng.choice(var_strings, size=(1, 4, 2)), ), "int": (["x", "y", "day"], rng.integers(0, 10, size=(1, 4, 2))), "datetime64": ( ["x", "y", "day"], np.arange( np.datetime64("2017-01-01"), np.datetime64("2017-01-09") ).reshape(1, 4, 2), ), "timedelta64": ( ["x", "y", "day"], np.reshape([pd.Timedelta(days=i) for i in range(8)], [1, 4, 2]), ), }, coords={ "lat": (["x", "y"], lat), "lon": (["x", "y"], lon), "day": ["day" + str(i * 2 + 1), "day" + str(i * 2 + 2)], }, ) for i in range(num_datasets) ] def test_concat_compat() -> None: ds1 = Dataset( { "has_x_y": (("y", "x"), [[1, 2]]), "has_x": ("x", [1, 2]), "no_x_y": ("z", [1, 2]), }, coords={"x": [0, 1], "y": [0], "z": [-1, -2]}, ) ds2 = Dataset( { "has_x_y": (("y", "x"), [[3, 4]]), "has_x": ("x", [1, 2]), "no_x_y": (("q", "z"), [[1, 2]]), }, coords={"x": [0, 1], "y": [1], "z": [-1, -2], "q": [0]}, ) result = concat([ds1, ds2], dim="y", data_vars="minimal", compat="broadcast_equals") assert_equal(ds2.no_x_y, result.no_x_y.transpose()) for var in ["has_x", "no_x_y"]: assert "y" not in result[var].dims and "y" not in result[var].coords with pytest.raises(ValueError, match=r"'q' not present in all datasets"): concat([ds1, ds2], dim="q", data_vars="all", join="outer") with pytest.raises(ValueError, match=r"'q' not present in all datasets"): concat([ds2, ds1], dim="q", data_vars="all", join="outer") def test_concat_missing_var() -> None: datasets = create_concat_datasets(2, seed=123) expected = concat(datasets, dim="day") vars_to_drop = ["humidity", "precipitation", "cloud_cover"] expected = expected.drop_vars(vars_to_drop) expected["pressure"][..., 2:] = np.nan datasets[0] = datasets[0].drop_vars(vars_to_drop) datasets[1] = datasets[1].drop_vars(vars_to_drop + ["pressure"]) actual = concat(datasets, dim="day") assert list(actual.data_vars.keys()) == ["temperature", "pressure"] assert_identical(actual, expected) @pytest.mark.parametrize("var", ["var4", pytest.param("var5", marks=requires_pyarrow)]) def test_concat_extension_array(var) -> None: data1 = create_test_data(use_extension_array=True) data2 = create_test_data(use_extension_array=True) concatenated = concat([data1, data2], dim="dim1") assert pd.Series( concatenated[var] == type(data2[var].variable.data)._concat_same_type( [ data1[var].variable.data, data2[var].variable.data, ] ) ).all() # need to wrap in series because pyarrow bool does not support `all` def test_concat_missing_multiple_consecutive_var() -> None: datasets = create_concat_datasets(3, seed=123) expected = concat(datasets, dim="day") vars_to_drop = ["humidity", "pressure"] expected["pressure"][..., :4] = np.nan expected["humidity"][..., :4] = np.nan datasets[0] = datasets[0].drop_vars(vars_to_drop) datasets[1] = datasets[1].drop_vars(vars_to_drop) actual = concat(datasets, dim="day") assert list(actual.data_vars.keys()) == [ "temperature", "precipitation", "cloud_cover", "pressure", "humidity", ] assert_identical(actual, expected) def test_concat_all_empty() -> None: ds1 = Dataset() ds2 = Dataset() expected = Dataset() actual = concat([ds1, ds2], dim="new_dim") assert_identical(actual, expected) def test_concat_second_empty() -> None: ds1 = Dataset(data_vars={"a": ("y", [0.1])}, coords={"x": 0.1}) ds2 = Dataset(coords={"x": 0.1}) expected = Dataset(data_vars={"a": ("y", [0.1, np.nan])}, coords={"x": 0.1}) actual = concat([ds1, ds2], dim="y") assert_identical(actual, expected) expected = Dataset( data_vars={"a": ("y", [0.1, np.nan])}, coords={"x": ("y", [0.1, 0.1])} ) actual = concat([ds1, ds2], dim="y", coords="all") assert_identical(actual, expected) def test_concat_second_empty_with_scalar_data_var_only_on_first() -> None: # Check concatenating scalar data_var only present in ds1 ds1 = Dataset(data_vars={"a": ("y", [0.1]), "b": 0.1}, coords={"x": 0.1}) ds2 = Dataset(coords={"x": 0.1}) expected = Dataset( data_vars={"a": ("y", [0.1, np.nan]), "b": ("y", [0.1, np.nan])}, coords={"x": ("y", [0.1, 0.1])}, ) actual = concat([ds1, ds2], dim="y", coords="all", data_vars="all") assert_identical(actual, expected) expected = Dataset( data_vars={"a": ("y", [0.1, np.nan]), "b": 0.1}, coords={"x": 0.1} ) actual = concat( [ds1, ds2], dim="y", coords="different", data_vars="different", compat="equals" ) assert_identical(actual, expected) def test_concat_multiple_missing_variables() -> None: datasets = create_concat_datasets(2, seed=123) expected = concat(datasets, dim="day") vars_to_drop = ["pressure", "cloud_cover"] expected["pressure"][..., 2:] = np.nan expected["cloud_cover"][..., 2:] = np.nan datasets[1] = datasets[1].drop_vars(vars_to_drop) actual = concat(datasets, dim="day") # check the variables orders are the same assert list(actual.data_vars.keys()) == [ "temperature", "pressure", "humidity", "precipitation", "cloud_cover", ] assert_identical(actual, expected) @pytest.mark.parametrize("include_day", [True, False]) def test_concat_multiple_datasets_missing_vars(include_day: bool) -> None: vars_to_drop = [ "temperature", "pressure", "humidity", "precipitation", "cloud_cover", ] # must specify if concat_dim='day' is not part of the vars kwargs = {"data_vars": "all"} if not include_day else {} datasets = create_concat_datasets( len(vars_to_drop), seed=123, include_day=include_day ) expected = concat(datasets, dim="day", **kwargs) # type: ignore[call-overload] for i, name in enumerate(vars_to_drop): if include_day: expected[name][..., i * 2 : (i + 1) * 2] = np.nan else: expected[name][i : i + 1, ...] = np.nan # set up the test data datasets = [ ds.drop_vars(varname) for ds, varname in zip(datasets, vars_to_drop, strict=True) ] actual = concat(datasets, dim="day", **kwargs) # type: ignore[call-overload] assert list(actual.data_vars.keys()) == [ "pressure", "humidity", "precipitation", "cloud_cover", "temperature", ] assert_identical(actual, expected) def test_concat_multiple_datasets_with_multiple_missing_variables() -> None: vars_to_drop_in_first = ["temperature", "pressure"] vars_to_drop_in_second = ["humidity", "precipitation", "cloud_cover"] datasets = create_concat_datasets(2, seed=123) expected = concat(datasets, dim="day") for name in vars_to_drop_in_first: expected[name][..., :2] = np.nan for name in vars_to_drop_in_second: expected[name][..., 2:] = np.nan # set up the test data datasets[0] = datasets[0].drop_vars(vars_to_drop_in_first) datasets[1] = datasets[1].drop_vars(vars_to_drop_in_second) actual = concat(datasets, dim="day") assert list(actual.data_vars.keys()) == [ "humidity", "precipitation", "cloud_cover", "temperature", "pressure", ] assert_identical(actual, expected) def test_concat_type_of_missing_fill() -> None: datasets = create_typed_datasets(2, seed=123) expected1 = concat(datasets, dim="day", fill_value=dtypes.NA) expected2 = concat(datasets[::-1], dim="day", fill_value=dtypes.NA) vars = ["float", "float2", "string", "int", "datetime64", "timedelta64"] expected = [expected2, expected1] for i, exp in enumerate(expected): sl = slice(i * 2, (i + 1) * 2) exp["float2"][..., sl] = np.nan exp["datetime64"][..., sl] = np.nan exp["timedelta64"][..., sl] = np.nan var = exp["int"] * 1.0 var[..., sl] = np.nan exp["int"] = var var = exp["string"].astype(object) var[..., sl] = np.nan exp["string"] = var # set up the test data datasets[1] = datasets[1].drop_vars(vars[1:]) actual = concat(datasets, dim="day", fill_value=dtypes.NA) assert_identical(actual, expected[1]) # reversed actual = concat(datasets[::-1], dim="day", fill_value=dtypes.NA) assert_identical(actual, expected[0]) def test_concat_order_when_filling_missing() -> None: vars_to_drop_in_first: list[str] = [] # drop middle vars_to_drop_in_second = ["humidity"] datasets = create_concat_datasets(2, seed=123) expected1 = concat(datasets, dim="day") for name in vars_to_drop_in_second: expected1[name][..., 2:] = np.nan expected2 = concat(datasets[::-1], dim="day") for name in vars_to_drop_in_second: expected2[name][..., :2] = np.nan # set up the test data datasets[0] = datasets[0].drop_vars(vars_to_drop_in_first) datasets[1] = datasets[1].drop_vars(vars_to_drop_in_second) actual = concat(datasets, dim="day") assert list(actual.data_vars.keys()) == [ "temperature", "pressure", "humidity", "precipitation", "cloud_cover", ] assert_identical(actual, expected1) actual = concat(datasets[::-1], dim="day") assert list(actual.data_vars.keys()) == [ "temperature", "pressure", "precipitation", "cloud_cover", "humidity", ] assert_identical(actual, expected2) @pytest.fixture def concat_var_names() -> Callable: # create var names list with one missing value def get_varnames(var_cnt: int = 10, list_cnt: int = 10) -> list[list[str]]: orig = [f"d{i:02d}" for i in range(var_cnt)] var_names = [] for _i in range(list_cnt): l1 = orig.copy() var_names.append(l1) return var_names return get_varnames @pytest.fixture def create_concat_ds() -> Callable: def create_ds( var_names: list[list[str]], dim: bool = False, coord: bool = False, drop_idx: list[int] | None = None, ) -> list[Dataset]: out_ds = [] ds = Dataset() ds = ds.assign_coords({"x": np.arange(2)}) ds = ds.assign_coords({"y": np.arange(3)}) ds = ds.assign_coords({"z": np.arange(4)}) for i, dsl in enumerate(var_names): vlist = dsl.copy() if drop_idx is not None: vlist.pop(drop_idx[i]) foo_data = np.arange(48, dtype=float).reshape(2, 2, 3, 4) dsi = ds.copy() if coord: dsi = ds.assign({"time": (["time"], [i * 2, i * 2 + 1])}) for k in vlist: dsi = dsi.assign({k: (["time", "x", "y", "z"], foo_data.copy())}) if not dim: dsi = dsi.isel(time=0) out_ds.append(dsi) return out_ds return create_ds @pytest.mark.parametrize("dim", [True, False]) @pytest.mark.parametrize("coord", [True, False]) def test_concat_fill_missing_variables( concat_var_names, create_concat_ds, dim: bool, coord: bool ) -> None: var_names = concat_var_names() drop_idx = [0, 7, 6, 4, 4, 8, 0, 6, 2, 0] expected = concat( create_concat_ds(var_names, dim=dim, coord=coord), dim="time", data_vars="all" ) for i, idx in enumerate(drop_idx): if dim: expected[var_names[0][idx]][i * 2 : i * 2 + 2] = np.nan else: expected[var_names[0][idx]][i] = np.nan concat_ds = create_concat_ds(var_names, dim=dim, coord=coord, drop_idx=drop_idx) actual = concat(concat_ds, dim="time", data_vars="all") assert list(actual.data_vars.keys()) == [ "d01", "d02", "d03", "d04", "d05", "d06", "d07", "d08", "d09", "d00", ] assert_identical(actual, expected) class TestConcatDataset: @pytest.fixture def data(self, request) -> Dataset: use_extension_array = request.param if hasattr(request, "param") else False return create_test_data(use_extension_array=use_extension_array).drop_dims( "dim3" ) def rectify_dim_order(self, data: Dataset, dataset) -> Dataset: # return a new dataset with all variable dimensions transposed into # the order in which they are found in `data` return Dataset( {k: v.transpose(*data[k].dims) for k, v in dataset.data_vars.items()}, dataset.coords, attrs=dataset.attrs, ) @pytest.mark.parametrize("coords", ["different", "minimal"]) @pytest.mark.parametrize( "dim,data", [["dim1", True], ["dim2", False]], indirect=["data"] ) def test_concat_simple(self, data: Dataset, dim, coords) -> None: datasets = [g for _, g in data.groupby(dim)] assert_identical(data, concat(datasets, dim, coords=coords, compat="equals")) def test_concat_merge_variables_present_in_some_datasets( self, data: Dataset ) -> None: # coordinates present in some datasets but not others ds1 = Dataset(data_vars={"a": ("y", [0.1])}, coords={"x": 0.1}) ds2 = Dataset(data_vars={"a": ("y", [0.2])}, coords={"z": 0.2}) actual = concat([ds1, ds2], dim="y", coords="minimal") expected = Dataset({"a": ("y", [0.1, 0.2])}, coords={"x": 0.1, "z": 0.2}) assert_identical(expected, actual) # data variables present in some datasets but not others split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))] data0, data1 = deepcopy(split_data) data1["foo"] = ("bar", np.random.randn(10)) actual = concat([data0, data1], "dim1", data_vars="minimal") expected = data.copy().assign(foo=data1.foo) assert_identical(expected, actual) # expand foo actual = concat([data0, data1], "dim1", data_vars="all") foo = np.ones((8, 10), dtype=data1.foo.dtype) * np.nan foo[3:] = data1.foo.values[None, ...] expected = data.copy().assign(foo=(["dim1", "bar"], foo)) assert_identical(expected, actual) @pytest.mark.parametrize("data", [False], indirect=["data"]) def test_concat_2(self, data: Dataset) -> None: dim = "dim2" datasets = [g.squeeze(dim) for _, g in data.groupby(dim, squeeze=False)] concat_over = [k for k, v in data.coords.items() if dim in v.dims and k != dim] actual = concat(datasets, data[dim], coords=concat_over) assert_identical(data, self.rectify_dim_order(data, actual)) @pytest.mark.parametrize("coords", ["different", "minimal", "all"]) @pytest.mark.parametrize("dim", ["dim1", "dim2"]) def test_concat_coords_kwarg( self, data: Dataset, dim: str, coords: Literal["all", "minimal", "different"] ) -> None: data = data.copy(deep=True) # make sure the coords argument behaves as expected data.coords["extra"] = ("dim4", np.arange(3)) datasets = [g for _, g in data.groupby(dim)] actual = concat( datasets, data[dim], coords=coords, data_vars="all", compat="equals" ) if coords == "all": expected = np.array([data["extra"].values for _ in range(data.sizes[dim])]) assert_array_equal(actual["extra"].values, expected) else: assert_equal(data["extra"], actual["extra"]) def test_concat(self, data: Dataset) -> None: split_data = [ data.isel(dim1=slice(3)), data.isel(dim1=3), data.isel(dim1=slice(4, None)), ] assert_identical(data, concat(split_data, "dim1")) def test_concat_dim_precedence(self, data: Dataset) -> None: # verify that the dim argument takes precedence over # concatenating dataset variables of the same name dim = (2 * data["dim1"]).rename("dim1") datasets = [g for _, g in data.groupby("dim1", squeeze=False)] expected = data.copy() expected["dim1"] = dim assert_identical(expected, concat(datasets, dim)) def test_concat_data_vars_typing(self) -> None: # Testing typing, can be removed if the next function works with annotations. data = Dataset({"foo": ("x", np.random.randn(10))}) objs: list[Dataset] = [data.isel(x=slice(5)), data.isel(x=slice(5, None))] actual = concat(objs, dim="x", data_vars="minimal") assert_identical(data, actual) @pytest.mark.parametrize("data_vars", ["minimal", "different", "all", [], ["foo"]]) def test_concat_data_vars(self, data_vars) -> None: data = Dataset({"foo": ("x", np.random.randn(10))}) objs: list[Dataset] = [data.isel(x=slice(5)), data.isel(x=slice(5, None))] actual = concat(objs, dim="x", data_vars=data_vars, compat="equals") assert_identical(data, actual) @pytest.mark.parametrize("coords", ["different", "all", ["c"]]) def test_concat_coords(self, coords) -> None: data = Dataset({"foo": ("x", np.random.randn(10))}) expected = data.assign_coords(c=("x", [0] * 5 + [1] * 5)) objs = [ data.isel(x=slice(5)).assign_coords(c=0), data.isel(x=slice(5, None)).assign_coords(c=1), ] if coords == "different": actual = concat(objs, dim="x", coords=coords, compat="equals") else: actual = concat(objs, dim="x", coords=coords) assert_identical(expected, actual) @pytest.mark.parametrize("coords", ["minimal", []]) def test_concat_coords_raises_merge_error(self, coords) -> None: data = Dataset({"foo": ("x", np.random.randn(10))}) objs = [ data.isel(x=slice(5)).assign_coords(c=0), data.isel(x=slice(5, None)).assign_coords(c=1), ] with pytest.raises(merge.MergeError, match="conflicting values"): concat(objs, dim="x", coords=coords, compat="equals") @pytest.mark.parametrize("data_vars", ["different", "all", ["foo"]]) def test_concat_constant_index(self, data_vars) -> None: # GH425 ds1 = Dataset({"foo": 1.5}, {"y": 1}) ds2 = Dataset({"foo": 2.5}, {"y": 1}) expected = Dataset({"foo": ("y", [1.5, 2.5]), "y": [1, 1]}) if data_vars == "different": actual = concat([ds1, ds2], "y", data_vars=data_vars, compat="equals") else: actual = concat([ds1, ds2], "y", data_vars=data_vars) assert_identical(expected, actual) def test_concat_constant_index_None(self) -> None: ds1 = Dataset({"foo": 1.5}, {"y": 1}) ds2 = Dataset({"foo": 2.5}, {"y": 1}) actual = concat([ds1, ds2], "new_dim", data_vars=None, compat="equals") expected = Dataset( {"foo": ("new_dim", [1.5, 2.5])}, coords={"y": 1}, ) assert_identical(actual, expected) def test_concat_constant_index_minimal(self) -> None: ds1 = Dataset({"foo": 1.5}, {"y": 1}) ds2 = Dataset({"foo": 2.5}, {"y": 1}) with set_options(use_new_combine_kwarg_defaults=False): with pytest.raises(merge.MergeError, match="conflicting values"): concat([ds1, ds2], dim="new_dim", data_vars="minimal") with set_options(use_new_combine_kwarg_defaults=True): with pytest.raises( ValueError, match="data_vars='minimal' and coords='minimal'" ): concat([ds1, ds2], dim="new_dim", data_vars="minimal") def test_concat_size0(self) -> None: data = create_test_data() split_data = [data.isel(dim1=slice(0, 0)), data] actual = concat(split_data, "dim1") assert_identical(data, actual) actual = concat(split_data[::-1], "dim1") assert_identical(data, actual) def test_concat_autoalign(self) -> None: ds1 = Dataset({"foo": DataArray([1, 2], coords=[("x", [1, 2])])}) ds2 = Dataset({"foo": DataArray([1, 2], coords=[("x", [1, 3])])}) actual = concat([ds1, ds2], "y", data_vars="all", join="outer") expected = Dataset( { "foo": DataArray( [[1, 2, np.nan], [1, np.nan, 2]], dims=["y", "x"], coords={"x": [1, 2, 3]}, ) } ) assert_identical(expected, actual) def test_concat_errors(self) -> None: data = create_test_data() split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))] with pytest.raises(ValueError, match=r"must supply at least one"): concat([], "dim1") with pytest.raises(ValueError, match=r"Cannot specify both .*='different'"): concat( [data, data], dim="concat_dim", data_vars="different", compat="override" ) with pytest.raises(ValueError, match=r"must supply at least one"): concat([], "dim1") with pytest.raises(ValueError, match=r"are not found in the coordinates"): concat([data, data], "new_dim", coords=["not_found"]) with pytest.raises(ValueError, match=r"are not found in the data variables"): concat([data, data], "new_dim", data_vars=["not_found"]) with pytest.raises(ValueError, match=r"global attributes not"): # call deepcopy separately to get unique attrs data0 = deepcopy(split_data[0]) data1 = deepcopy(split_data[1]) data1.attrs["foo"] = "bar" concat([data0, data1], "dim1", compat="identical") assert_identical(data, concat([data0, data1], "dim1", compat="equals")) with pytest.raises(ValueError, match=r"compat.* invalid"): concat(split_data, "dim1", compat="foobar") # type: ignore[call-overload] with pytest.raises(ValueError, match=r"compat.* invalid"): concat(split_data, "dim1", compat="minimal") with pytest.raises(ValueError, match=r"unexpected value for"): concat([data, data], "new_dim", coords="foobar") with pytest.raises( ValueError, match=r"coordinate in some datasets but not others" ): concat([Dataset({"x": 0}), Dataset({"x": [1]})], dim="z") with pytest.raises( ValueError, match=r"coordinate in some datasets but not others" ): concat([Dataset({"x": 0}), Dataset({}, {"x": 1})], dim="z") def test_concat_join_kwarg(self) -> None: ds1 = Dataset({"a": (("x", "y"), [[0]])}, coords={"x": [0], "y": [0]}) ds2 = Dataset({"a": (("x", "y"), [[0]])}, coords={"x": [1], "y": [0.0001]}) expected: dict[JoinOptions, Any] = {} expected["outer"] = Dataset( {"a": (("x", "y"), [[0, np.nan], [np.nan, 0]])}, {"x": [0, 1], "y": [0, 0.0001]}, ) expected["inner"] = Dataset( {"a": (("x", "y"), [[], []])}, {"x": [0, 1], "y": []} ) expected["left"] = Dataset( {"a": (("x", "y"), np.array([0, np.nan], ndmin=2).T)}, coords={"x": [0, 1], "y": [0]}, ) expected["right"] = Dataset( {"a": (("x", "y"), np.array([np.nan, 0], ndmin=2).T)}, coords={"x": [0, 1], "y": [0.0001]}, ) expected["override"] = Dataset( {"a": (("x", "y"), np.array([0, 0], ndmin=2).T)}, coords={"x": [0, 1], "y": [0]}, ) with pytest.raises(ValueError, match=r"cannot align.*exact.*dimensions.*'y'"): actual = concat([ds1, ds2], join="exact", dim="x") for join, expected_item in expected.items(): actual = concat([ds1, ds2], join=join, dim="x") assert_equal(actual, expected_item) # regression test for #3681 actual = concat( [ds1.drop_vars("x"), ds2.drop_vars("x")], join="override", dim="y" ) expected2 = Dataset( {"a": (("x", "y"), np.array([0, 0], ndmin=2))}, coords={"y": [0, 0.0001]} ) assert_identical(actual, expected2) @pytest.mark.parametrize( "combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception", [ ( "no_conflicts", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2, "c": 3}, False, ), ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), ( "no_conflicts", {"a": 1, "b": 2}, {"a": 4, "c": 3}, {"a": 1, "b": 2, "c": 3}, True, ), ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), ( "override", {"a": 1, "b": 2}, {"a": 4, "b": 5, "c": 3}, {"a": 1, "b": 2}, False, ), ( "drop_conflicts", {"a": 41, "b": 42, "c": 43}, {"b": 2, "c": 43, "d": 44}, {"a": 41, "c": 43, "d": 44}, False, ), ( lambda attrs, context: {"a": -1, "b": 0, "c": 1} if any(attrs) else {}, {"a": 41, "b": 42, "c": 43}, {"b": 2, "c": 43, "d": 44}, {"a": -1, "b": 0, "c": 1}, False, ), ], ) def test_concat_combine_attrs_kwarg( self, combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception ): ds1 = Dataset({"a": ("x", [0])}, coords={"x": [0]}, attrs=var1_attrs) ds2 = Dataset({"a": ("x", [0])}, coords={"x": [1]}, attrs=var2_attrs) if expect_exception: with pytest.raises(ValueError, match=f"combine_attrs='{combine_attrs}'"): concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) else: actual = concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) expected = Dataset( {"a": ("x", [0, 0])}, {"x": [0, 1]}, attrs=expected_attrs ) assert_identical(actual, expected) @pytest.mark.parametrize( "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception", [ ( "no_conflicts", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2, "c": 3}, False, ), ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), ( "no_conflicts", {"a": 1, "b": 2}, {"a": 4, "c": 3}, {"a": 1, "b": 2, "c": 3}, True, ), ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), ( "override", {"a": 1, "b": 2}, {"a": 4, "b": 5, "c": 3}, {"a": 1, "b": 2}, False, ), ( "drop_conflicts", {"a": 41, "b": 42, "c": 43}, {"b": 2, "c": 43, "d": 44}, {"a": 41, "c": 43, "d": 44}, False, ), ( lambda attrs, context: {"a": -1, "b": 0, "c": 1} if any(attrs) else {}, {"a": 41, "b": 42, "c": 43}, {"b": 2, "c": 43, "d": 44}, {"a": -1, "b": 0, "c": 1}, False, ), ], ) def test_concat_combine_attrs_kwarg_variables( self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception ): """check that combine_attrs is used on data variables and coords""" ds1 = Dataset({"a": ("x", [0], attrs1)}, coords={"x": ("x", [0], attrs1)}) ds2 = Dataset({"a": ("x", [0], attrs2)}, coords={"x": ("x", [1], attrs2)}) if expect_exception: with pytest.raises(ValueError, match=f"combine_attrs='{combine_attrs}'"): concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) else: actual = concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) expected = Dataset( {"a": ("x", [0, 0], expected_attrs)}, {"x": ("x", [0, 1], expected_attrs)}, ) assert_identical(actual, expected) def test_concat_promote_shape_with_mixed_dims_within_variables(self) -> None: objs = [Dataset({}, {"x": 0}), Dataset({"x": [1]})] actual = concat(objs, "x") expected = Dataset({"x": [0, 1]}) assert_identical(actual, expected) objs = [Dataset({"x": [0]}), Dataset({}, {"x": 1})] actual = concat(objs, "x") assert_identical(actual, expected) def test_concat_promote_shape_with_mixed_dims_between_variables(self) -> None: objs = [Dataset({"x": [2], "y": 3}), Dataset({"x": [4], "y": 5})] actual = concat(objs, "x", data_vars="all") expected = Dataset({"x": [2, 4], "y": ("x", [3, 5])}) assert_identical(actual, expected) def test_concat_promote_shape_with_mixed_dims_in_coord_variable(self) -> None: objs = [Dataset({"x": [0]}, {"y": -1}), Dataset({"x": [1]}, {"y": ("x", [-2])})] actual = concat(objs, "x") expected = Dataset({"x": [0, 1]}, {"y": ("x", [-1, -2])}) assert_identical(actual, expected) def test_concat_promote_shape_for_scalars_with_mixed_lengths_along_concat_dim( self, ) -> None: # values should repeat objs = [Dataset({"x": [0]}, {"y": -1}), Dataset({"x": [1, 2]}, {"y": -2})] actual = concat(objs, "x", coords="different", compat="equals") expected = Dataset({"x": [0, 1, 2]}, {"y": ("x", [-1, -2, -2])}) assert_identical(actual, expected) actual = concat(objs, "x", coords="all") assert_identical(actual, expected) def test_concat_promote_shape_broadcast_1d_x_1d_goes_to_2d(self) -> None: objs = [ Dataset({"z": ("x", [-1])}, {"x": [0], "y": [0]}), Dataset({"z": ("y", [1])}, {"x": [1], "y": [0]}), ] actual = concat(objs, "x") expected = Dataset({"z": (("x", "y"), [[-1], [1]])}, {"x": [0, 1], "y": [0]}) assert_identical(actual, expected) def test_concat_promote_shape_with_scalar_coordinates(self) -> None: # regression GH6384 objs = [ Dataset({}, {"x": pd.Interval(-1, 0, closed="right")}), Dataset({"x": [pd.Interval(0, 1, closed="right")]}), ] actual = concat(objs, "x") expected = Dataset( {"x": pd.IntervalIndex.from_tuples([(-1, 0), (0, 1)], closed="right")} ) assert_identical(actual, expected) def test_concat_promote_shape_with_coordinates_of_particular_dtypes(self) -> None: # regression GH6416 (coord dtype) and GH6434 time_data1 = np.array(["2022-01-01", "2022-02-01"], dtype="datetime64[ns]") time_data2 = np.array("2022-03-01", dtype="datetime64[ns]") time_expected = np.array( ["2022-01-01", "2022-02-01", "2022-03-01"], dtype="datetime64[ns]" ) objs = [Dataset({}, {"time": time_data1}), Dataset({}, {"time": time_data2})] actual = concat(objs, "time") expected = Dataset({}, {"time": time_expected}) assert_identical(actual, expected) assert isinstance(actual.indexes["time"], pd.DatetimeIndex) def test_concat_do_not_promote(self) -> None: # GH438 objs = [ Dataset({"y": ("t", [1])}, {"x": 1, "t": [0]}), Dataset({"y": ("t", [2])}, {"x": 1, "t": [0]}), ] expected = Dataset({"y": ("t", [1, 2])}, {"x": 1, "t": [0, 0]}) actual = concat(objs, "t") assert_identical(expected, actual) objs = [ Dataset({"y": ("t", [1])}, {"x": 1, "t": [0]}), Dataset({"y": ("t", [2])}, {"x": 2, "t": [0]}), ] with set_options(use_new_combine_kwarg_defaults=False): with pytest.raises(ValueError): concat(objs, "t", coords="minimal") with set_options(use_new_combine_kwarg_defaults=True): with pytest.raises(ValueError): concat(objs, "t", compat="equals") def test_concat_dim_is_variable(self) -> None: objs = [Dataset({"x": 0}), Dataset({"x": 1})] coord = Variable("y", [3, 4], attrs={"foo": "bar"}) expected = Dataset({"x": ("y", [0, 1]), "y": coord}) actual = concat(objs, coord, data_vars="all") assert_identical(actual, expected) def test_concat_dim_is_dataarray(self) -> None: objs = [Dataset({"x": 0}), Dataset({"x": 1})] coord = DataArray([3, 4], dims="y", attrs={"foo": "bar"}) expected = Dataset({"x": ("y", [0, 1]), "y": coord}) actual = concat(objs, coord, data_vars="all") assert_identical(actual, expected) def test_concat_multiindex(self) -> None: midx = pd.MultiIndex.from_product([[1, 2, 3], ["a", "b"]]) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") expected = Dataset(coords=midx_coords) actual = concat( [expected.isel(x=slice(2)), expected.isel(x=slice(2, None))], "x" ) assert expected.equals(actual) assert isinstance(actual.x.to_index(), pd.MultiIndex) def test_concat_along_new_dim_multiindex(self) -> None: # see https://github.com/pydata/xarray/issues/6881 level_names = ["x_level_0", "x_level_1"] midx = pd.MultiIndex.from_product([[1, 2, 3], ["a", "b"]], names=level_names) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") ds = Dataset(coords=midx_coords) concatenated = concat([ds], "new") actual = list(concatenated.xindexes.get_all_coords("x")) expected = ["x"] + level_names assert actual == expected @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"a": 2, "b": 1}]) def test_concat_fill_value(self, fill_value) -> None: datasets = [ Dataset({"a": ("x", [2, 3]), "b": ("x", [-2, 1])}, {"x": [1, 2]}), Dataset({"a": ("x", [1, 2]), "b": ("x", [3, -1])}, {"x": [0, 1]}), ] if fill_value == dtypes.NA: # if we supply the default, we expect the missing value for a # float array fill_value_a = fill_value_b = np.nan elif isinstance(fill_value, dict): fill_value_a = fill_value["a"] fill_value_b = fill_value["b"] else: fill_value_a = fill_value_b = fill_value expected = Dataset( { "a": (("t", "x"), [[fill_value_a, 2, 3], [1, 2, fill_value_a]]), "b": (("t", "x"), [[fill_value_b, -2, 1], [3, -1, fill_value_b]]), }, {"x": [0, 1, 2]}, ) actual = concat( datasets, dim="t", fill_value=fill_value, data_vars="all", join="outer" ) assert_identical(actual, expected) @pytest.mark.parametrize("dtype", [str, bytes]) @pytest.mark.parametrize("dim", ["x1", "x2"]) def test_concat_str_dtype(self, dtype, dim) -> None: data = np.arange(4).reshape([2, 2]) da1 = Dataset( { "data": (["x1", "x2"], data), "x1": [0, 1], "x2": np.array(["a", "b"], dtype=dtype), } ) da2 = Dataset( { "data": (["x1", "x2"], data), "x1": np.array([1, 2]), "x2": np.array(["c", "d"], dtype=dtype), } ) actual = concat([da1, da2], dim=dim, join="outer") assert np.issubdtype(actual.x2.dtype, dtype) def test_concat_avoids_index_auto_creation(self) -> None: # TODO once passing indexes={} directly to Dataset constructor is allowed then no need to create coords first coords = Coordinates( {"x": ConcatenatableArray(np.array([1, 2, 3]))}, indexes={} ) datasets = [ Dataset( {"a": (["x", "y"], ConcatenatableArray(np.zeros((3, 3))))}, coords=coords, ) for _ in range(2) ] # should not raise on concat combined = concat(datasets, dim="x") assert combined["a"].shape == (6, 3) assert combined["a"].dims == ("x", "y") # nor have auto-created any indexes assert combined.indexes == {} # should not raise on stack combined = concat(datasets, dim="z", data_vars="all") assert combined["a"].shape == (2, 3, 3) assert combined["a"].dims == ("z", "x", "y") # nor have auto-created any indexes assert combined.indexes == {} def test_concat_avoids_index_auto_creation_new_1d_coord(self) -> None: # create 0D coordinates (without indexes) datasets = [ Dataset( coords={"x": ConcatenatableArray(np.array(10))}, ) for _ in range(2) ] with pytest.raises(UnexpectedDataAccess): concat(datasets, dim="x", create_index_for_new_dim=True) # should not raise on concat iff create_index_for_new_dim=False combined = concat(datasets, dim="x", create_index_for_new_dim=False) assert combined["x"].shape == (2,) assert combined["x"].dims == ("x",) # nor have auto-created any indexes assert combined.indexes == {} def test_concat_promote_shape_without_creating_new_index(self) -> None: # different shapes but neither have indexes ds1 = Dataset(coords={"x": 0}) ds2 = Dataset(data_vars={"x": [1]}).drop_indexes("x") actual = concat([ds1, ds2], dim="x", create_index_for_new_dim=False) expected = Dataset(data_vars={"x": [0, 1]}).drop_indexes("x") assert_identical(actual, expected, check_default_indexes=False) assert actual.indexes == {} @requires_scipy_or_netCDF4 def test_concat_combine_attrs_nan_after_netcdf_roundtrip(self, tmp_path) -> None: # Test for issue #10833: NaN attributes should be preserved # with combine_attrs="drop_conflicts" after NetCDF roundtrip import numpy as np # Create arrays with matching NaN fill_value attribute ds1 = Dataset( {"a": ("x", [0, 1])}, attrs={"fill_value": np.nan, "sensor": "G18", "field": "CTH"}, ) ds2 = Dataset( {"a": ("x", [2, 3])}, attrs={"fill_value": np.nan, "sensor": "G16", "field": "CTH"}, ) # Save to NetCDF and reload (converts Python float NaN to NumPy scalar NaN) path1 = tmp_path / "ds1.nc" path2 = tmp_path / "ds2.nc" ds1.to_netcdf(path1) ds2.to_netcdf(path2) ds1_loaded = open_dataset(path1) ds2_loaded = open_dataset(path2) # Verify that NaN attributes are preserved after concat actual = concat( [ds1_loaded, ds2_loaded], dim="y", combine_attrs="drop_conflicts" ) # fill_value should be preserved (not dropped) since both have NaN assert "fill_value" in actual.attrs assert np.isnan(actual.attrs["fill_value"]) # field should be preserved (identical in both) assert actual.attrs["field"] == "CTH" # sensor should be dropped (conflicts) assert "sensor" not in actual.attrs ds1_loaded.close() ds2_loaded.close() class TestConcatDataArray: def test_concat(self) -> None: ds = Dataset( { "foo": (["x", "y"], np.random.random((2, 3))), "bar": (["x", "y"], np.random.random((2, 3))), }, {"x": [0, 1]}, ) foo = ds["foo"] bar = ds["bar"] # from dataset array: expected = DataArray( np.array([foo.values, bar.values]), dims=["w", "x", "y"], coords={"x": [0, 1]}, ) actual = concat([foo, bar], "w") assert_equal(expected, actual) # from iteration: grouped = [g.squeeze() for _, g in foo.groupby("x", squeeze=False)] stacked = concat(grouped, ds["x"]) assert_identical(foo, stacked) # with an index as the 'dim' argument stacked = concat(grouped, pd.Index(ds["x"], name="x")) assert_identical(foo, stacked) actual2 = concat( [foo.isel(x=0), foo.isel(x=1)], pd.Index([0, 1]), coords="all" ).reset_coords(drop=True) expected = foo[:2].rename({"x": "concat_dim"}) assert_identical(expected, actual2) actual3 = concat( [foo.isel(x=0), foo.isel(x=1)], [0, 1], coords="all" ).reset_coords(drop=True) expected = foo[:2].rename({"x": "concat_dim"}) assert_identical(expected, actual3) with pytest.raises(ValueError, match=r"not identical"): concat([foo, bar], dim="w", compat="identical") with pytest.raises(ValueError, match=r"not a valid argument"): concat([foo, bar], dim="w", data_vars="different") def test_concat_encoding(self) -> None: # Regression test for GH1297 ds = Dataset( { "foo": (["x", "y"], np.random.random((2, 3))), "bar": (["x", "y"], np.random.random((2, 3))), }, {"x": [0, 1]}, ) foo = ds["foo"] foo.encoding = {"complevel": 5} ds.encoding = {"unlimited_dims": "x"} assert concat([foo, foo], dim="x").encoding == foo.encoding assert concat([ds, ds], dim="x").encoding == ds.encoding @requires_dask def test_concat_lazy(self) -> None: import dask.array as da arrays = [ DataArray( da.from_array(InaccessibleArray(np.zeros((3, 3))), 3), dims=["x", "y"] ) for _ in range(2) ] # should not raise combined = concat(arrays, dim="z") assert combined.shape == (2, 3, 3) assert combined.dims == ("z", "x", "y") def test_concat_avoids_index_auto_creation(self) -> None: # TODO once passing indexes={} directly to DataArray constructor is allowed then no need to create coords first coords = Coordinates( {"x": ConcatenatableArray(np.array([1, 2, 3]))}, indexes={} ) arrays = [ DataArray( ConcatenatableArray(np.zeros((3, 3))), dims=["x", "y"], coords=coords, ) for _ in range(2) ] # should not raise on concat combined = concat(arrays, dim="x") assert combined.shape == (6, 3) assert combined.dims == ("x", "y") # nor have auto-created any indexes assert combined.indexes == {} # should not raise on stack combined = concat(arrays, dim="z") assert combined.shape == (2, 3, 3) assert combined.dims == ("z", "x", "y") # nor have auto-created any indexes assert combined.indexes == {} @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0]) def test_concat_fill_value(self, fill_value) -> None: foo = DataArray([1, 2], coords=[("x", [1, 2])]) bar = DataArray([1, 2], coords=[("x", [1, 3])]) if fill_value == dtypes.NA: # if we supply the default, we expect the missing value for a # float array fill_value = np.nan expected = DataArray( [[1, 2, fill_value], [1, fill_value, 2]], dims=["y", "x"], coords={"x": [1, 2, 3]}, ) actual = concat((foo, bar), dim="y", fill_value=fill_value, join="outer") assert_identical(actual, expected) def test_concat_join_kwarg(self) -> None: ds1 = Dataset( {"a": (("x", "y"), [[0]])}, coords={"x": [0], "y": [0]} ).to_dataarray() ds2 = Dataset( {"a": (("x", "y"), [[0]])}, coords={"x": [1], "y": [0.0001]} ).to_dataarray() expected: dict[JoinOptions, Any] = {} expected["outer"] = Dataset( {"a": (("x", "y"), [[0, np.nan], [np.nan, 0]])}, {"x": [0, 1], "y": [0, 0.0001]}, ) expected["inner"] = Dataset( {"a": (("x", "y"), [[], []])}, {"x": [0, 1], "y": []} ) expected["left"] = Dataset( {"a": (("x", "y"), np.array([0, np.nan], ndmin=2).T)}, coords={"x": [0, 1], "y": [0]}, ) expected["right"] = Dataset( {"a": (("x", "y"), np.array([np.nan, 0], ndmin=2).T)}, coords={"x": [0, 1], "y": [0.0001]}, ) expected["override"] = Dataset( {"a": (("x", "y"), np.array([0, 0], ndmin=2).T)}, coords={"x": [0, 1], "y": [0]}, ) with pytest.raises(ValueError, match=r"cannot align.*exact.*dimensions.*'y'"): actual = concat([ds1, ds2], join="exact", dim="x") for join, expected_item in expected.items(): actual = concat([ds1, ds2], join=join, dim="x") assert_equal(actual, expected_item.to_dataarray()) def test_concat_combine_attrs_kwarg(self) -> None: da1 = DataArray([0], coords=[("x", [0])], attrs={"b": 42}) da2 = DataArray([0], coords=[("x", [1])], attrs={"b": 42, "c": 43}) expected: dict[CombineAttrsOptions, Any] = {} expected["drop"] = DataArray([0, 0], coords=[("x", [0, 1])]) expected["no_conflicts"] = DataArray( [0, 0], coords=[("x", [0, 1])], attrs={"b": 42, "c": 43} ) expected["override"] = DataArray( [0, 0], coords=[("x", [0, 1])], attrs={"b": 42} ) with pytest.raises(ValueError, match=r"combine_attrs='identical'"): actual = concat([da1, da2], dim="x", combine_attrs="identical") with pytest.raises(ValueError, match=r"combine_attrs='no_conflicts'"): da3 = da2.copy(deep=True) da3.attrs["b"] = 44 actual = concat([da1, da3], dim="x", combine_attrs="no_conflicts") for combine_attrs, expected_item in expected.items(): actual = concat([da1, da2], dim="x", combine_attrs=combine_attrs) assert_identical(actual, expected_item) @pytest.mark.parametrize("dtype", [str, bytes]) @pytest.mark.parametrize("dim", ["x1", "x2"]) def test_concat_str_dtype(self, dtype, dim) -> None: data = np.arange(4).reshape([2, 2]) da1 = DataArray( data=data, dims=["x1", "x2"], coords={"x1": [0, 1], "x2": np.array(["a", "b"], dtype=dtype)}, ) da2 = DataArray( data=data, dims=["x1", "x2"], coords={"x1": np.array([1, 2]), "x2": np.array(["c", "d"], dtype=dtype)}, ) actual = concat([da1, da2], dim=dim, join="outer") assert np.issubdtype(actual.x2.dtype, dtype) def test_concat_coord_name(self) -> None: da = DataArray([0], dims="a") da_concat = concat([da, da], dim=DataArray([0, 1], dims="b")) assert list(da_concat.coords) == ["b"] da_concat_std = concat([da, da], dim=DataArray([0, 1])) assert list(da_concat_std.coords) == ["dim_0"] @pytest.mark.parametrize("attr1", ({"a": {"meta": [10, 20, 30]}}, {"a": [1, 2, 3]}, {})) @pytest.mark.parametrize("attr2", ({"a": [1, 2, 3]}, {})) def test_concat_attrs_first_variable(attr1, attr2) -> None: arrs = [ DataArray([[1], [2]], dims=["x", "y"], attrs=attr1), DataArray([[3], [4]], dims=["x", "y"], attrs=attr2), ] concat_attrs = concat(arrs, "y").attrs assert concat_attrs == attr1 def test_concat_merge_single_non_dim_coord() -> None: da1 = DataArray([1, 2, 3], dims="x", coords={"x": [1, 2, 3], "y": 1}) da2 = DataArray([4, 5, 6], dims="x", coords={"x": [4, 5, 6]}) expected = DataArray(range(1, 7), dims="x", coords={"x": range(1, 7), "y": 1}) actual = concat([da1, da2], "x", coords="minimal", compat="override") assert_identical(actual, expected) actual = concat([da1, da2], "x", coords="different", compat="equals") assert_identical(actual, expected) with pytest.raises(ValueError, match=r"'y' not present in all datasets."): concat([da1, da2], dim="x", coords="all") da1 = DataArray([1, 2, 3], dims="x", coords={"x": [1, 2, 3], "y": 1}) da2 = DataArray([4, 5, 6], dims="x", coords={"x": [4, 5, 6]}) da3 = DataArray([7, 8, 9], dims="x", coords={"x": [7, 8, 9], "y": 1}) with pytest.raises(ValueError, match=r"'y' not present in all datasets"): concat([da1, da2, da3], dim="x", coords="all") with pytest.raises(ValueError, match=r"'y' not present in all datasets"): concat([da1, da2, da3], dim="x", coords="different", compat="equals") def test_concat_preserve_coordinate_order() -> None: x = np.arange(0, 5) y = np.arange(0, 10) time = np.arange(0, 4) data = np.zeros((4, 10, 5), dtype=bool) ds1 = Dataset( {"data": (["time", "y", "x"], data[0:2])}, coords={"time": time[0:2], "y": y, "x": x}, ) ds2 = Dataset( {"data": (["time", "y", "x"], data[2:4])}, coords={"time": time[2:4], "y": y, "x": x}, ) expected = Dataset( {"data": (["time", "y", "x"], data)}, coords={"time": time, "y": y, "x": x}, ) actual = concat([ds1, ds2], dim="time") # check dimension order for act, exp in zip(actual.dims, expected.dims, strict=True): assert act == exp assert actual.sizes[act] == expected.sizes[exp] # check coordinate order for act, exp in zip(actual.coords, expected.coords, strict=True): assert act == exp assert_identical(actual.coords[act], expected.coords[exp]) def test_concat_typing_check() -> None: ds = Dataset({"foo": 1}, {"bar": 2}) da = Dataset({"foo": 3}, {"bar": 4}).to_dataarray(dim="foo") # concatenate a list of non-homogeneous types must raise TypeError with pytest.raises( TypeError, match="The elements in the input list need to be either all 'Dataset's or all 'DataArray's", ): concat([ds, da], dim="foo") # type: ignore[list-item] with pytest.raises( TypeError, match="The elements in the input list need to be either all 'Dataset's or all 'DataArray's", ): concat([da, ds], dim="foo") # type: ignore[list-item] def test_concat_not_all_indexes() -> None: ds1 = Dataset(coords={"x": ("x", [1, 2])}) # ds2.x has no default index ds2 = Dataset(coords={"x": ("y", [3, 4])}) with pytest.raises( ValueError, match=r"'x' must have either an index or no index in all datasets.*" ): concat([ds1, ds2], dim="x") def test_concat_index_not_same_dim() -> None: ds1 = Dataset(coords={"x": ("x", [1, 2])}) ds2 = Dataset(coords={"x": ("y", [3, 4])}) # TODO: use public API for setting a non-default index, when available ds2._indexes["x"] = PandasIndex([3, 4], "y") with pytest.raises( ValueError, match=r"Cannot concatenate along dimension 'x' indexes with dimensions.*", ): concat([ds1, ds2], dim="x") class TestNewDefaults: def test_concat_second_empty_with_scalar_data_var_only_on_first(self) -> None: ds1 = Dataset(data_vars={"a": ("y", [0.1]), "b": 0.1}, coords={"x": 0.1}) ds2 = Dataset(coords={"x": 0.1}) expected = Dataset( data_vars={"a": ("y", [0.1, np.nan]), "b": 0.1}, coords={"x": 0.1} ) with set_options(use_new_combine_kwarg_defaults=False): with pytest.warns( FutureWarning, match="will change from compat='equals' to compat='override'", ): actual = concat( [ds1, ds2], dim="y", coords="different", data_vars="different" ) assert_identical(actual, expected) with set_options(use_new_combine_kwarg_defaults=True): with pytest.raises(ValueError, match="might be related to new default"): concat([ds1, ds2], dim="y", coords="different", data_vars="different") def test_concat_multiple_datasets_missing_vars(self) -> None: vars_to_drop = [ "temperature", "pressure", "humidity", "precipitation", "cloud_cover", ] datasets = create_concat_datasets( len(vars_to_drop), seed=123, include_day=False ) # set up the test data datasets = [ ds.drop_vars(varname) for ds, varname in zip(datasets, vars_to_drop, strict=True) ] with set_options(use_new_combine_kwarg_defaults=False): old = concat(datasets, dim="day") with set_options(use_new_combine_kwarg_defaults=True): new = concat(datasets, dim="day") assert_identical(old, new) @pytest.mark.parametrize("coords", ["different", "minimal", "all"]) def test_concat_coords_kwarg( self, coords: Literal["all", "minimal", "different"] ) -> None: data = create_test_data().drop_dims("dim3") # make sure the coords argument behaves as expected data.coords["extra"] = ("dim4", np.arange(3)) datasets = [g for _, g in data.groupby("dim1")] with set_options(use_new_combine_kwarg_defaults=False): expectation: AbstractContextManager = ( pytest.warns( FutureWarning, match="will change from compat='equals' to compat='override'", ) if coords == "different" else nullcontext() ) with expectation: old = concat(datasets, data["dim1"], coords=coords) with set_options(use_new_combine_kwarg_defaults=True): if coords == "different": with pytest.raises(ValueError): concat(datasets, data["dim1"], coords=coords) else: new = concat(datasets, data["dim1"], coords=coords) assert_identical(old, new) def test_concat_promote_shape_for_scalars_with_mixed_lengths_along_concat_dim( self, ) -> None: # values should repeat objs = [Dataset({"x": [0]}, {"y": -1}), Dataset({"x": [1, 2]}, {"y": -2})] expected = Dataset({"x": [0, 1, 2]}, {"y": ("x", [-1, -2, -2])}) with set_options(use_new_combine_kwarg_defaults=False): with pytest.warns( FutureWarning, match="will change from coords='different' to coords='minimal'", ): old = concat(objs, "x") assert_identical(old, expected) with set_options(use_new_combine_kwarg_defaults=True): new = concat(objs, "x") with pytest.raises(AssertionError): assert_identical(new, old) with pytest.raises(ValueError, match="might be related to new default"): concat(objs, "x", coords="different") with pytest.raises(merge.MergeError, match="conflicting values"): concat(objs, "x", compat="equals") new = concat(objs, "x", coords="different", compat="equals") assert_identical(old, new) def test_concat_multi_dim_index() -> None: ds1 = ( Dataset( {"foo": (("x", "y"), np.random.randn(2, 2))}, coords={"x": [1, 2], "y": [3, 4]}, ) .drop_indexes(["x", "y"]) .set_xindex(["x", "y"], XYIndex) ) ds2 = ( Dataset( {"foo": (("x", "y"), np.random.randn(2, 2))}, coords={"x": [1, 2], "y": [5, 6]}, ) .drop_indexes(["x", "y"]) .set_xindex(["x", "y"], XYIndex) ) expected = ( Dataset( { "foo": ( ("x", "y"), np.concatenate([ds1.foo.data, ds2.foo.data], axis=-1), ) }, coords={"x": [1, 2], "y": [3, 4, 5, 6]}, ) .drop_indexes(["x", "y"]) .set_xindex(["x", "y"], XYIndex) ) # note: missing 'override' joins: list[types.JoinOptions] = ["inner", "outer", "exact", "left", "right"] for join in joins: actual = concat([ds1, ds2], dim="y", join=join) assert_identical(actual, expected, check_default_indexes=False) with pytest.raises(AlignmentError): actual = concat([ds1, ds2], dim="x", join="exact") # TODO: fix these, or raise better error message with pytest.raises(AssertionError): joins_lr: list[types.JoinOptions] = ["left", "right"] for join in joins_lr: actual = concat([ds1, ds2], dim="x", join=join) class TestConcatDataTree: def test_concat_datatree_along_existing_dim(self): dt1 = DataTree.from_dict(data={"/a": ("x", [1]), "/b": 3}, coords={"/x": [0]}) dt2 = DataTree.from_dict(data={"/a": ("x", [2]), "/b": 3}, coords={"/x": [1]}) expected = DataTree.from_dict( data={"/a": ("x", [1, 2]), "/b": 3}, coords={"/x": [0, 1]} ) actual = concat([dt1, dt2], dim="x", data_vars="minimal", coords="minimal") assert actual.identical(expected) def test_concat_datatree_along_existing_dim_defaults(self): # scalar coordinate dt1 = DataTree.from_dict(data={"/a": ("x", [1])}, coords={"/x": [0], "/b": 3}) dt2 = DataTree.from_dict(data={"/a": ("x", [2])}, coords={"/x": [1], "/b": 3}) expected = DataTree.from_dict( data={"/a": ("x", [1, 2])}, coords={"/x": [0, 1], "b": 3} ) actual = concat([dt1, dt2], dim="x") assert actual.identical(expected) # scalar data variable dt1 = DataTree.from_dict(data={"/a": ("x", [1]), "/b": 3}, coords={"/x": [0]}) dt2 = DataTree.from_dict(data={"/a": ("x", [2]), "/b": 3}, coords={"/x": [1]}) expected = DataTree.from_dict( data={"/a": ("x", [1, 2]), "/b": ("x", [3, 3])}, coords={"/x": [0, 1]} ) with pytest.warns( FutureWarning, match="will change from data_vars='all' to data_vars=None" ): actual = concat([dt1, dt2], dim="x") assert actual.identical(expected) with set_options(use_new_combine_kwarg_defaults=True): expected = DataTree.from_dict( data={"/a": ("x", [1, 2]), "/b": 3}, coords={"/x": [0, 1]} ) actual = concat([dt1, dt2], dim="x") assert actual.identical(expected) def test_concat_datatree_isomorphic_error(self): dt1 = DataTree.from_dict(data={"/data": ("x", [1]), "/a": None}) dt2 = DataTree.from_dict(data={"/data": ("x", [2]), "/b": None}) with pytest.raises( ValueError, match="All trees must be isomorphic to be concatenated" ): concat([dt1, dt2], dim="x", data_vars="minimal", coords="minimal") def test_concat_datatree_datavars_all(self): dt1 = DataTree.from_dict(data={"/a": 1, "/c/b": ("y", [10])}) dt2 = DataTree.from_dict(data={"/a": 2, "/c/b": ("y", [20])}) dim = pd.Index([100, 200], name="x") actual = concat([dt1, dt2], dim=dim, data_vars="all", coords="minimal") expected = DataTree.from_dict( data={ "/a": (("x",), [1, 2]), "/c/b": (("x", "y"), [[10], [20]]), }, coords={"/x": dim}, ) assert actual.identical(expected) def test_concat_datatree_coords_all(self): dt1 = DataTree.from_dict(data={"/child/d": ("y", [10])}, coords={"/c": 1}) dt2 = DataTree.from_dict(data={"/child/d": ("y", [10])}, coords={"/c": 2}) dim = pd.Index([0, 1], name="x") actual = concat( [dt1, dt2], dim=dim, data_vars="minimal", coords="all", compat="equals" ) expected = DataTree.from_dict( data={"/child/d": ("y", [10])}, coords={ "/c": (("x",), [1, 2]), "/x": dim, "/child/x": dim, }, ) assert actual.identical(expected) def test_concat_datatree_datavars_different(self): dt1 = DataTree.from_dict(data={"/a": 0, "/b": 1}) dt2 = DataTree.from_dict(data={"/a": 0, "/b": 2}) dim = pd.Index([0, 1], name="x") actual = concat( [dt1, dt2], dim=dim, data_vars="different", coords="minimal", compat="equals", ) expected = DataTree.from_dict( data={"/a": 0, "/b": (("x",), [1, 2])}, coords={"/x": dim} ) assert actual.identical(expected) def test_concat_datatree_nodes(self): dt1 = DataTree.from_dict(data={"/a/d": ("x", [1])}, coords={"/x": [0]}) dt2 = DataTree.from_dict(data={"/a/d": ("x", [2])}, coords={"/x": [1]}) actual = concat([dt1, dt2], dim="x", data_vars="minimal", coords="minimal") expected = DataTree.from_dict( data={"/a/d": ("x", [1, 2])}, coords={"/x": [0, 1]} ) assert actual.identical(expected) def test_concat_datatree_names(self): dt1 = DataTree(Dataset({"a": ("x", [1])}), name="a") dt2 = DataTree(Dataset({"a": ("x", [2])}), name="b") result = concat( [dt1, dt2], dim="x", data_vars="minimal", coords="minimal", compat="equals" ) assert result.name == "a" expected = DataTree(Dataset({"a": ("x", [1, 2])}), name="a") assert result.identical(expected) with pytest.raises(ValueError, match="DataTree names not identical"): concat( [dt1, dt2], dim="x", data_vars="minimal", coords="minimal", compat="identical", ) def test_concat_along_new_dim_raises_for_minimal(self): dt1 = DataTree.from_dict({"/a/d": 1}) dt2 = DataTree.from_dict({"/a/d": 2}) with pytest.raises( ValueError, match="data_vars='minimal' and coords='minimal'" ): concat([dt1, dt2], dim="y", data_vars="minimal", coords="minimal") def test_concat_data_in_child_only(self): dt1 = DataTree.from_dict( data={"/child/a": ("x", [1])}, coords={"/child/x": [0]} ) dt2 = DataTree.from_dict( data={"/child/a": ("x", [2])}, coords={"/child/x": [1]} ) actual = concat([dt1, dt2], dim="x", data_vars="minimal", coords="minimal") expected = DataTree.from_dict( data={"/child/a": ("x", [1, 2])}, coords={"/child/x": [0, 1]} ) assert actual.identical(expected) def test_concat_data_in_child_only_defaults(self): dt1 = DataTree.from_dict( data={"/child/a": ("x", [1])}, coords={"/child/x": [0]} ) dt2 = DataTree.from_dict( data={"/child/a": ("x", [2])}, coords={"/child/x": [1]} ) actual = concat([dt1, dt2], dim="x") expected = DataTree.from_dict( data={"/child/a": ("x", [1, 2])}, coords={"/child/x": [0, 1]} ) assert actual.identical(expected) def test_concat_data_in_child_new_dim(self): dt1 = DataTree.from_dict(data={"/child/a": 1}, coords={"/child/x": 0}) dt2 = DataTree.from_dict(data={"/child/a": 2}, coords={"/child/x": 1}) actual = concat([dt1, dt2], dim="x") expected = DataTree.from_dict( data={"/child/a": ("x", [1, 2])}, coords={"/child/x": [0, 1]} ) assert actual.identical(expected) def test_concat_different_dims_in_different_child(self): dt1 = DataTree.from_dict(coords={"/first/x": [1], "/second/x": [2]}) dt2 = DataTree.from_dict(coords={"/first/x": [3], "/second/x": [4]}) actual = concat([dt1, dt2], dim="x") expected = DataTree.from_dict(coords={"/first/x": [1, 3], "/second/x": [2, 4]}) assert actual.identical(expected) python-xarray-2026.01.0/xarray/tests/test_tutorial.py0000664000175000017500000000275515136607163023000 0ustar alastairalastairfrom __future__ import annotations from xarray import DataArray, DataTree, tutorial from xarray.testing import assert_identical from xarray.tests import network @network class TestLoadDataset: def test_download_from_github(self, tmp_path) -> None: cache_dir = tmp_path / tutorial._default_cache_dir_name ds = tutorial.load_dataset("tiny", cache_dir=cache_dir) tiny = DataArray(range(5), name="tiny").to_dataset() assert_identical(ds, tiny) def test_download_from_github_load_without_cache(self, tmp_path) -> None: cache_dir = tmp_path / tutorial._default_cache_dir_name ds_nocache = tutorial.load_dataset("tiny", cache=False, cache_dir=cache_dir) ds_cache = tutorial.load_dataset("tiny", cache_dir=cache_dir) assert_identical(ds_cache, ds_nocache) @network class TestLoadDataTree: def test_download_from_github(self, tmp_path) -> None: cache_dir = tmp_path / tutorial._default_cache_dir_name ds = tutorial.load_datatree("tiny", cache_dir=cache_dir) tiny = DataTree.from_dict({"/": DataArray(range(5), name="tiny").to_dataset()}) assert_identical(ds, tiny) def test_download_from_github_load_without_cache(self, tmp_path) -> None: cache_dir = tmp_path / tutorial._default_cache_dir_name ds_nocache = tutorial.load_datatree("tiny", cache=False, cache_dir=cache_dir) ds_cache = tutorial.load_datatree("tiny", cache_dir=cache_dir) assert_identical(ds_cache, ds_nocache) python-xarray-2026.01.0/xarray/tests/test_indexing.py0000664000175000017500000012222415136607163022734 0ustar alastairalastairfrom __future__ import annotations import itertools from typing import Any, Union import numpy as np import pandas as pd import pytest from xarray import DataArray, Dataset, Variable, concat from xarray.core import indexing, nputils from xarray.core.indexes import PandasIndex, PandasMultiIndex from xarray.core.types import T_Xarray from xarray.tests import ( IndexerMaker, ReturnItem, assert_array_equal, assert_identical, raise_if_dask_computes, requires_dask, requires_pandas_3, ) from xarray.tests.arrays import DuckArrayWrapper B = IndexerMaker(indexing.BasicIndexer) class TestIndexCallable: def test_getitem(self): def getter(key): return key * 2 indexer = indexing.IndexCallable(getter) assert indexer[3] == 6 assert indexer[0] == 0 assert indexer[-1] == -2 def test_setitem(self): def getter(key): return key * 2 def setter(key, value): raise NotImplementedError("Setter not implemented") indexer = indexing.IndexCallable(getter, setter) with pytest.raises(NotImplementedError): indexer[3] = 6 class TestIndexers: def set_to_zero(self, x, i): x = x.copy() x[i] = 0 return x def test_expanded_indexer(self) -> None: x = np.random.randn(10, 11, 12, 13, 14) y = np.arange(5) arr = ReturnItem() for i in [ arr[:], arr[...], arr[0, :, 10], arr[..., 10], arr[:5, ..., 0], arr[..., 0, :], arr[y], arr[y, y], arr[..., y, y], arr[..., 0, 1, 2, 3, 4], ]: j = indexing.expanded_indexer(i, x.ndim) assert_array_equal(x[i], x[j]) assert_array_equal(self.set_to_zero(x, i), self.set_to_zero(x, j)) with pytest.raises(IndexError, match=r"too many indices"): indexing.expanded_indexer(arr[1, 2, 3], 2) def test_stacked_multiindex_min_max(self) -> None: data = np.random.randn(3, 23, 4) da = DataArray( data, name="value", dims=["replicate", "rsample", "exp"], coords=dict( replicate=[0, 1, 2], exp=["a", "b", "c", "d"], rsample=list(range(23)) ), ) da2 = da.stack(sample=("replicate", "rsample")) s = da2.sample assert_array_equal(da2.loc["a", s.max()], data[2, 22, 0]) assert_array_equal(da2.loc["b", s.min()], data[0, 0, 1]) def test_group_indexers_by_index(self) -> None: mindex = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")) data = DataArray( np.zeros((4, 2, 2)), coords={"x": mindex, "y": [1, 2]}, dims=("x", "y", "z") ) data.coords["y2"] = ("y", [2.0, 3.0]) grouped_indexers = indexing.group_indexers_by_index( data, {"z": 0, "one": "a", "two": 1, "y": 0}, {} ) for idx, indexers in grouped_indexers: if idx is None: assert indexers == {"z": 0} elif idx.equals(data.xindexes["x"]): assert indexers == {"one": "a", "two": 1} elif idx.equals(data.xindexes["y"]): assert indexers == {"y": 0} assert len(grouped_indexers) == 3 with pytest.raises( KeyError, match=r"'w' is not a valid dimension or coordinate" ): indexing.group_indexers_by_index(data, {"w": "a"}, {}) with pytest.raises(ValueError, match=r"cannot supply.*"): indexing.group_indexers_by_index(data, {"z": 1}, {"method": "nearest"}) def test_group_indexers_by_index_creates_index_for_unindexed_coord(self) -> None: # Test that selecting on a coordinate without an index creates a PandasIndex on the fly data = DataArray( np.zeros((2, 3)), coords={"x": [0, 1], "y": [10, 20, 30]}, dims=("x", "y") ) data.coords["y2"] = ("y", [2.0, 3.0, 4.0]) # y2 is a coordinate but has no index assert "y2" in data.coords assert "y2" not in data.xindexes # group_indexers_by_index should create a PandasIndex on the fly grouped_indexers = indexing.group_indexers_by_index(data, {"y2": 3.0}, {}) assert len(grouped_indexers) == 1 idx, indexers = grouped_indexers[0] assert isinstance(idx, PandasIndex) assert indexers == {"y2": 3.0} def test_map_index_queries(self) -> None: def create_sel_results( x_indexer, x_index, other_vars, drop_coords, drop_indexes, rename_dims, ): dim_indexers = {"x": x_indexer} index_vars = x_index.create_variables() indexes = dict.fromkeys(index_vars, x_index) variables = {} variables.update(index_vars) variables.update(other_vars) return indexing.IndexSelResult( dim_indexers=dim_indexers, indexes=indexes, variables=variables, drop_coords=drop_coords, drop_indexes=drop_indexes, rename_dims=rename_dims, ) def test_indexer( data: T_Xarray, x: Any, expected: indexing.IndexSelResult, ) -> None: results = indexing.map_index_queries(data, {"x": x}) assert results.dim_indexers.keys() == expected.dim_indexers.keys() assert_array_equal(results.dim_indexers["x"], expected.dim_indexers["x"]) assert results.indexes.keys() == expected.indexes.keys() for k in results.indexes: assert results.indexes[k].equals(expected.indexes[k]) assert results.variables.keys() == expected.variables.keys() for k in results.variables: assert_array_equal(results.variables[k], expected.variables[k]) assert set(results.drop_coords) == set(expected.drop_coords) assert set(results.drop_indexes) == set(expected.drop_indexes) assert results.rename_dims == expected.rename_dims data = Dataset({"x": ("x", [1, 2, 3])}) mindex = pd.MultiIndex.from_product( [["a", "b"], [1, 2], [-1, -2]], names=("one", "two", "three") ) mdata = DataArray(range(8), [("x", mindex)]) test_indexer(data, 1, indexing.IndexSelResult({"x": 0})) test_indexer(data, np.int32(1), indexing.IndexSelResult({"x": 0})) test_indexer(data, Variable([], 1), indexing.IndexSelResult({"x": 0})) test_indexer(mdata, ("a", 1, -1), indexing.IndexSelResult({"x": 0})) expected = create_sel_results( [True, True, False, False, False, False, False, False], PandasIndex(pd.Index([-1, -2]), "three"), {"one": Variable((), "a"), "two": Variable((), 1)}, ["x"], ["one", "two"], {"x": "three"}, ) test_indexer(mdata, ("a", 1), expected) expected = create_sel_results( slice(0, 4, None), PandasMultiIndex( pd.MultiIndex.from_product([[1, 2], [-1, -2]], names=("two", "three")), "x", ), {"one": Variable((), "a")}, [], ["one"], {}, ) test_indexer(mdata, "a", expected) expected = create_sel_results( [True, True, True, True, False, False, False, False], PandasMultiIndex( pd.MultiIndex.from_product([[1, 2], [-1, -2]], names=("two", "three")), "x", ), {"one": Variable((), "a")}, [], ["one"], {}, ) test_indexer(mdata, ("a",), expected) test_indexer( mdata, [("a", 1, -1), ("b", 2, -2)], indexing.IndexSelResult({"x": [0, 7]}) ) test_indexer( mdata, slice("a", "b"), indexing.IndexSelResult({"x": slice(0, 8, None)}) ) test_indexer( mdata, slice(("a", 1), ("b", 1)), indexing.IndexSelResult({"x": slice(0, 6, None)}), ) test_indexer( mdata, {"one": "a", "two": 1, "three": -1}, indexing.IndexSelResult({"x": 0}), ) expected = create_sel_results( [True, True, False, False, False, False, False, False], PandasIndex(pd.Index([-1, -2]), "three"), {"one": Variable((), "a"), "two": Variable((), 1)}, ["x"], ["one", "two"], {"x": "three"}, ) test_indexer(mdata, {"one": "a", "two": 1}, expected) expected = create_sel_results( [True, False, True, False, False, False, False, False], PandasIndex(pd.Index([1, 2]), "two"), {"one": Variable((), "a"), "three": Variable((), -1)}, ["x"], ["one", "three"], {"x": "two"}, ) test_indexer(mdata, {"one": "a", "three": -1}, expected) expected = create_sel_results( [True, True, True, True, False, False, False, False], PandasMultiIndex( pd.MultiIndex.from_product([[1, 2], [-1, -2]], names=("two", "three")), "x", ), {"one": Variable((), "a")}, [], ["one"], {}, ) test_indexer(mdata, {"one": "a"}, expected) def test_read_only_view(self) -> None: arr = DataArray( np.random.rand(3, 3), coords={"x": np.arange(3), "y": np.arange(3)}, dims=("x", "y"), ) # Create a 2D DataArray arr = arr.expand_dims({"z": 3}, -1) # New dimension 'z' arr["z"] = np.arange(3) # New coords to dimension 'z' with pytest.raises(ValueError, match=r"Do you want to .copy()"): arr.loc[0, 0, 0] = 999 class TestLazyArray: @pytest.mark.parametrize( ["indexer", "size", "expected"], ( (4, 5, 4), (-1, 3, 2), (slice(None), 4, slice(0, 4, 1)), (slice(1, -3), 7, slice(1, 4, 1)), (np.array([-1, 3, -2]), 5, np.array([4, 3, 3])), ), ) def normalize_indexer(self, indexer, size, expected): actual = indexing.normalize_indexer(indexer, size) if isinstance(expected, np.ndarray): np.testing.assert_equal(actual, expected) else: assert actual == expected def test_slice_slice(self) -> None: arr = ReturnItem() for size in [100, 99]: # We test even/odd size cases x = np.arange(size) slices = [ arr[:3], arr[:4], arr[2:4], arr[:1], arr[:-1], arr[5:-1], arr[-5:-1], arr[::-1], arr[5::-1], arr[:3:-1], arr[:30:-1], arr[10:4:], arr[::4], arr[4:4:4], arr[:4:-4], arr[::-2], ] for i in slices: for j in slices: expected = x[i][j] new_slice = indexing.slice_slice(i, j, size=size) actual = x[new_slice] assert_array_equal(expected, actual) @pytest.mark.parametrize( ["old_slice", "array", "size"], ( (slice(None, 8), np.arange(2, 6), 10), (slice(2, None), np.arange(2, 6), 10), (slice(1, 10, 2), np.arange(1, 4), 15), (slice(10, None, -1), np.array([2, 5, 7]), 12), (slice(2, None, 2), np.array([3, -2, 5, -1]), 13), (slice(8, None), np.array([1, -2, 2, -1, -7]), 20), ), ) def test_slice_slice_by_array(self, old_slice, array, size): actual = indexing.slice_slice_by_array(old_slice, array, size) expected = np.arange(size)[old_slice][array] assert_array_equal(actual, expected) @pytest.mark.parametrize( ["old_indexer", "indexer", "size", "expected"], ( pytest.param( slice(None), slice(None, 3), 5, slice(0, 3, 1), id="full_slice-slice" ), pytest.param( slice(None), np.arange(2, 4), 5, np.arange(2, 4), id="full_slice-array" ), pytest.param(slice(None), 3, 5, 3, id="full_slice-int"), pytest.param( slice(2, 12, 3), slice(1, 3), 16, slice(5, 11, 3), id="slice_step-slice" ), pytest.param( slice(2, 12, 3), np.array([1, 3]), 16, np.array([5, 11]), id="slice_step-array", ), pytest.param( np.arange(5), slice(1, 3), 7, np.arange(1, 3), id="array-slice" ), pytest.param( np.arange(0, 8, 2), np.arange(1, 3), 9, np.arange(2, 6, 2), id="array-array", ), pytest.param(np.arange(3), 2, 5, 2, id="array-int"), ), ) def test_index_indexer_1d(self, old_indexer, indexer, size, expected): actual = indexing._index_indexer_1d(old_indexer, indexer, size) if isinstance(expected, np.ndarray): np.testing.assert_equal(actual, expected) else: assert actual == expected def test_lazily_indexed_array(self) -> None: original = np.random.rand(10, 20, 30) x = indexing.NumpyIndexingAdapter(original) v = Variable(["i", "j", "k"], original) lazy = indexing.LazilyIndexedArray(x) v_lazy = Variable(["i", "j", "k"], lazy) arr = ReturnItem() # test orthogonally applied indexers indexers = [arr[:], 0, -2, arr[:3], [0, 1, 2, 3], [0], np.arange(10) < 5] for i in indexers: for j in indexers: for k in indexers: if isinstance(j, np.ndarray) and j.dtype.kind == "b": j = np.arange(20) < 5 if isinstance(k, np.ndarray) and k.dtype.kind == "b": k = np.arange(30) < 5 expected = np.asarray(v[i, j, k]) for actual in [ v_lazy[i, j, k], v_lazy[:, j, k][i], v_lazy[:, :, k][:, j][i], ]: assert expected.shape == actual.shape assert_array_equal(expected, actual) assert isinstance(actual._data, indexing.LazilyIndexedArray) assert isinstance(v_lazy._data, indexing.LazilyIndexedArray) # make sure actual.key is appropriate type if all( isinstance(k, int | slice) for k in v_lazy._data.key.tuple ): assert isinstance(v_lazy._data.key, indexing.BasicIndexer) else: assert isinstance(v_lazy._data.key, indexing.OuterIndexer) # test sequentially applied indexers indexers = [ (3, 2), (arr[:], 0), (arr[:2], -1), (arr[:4], [0]), ([4, 5], 0), ([0, 1, 2], [0, 1]), ([0, 3, 5], arr[:2]), ] for i, j in indexers: expected_b = v[i][j] actual = v_lazy[i][j] assert expected_b.shape == actual.shape assert_array_equal(expected_b, actual) # test transpose if actual.ndim > 1: order = np.random.choice(actual.ndim, actual.ndim) order = np.array(actual.dims) transposed = actual.transpose(*order) assert_array_equal(expected_b.transpose(*order), transposed) assert isinstance( actual._data, indexing.LazilyVectorizedIndexedArray | indexing.LazilyIndexedArray, ) assert isinstance(actual._data, indexing.LazilyIndexedArray) assert isinstance(actual._data.array, indexing.NumpyIndexingAdapter) def test_vectorized_lazily_indexed_array(self) -> None: original = np.random.rand(10, 20, 30) x = indexing.NumpyIndexingAdapter(original) v_eager = Variable(["i", "j", "k"], x) lazy = indexing.LazilyIndexedArray(x) v_lazy = Variable(["i", "j", "k"], lazy) arr = ReturnItem() def check_indexing(v_eager, v_lazy, indexers): for indexer in indexers: actual = v_lazy[indexer] expected = v_eager[indexer] assert expected.shape == actual.shape assert isinstance( actual._data, indexing.LazilyVectorizedIndexedArray | indexing.LazilyIndexedArray, ) assert_array_equal(expected, actual) v_eager = expected v_lazy = actual # test orthogonal indexing indexers = [(arr[:], 0, 1), (Variable("i", [0, 1]),)] check_indexing(v_eager, v_lazy, indexers) # vectorized indexing indexers = [ (Variable("i", [0, 1]), Variable("i", [0, 1]), slice(None)), (slice(1, 3, 2), 0), ] check_indexing(v_eager, v_lazy, indexers) indexers = [ (slice(None, None, 2), 0, slice(None, 10)), (Variable("i", [3, 2, 4, 3]), Variable("i", [3, 2, 1, 0])), (Variable(["i", "j"], [[0, 1], [1, 2]]),), ] check_indexing(v_eager, v_lazy, indexers) indexers = [ (Variable("i", [3, 2, 4, 3]), Variable("i", [3, 2, 1, 0])), (Variable(["i", "j"], [[0, 1], [1, 2]]),), ] check_indexing(v_eager, v_lazy, indexers) def test_lazily_indexed_array_vindex_setitem(self) -> None: lazy = indexing.LazilyIndexedArray(np.random.rand(10, 20, 30)) # vectorized indexing indexer = indexing.VectorizedIndexer( (np.array([0, 1]), np.array([0, 1]), slice(None, None, None)) ) with pytest.raises( NotImplementedError, match=r"Lazy item assignment with the vectorized indexer is not yet", ): lazy.vindex[indexer] = 0 @pytest.mark.parametrize( "indexer_class, key, value", [ (indexing.OuterIndexer, (0, 1, slice(None, None, None)), 10), (indexing.BasicIndexer, (0, 1, slice(None, None, None)), 10), ], ) def test_lazily_indexed_array_setitem(self, indexer_class, key, value) -> None: original = np.random.rand(10, 20, 30) x = indexing.NumpyIndexingAdapter(original) lazy = indexing.LazilyIndexedArray(x) if indexer_class is indexing.BasicIndexer: indexer = indexer_class(key) lazy[indexer] = value elif indexer_class is indexing.OuterIndexer: indexer = indexer_class(key) lazy.oindex[indexer] = value assert_array_equal(original[key], value) class TestCopyOnWriteArray: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) wrapped[B[:]] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.zeros(10)) def test_sub_array(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) child = wrapped[B[:5]] assert isinstance(child, indexing.CopyOnWriteArray) child[B[:]] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.arange(10)) assert_array_equal(child, np.zeros(5)) def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.CopyOnWriteArray(np.array(["foo", "bar"])) assert np.array(x[B[0]][B[()]]) == "foo" class TestMemoryCachedArray: def test_wrapper(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) assert_array_equal(wrapped, np.arange(10)) assert isinstance(wrapped.array, indexing.NumpyIndexingAdapter) def test_sub_array(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) child = wrapped[B[:5]] assert isinstance(child, indexing.MemoryCachedArray) assert_array_equal(child, np.arange(5)) assert isinstance(child.array, indexing.NumpyIndexingAdapter) assert isinstance(wrapped.array, indexing.LazilyIndexedArray) @pytest.mark.asyncio async def test_async_wrapper(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) await wrapped.async_get_duck_array() assert_array_equal(wrapped, np.arange(10)) assert isinstance(wrapped.array, indexing.NumpyIndexingAdapter) @pytest.mark.asyncio async def test_async_sub_array(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) child = wrapped[B[:5]] assert isinstance(child, indexing.MemoryCachedArray) await child.async_get_duck_array() assert_array_equal(child, np.arange(5)) assert isinstance(child.array, indexing.NumpyIndexingAdapter) assert isinstance(wrapped.array, indexing.LazilyIndexedArray) def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.MemoryCachedArray(original) wrapped[B[:]] = 0 assert_array_equal(original, np.zeros(10)) def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.MemoryCachedArray(np.array(["foo", "bar"])) assert np.array(x[B[0]][B[()]]) == "foo" def test_base_explicit_indexer() -> None: with pytest.raises(TypeError): indexing.ExplicitIndexer(()) class Subclass(indexing.ExplicitIndexer): pass value = Subclass((1, 2, 3)) assert value.tuple == (1, 2, 3) assert repr(value) == "Subclass((1, 2, 3))" @pytest.mark.parametrize( "indexer_cls", [indexing.BasicIndexer, indexing.OuterIndexer, indexing.VectorizedIndexer], ) def test_invalid_for_all(indexer_cls) -> None: with pytest.raises(TypeError): indexer_cls(None) with pytest.raises(TypeError): indexer_cls(([],)) with pytest.raises(TypeError): indexer_cls((None,)) with pytest.raises(TypeError): indexer_cls(("foo",)) with pytest.raises(TypeError): indexer_cls((1.0,)) with pytest.raises(TypeError): indexer_cls((slice("foo"),)) with pytest.raises(TypeError): indexer_cls((np.array(["foo"]),)) with pytest.raises(TypeError): indexer_cls(True) with pytest.raises(TypeError): indexer_cls(np.array(True)) def check_integer(indexer_cls): value = indexer_cls((1, np.uint64(2))).tuple assert all(isinstance(v, int) for v in value) assert value == (1, 2) def check_slice(indexer_cls): (value,) = indexer_cls((slice(1, None, np.int64(2)),)).tuple assert value == slice(1, None, 2) assert isinstance(value.step, int) def check_array1d(indexer_cls): (value,) = indexer_cls((np.arange(3, dtype=np.int32),)).tuple assert value.dtype == np.int64 np.testing.assert_array_equal(value, [0, 1, 2]) def check_array2d(indexer_cls): array = np.array([[1, 2], [3, 4]], dtype=np.int64) (value,) = indexer_cls((array,)).tuple assert value.dtype == np.int64 np.testing.assert_array_equal(value, array) def test_basic_indexer() -> None: check_integer(indexing.BasicIndexer) check_slice(indexing.BasicIndexer) with pytest.raises(TypeError): check_array1d(indexing.BasicIndexer) with pytest.raises(TypeError): check_array2d(indexing.BasicIndexer) def test_outer_indexer() -> None: check_integer(indexing.OuterIndexer) check_slice(indexing.OuterIndexer) check_array1d(indexing.OuterIndexer) with pytest.raises(TypeError): check_array2d(indexing.OuterIndexer) def test_vectorized_indexer() -> None: with pytest.raises(TypeError): check_integer(indexing.VectorizedIndexer) check_slice(indexing.VectorizedIndexer) check_array1d(indexing.VectorizedIndexer) check_array2d(indexing.VectorizedIndexer) with pytest.raises(ValueError, match=r"numbers of dimensions"): indexing.VectorizedIndexer( (np.array(1, dtype=np.int64), np.arange(5, dtype=np.int64)) ) class Test_vectorized_indexer: @pytest.fixture(autouse=True) def setup(self): self.data = indexing.NumpyIndexingAdapter(np.random.randn(10, 12, 13)) self.indexers = [ np.array([[0, 3, 2]]), np.array([[0, 3, 3], [4, 6, 7]]), slice(2, -2, 2), slice(2, -2, 3), slice(None), ] def test_arrayize_vectorized_indexer(self) -> None: for i, j, k in itertools.product(self.indexers, repeat=3): vindex = indexing.VectorizedIndexer((i, j, k)) # type: ignore[arg-type] vindex_array = indexing._arrayize_vectorized_indexer( vindex, self.data.shape ) np.testing.assert_array_equal( self.data.vindex[vindex], self.data.vindex[vindex_array] ) actual = indexing._arrayize_vectorized_indexer( indexing.VectorizedIndexer((slice(None),)), shape=(5,) ) np.testing.assert_array_equal(actual.tuple, [np.arange(5)]) actual = indexing._arrayize_vectorized_indexer( indexing.VectorizedIndexer((np.arange(5),) * 3), shape=(8, 10, 12) ) expected = np.stack([np.arange(5)] * 3) np.testing.assert_array_equal(np.stack(actual.tuple), expected) actual = indexing._arrayize_vectorized_indexer( indexing.VectorizedIndexer((np.arange(5), slice(None))), shape=(8, 10) ) a, b = actual.tuple np.testing.assert_array_equal(a, np.arange(5)[:, np.newaxis]) np.testing.assert_array_equal(b, np.arange(10)[np.newaxis, :]) actual = indexing._arrayize_vectorized_indexer( indexing.VectorizedIndexer((slice(None), np.arange(5))), shape=(8, 10) ) a, b = actual.tuple np.testing.assert_array_equal(a, np.arange(8)[np.newaxis, :]) np.testing.assert_array_equal(b, np.arange(5)[:, np.newaxis]) def get_indexers( shape: tuple[int, ...], mode: str ) -> Union[indexing.VectorizedIndexer, indexing.OuterIndexer, indexing.BasicIndexer]: if mode == "vectorized": indexed_shape = (3, 4) indexer_v = tuple(np.random.randint(0, s, size=indexed_shape) for s in shape) return indexing.VectorizedIndexer(indexer_v) elif mode == "outer": indexer_o = tuple(np.random.randint(0, s, s + 2) for s in shape) return indexing.OuterIndexer(indexer_o) elif mode == "outer_scalar": indexer_os: tuple[Any, ...] = ( np.random.randint(0, 3, 4), 0, slice(None, None, 2), ) return indexing.OuterIndexer(indexer_os[: len(shape)]) elif mode == "outer_scalar2": indexer_os2: tuple[Any, ...] = ( np.random.randint(0, 3, 4), -2, slice(None, None, 2), ) return indexing.OuterIndexer(indexer_os2[: len(shape)]) elif mode == "outer1vec": indexer_o1v: list[Any] = [slice(2, -3) for s in shape] indexer_o1v[1] = np.random.randint(0, shape[1], shape[1] + 2) return indexing.OuterIndexer(tuple(indexer_o1v)) elif mode == "basic": # basic indexer indexer_b: list[Any] = [slice(2, -3) for s in shape] indexer_b[0] = 3 return indexing.BasicIndexer(tuple(indexer_b)) elif mode == "basic1": # basic indexer return indexing.BasicIndexer((3,)) elif mode == "basic2": # basic indexer indexer_b2 = [0, 2, 4] return indexing.BasicIndexer(tuple(indexer_b2[: len(shape)])) elif mode == "basic3": # basic indexer indexer_b3: list[Any] = [slice(None) for s in shape] indexer_b3[0] = slice(-2, 2, -2) indexer_b3[1] = slice(1, -1, 2) return indexing.BasicIndexer(tuple(indexer_b3[: len(shape)])) raise ValueError(f"Unknown mode: {mode}") @pytest.mark.parametrize("size", [100, 99]) @pytest.mark.parametrize( "sl", [slice(1, -1, 1), slice(None, -1, 2), slice(-1, 1, -1), slice(-1, 1, -2)] ) def test_decompose_slice(size, sl) -> None: x = np.arange(size) slice1, slice2 = indexing._decompose_slice(sl, size) expected = x[sl] actual = x[slice1][slice2] assert_array_equal(expected, actual) @pytest.mark.parametrize("shape", [(10, 5, 8), (10, 3)]) @pytest.mark.parametrize( "indexer_mode", [ "vectorized", "outer", "outer_scalar", "outer_scalar2", "outer1vec", "basic", "basic1", "basic2", "basic3", ], ) @pytest.mark.parametrize( "indexing_support", [ indexing.IndexingSupport.BASIC, indexing.IndexingSupport.OUTER, indexing.IndexingSupport.OUTER_1VECTOR, indexing.IndexingSupport.VECTORIZED, ], ) def test_decompose_indexers(shape, indexer_mode, indexing_support) -> None: data = np.random.randn(*shape) indexer = get_indexers(shape, indexer_mode) backend_ind, np_ind = indexing.decompose_indexer(indexer, shape, indexing_support) indexing_adapter = indexing.NumpyIndexingAdapter(data) # Dispatch to appropriate indexing method if indexer_mode.startswith("vectorized"): expected = indexing_adapter.vindex[indexer] elif indexer_mode.startswith("outer"): expected = indexing_adapter.oindex[indexer] else: expected = indexing_adapter[indexer] # Basic indexing if isinstance(backend_ind, indexing.VectorizedIndexer): array = indexing_adapter.vindex[backend_ind] elif isinstance(backend_ind, indexing.OuterIndexer): array = indexing_adapter.oindex[backend_ind] else: array = indexing_adapter[backend_ind] if len(np_ind.tuple) > 0: array_indexing_adapter = indexing.NumpyIndexingAdapter(array) if isinstance(np_ind, indexing.VectorizedIndexer): array = array_indexing_adapter.vindex[np_ind] elif isinstance(np_ind, indexing.OuterIndexer): array = array_indexing_adapter.oindex[np_ind] else: array = array_indexing_adapter[np_ind] np.testing.assert_array_equal(expected, array) if not all(isinstance(k, indexing.integer_types) for k in np_ind.tuple): combined_ind = indexing._combine_indexers(backend_ind, shape, np_ind) assert isinstance(combined_ind, indexing.VectorizedIndexer) array = indexing_adapter.vindex[combined_ind] np.testing.assert_array_equal(expected, array) def test_implicit_indexing_adapter() -> None: array = np.arange(10, dtype=np.int64) implicit = indexing.ImplicitToExplicitIndexingAdapter( indexing.NumpyIndexingAdapter(array), indexing.BasicIndexer ) np.testing.assert_array_equal(array, np.asarray(implicit)) np.testing.assert_array_equal(array, implicit[:]) def test_implicit_indexing_adapter_copy_on_write() -> None: array = np.arange(10, dtype=np.int64) implicit = indexing.ImplicitToExplicitIndexingAdapter( indexing.CopyOnWriteArray(array) ) assert isinstance(implicit[:], indexing.ImplicitToExplicitIndexingAdapter) def test_outer_indexer_consistency_with_broadcast_indexes_vectorized() -> None: def nonzero(x): if isinstance(x, np.ndarray) and x.dtype.kind == "b": x = x.nonzero()[0] return x original = np.random.rand(10, 20, 30) v = Variable(["i", "j", "k"], original) arr = ReturnItem() # test orthogonally applied indexers indexers = [ arr[:], 0, -2, arr[:3], np.array([0, 1, 2, 3]), np.array([0]), np.arange(10) < 5, ] for i, j, k in itertools.product(indexers, repeat=3): if isinstance(j, np.ndarray) and j.dtype.kind == "b": # match size j = np.arange(20) < 4 if isinstance(k, np.ndarray) and k.dtype.kind == "b": k = np.arange(30) < 8 _, expected, new_order = v._broadcast_indexes_vectorized((i, j, k)) expected_data = nputils.NumpyVIndexAdapter(v.data)[expected.tuple] if new_order: old_order = range(len(new_order)) expected_data = np.moveaxis(expected_data, old_order, new_order) outer_index = indexing.OuterIndexer((nonzero(i), nonzero(j), nonzero(k))) actual = indexing._outer_to_numpy_indexer(outer_index, v.shape) actual_data = v.data[actual] np.testing.assert_array_equal(actual_data, expected_data) def test_create_mask_outer_indexer() -> None: indexer = indexing.OuterIndexer((np.array([0, -1, 2]),)) expected = np.array([False, True, False]) actual = indexing.create_mask(indexer, (5,)) np.testing.assert_array_equal(expected, actual) indexer = indexing.OuterIndexer((1, slice(2), np.array([0, -1, 2]))) expected = np.array(2 * [[False, True, False]]) actual = indexing.create_mask(indexer, (5, 5, 5)) np.testing.assert_array_equal(expected, actual) def test_create_mask_vectorized_indexer() -> None: indexer = indexing.VectorizedIndexer((np.array([0, -1, 2]), np.array([0, 1, -1]))) expected = np.array([False, True, True]) actual = indexing.create_mask(indexer, (5,)) np.testing.assert_array_equal(expected, actual) indexer = indexing.VectorizedIndexer( (np.array([0, -1, 2]), slice(None), np.array([0, 1, -1])) ) expected = np.array([[False, True, True]] * 2).T actual = indexing.create_mask(indexer, (5, 2)) np.testing.assert_array_equal(expected, actual) def test_create_mask_basic_indexer() -> None: indexer = indexing.BasicIndexer((-1,)) actual = indexing.create_mask(indexer, (3,)) np.testing.assert_array_equal(True, actual) indexer = indexing.BasicIndexer((0,)) actual = indexing.create_mask(indexer, (3,)) np.testing.assert_array_equal(False, actual) def test_create_mask_dask() -> None: da = pytest.importorskip("dask.array") indexer = indexing.OuterIndexer((1, slice(2), np.array([0, -1, 2]))) expected = np.array(2 * [[False, True, False]]) actual = indexing.create_mask( indexer, (5, 5, 5), da.empty((2, 3), chunks=((1, 1), (2, 1))) ) assert actual.chunks == ((1, 1), (2, 1)) np.testing.assert_array_equal(expected, actual) indexer_vec = indexing.VectorizedIndexer( (np.array([0, -1, 2]), slice(None), np.array([0, 1, -1])) ) expected = np.array([[False, True, True]] * 2).T actual = indexing.create_mask( indexer_vec, (5, 2), da.empty((3, 2), chunks=((3,), (2,))) ) assert isinstance(actual, da.Array) np.testing.assert_array_equal(expected, actual) with pytest.raises(ValueError): indexing.create_mask(indexer_vec, (5, 2), da.empty((5,), chunks=(1,))) def test_create_mask_error() -> None: with pytest.raises(TypeError, match=r"unexpected key type"): indexing.create_mask((1, 2), (3, 4)) # type: ignore[arg-type] @pytest.mark.parametrize( "indices, expected", [ (np.arange(5), np.arange(5)), (np.array([0, -1, -1]), np.array([0, 0, 0])), (np.array([-1, 1, -1]), np.array([1, 1, 1])), (np.array([-1, -1, 2]), np.array([2, 2, 2])), (np.array([-1]), np.array([0])), (np.array([0, -1, 1, -1, -1]), np.array([0, 0, 1, 1, 1])), (np.array([0, -1, -1, -1, 1]), np.array([0, 0, 0, 0, 1])), ], ) def test_posify_mask_subindexer(indices, expected) -> None: actual = indexing._posify_mask_subindexer(indices) np.testing.assert_array_equal(expected, actual) class ArrayWithNamespace: def __array_namespace__(self, version=None): pass class ArrayWithArrayFunction: def __array_function__(self, func, types, args, kwargs): pass class ArrayWithNamespaceAndArrayFunction: def __array_namespace__(self, version=None): pass def __array_function__(self, func, types, args, kwargs): pass def as_dask_array(arr, chunks): try: import dask.array as da except ImportError: return None return da.from_array(arr, chunks=chunks) @pytest.mark.parametrize( ["array", "expected_type"], ( pytest.param( indexing.CopyOnWriteArray(np.array([1, 2])), indexing.CopyOnWriteArray, id="ExplicitlyIndexed", ), pytest.param( np.array([1, 2]), indexing.NumpyIndexingAdapter, id="numpy.ndarray" ), pytest.param( pd.Index([1, 2]), indexing.PandasIndexingAdapter, id="pandas.Index" ), pytest.param( as_dask_array(np.array([1, 2]), chunks=(1,)), indexing.DaskIndexingAdapter, id="dask.array", marks=requires_dask, ), pytest.param( ArrayWithNamespace(), indexing.ArrayApiIndexingAdapter, id="array_api" ), pytest.param( ArrayWithArrayFunction(), indexing.NdArrayLikeIndexingAdapter, id="array_like", ), pytest.param( ArrayWithNamespaceAndArrayFunction(), indexing.ArrayApiIndexingAdapter, id="array_api_with_fallback", ), ), ) def test_as_indexable(array, expected_type): actual = indexing.as_indexable(array) assert isinstance(actual, expected_type) def test_indexing_1d_object_array() -> None: items = (np.arange(3), np.arange(6)) arr = DataArray(np.array(items, dtype=object)) actual = arr[0] expected_data = np.empty((), dtype=object) expected_data[()] = items[0] expected = DataArray(expected_data) assert [actual.data.item()] == [expected.data.item()] @requires_dask def test_indexing_dask_array() -> None: import dask.array da = DataArray( np.ones(10 * 3 * 3).reshape((10, 3, 3)), dims=("time", "x", "y"), ).chunk(dict(time=-1, x=1, y=1)) with raise_if_dask_computes(): actual = da.isel(time=dask.array.from_array([9], chunks=(1,))) expected = da.isel(time=[9]) assert_identical(actual, expected) @requires_dask def test_indexing_dask_array_scalar() -> None: # GH4276 import dask.array a = dask.array.from_array(np.linspace(0.0, 1.0)) da = DataArray(a, dims="x") x_selector = da.argmax(dim=...) assert not isinstance(x_selector, DataArray) with raise_if_dask_computes(): actual = da.isel(x_selector) expected = da.isel(x=-1) assert_identical(actual, expected) @requires_dask def test_vectorized_indexing_dask_array() -> None: # https://github.com/pydata/xarray/issues/2511#issuecomment-563330352 darr = DataArray(data=[0.2, 0.4, 0.6], coords={"z": range(3)}, dims=("z",)) indexer = DataArray( data=np.random.randint(0, 3, 8).reshape(4, 2).astype(int), coords={"y": range(4), "x": range(2)}, dims=("y", "x"), ) expected = darr[indexer] # fails because we can't index pd.Index lazily (yet). # We could make this succeed by auto-chunking the values # and constructing a lazy index variable, and not automatically # create an index for it. with pytest.raises(ValueError, match="Cannot index with"): with raise_if_dask_computes(): darr.chunk()[indexer.chunk({"y": 2})] with pytest.raises(ValueError, match="Cannot index with"): with raise_if_dask_computes(): actual = darr[indexer.chunk({"y": 2})] with raise_if_dask_computes(): actual = darr.drop_vars("z").chunk()[indexer.chunk({"y": 2})] assert_identical(actual, expected.drop_vars("z")) with raise_if_dask_computes(): actual_variable = darr.variable.chunk()[indexer.variable.chunk({"y": 2})] assert_identical(actual_variable, expected.variable) @requires_dask def test_advanced_indexing_dask_array() -> None: # GH4663 import dask.array as da ds = Dataset( dict( a=("x", da.from_array(np.random.randint(0, 100, 100))), b=(("x", "y"), da.random.random((100, 10))), ) ) expected = ds.b.sel(x=ds.a.compute()) with raise_if_dask_computes(): actual = ds.b.sel(x=ds.a) assert_identical(expected, actual) with raise_if_dask_computes(): actual = ds.b.sel(x=ds.a.data) assert_identical(expected, actual) def test_backend_indexing_non_numpy() -> None: """This model indexing of a Zarr store that reads to GPU memory.""" array = DuckArrayWrapper(np.array([1, 2, 3])) indexed = indexing.explicit_indexing_adapter( indexing.BasicIndexer((slice(1),)), shape=array.shape, indexing_support=indexing.IndexingSupport.BASIC, raw_indexing_method=array.__getitem__, ) np.testing.assert_array_equal(indexed.array, np.array([1])) @requires_pandas_3 def test_pandas_StringDtype_index_coerces_to_numpy() -> None: da = DataArray([0, 1], coords={"x": ["x1", "x2"]}) actual = concat([da, da], dim=pd.Index(["y1", "y2"], name="y")) assert isinstance(actual["y"].dtype, np.dtypes.StringDType) python-xarray-2026.01.0/xarray/tests/test_plot.py0000664000175000017500000037732715136607163022125 0ustar alastairalastairfrom __future__ import annotations import contextlib import inspect import math from collections.abc import Callable, Generator, Hashable from copy import copy from datetime import date, timedelta from typing import Any, Literal, cast import numpy as np import pandas as pd import pytest import xarray as xr import xarray.plot as xplt from xarray import DataArray, Dataset from xarray.namedarray.utils import module_available from xarray.plot.dataarray_plot import _infer_interval_breaks from xarray.plot.dataset_plot import _infer_meta_data from xarray.plot.utils import ( _assert_valid_xy, _build_discrete_cmap, _color_palette, _determine_cmap_params, _maybe_gca, get_axis, label_from_attrs, ) from xarray.tests import ( assert_array_equal, assert_equal, assert_no_warnings, requires_cartopy, requires_cftime, requires_dask, requires_matplotlib, requires_seaborn, ) # this should not be imported to test if the automatic lazy import works has_nc_time_axis = module_available("nc_time_axis") # import mpl and change the backend before other mpl imports try: import matplotlib as mpl import matplotlib.dates import matplotlib.pyplot as plt import mpl_toolkits except ImportError: pass with contextlib.suppress(ImportError): import cartopy @contextlib.contextmanager def figure_context(*args, **kwargs): """context manager which autocloses a figure (even if the test failed)""" try: yield None finally: plt.close("all") @pytest.fixture(autouse=True) def test_all_figures_closed(): """meta-test to ensure all figures are closed at the end of a test Notes: Scope is kept to module (only invoke this function once per test module) else tests cannot be run in parallel (locally). Disadvantage: only catches one open figure per run. May still give a false positive if tests are run in parallel. """ yield None open_figs = len(plt.get_fignums()) if open_figs: raise RuntimeError( f"tests did not close all figures ({open_figs} figures open)" ) @pytest.mark.flaky @pytest.mark.skip(reason="maybe flaky") def text_in_fig() -> set[str]: """ Return the set of all text in the figure """ return {t.get_text() for t in plt.gcf().findobj(mpl.text.Text)} def find_possible_colorbars() -> list[mpl.collections.QuadMesh]: # nb. this function also matches meshes from pcolormesh return plt.gcf().findobj(mpl.collections.QuadMesh) def substring_in_axes(substring: str, ax: mpl.axes.Axes) -> bool: """ Return True if a substring is found anywhere in an axes """ alltxt: set[str] = {t.get_text() for t in ax.findobj(mpl.text.Text)} return any(substring in txt for txt in alltxt) def substring_not_in_axes(substring: str, ax: mpl.axes.Axes) -> bool: """ Return True if a substring is not found anywhere in an axes """ alltxt: set[str] = {t.get_text() for t in ax.findobj(mpl.text.Text)} check = [(substring not in txt) for txt in alltxt] return all(check) def property_in_axes_text( property, property_str, target_txt, ax: mpl.axes.Axes ) -> bool: """ Return True if the specified text in an axes has the property assigned to property_str """ alltxt: list[mpl.text.Text] = ax.findobj(mpl.text.Text) return all( plt.getp(t, property) == property_str for t in alltxt if t.get_text() == target_txt ) def easy_array(shape: tuple[int, ...], start: float = 0, stop: float = 1) -> np.ndarray: """ Make an array with desired shape using np.linspace shape is a tuple like (2, 3) """ a = np.linspace(start, stop, num=math.prod(shape)) return a.reshape(shape) def get_colorbar_label(colorbar) -> str: if colorbar.orientation == "vertical": return colorbar.ax.get_ylabel() else: return colorbar.ax.get_xlabel() @requires_matplotlib class PlotTestCase: @pytest.fixture(autouse=True) def setup(self) -> Generator: yield # Remove all matplotlib figures plt.close("all") def pass_in_axis(self, plotmethod, subplot_kw=None) -> None: _fig, axs = plt.subplots(ncols=2, subplot_kw=subplot_kw, squeeze=False) ax = axs[0, 0] plotmethod(ax=ax) assert ax.has_data() @pytest.mark.slow def imshow_called(self, plotmethod) -> bool: plotmethod() images = plt.gca().findobj(mpl.image.AxesImage) return len(images) > 0 def contourf_called(self, plotmethod) -> bool: plotmethod() # Compatible with mpl before (PathCollection) and after (QuadContourSet) 3.8 def matchfunc(x) -> bool: return isinstance( x, mpl.collections.PathCollection | mpl.contour.QuadContourSet ) paths = plt.gca().findobj(matchfunc) return len(paths) > 0 class TestPlot(PlotTestCase): @pytest.fixture(autouse=True) def setup_array(self) -> None: self.darray = DataArray(easy_array((2, 3, 4))) def test_accessor(self) -> None: from xarray.plot.accessor import DataArrayPlotAccessor assert DataArray.plot is DataArrayPlotAccessor assert isinstance(self.darray.plot, DataArrayPlotAccessor) def test_label_from_attrs(self) -> None: da = self.darray.copy() assert "" == label_from_attrs(da) da.name = 0 assert "0" == label_from_attrs(da) da.name = "a" da.attrs["units"] = "a_units" da.attrs["long_name"] = "a_long_name" da.attrs["standard_name"] = "a_standard_name" assert "a_long_name [a_units]" == label_from_attrs(da) da.attrs.pop("long_name") assert "a_standard_name [a_units]" == label_from_attrs(da) da.attrs.pop("units") assert "a_standard_name" == label_from_attrs(da) da.attrs["units"] = "a_units" da.attrs.pop("standard_name") assert "a [a_units]" == label_from_attrs(da) da.attrs.pop("units") assert "a" == label_from_attrs(da) # Latex strings can be longer without needing a new line: long_latex_name = r"$Ra_s = \mathrm{mean}(\epsilon_k) / \mu M^2_\infty$" da.attrs = dict(long_name=long_latex_name) assert label_from_attrs(da) == long_latex_name def test1d(self) -> None: self.darray[:, 0, 0].plot() # type: ignore[call-arg] with pytest.raises(ValueError, match=r"x must be one of None, 'dim_0'"): self.darray[:, 0, 0].plot(x="dim_1") # type: ignore[call-arg] with pytest.raises(TypeError, match=r"complex128"): (self.darray[:, 0, 0] + 1j).plot() # type: ignore[call-arg] def test_1d_bool(self) -> None: xr.ones_like(self.darray[:, 0, 0], dtype=bool).plot() # type: ignore[call-arg] def test_1d_x_y_kw(self) -> None: z = np.arange(10) da = DataArray(np.cos(z), dims=["z"], coords=[z], name="f") xy: list[list[str | None]] = [[None, None], [None, "z"], ["z", None]] _f, axs = plt.subplots(3, 1, squeeze=False) for aa, (x, y) in enumerate(xy): da.plot(x=x, y=y, ax=axs.flat[aa]) # type: ignore[call-arg] with pytest.raises(ValueError, match=r"Cannot specify both"): da.plot(x="z", y="z") # type: ignore[call-arg] error_msg = "must be one of None, 'z'" with pytest.raises(ValueError, match=rf"x {error_msg}"): da.plot(x="f") # type: ignore[call-arg] with pytest.raises(ValueError, match=rf"y {error_msg}"): da.plot(y="f") # type: ignore[call-arg] def test_multiindex_level_as_coord(self) -> None: da = xr.DataArray( np.arange(5), dims="x", coords=dict(a=("x", np.arange(5)), b=("x", np.arange(5, 10))), ) da = da.set_index(x=["a", "b"]) for x in ["a", "b"]: h = da.plot(x=x)[0] # type: ignore[call-arg] assert_array_equal(h.get_xdata(), da[x].values) for y in ["a", "b"]: h = da.plot(y=y)[0] # type: ignore[call-arg] assert_array_equal(h.get_ydata(), da[y].values) # Test for bug in GH issue #2725 def test_infer_line_data(self) -> None: current = DataArray( name="I", data=np.array([5, 8]), dims=["t"], coords={ "t": (["t"], np.array([0.1, 0.2])), "V": (["t"], np.array([100, 200])), }, ) # Plot current against voltage line = current.plot.line(x="V")[0] assert_array_equal(line.get_xdata(), current.coords["V"].values) # Plot current against time line = current.plot.line()[0] assert_array_equal(line.get_xdata(), current.coords["t"].values) def test_line_plot_along_1d_coord(self) -> None: # Test for bug in GH #3334 x_coord = xr.DataArray(data=[0.1, 0.2], dims=["x"]) t_coord = xr.DataArray(data=[10, 20], dims=["t"]) da = xr.DataArray( data=np.array([[0, 1], [5, 9]]), dims=["x", "t"], coords={"x": x_coord, "time": t_coord}, ) line = da.plot(x="time", hue="x")[0] # type: ignore[call-arg] assert_array_equal(line.get_xdata(), da.coords["time"].values) line = da.plot(y="time", hue="x")[0] # type: ignore[call-arg] assert_array_equal(line.get_ydata(), da.coords["time"].values) def test_line_plot_wrong_hue(self) -> None: da = xr.DataArray( data=np.array([[0, 1], [5, 9]]), dims=["x", "t"], ) with pytest.raises(ValueError, match="hue must be one of"): da.plot(x="t", hue="wrong_coord") # type: ignore[call-arg] def test_2d_line(self) -> None: with pytest.raises(ValueError, match=r"hue"): self.darray[:, :, 0].plot.line() self.darray[:, :, 0].plot.line(hue="dim_1") self.darray[:, :, 0].plot.line(x="dim_1") self.darray[:, :, 0].plot.line(y="dim_1") self.darray[:, :, 0].plot.line(x="dim_0", hue="dim_1") self.darray[:, :, 0].plot.line(y="dim_0", hue="dim_1") with pytest.raises(ValueError, match=r"Cannot"): self.darray[:, :, 0].plot.line(x="dim_1", y="dim_0", hue="dim_1") def test_2d_line_accepts_legend_kw(self) -> None: self.darray[:, :, 0].plot.line(x="dim_0", add_legend=False) assert not plt.gca().get_legend() plt.cla() self.darray[:, :, 0].plot.line(x="dim_0", add_legend=True) legend = plt.gca().get_legend() assert legend is not None # check whether legend title is set assert legend.get_title().get_text() == "dim_1" def test_2d_line_accepts_x_kw(self) -> None: self.darray[:, :, 0].plot.line(x="dim_0") assert plt.gca().get_xlabel() == "dim_0" plt.cla() self.darray[:, :, 0].plot.line(x="dim_1") assert plt.gca().get_xlabel() == "dim_1" def test_2d_line_accepts_hue_kw(self) -> None: self.darray[:, :, 0].plot.line(hue="dim_0") legend = plt.gca().get_legend() assert legend is not None assert legend.get_title().get_text() == "dim_0" plt.cla() self.darray[:, :, 0].plot.line(hue="dim_1") legend = plt.gca().get_legend() assert legend is not None assert legend.get_title().get_text() == "dim_1" def test_2d_coords_line_plot(self) -> None: lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4)) lon += lat / 10 lat += lon / 10 da = xr.DataArray( np.arange(20).reshape(4, 5), dims=["y", "x"], coords={"lat": (("y", "x"), lat), "lon": (("y", "x"), lon)}, ) with figure_context(): hdl = da.plot.line(x="lon", hue="x") assert len(hdl) == 5 with figure_context(): hdl = da.plot.line(x="lon", hue="y") assert len(hdl) == 4 with pytest.raises(ValueError, match="For 2D inputs, hue must be a dimension"): da.plot.line(x="lon", hue="lat") def test_2d_coord_line_plot_coords_transpose_invariant(self) -> None: # checks for bug reported in GH #3933 x = np.arange(10) y = np.arange(20) ds = xr.Dataset(coords={"x": x, "y": y}) for z in [ds.y + ds.x, ds.x + ds.y]: ds = ds.assign_coords(z=z) ds["v"] = ds.x + ds.y ds["v"].plot.line(y="z", hue="x") def test_2d_before_squeeze(self) -> None: a = DataArray(easy_array((1, 5))) a.plot() # type: ignore[call-arg] def test2d_uniform_calls_imshow(self) -> None: assert self.imshow_called(self.darray[:, :, 0].plot.imshow) @pytest.mark.slow def test2d_nonuniform_calls_contourf(self) -> None: a = self.darray[:, :, 0] a.coords["dim_1"] = [2, 1, 89] assert self.contourf_called(a.plot.contourf) def test2d_1d_2d_coordinates_contourf(self) -> None: sz = (20, 10) depth = easy_array(sz) a = DataArray( easy_array(sz), dims=["z", "time"], coords={"depth": (["z", "time"], depth), "time": np.linspace(0, 1, sz[1])}, ) a.plot.contourf(x="time", y="depth") a.plot.contourf(x="depth", y="time") def test2d_1d_2d_coordinates_pcolormesh(self) -> None: # Test with equal coordinates to catch bug from #5097 sz = 10 y2d, x2d = np.meshgrid(np.arange(sz), np.arange(sz)) a = DataArray( easy_array((sz, sz)), dims=["x", "y"], coords={"x2d": (["x", "y"], x2d), "y2d": (["x", "y"], y2d)}, ) for x, y in [ ("x", "y"), ("y", "x"), ("x2d", "y"), ("y", "x2d"), ("x", "y2d"), ("y2d", "x"), ("x2d", "y2d"), ("y2d", "x2d"), ]: p = a.plot.pcolormesh(x=x, y=y) v = p.get_paths()[0].vertices assert isinstance(v, np.ndarray) # Check all vertices are different, except last vertex which should be the # same as the first _, unique_counts = np.unique(v[:-1], axis=0, return_counts=True) assert np.all(unique_counts == 1) def test_str_coordinates_pcolormesh(self) -> None: # test for #6775 x = DataArray( [[1, 2, 3], [4, 5, 6]], dims=("a", "b"), coords={"a": [1, 2], "b": ["a", "b", "c"]}, ) x.plot.pcolormesh() x.T.plot.pcolormesh() def test_contourf_cmap_set(self) -> None: a = DataArray(easy_array((4, 4)), dims=["z", "time"]) cmap_expected = mpl.colormaps["viridis"] # use copy to ensure cmap is not changed by contourf() # Set vmin and vmax so that _build_discrete_colormap is called with # extend='both'. extend is passed to # mpl.colors.from_levels_and_colors(), which returns a result with # sensible under and over values if extend='both', but not if # extend='neither' (but if extend='neither' the under and over values # would not be used because the data would all be within the plotted # range) pl = a.plot.contourf(cmap=copy(cmap_expected), vmin=0.1, vmax=0.9) # check the set_bad color cmap = pl.cmap assert cmap is not None assert_array_equal( cmap(np.ma.masked_invalid([np.nan]))[0], cmap_expected(np.ma.masked_invalid([np.nan]))[0], ) # check the set_under color assert cmap(-np.inf) == cmap_expected(-np.inf) # check the set_over color assert cmap(np.inf) == cmap_expected(np.inf) def test_contourf_cmap_set_with_bad_under_over(self) -> None: a = DataArray(easy_array((4, 4)), dims=["z", "time"]) # make a copy using with_extremes because we want a local cmap: cmap_expected = mpl.colormaps["viridis"].with_extremes( bad="w", under="r", over="g" ) # check we actually changed the set_bad color assert np.all( cmap_expected(np.ma.masked_invalid([np.nan]))[0] != mpl.colormaps["viridis"](np.ma.masked_invalid([np.nan]))[0] ) # check we actually changed the set_under color assert cmap_expected(-np.inf) != mpl.colormaps["viridis"](-np.inf) # check we actually changed the set_over color assert cmap_expected(np.inf) != mpl.colormaps["viridis"](-np.inf) # copy to ensure cmap is not changed by contourf() pl = a.plot.contourf(cmap=copy(cmap_expected)) cmap = pl.cmap assert cmap is not None # check the set_bad color has been kept assert_array_equal( cmap(np.ma.masked_invalid([np.nan]))[0], cmap_expected(np.ma.masked_invalid([np.nan]))[0], ) # check the set_under color has been kept assert cmap(-np.inf) == cmap_expected(-np.inf) # check the set_over color has been kept assert cmap(np.inf) == cmap_expected(np.inf) def test3d(self) -> None: self.darray.plot() # type: ignore[call-arg] def test_can_pass_in_axis(self) -> None: self.pass_in_axis(self.darray.plot) def test__infer_interval_breaks(self) -> None: assert_array_equal([-0.5, 0.5, 1.5], _infer_interval_breaks([0, 1])) assert_array_equal( [-0.5, 0.5, 5.0, 9.5, 10.5], _infer_interval_breaks([0, 1, 9, 10]) ) assert_array_equal( pd.date_range("20000101", periods=4) - np.timedelta64(12, "h"), _infer_interval_breaks(pd.date_range("20000101", periods=3)), ) # make a bounded 2D array that we will center and re-infer xref, yref = np.meshgrid(np.arange(6), np.arange(5)) cx = (xref[1:, 1:] + xref[:-1, :-1]) / 2 cy = (yref[1:, 1:] + yref[:-1, :-1]) / 2 x = _infer_interval_breaks(cx, axis=1) x = _infer_interval_breaks(x, axis=0) y = _infer_interval_breaks(cy, axis=1) y = _infer_interval_breaks(y, axis=0) np.testing.assert_allclose(xref, x) np.testing.assert_allclose(yref, y) # test that ValueError is raised for non-monotonic 1D inputs with pytest.raises(ValueError): _infer_interval_breaks(np.array([0, 2, 1]), check_monotonic=True) def test__infer_interval_breaks_logscale(self) -> None: """ Check if interval breaks are defined in the logspace if scale="log" """ # Check for 1d arrays x = np.logspace(-4, 3, 8) expected_interval_breaks = 10 ** np.linspace(-4.5, 3.5, 9) np.testing.assert_allclose( _infer_interval_breaks(x, scale="log"), expected_interval_breaks ) # Check for 2d arrays x = np.logspace(-4, 3, 8) y = np.linspace(-5, 5, 11) x, y = np.meshgrid(x, y) expected_interval_breaks = np.vstack([10 ** np.linspace(-4.5, 3.5, 9)] * 12) x = _infer_interval_breaks(x, axis=1, scale="log") x = _infer_interval_breaks(x, axis=0, scale="log") np.testing.assert_allclose(x, expected_interval_breaks) def test__infer_interval_breaks_logscale_invalid_coords(self) -> None: """ Check error is raised when passing non-positive coordinates with logscale """ # Check if error is raised after a zero value in the array x = np.linspace(0, 5, 6) with pytest.raises(ValueError): _infer_interval_breaks(x, scale="log") # Check if error is raised after negative values in the array x = np.linspace(-5, 5, 11) with pytest.raises(ValueError): _infer_interval_breaks(x, scale="log") def test_geo_data(self) -> None: # Regression test for gh2250 # Realistic coordinates taken from the example dataset lat = np.array( [ [16.28, 18.48, 19.58, 19.54, 18.35], [28.07, 30.52, 31.73, 31.68, 30.37], [39.65, 42.27, 43.56, 43.51, 42.11], [50.52, 53.22, 54.55, 54.50, 53.06], ] ) lon = np.array( [ [-126.13, -113.69, -100.92, -88.04, -75.29], [-129.27, -115.62, -101.54, -87.32, -73.26], [-133.10, -118.00, -102.31, -86.42, -70.76], [-137.85, -120.99, -103.28, -85.28, -67.62], ] ) data = np.hypot(lon, lat) da = DataArray( data, dims=("y", "x"), coords={"lon": (("y", "x"), lon), "lat": (("y", "x"), lat)}, ) da.plot(x="lon", y="lat") # type: ignore[call-arg] ax = plt.gca() assert ax.has_data() da.plot(x="lat", y="lon") # type: ignore[call-arg] ax = plt.gca() assert ax.has_data() def test_datetime_dimension(self) -> None: nrow = 3 ncol = 4 time = pd.date_range("2000-01-01", periods=nrow) a = DataArray( easy_array((nrow, ncol)), coords=[("time", time), ("y", range(ncol))] ) a.plot() # type: ignore[call-arg] ax = plt.gca() assert ax.has_data() def test_date_dimension(self) -> None: nrow = 3 ncol = 4 start = date(2000, 1, 1) time = [start + timedelta(days=i) for i in range(nrow)] a = DataArray( easy_array((nrow, ncol)), coords=[("time", time), ("y", range(ncol))] ) a.plot() # type: ignore[call-arg] ax = plt.gca() assert ax.has_data() @pytest.mark.slow @pytest.mark.filterwarnings("ignore:tight_layout cannot") def test_convenient_facetgrid(self) -> None: a = easy_array((10, 15, 4)) d = DataArray(a, dims=["y", "x", "z"]) d.coords["z"] = list("abcd") g = d.plot(x="x", y="y", col="z", col_wrap=2, cmap="cool") # type: ignore[call-arg] assert_array_equal(g.axs.shape, [2, 2]) for ax in g.axs.flat: assert ax.has_data() with pytest.raises(ValueError, match=r"[Ff]acet"): d.plot(x="x", y="y", col="z", ax=plt.gca()) # type: ignore[call-arg] with pytest.raises(ValueError, match=r"[Ff]acet"): d[0].plot(x="x", y="y", col="z", ax=plt.gca()) # type: ignore[call-arg] @pytest.mark.slow def test_subplot_kws(self) -> None: a = easy_array((10, 15, 4)) d = DataArray(a, dims=["y", "x", "z"]) d.coords["z"] = list("abcd") g = d.plot( # type: ignore[call-arg] x="x", y="y", col="z", col_wrap=2, cmap="cool", subplot_kws=dict(facecolor="r"), ) for ax in g.axs.flat: # mpl V2 assert ax.get_facecolor()[0:3] == mpl.colors.to_rgb("r") @pytest.mark.slow def test_plot_size(self) -> None: self.darray[:, 0, 0].plot(figsize=(13, 5)) # type: ignore[call-arg] assert tuple(plt.gcf().get_size_inches()) == (13, 5) self.darray.plot(figsize=(13, 5)) # type: ignore[call-arg] assert tuple(plt.gcf().get_size_inches()) == (13, 5) self.darray.plot(size=5) # type: ignore[call-arg] assert plt.gcf().get_size_inches()[1] == 5 self.darray.plot(size=5, aspect=2) # type: ignore[call-arg] assert tuple(plt.gcf().get_size_inches()) == (10, 5) with pytest.raises(ValueError, match=r"cannot provide both"): self.darray.plot(ax=plt.gca(), figsize=(3, 4)) # type: ignore[call-arg] with pytest.raises(ValueError, match=r"cannot provide both"): self.darray.plot(size=5, figsize=(3, 4)) # type: ignore[call-arg] with pytest.raises(ValueError, match=r"cannot provide both"): self.darray.plot(size=5, ax=plt.gca()) # type: ignore[call-arg] with pytest.raises(ValueError, match=r"cannot provide `aspect`"): self.darray.plot(aspect=1) # type: ignore[call-arg] @pytest.mark.slow @pytest.mark.filterwarnings("ignore:tight_layout cannot") def test_convenient_facetgrid_4d(self) -> None: a = easy_array((10, 15, 2, 3)) d = DataArray(a, dims=["y", "x", "columns", "rows"]) g = d.plot(x="x", y="y", col="columns", row="rows") # type: ignore[call-arg] assert_array_equal(g.axs.shape, [3, 2]) for ax in g.axs.flat: assert ax.has_data() with pytest.raises(ValueError, match=r"[Ff]acet"): d.plot(x="x", y="y", col="columns", ax=plt.gca()) # type: ignore[call-arg] def test_coord_with_interval(self) -> None: """Test line plot with intervals.""" bins = [-1, 0, 1, 2] self.darray.groupby_bins("dim_0", bins).mean(...).plot() # type: ignore[call-arg] def test_coord_with_interval_x(self) -> None: """Test line plot with intervals explicitly on x axis.""" bins = [-1, 0, 1, 2] self.darray.groupby_bins("dim_0", bins).mean(...).plot(x="dim_0_bins") # type: ignore[call-arg] def test_coord_with_interval_y(self) -> None: """Test line plot with intervals explicitly on y axis.""" bins = [-1, 0, 1, 2] self.darray.groupby_bins("dim_0", bins).mean(...).plot(y="dim_0_bins") # type: ignore[call-arg] def test_coord_with_interval_xy(self) -> None: """Test line plot with intervals on both x and y axes.""" bins = [-1, 0, 1, 2] self.darray.groupby_bins("dim_0", bins).mean(...).dim_0_bins.plot() @pytest.mark.parametrize("dim", ("x", "y")) def test_labels_with_units_with_interval(self, dim) -> None: """Test line plot with intervals and a units attribute.""" bins = [-1, 0, 1, 2] arr = self.darray.groupby_bins("dim_0", bins).mean(...) arr.dim_0_bins.attrs["units"] = "m" (mappable,) = arr.plot(**{dim: "dim_0_bins"}) # type: ignore[arg-type] ax = mappable.figure.gca() actual = getattr(ax, f"get_{dim}label")() expected = "dim_0_bins_center [m]" assert actual == expected def test_multiplot_over_length_one_dim(self) -> None: a = easy_array((3, 1, 1, 1)) d = DataArray(a, dims=("x", "col", "row", "hue")) d.plot(col="col") # type: ignore[call-arg] d.plot(row="row") # type: ignore[call-arg] d.plot(hue="hue") # type: ignore[call-arg] class TestPlot1D(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: d = [0, 1.1, 0, 2] self.darray = DataArray(d, coords={"period": range(len(d))}, dims="period") self.darray.period.attrs["units"] = "s" def test_xlabel_is_index_name(self) -> None: self.darray.plot() # type: ignore[call-arg] assert "period [s]" == plt.gca().get_xlabel() def test_no_label_name_on_x_axis(self) -> None: self.darray.plot(y="period") # type: ignore[call-arg] assert "" == plt.gca().get_xlabel() def test_no_label_name_on_y_axis(self) -> None: self.darray.plot() # type: ignore[call-arg] assert "" == plt.gca().get_ylabel() def test_ylabel_is_data_name(self) -> None: self.darray.name = "temperature" self.darray.attrs["units"] = "degrees_Celsius" self.darray.plot() # type: ignore[call-arg] assert "temperature [degrees_Celsius]" == plt.gca().get_ylabel() def test_xlabel_is_data_name(self) -> None: self.darray.name = "temperature" self.darray.attrs["units"] = "degrees_Celsius" self.darray.plot(y="period") # type: ignore[call-arg] assert "temperature [degrees_Celsius]" == plt.gca().get_xlabel() def test_format_string(self) -> None: self.darray.plot.line("ro") def test_can_pass_in_axis(self) -> None: self.pass_in_axis(self.darray.plot.line) def test_nonnumeric_index(self) -> None: a = DataArray([1, 2, 3], {"letter": ["a", "b", "c"]}, dims="letter") a.plot.line() def test_primitive_returned(self) -> None: p = self.darray.plot.line() assert isinstance(p[0], mpl.lines.Line2D) @pytest.mark.slow def test_plot_nans(self) -> None: self.darray[1] = np.nan self.darray.plot.line() def test_dates_are_concise(self) -> None: import matplotlib.dates as mdates time = pd.date_range("2000-01-01", "2000-01-10") a = DataArray(np.arange(len(time)), [("t", time)]) a.plot.line() ax = plt.gca() assert isinstance(ax.xaxis.get_major_locator(), mdates.AutoDateLocator) assert isinstance(ax.xaxis.get_major_formatter(), mdates.ConciseDateFormatter) def test_xyincrease_false_changes_axes(self) -> None: self.darray.plot.line(xincrease=False, yincrease=False) xlim = plt.gca().get_xlim() ylim = plt.gca().get_ylim() diffs = xlim[1] - xlim[0], ylim[1] - ylim[0] assert all(x < 0 for x in diffs) def test_slice_in_title(self) -> None: self.darray.coords["d"] = 10.009 self.darray.plot.line() title = plt.gca().get_title() assert "d = 10.01" == title def test_slice_in_title_single_item_array(self) -> None: """Edge case for data of shape (1, N) or (N, 1).""" darray = self.darray.expand_dims({"d": np.array([10.009])}) darray.plot.line(x="period") title = plt.gca().get_title() assert "d = [10.009]" == title class TestPlotStep(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: self.darray = DataArray(easy_array((2, 3, 4))) def test_step(self) -> None: hdl = self.darray[0, 0].plot.step() assert "steps" in hdl[0].get_drawstyle() @pytest.mark.parametrize("where", ["pre", "post", "mid"]) def test_step_with_where(self, where) -> None: hdl = self.darray[0, 0].plot.step(where=where) assert hdl[0].get_drawstyle() == f"steps-{where}" def test_step_with_hue(self) -> None: hdl = self.darray[0].plot.step(hue="dim_2") assert hdl[0].get_drawstyle() == "steps-pre" @pytest.mark.parametrize("where", ["pre", "post", "mid"]) def test_step_with_hue_and_where(self, where) -> None: hdl = self.darray[0].plot.step(hue="dim_2", where=where) assert hdl[0].get_drawstyle() == f"steps-{where}" def test_drawstyle_steps(self) -> None: hdl = self.darray[0].plot(hue="dim_2", drawstyle="steps") # type: ignore[call-arg] assert hdl[0].get_drawstyle() == "steps" @pytest.mark.parametrize("where", ["pre", "post", "mid"]) def test_drawstyle_steps_with_where(self, where) -> None: hdl = self.darray[0].plot(hue="dim_2", drawstyle=f"steps-{where}") # type: ignore[call-arg] assert hdl[0].get_drawstyle() == f"steps-{where}" def test_coord_with_interval_step(self) -> None: """Test step plot with intervals.""" bins = [-1, 0, 1, 2] self.darray.groupby_bins("dim_0", bins).mean(...).plot.step() line = plt.gca().lines[0] assert isinstance(line, mpl.lines.Line2D) assert len(np.asarray(line.get_xdata())) == ((len(bins) - 1) * 2) def test_coord_with_interval_step_x(self) -> None: """Test step plot with intervals explicitly on x axis.""" bins = [-1, 0, 1, 2] self.darray.groupby_bins("dim_0", bins).mean(...).plot.step(x="dim_0_bins") line = plt.gca().lines[0] assert isinstance(line, mpl.lines.Line2D) assert len(np.asarray(line.get_xdata())) == ((len(bins) - 1) * 2) def test_coord_with_interval_step_y(self) -> None: """Test step plot with intervals explicitly on y axis.""" bins = [-1, 0, 1, 2] self.darray.groupby_bins("dim_0", bins).mean(...).plot.step(y="dim_0_bins") line = plt.gca().lines[0] assert isinstance(line, mpl.lines.Line2D) assert len(np.asarray(line.get_xdata())) == ((len(bins) - 1) * 2) def test_coord_with_interval_step_x_and_y_raises_valueeerror(self) -> None: """Test that step plot with intervals both on x and y axes raises an error.""" arr = xr.DataArray( [pd.Interval(0, 1), pd.Interval(1, 2)], coords=[("x", [pd.Interval(0, 1), pd.Interval(1, 2)])], ) with pytest.raises(TypeError, match="intervals against intervals"): arr.plot.step() class TestPlotHistogram(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: self.darray = DataArray(easy_array((2, 3, 4))) def test_3d_array(self) -> None: self.darray.plot.hist() # type: ignore[call-arg] def test_xlabel_uses_name(self) -> None: self.darray.name = "testpoints" self.darray.attrs["units"] = "testunits" self.darray.plot.hist() # type: ignore[call-arg] assert "testpoints [testunits]" == plt.gca().get_xlabel() def test_title_is_histogram(self) -> None: self.darray.coords["d"] = 10 self.darray.plot.hist() # type: ignore[call-arg] assert "d = 10" == plt.gca().get_title() def test_can_pass_in_kwargs(self) -> None: nbins = 5 self.darray.plot.hist(bins=nbins) # type: ignore[call-arg] assert nbins == len(plt.gca().patches) def test_can_pass_in_axis(self) -> None: self.pass_in_axis(self.darray.plot.hist) def test_primitive_returned(self) -> None: n, bins, patches = self.darray.plot.hist() # type: ignore[call-arg] assert isinstance(n, np.ndarray) assert isinstance(bins, np.ndarray) assert isinstance(patches, mpl.container.BarContainer) assert isinstance(patches[0], mpl.patches.Rectangle) @pytest.mark.slow def test_plot_nans(self) -> None: self.darray[0, 0, 0] = np.nan self.darray.plot.hist() # type: ignore[call-arg] def test_hist_coord_with_interval(self) -> None: ( self.darray.groupby_bins("dim_0", [-1, 0, 1, 2]) # type: ignore[call-arg] .mean(...) .plot.hist(range=(-1, 2)) ) @requires_matplotlib class TestDetermineCmapParams: @pytest.fixture(autouse=True) def setUp(self) -> None: self.data = np.linspace(0, 1, num=100) def test_robust(self) -> None: cmap_params = _determine_cmap_params(self.data, robust=True) assert cmap_params["vmin"] == np.percentile(self.data, 2) assert cmap_params["vmax"] == np.percentile(self.data, 98) assert cmap_params["cmap"] == "viridis" assert cmap_params["extend"] == "both" assert cmap_params["levels"] is None assert cmap_params["norm"] is None def test_center(self) -> None: cmap_params = _determine_cmap_params(self.data, center=0.5) assert cmap_params["vmax"] - 0.5 == 0.5 - cmap_params["vmin"] assert cmap_params["cmap"] == "RdBu_r" assert cmap_params["extend"] == "neither" assert cmap_params["levels"] is None assert cmap_params["norm"] is None def test_cmap_sequential_option(self) -> None: with xr.set_options(cmap_sequential="magma"): cmap_params = _determine_cmap_params(self.data) assert cmap_params["cmap"] == "magma" def test_cmap_sequential_explicit_option(self) -> None: with xr.set_options(cmap_sequential=mpl.colormaps["magma"]): cmap_params = _determine_cmap_params(self.data) assert cmap_params["cmap"] == mpl.colormaps["magma"] def test_cmap_divergent_option(self) -> None: with xr.set_options(cmap_divergent="magma"): cmap_params = _determine_cmap_params(self.data, center=0.5) assert cmap_params["cmap"] == "magma" def test_nan_inf_are_ignored(self) -> None: cmap_params1 = _determine_cmap_params(self.data) data = self.data data[50:55] = np.nan data[56:60] = np.inf cmap_params2 = _determine_cmap_params(data) assert cmap_params1["vmin"] == cmap_params2["vmin"] assert cmap_params1["vmax"] == cmap_params2["vmax"] @pytest.mark.slow def test_integer_levels(self) -> None: data = self.data + 1 # default is to cover full data range but with no guarantee on Nlevels for level in np.arange(2, 10, dtype=int): cmap_params = _determine_cmap_params(data, levels=level) assert cmap_params["vmin"] is None assert cmap_params["vmax"] is None assert cmap_params["norm"].vmin == cmap_params["levels"][0] assert cmap_params["norm"].vmax == cmap_params["levels"][-1] assert cmap_params["extend"] == "neither" # with min max we are more strict cmap_params = _determine_cmap_params( data, levels=5, vmin=0, vmax=5, cmap="Blues" ) assert cmap_params["vmin"] is None assert cmap_params["vmax"] is None assert cmap_params["norm"].vmin == 0 assert cmap_params["norm"].vmax == 5 assert cmap_params["norm"].vmin == cmap_params["levels"][0] assert cmap_params["norm"].vmax == cmap_params["levels"][-1] assert cmap_params["cmap"].name == "Blues" assert cmap_params["extend"] == "neither" assert cmap_params["cmap"].N == 4 assert cmap_params["norm"].N == 5 cmap_params = _determine_cmap_params(data, levels=5, vmin=0.5, vmax=1.5) assert cmap_params["cmap"].name == "viridis" assert cmap_params["extend"] == "max" cmap_params = _determine_cmap_params(data, levels=5, vmin=1.5) assert cmap_params["cmap"].name == "viridis" assert cmap_params["extend"] == "min" cmap_params = _determine_cmap_params(data, levels=5, vmin=1.3, vmax=1.5) assert cmap_params["cmap"].name == "viridis" assert cmap_params["extend"] == "both" def test_list_levels(self) -> None: data = self.data + 1 orig_levels = [0, 1, 2, 3, 4, 5] # vmin and vmax should be ignored if levels are explicitly provided cmap_params = _determine_cmap_params(data, levels=orig_levels, vmin=0, vmax=3) assert cmap_params["vmin"] is None assert cmap_params["vmax"] is None assert cmap_params["norm"].vmin == 0 assert cmap_params["norm"].vmax == 5 assert cmap_params["cmap"].N == 5 assert cmap_params["norm"].N == 6 for wrap_levels in cast( list[Callable[[Any], dict[Any, Any]]], [list, np.array, pd.Index, DataArray] ): cmap_params = _determine_cmap_params(data, levels=wrap_levels(orig_levels)) assert_array_equal(cmap_params["levels"], orig_levels) def test_divergentcontrol(self) -> None: neg = self.data - 0.1 pos = self.data # Default with positive data will be a normal cmap cmap_params = _determine_cmap_params(pos) assert cmap_params["vmin"] == 0 assert cmap_params["vmax"] == 1 assert cmap_params["cmap"] == "viridis" # Default with negative data will be a divergent cmap cmap_params = _determine_cmap_params(neg) assert cmap_params["vmin"] == -0.9 assert cmap_params["vmax"] == 0.9 assert cmap_params["cmap"] == "RdBu_r" # Setting vmin or vmax should prevent this only if center is false cmap_params = _determine_cmap_params(neg, vmin=-0.1, center=False) assert cmap_params["vmin"] == -0.1 assert cmap_params["vmax"] == 0.9 assert cmap_params["cmap"] == "viridis" cmap_params = _determine_cmap_params(neg, vmax=0.5, center=False) assert cmap_params["vmin"] == -0.1 assert cmap_params["vmax"] == 0.5 assert cmap_params["cmap"] == "viridis" # Setting center=False too cmap_params = _determine_cmap_params(neg, center=False) assert cmap_params["vmin"] == -0.1 assert cmap_params["vmax"] == 0.9 assert cmap_params["cmap"] == "viridis" # However, I should still be able to set center and have a div cmap cmap_params = _determine_cmap_params(neg, center=0) assert cmap_params["vmin"] == -0.9 assert cmap_params["vmax"] == 0.9 assert cmap_params["cmap"] == "RdBu_r" # Setting vmin or vmax alone will force symmetric bounds around center cmap_params = _determine_cmap_params(neg, vmin=-0.1) assert cmap_params["vmin"] == -0.1 assert cmap_params["vmax"] == 0.1 assert cmap_params["cmap"] == "RdBu_r" cmap_params = _determine_cmap_params(neg, vmax=0.5) assert cmap_params["vmin"] == -0.5 assert cmap_params["vmax"] == 0.5 assert cmap_params["cmap"] == "RdBu_r" cmap_params = _determine_cmap_params(neg, vmax=0.6, center=0.1) assert cmap_params["vmin"] == -0.4 assert cmap_params["vmax"] == 0.6 assert cmap_params["cmap"] == "RdBu_r" # But this is only true if vmin or vmax are negative cmap_params = _determine_cmap_params(pos, vmin=-0.1) assert cmap_params["vmin"] == -0.1 assert cmap_params["vmax"] == 0.1 assert cmap_params["cmap"] == "RdBu_r" cmap_params = _determine_cmap_params(pos, vmin=0.1) assert cmap_params["vmin"] == 0.1 assert cmap_params["vmax"] == 1 assert cmap_params["cmap"] == "viridis" cmap_params = _determine_cmap_params(pos, vmax=0.5) assert cmap_params["vmin"] == 0 assert cmap_params["vmax"] == 0.5 assert cmap_params["cmap"] == "viridis" # If both vmin and vmax are provided, output is non-divergent cmap_params = _determine_cmap_params(neg, vmin=-0.2, vmax=0.6) assert cmap_params["vmin"] == -0.2 assert cmap_params["vmax"] == 0.6 assert cmap_params["cmap"] == "viridis" # regression test for GH3524 # infer diverging colormap from divergent levels cmap_params = _determine_cmap_params(pos, levels=[-0.1, 0, 1]) # specifying levels makes cmap a Colormap object assert cmap_params["cmap"].name == "RdBu_r" def test_norm_sets_vmin_vmax(self) -> None: vmin = self.data.min() vmax = self.data.max() for norm, extend, levels in zip( [ mpl.colors.Normalize(), mpl.colors.Normalize(), mpl.colors.Normalize(vmin + 0.1, vmax - 0.1), mpl.colors.Normalize(None, vmax - 0.1), mpl.colors.Normalize(vmin + 0.1, None), ], ["neither", "neither", "both", "max", "min"], [7, None, None, None, None], strict=True, ): test_min = vmin if norm.vmin is None else norm.vmin test_max = vmax if norm.vmax is None else norm.vmax cmap_params = _determine_cmap_params(self.data, norm=norm, levels=levels) assert cmap_params["vmin"] is None assert cmap_params["vmax"] is None assert cmap_params["norm"].vmin == test_min assert cmap_params["norm"].vmax == test_max assert cmap_params["extend"] == extend assert cmap_params["norm"] == norm @requires_matplotlib class TestDiscreteColorMap: @pytest.fixture(autouse=True) def setUp(self): x = np.arange(start=0, stop=10, step=2) y = np.arange(start=9, stop=-7, step=-3) xy = np.dstack(np.meshgrid(x, y)) distance = np.linalg.norm(xy, axis=2) self.darray = DataArray(distance, list(zip(("y", "x"), (y, x), strict=True))) self.data_min = distance.min() self.data_max = distance.max() yield # Remove all matplotlib figures plt.close("all") @pytest.mark.slow def test_recover_from_seaborn_jet_exception(self) -> None: pal = _color_palette("jet", 4) assert type(pal) is np.ndarray assert len(pal) == 4 @pytest.mark.slow def test_build_discrete_cmap(self) -> None: for cmap, levels, extend, filled in [ ("jet", [0, 1], "both", False), ("hot", [-4, 4], "max", True), ]: ncmap, cnorm = _build_discrete_cmap(cmap, levels, extend, filled) assert ncmap.N == len(levels) - 1 assert len(ncmap.colors) == len(levels) - 1 assert cnorm.N == len(levels) assert_array_equal(cnorm.boundaries, levels) assert max(levels) == cnorm.vmax assert min(levels) == cnorm.vmin if filled: assert ncmap.colorbar_extend == extend else: assert ncmap.colorbar_extend == "max" @pytest.mark.slow def test_discrete_colormap_list_of_levels(self) -> None: for extend, levels in [ ("max", [-1, 2, 4, 8, 10]), ("both", [2, 5, 10, 11]), ("neither", [0, 5, 10, 15]), ("min", [2, 5, 10, 15]), ]: for kind in ["imshow", "pcolormesh", "contourf", "contour"]: primitive = getattr(self.darray.plot, kind)(levels=levels) assert_array_equal(levels, primitive.norm.boundaries) assert max(levels) == primitive.norm.vmax assert min(levels) == primitive.norm.vmin if kind != "contour": assert extend == primitive.cmap.colorbar_extend else: assert "max" == primitive.cmap.colorbar_extend assert len(levels) - 1 == len(primitive.cmap.colors) @pytest.mark.slow def test_discrete_colormap_int_levels(self) -> None: for extend, levels, vmin, vmax, cmap in [ ("neither", 7, None, None, None), ("neither", 7, None, 20, mpl.colormaps["RdBu"]), ("both", 7, 4, 8, None), ("min", 10, 4, 15, None), ]: for kind in ["imshow", "pcolormesh", "contourf", "contour"]: primitive = getattr(self.darray.plot, kind)( levels=levels, vmin=vmin, vmax=vmax, cmap=cmap ) assert levels >= len(primitive.norm.boundaries) - 1 if vmax is None: assert primitive.norm.vmax >= self.data_max else: assert primitive.norm.vmax >= vmax if vmin is None: assert primitive.norm.vmin <= self.data_min else: assert primitive.norm.vmin <= vmin if kind != "contour": assert extend == primitive.cmap.colorbar_extend else: assert "max" == primitive.cmap.colorbar_extend assert levels >= len(primitive.cmap.colors) def test_discrete_colormap_list_levels_and_vmin_or_vmax(self) -> None: levels = [0, 5, 10, 15] primitive = self.darray.plot(levels=levels, vmin=-3, vmax=20) # type: ignore[call-arg] assert primitive.norm.vmax == max(levels) assert primitive.norm.vmin == min(levels) def test_discrete_colormap_provided_boundary_norm(self) -> None: norm = mpl.colors.BoundaryNorm([0, 5, 10, 15], 4) primitive = self.darray.plot.contourf(norm=norm) np.testing.assert_allclose(list(primitive.levels), norm.boundaries) def test_discrete_colormap_provided_boundary_norm_matching_cmap_levels( self, ) -> None: norm = mpl.colors.BoundaryNorm([0, 5, 10, 15], 4) primitive = self.darray.plot.contourf(norm=norm) cbar = primitive.colorbar assert cbar is not None assert cbar.norm.Ncmap == cbar.norm.N # type: ignore[attr-defined] # Exists, debatable if public though. class Common2dMixin: """ Common tests for 2d plotting go here. These tests assume that a staticmethod for `self.plotfunc` exists. Should have the same name as the method. """ darray: DataArray plotfunc: staticmethod pass_in_axis: Callable # Needs to be overridden in TestSurface for facet grid plots subplot_kws: dict[Any, Any] | None = None @pytest.fixture(autouse=True) def setUp(self) -> None: da = DataArray( easy_array((10, 15), start=-1), dims=["y", "x"], coords={"y": np.arange(10), "x": np.arange(15)}, ) # add 2d coords ds = da.to_dataset(name="testvar") x, y = np.meshgrid(da.x.values, da.y.values) ds["x2d"] = DataArray(x, dims=["y", "x"]) ds["y2d"] = DataArray(y, dims=["y", "x"]) ds = ds.set_coords(["x2d", "y2d"]) # set darray and plot method self.darray: DataArray = ds.testvar # Add CF-compliant metadata self.darray.attrs["long_name"] = "a_long_name" self.darray.attrs["units"] = "a_units" self.darray.x.attrs["long_name"] = "x_long_name" self.darray.x.attrs["units"] = "x_units" self.darray.y.attrs["long_name"] = "y_long_name" self.darray.y.attrs["units"] = "y_units" self.plotmethod = getattr(self.darray.plot, self.plotfunc.__name__) def test_label_names(self) -> None: self.plotmethod() assert "x_long_name [x_units]" == plt.gca().get_xlabel() assert "y_long_name [y_units]" == plt.gca().get_ylabel() def test_1d_raises_valueerror(self) -> None: with pytest.raises(ValueError, match=r"DataArray must be 2d"): self.plotfunc(self.darray[0, :]) def test_bool(self) -> None: xr.ones_like(self.darray, dtype=bool).plot() # type: ignore[call-arg] def test_complex_raises_typeerror(self) -> None: with pytest.raises(TypeError, match=r"complex128"): (self.darray + 1j).plot() # type: ignore[call-arg] def test_3d_raises_valueerror(self) -> None: a = DataArray(easy_array((2, 3, 4))) if self.plotfunc.__name__ == "imshow": pytest.skip() with pytest.raises(ValueError, match=r"DataArray must be 2d"): self.plotfunc(a) def test_nonnumeric_index(self) -> None: a = DataArray(easy_array((3, 2)), coords=[["a", "b", "c"], ["d", "e"]]) if self.plotfunc.__name__ == "surface": # ax.plot_surface errors with nonnumerics: with pytest.raises(TypeError, match="not supported for the input types"): self.plotfunc(a) else: self.plotfunc(a) def test_multiindex_raises_typeerror(self) -> None: a = DataArray( easy_array((3, 2)), dims=("x", "y"), coords=dict(x=("x", [0, 1, 2]), a=("y", [0, 1]), b=("y", [2, 3])), ) a = a.set_index(y=("a", "b")) with pytest.raises(TypeError, match=r"[Pp]lot"): self.plotfunc(a) def test_can_pass_in_axis(self) -> None: self.pass_in_axis(self.plotmethod) def test_xyincrease_defaults(self) -> None: # With default settings the axis must be ordered regardless # of the coords order. self.plotfunc(DataArray(easy_array((3, 2)), coords=[[1, 2, 3], [1, 2]])) bounds = plt.gca().get_ylim() assert bounds[0] < bounds[1] bounds = plt.gca().get_xlim() assert bounds[0] < bounds[1] # Inverted coords self.plotfunc(DataArray(easy_array((3, 2)), coords=[[3, 2, 1], [2, 1]])) bounds = plt.gca().get_ylim() assert bounds[0] < bounds[1] bounds = plt.gca().get_xlim() assert bounds[0] < bounds[1] def test_xyincrease_false_changes_axes(self) -> None: self.plotmethod(xincrease=False, yincrease=False) xlim = plt.gca().get_xlim() ylim = plt.gca().get_ylim() diffs = xlim[0] - 14, xlim[1] - 0, ylim[0] - 9, ylim[1] - 0 assert all(abs(x) < 1 for x in diffs) def test_xyincrease_true_changes_axes(self) -> None: self.plotmethod(xincrease=True, yincrease=True) xlim = plt.gca().get_xlim() ylim = plt.gca().get_ylim() diffs = xlim[0] - 0, xlim[1] - 14, ylim[0] - 0, ylim[1] - 9 assert all(abs(x) < 1 for x in diffs) def test_dates_are_concise(self) -> None: import matplotlib.dates as mdates time = pd.date_range("2000-01-01", "2000-01-10") a = DataArray(np.random.randn(2, len(time)), [("xx", [1, 2]), ("t", time)]) self.plotfunc(a, x="t") ax = plt.gca() assert isinstance(ax.xaxis.get_major_locator(), mdates.AutoDateLocator) assert isinstance(ax.xaxis.get_major_formatter(), mdates.ConciseDateFormatter) def test_plot_nans(self) -> None: x1 = self.darray[:5] x2 = self.darray.copy() x2[5:] = np.nan clim1 = self.plotfunc(x1).get_clim() clim2 = self.plotfunc(x2).get_clim() assert clim1 == clim2 @pytest.mark.filterwarnings("ignore::UserWarning") @pytest.mark.filterwarnings("ignore:invalid value encountered") def test_can_plot_all_nans(self) -> None: # regression test for issue #1780 self.plotfunc(DataArray(np.full((2, 2), np.nan))) @pytest.mark.filterwarnings("ignore: Attempting to set") def test_can_plot_axis_size_one(self) -> None: if self.plotfunc.__name__ not in ("contour", "contourf"): self.plotfunc(DataArray(np.ones((1, 1)))) def test_disallows_rgb_arg(self) -> None: with pytest.raises(ValueError): # Always invalid for most plots. Invalid for imshow with 2D data. self.plotfunc(DataArray(np.ones((2, 2))), rgb="not None") def test_viridis_cmap(self) -> None: cmap_name = self.plotmethod(cmap="viridis").get_cmap().name assert "viridis" == cmap_name def test_default_cmap(self) -> None: cmap_name = self.plotmethod().get_cmap().name assert "RdBu_r" == cmap_name cmap_name = self.plotfunc(abs(self.darray)).get_cmap().name assert "viridis" == cmap_name @requires_seaborn def test_seaborn_palette_as_cmap(self) -> None: cmap_name = self.plotmethod(levels=2, cmap="husl").get_cmap().name assert "husl" == cmap_name def test_can_change_default_cmap(self) -> None: cmap_name = self.plotmethod(cmap="Blues").get_cmap().name assert "Blues" == cmap_name def test_diverging_color_limits(self) -> None: artist = self.plotmethod() vmin, vmax = artist.get_clim() assert round(abs(-vmin - vmax), 7) == 0 def test_xy_strings(self) -> None: self.plotmethod(x="y", y="x") ax = plt.gca() assert "y_long_name [y_units]" == ax.get_xlabel() assert "x_long_name [x_units]" == ax.get_ylabel() def test_positional_coord_string(self) -> None: self.plotmethod(y="x") ax = plt.gca() assert "x_long_name [x_units]" == ax.get_ylabel() assert "y_long_name [y_units]" == ax.get_xlabel() self.plotmethod(x="x") ax = plt.gca() assert "x_long_name [x_units]" == ax.get_xlabel() assert "y_long_name [y_units]" == ax.get_ylabel() def test_bad_x_string_exception(self) -> None: with pytest.raises(ValueError, match=r"x and y cannot be equal."): self.plotmethod(x="y", y="y") error_msg = "must be one of None, 'x', 'x2d', 'y', 'y2d'" with pytest.raises(ValueError, match=rf"x {error_msg}"): self.plotmethod(x="not_a_real_dim", y="y") with pytest.raises(ValueError, match=rf"x {error_msg}"): self.plotmethod(x="not_a_real_dim") with pytest.raises(ValueError, match=rf"y {error_msg}"): self.plotmethod(y="not_a_real_dim") self.darray.coords["z"] = 100 def test_coord_strings(self) -> None: # 1d coords (same as dims) assert {"x", "y"} == set(self.darray.dims) self.plotmethod(y="y", x="x") def test_non_linked_coords(self) -> None: # plot with coordinate names that are not dimensions newy = self.darray.y + 150 newy.attrs = {} # Clear attrs since binary ops keep them by default self.darray.coords["newy"] = newy # Normal case, without transpose self.plotfunc(self.darray, x="x", y="newy") ax = plt.gca() assert "x_long_name [x_units]" == ax.get_xlabel() assert "newy" == ax.get_ylabel() # ax limits might change between plotfuncs # simply ensure that these high coords were passed over assert np.min(ax.get_ylim()) > 100.0 def test_non_linked_coords_transpose(self) -> None: # plot with coordinate names that are not dimensions, # and with transposed y and x axes # This used to raise an error with pcolormesh and contour # https://github.com/pydata/xarray/issues/788 newy = self.darray.y + 150 newy.attrs = {} # Clear attrs since binary ops keep them by default self.darray.coords["newy"] = newy self.plotfunc(self.darray, x="newy", y="x") ax = plt.gca() assert "newy" == ax.get_xlabel() assert "x_long_name [x_units]" == ax.get_ylabel() # ax limits might change between plotfuncs # simply ensure that these high coords were passed over assert np.min(ax.get_xlim()) > 100.0 def test_multiindex_level_as_coord(self) -> None: da = DataArray( easy_array((3, 2)), dims=("x", "y"), coords=dict(x=("x", [0, 1, 2]), a=("y", [0, 1]), b=("y", [2, 3])), ) da = da.set_index(y=["a", "b"]) for x, y in (("a", "x"), ("b", "x"), ("x", "a"), ("x", "b")): self.plotfunc(da, x=x, y=y) ax = plt.gca() assert x == ax.get_xlabel() assert y == ax.get_ylabel() with pytest.raises(ValueError, match=r"levels of the same MultiIndex"): self.plotfunc(da, x="a", y="b") with pytest.raises(ValueError, match=r"y must be one of None, 'a', 'b', 'x'"): self.plotfunc(da, x="a", y="y") def test_default_title(self) -> None: a = DataArray(easy_array((4, 3, 2)), dims=["a", "b", "c"]) a.coords["c"] = [0, 1] a.coords["d"] = "foo" self.plotfunc(a.isel(c=1)) title = plt.gca().get_title() assert title in {"c = 1, d = foo", "d = foo, c = 1"} def test_colorbar_default_label(self) -> None: self.plotmethod(add_colorbar=True) assert "a_long_name [a_units]" in text_in_fig() def test_no_labels(self) -> None: self.darray.name = "testvar" self.darray.attrs["units"] = "test_units" self.plotmethod(add_labels=False) alltxt = text_in_fig() for string in [ "x_long_name [x_units]", "y_long_name [y_units]", "testvar [test_units]", ]: assert string not in alltxt def test_colorbar_kwargs(self) -> None: # replace label self.darray.attrs.pop("long_name") self.darray.attrs["units"] = "test_units" # check default colorbar label self.plotmethod(add_colorbar=True) alltxt = text_in_fig() assert "testvar [test_units]" in alltxt self.darray.attrs.pop("units") self.darray.name = "testvar" self.plotmethod(add_colorbar=True, cbar_kwargs={"label": "MyLabel"}) alltxt = text_in_fig() assert "MyLabel" in alltxt assert "testvar" not in alltxt # you can use anything accepted by the dict constructor as well self.plotmethod(add_colorbar=True, cbar_kwargs=(("label", "MyLabel"),)) alltxt = text_in_fig() assert "MyLabel" in alltxt assert "testvar" not in alltxt # change cbar ax _fig, axs = plt.subplots(1, 2, squeeze=False) ax = axs[0, 0] cax = axs[0, 1] self.plotmethod( ax=ax, cbar_ax=cax, add_colorbar=True, cbar_kwargs={"label": "MyBar"} ) assert ax.has_data() assert cax.has_data() alltxt = text_in_fig() assert "MyBar" in alltxt assert "testvar" not in alltxt # note that there are two ways to achieve this _fig, axs = plt.subplots(1, 2, squeeze=False) ax = axs[0, 0] cax = axs[0, 1] self.plotmethod( ax=ax, add_colorbar=True, cbar_kwargs={"label": "MyBar", "cax": cax} ) assert ax.has_data() assert cax.has_data() alltxt = text_in_fig() assert "MyBar" in alltxt assert "testvar" not in alltxt # see that no colorbar is respected self.plotmethod(add_colorbar=False) assert "testvar" not in text_in_fig() # check that error is raised pytest.raises( ValueError, self.plotmethod, add_colorbar=False, cbar_kwargs={"label": "label"}, ) def test_verbose_facetgrid(self) -> None: a = easy_array((10, 15, 3)) d = DataArray(a, dims=["y", "x", "z"]) g = xplt.FacetGrid(d, col="z", subplot_kws=self.subplot_kws) g.map_dataarray(self.plotfunc, "x", "y") for ax in g.axs.flat: assert ax.has_data() def test_2d_function_and_method_signature_same(self) -> None: func_sig = inspect.signature(self.plotfunc) method_sig = inspect.signature(self.plotmethod) for argname, param in method_sig.parameters.items(): assert func_sig.parameters[argname] == param @pytest.mark.filterwarnings("ignore:tight_layout cannot") def test_convenient_facetgrid(self) -> None: a = easy_array((10, 15, 4)) d = DataArray(a, dims=["y", "x", "z"]) g = self.plotfunc(d, x="x", y="y", col="z", col_wrap=2) assert_array_equal(g.axs.shape, [2, 2]) for (y, x), ax in np.ndenumerate(g.axs): assert ax.has_data() if x == 0: assert "y" == ax.get_ylabel() else: assert "" == ax.get_ylabel() if y == 1: assert "x" == ax.get_xlabel() else: assert "" == ax.get_xlabel() # Inferring labels g = self.plotfunc(d, col="z", col_wrap=2) assert_array_equal(g.axs.shape, [2, 2]) for (y, x), ax in np.ndenumerate(g.axs): assert ax.has_data() if x == 0: assert "y" == ax.get_ylabel() else: assert "" == ax.get_ylabel() if y == 1: assert "x" == ax.get_xlabel() else: assert "" == ax.get_xlabel() @pytest.mark.filterwarnings("ignore:tight_layout cannot") def test_convenient_facetgrid_4d(self) -> None: a = easy_array((10, 15, 2, 3)) d = DataArray(a, dims=["y", "x", "columns", "rows"]) g = self.plotfunc(d, x="x", y="y", col="columns", row="rows") assert_array_equal(g.axs.shape, [3, 2]) for ax in g.axs.flat: assert ax.has_data() @pytest.mark.filterwarnings("ignore:This figure includes") def test_facetgrid_map_only_appends_mappables(self) -> None: a = easy_array((10, 15, 2, 3)) d = DataArray(a, dims=["y", "x", "columns", "rows"]) g = self.plotfunc(d, x="x", y="y", col="columns", row="rows") expected = g._mappables g.map(lambda: plt.plot(1, 1)) actual = g._mappables assert expected == actual def test_facetgrid_cmap(self) -> None: # Regression test for GH592 data = np.random.random(size=(20, 25, 12)) + np.linspace(-3, 3, 12) d = DataArray(data, dims=["x", "y", "time"]) fg = d.plot.pcolormesh(col="time") # check that all color limits are the same assert len({m.get_clim() for m in fg._mappables}) == 1 # check that all colormaps are the same assert len({m.get_cmap().name for m in fg._mappables}) == 1 def test_facetgrid_cbar_kwargs(self) -> None: a = easy_array((10, 15, 2, 3)) d = DataArray(a, dims=["y", "x", "columns", "rows"]) g = self.plotfunc( d, x="x", y="y", col="columns", row="rows", cbar_kwargs={"label": "test_label"}, ) # catch contour case if g.cbar is not None: assert get_colorbar_label(g.cbar) == "test_label" def test_facetgrid_no_cbar_ax(self) -> None: a = easy_array((10, 15, 2, 3)) d = DataArray(a, dims=["y", "x", "columns", "rows"]) with pytest.raises(ValueError): self.plotfunc(d, x="x", y="y", col="columns", row="rows", cbar_ax=1) def test_cmap_and_color_both(self) -> None: with pytest.raises(ValueError): self.plotmethod(colors="k", cmap="RdBu") def test_2d_coord_with_interval(self) -> None: for dim in self.darray.dims: gp = self.darray.groupby_bins(dim, range(15), restore_coord_dims=True).mean( [dim] ) for kind in ["imshow", "pcolormesh", "contourf", "contour"]: getattr(gp.plot, kind)() def test_colormap_error_norm_and_vmin_vmax(self) -> None: norm = mpl.colors.LogNorm(0.1, 1e1) with pytest.raises(ValueError): self.darray.plot(norm=norm, vmin=2) # type: ignore[call-arg] with pytest.raises(ValueError): self.darray.plot(norm=norm, vmax=2) # type: ignore[call-arg] @pytest.mark.slow class TestContourf(Common2dMixin, PlotTestCase): plotfunc = staticmethod(xplt.contourf) @pytest.mark.slow def test_contourf_called(self) -> None: # Having both statements ensures the test works properly assert not self.contourf_called(self.darray.plot.imshow) assert self.contourf_called(self.darray.plot.contourf) def test_primitive_artist_returned(self) -> None: artist = self.plotmethod() assert isinstance(artist, mpl.contour.QuadContourSet) @pytest.mark.slow def test_extend(self) -> None: artist = self.plotmethod() assert artist.extend == "neither" self.darray[0, 0] = -100 self.darray[-1, -1] = 100 artist = self.plotmethod(robust=True) assert artist.extend == "both" self.darray[0, 0] = 0 self.darray[-1, -1] = 0 artist = self.plotmethod(vmin=-0, vmax=10) assert artist.extend == "min" artist = self.plotmethod(vmin=-10, vmax=0) assert artist.extend == "max" @pytest.mark.slow def test_2d_coord_names(self) -> None: self.plotmethod(x="x2d", y="y2d") # make sure labels came out ok ax = plt.gca() assert "x2d" == ax.get_xlabel() assert "y2d" == ax.get_ylabel() @pytest.mark.slow def test_levels(self) -> None: artist = self.plotmethod(levels=[-0.5, -0.4, 0.1]) assert artist.extend == "both" artist = self.plotmethod(levels=3) assert artist.extend == "neither" def test_colormap_norm(self) -> None: # Using a norm should plot a nice colorbar and look consistent with pcolormesh. norm = mpl.colors.LogNorm(0.1, 1e1) with pytest.warns(UserWarning): artist = self.plotmethod(norm=norm, add_colorbar=True) actual = artist.colorbar.locator() expected = np.array([0.01, 0.1, 1.0, 10.0]) np.testing.assert_allclose(actual, expected) @pytest.mark.slow class TestContour(Common2dMixin, PlotTestCase): plotfunc = staticmethod(xplt.contour) # matplotlib cmap.colors gives an rgbA ndarray # when seaborn is used, instead we get an rgb tuple @staticmethod def _color_as_tuple(c: Any) -> tuple[Any, Any, Any]: return c[0], c[1], c[2] def test_colors(self) -> None: # with single color, we don't want rgb array artist = self.plotmethod(colors="k") assert artist.cmap.colors[0] == "k" # 2 colors, will repeat every other tick: artist = self.plotmethod(colors=["k", "b"]) assert artist.cmap.colors[:2] == ["k", "b"] # 4 colors, will repeat every 4th tick: artist = self.darray.plot.contour( levels=[-0.5, 0.0, 0.5, 1.0], colors=["k", "r", "w", "b"] ) assert artist.cmap.colors[:5] == ["k", "r", "w", "b"] # type: ignore[attr-defined,unused-ignore] # the last color is now under "over" assert self._color_as_tuple(artist.cmap.get_over()) == (0.0, 0.0, 1.0) def test_colors_np_levels(self) -> None: # https://github.com/pydata/xarray/issues/3284 levels = np.array([-0.5, 0.0, 0.5, 1.0]) artist = self.darray.plot.contour(levels=levels, colors=["k", "r", "w", "b"]) cmap = artist.cmap assert isinstance(cmap, mpl.colors.ListedColormap) assert artist.cmap.colors[:5] == ["k", "r", "w", "b"] # type: ignore[attr-defined,unused-ignore] # the last color is now under "over" assert self._color_as_tuple(cmap.get_over()) == (0.0, 0.0, 1.0) def test_cmap_and_color_both(self) -> None: with pytest.raises(ValueError): self.plotmethod(colors="k", cmap="RdBu") def list_of_colors_in_cmap_raises_error(self) -> None: with pytest.raises(ValueError, match=r"list of colors"): self.plotmethod(cmap=["k", "b"]) @pytest.mark.slow def test_2d_coord_names(self) -> None: self.plotmethod(x="x2d", y="y2d") # make sure labels came out ok ax = plt.gca() assert "x2d" == ax.get_xlabel() assert "y2d" == ax.get_ylabel() def test_single_level(self) -> None: # this used to raise an error, but not anymore since # add_colorbar defaults to false self.plotmethod(levels=[0.1]) self.plotmethod(levels=1) def test_colormap_norm(self) -> None: # Using a norm should plot a nice colorbar and look consistent with pcolormesh. norm = mpl.colors.LogNorm(0.1, 1e1) with pytest.warns(UserWarning): artist = self.plotmethod(norm=norm, add_colorbar=True) actual = artist.colorbar.locator() expected = np.array([0.01, 0.1, 1.0, 10.0]) np.testing.assert_allclose(actual, expected) class TestPcolormesh(Common2dMixin, PlotTestCase): plotfunc = staticmethod(xplt.pcolormesh) def test_primitive_artist_returned(self) -> None: artist = self.plotmethod() assert isinstance(artist, mpl.collections.QuadMesh) def test_everything_plotted(self) -> None: artist = self.plotmethod() assert artist.get_array().size == self.darray.size @pytest.mark.slow def test_2d_coord_names(self) -> None: self.plotmethod(x="x2d", y="y2d") # make sure labels came out ok ax = plt.gca() assert "x2d" == ax.get_xlabel() assert "y2d" == ax.get_ylabel() def test_dont_infer_interval_breaks_for_cartopy(self) -> None: # Regression for GH 781 ax = plt.gca() # Simulate a Cartopy Axis ax.projection = True # type: ignore[attr-defined] artist = self.plotmethod(x="x2d", y="y2d", ax=ax) assert isinstance(artist, mpl.collections.QuadMesh) # Let cartopy handle the axis limits and artist size arr = artist.get_array() assert arr is not None assert arr.size <= self.darray.size class TestPcolormeshLogscale(PlotTestCase): """ Test pcolormesh axes when x and y are in logscale """ plotfunc = staticmethod(xplt.pcolormesh) @pytest.fixture(autouse=True) def setUp(self) -> None: self.boundaries = (-1, 9, -4, 3) shape = (8, 11) x = np.logspace(self.boundaries[0], self.boundaries[1], shape[1]) y = np.logspace(self.boundaries[2], self.boundaries[3], shape[0]) da = DataArray( easy_array(shape, start=-1), dims=["y", "x"], coords={"y": y, "x": x}, name="testvar", ) self.darray = da def test_interval_breaks_logspace(self) -> None: """ Check if the outer vertices of the pcolormesh are the expected values Checks bugfix for #5333 """ artist = self.darray.plot.pcolormesh(xscale="log", yscale="log") # Grab the coordinates of the vertices of the Patches x_vertices = [p.vertices[:, 0] for p in artist.properties()["paths"]] y_vertices = [p.vertices[:, 1] for p in artist.properties()["paths"]] # Get the maximum and minimum values for each set of vertices xmin, xmax = np.min(x_vertices), np.max(x_vertices) ymin, ymax = np.min(y_vertices), np.max(y_vertices) # Check if they are equal to 10 to the power of the outer value of its # corresponding axis plus or minus the interval in the logspace log_interval = 0.5 np.testing.assert_allclose(xmin, 10 ** (self.boundaries[0] - log_interval)) np.testing.assert_allclose(xmax, 10 ** (self.boundaries[1] + log_interval)) np.testing.assert_allclose(ymin, 10 ** (self.boundaries[2] - log_interval)) np.testing.assert_allclose(ymax, 10 ** (self.boundaries[3] + log_interval)) @pytest.mark.slow class TestImshow(Common2dMixin, PlotTestCase): plotfunc = staticmethod(xplt.imshow) @pytest.mark.xfail( reason=( "Failing inside matplotlib. Should probably be fixed upstream because " "other plot functions can handle it. " "Remove this test when it works, already in Common2dMixin" ) ) def test_dates_are_concise(self) -> None: import matplotlib.dates as mdates time = pd.date_range("2000-01-01", "2000-01-10") a = DataArray(np.random.randn(2, len(time)), [("xx", [1, 2]), ("t", time)]) self.plotfunc(a, x="t") ax = plt.gca() assert isinstance(ax.xaxis.get_major_locator(), mdates.AutoDateLocator) assert isinstance(ax.xaxis.get_major_formatter(), mdates.ConciseDateFormatter) @pytest.mark.slow def test_imshow_called(self) -> None: # Having both statements ensures the test works properly assert not self.imshow_called(self.darray.plot.contourf) assert self.imshow_called(self.darray.plot.imshow) def test_xy_pixel_centered(self) -> None: self.darray.plot.imshow(yincrease=False) assert np.allclose([-0.5, 14.5], plt.gca().get_xlim()) assert np.allclose([9.5, -0.5], plt.gca().get_ylim()) def test_default_aspect_is_auto(self) -> None: self.darray.plot.imshow() assert "auto" == plt.gca().get_aspect() @pytest.mark.slow def test_cannot_change_mpl_aspect(self) -> None: with pytest.raises(ValueError, match=r"not available in xarray"): self.darray.plot.imshow(aspect="equal") # with numbers we fall back to fig control self.darray.plot.imshow(size=5, aspect=2) assert "auto" == plt.gca().get_aspect() assert tuple(plt.gcf().get_size_inches()) == (10, 5) @pytest.mark.slow def test_primitive_artist_returned(self) -> None: artist = self.plotmethod() assert isinstance(artist, mpl.image.AxesImage) @pytest.mark.slow @requires_seaborn def test_seaborn_palette_needs_levels(self) -> None: with pytest.raises(ValueError): self.plotmethod(cmap="husl") def test_2d_coord_names(self) -> None: with pytest.raises(ValueError, match=r"requires 1D coordinates"): self.plotmethod(x="x2d", y="y2d") def test_plot_rgb_image(self) -> None: DataArray( easy_array((10, 15, 3), start=0), dims=["y", "x", "band"] ).plot.imshow() assert 0 == len(find_possible_colorbars()) def test_plot_rgb_image_explicit(self) -> None: DataArray( easy_array((10, 15, 3), start=0), dims=["y", "x", "band"] ).plot.imshow(y="y", x="x", rgb="band") assert 0 == len(find_possible_colorbars()) def test_plot_rgb_faceted(self) -> None: DataArray( easy_array((2, 2, 10, 15, 3), start=0), dims=["a", "b", "y", "x", "band"] ).plot.imshow(row="a", col="b") assert 0 == len(find_possible_colorbars()) def test_plot_rgba_image_transposed(self) -> None: # We can handle the color axis being in any position DataArray( easy_array((4, 10, 15), start=0), dims=["band", "y", "x"] ).plot.imshow() def test_warns_ambiguous_dim(self) -> None: arr = DataArray(easy_array((3, 3, 3)), dims=["y", "x", "band"]) with pytest.warns(UserWarning): arr.plot.imshow() # but doesn't warn if dimensions specified arr.plot.imshow(rgb="band") arr.plot.imshow(x="x", y="y") def test_rgb_errors_too_many_dims(self) -> None: arr = DataArray(easy_array((3, 3, 3, 3)), dims=["y", "x", "z", "band"]) with pytest.raises(ValueError): arr.plot.imshow(rgb="band") def test_rgb_errors_bad_dim_sizes(self) -> None: arr = DataArray(easy_array((5, 5, 5)), dims=["y", "x", "band"]) with pytest.raises(ValueError): arr.plot.imshow(rgb="band") @pytest.mark.parametrize( ["vmin", "vmax", "robust"], [ (-1, None, False), (None, 2, False), (-1, 1, False), (0, 0, False), (0, None, True), (None, -1, True), ], ) def test_normalize_rgb_imshow( self, vmin: float | None, vmax: float | None, robust: bool ) -> None: da = DataArray(easy_array((5, 5, 3), start=-0.6, stop=1.4)) arr = da.plot.imshow(vmin=vmin, vmax=vmax, robust=robust).get_array() assert arr is not None assert 0 <= arr.min() <= arr.max() <= 1 def test_normalize_rgb_one_arg_error(self) -> None: da = DataArray(easy_array((5, 5, 3), start=-0.6, stop=1.4)) # If passed one bound that implies all out of range, error: for vmin, vmax in ((None, -1), (2, None)): with pytest.raises(ValueError): da.plot.imshow(vmin=vmin, vmax=vmax) # If passed two that's just moving the range, *not* an error: for vmin2, vmax2 in ((-1.2, -1), (2, 2.1)): da.plot.imshow(vmin=vmin2, vmax=vmax2) @pytest.mark.parametrize("dtype", [np.uint8, np.int8, np.int16]) def test_imshow_rgb_values_in_valid_range(self, dtype) -> None: da = DataArray(np.arange(75, dtype=dtype).reshape((5, 5, 3))) _, ax = plt.subplots() out = da.plot.imshow(ax=ax).get_array() assert out is not None actual_dtype = out.dtype assert actual_dtype is not None assert actual_dtype == np.uint8 assert (out[..., :3] == da.values).all() # Compare without added alpha assert (out[..., -1] == 255).all() # Compare alpha @pytest.mark.filterwarnings("ignore:Several dimensions of this array") def test_regression_rgb_imshow_dim_size_one(self) -> None: # Regression: https://github.com/pydata/xarray/issues/1966 da = DataArray(easy_array((1, 3, 3), start=0.0, stop=1.0)) da.plot.imshow() def test_origin_overrides_xyincrease(self) -> None: da = DataArray(easy_array((3, 2)), coords=[[-2, 0, 2], [-1, 1]]) with figure_context(): da.plot.imshow(origin="upper") assert plt.xlim()[0] < 0 assert plt.ylim()[1] < 0 with figure_context(): da.plot.imshow(origin="lower") assert plt.xlim()[0] < 0 assert plt.ylim()[0] < 0 class TestSurface(Common2dMixin, PlotTestCase): plotfunc = staticmethod(xplt.surface) subplot_kws = {"projection": "3d"} @pytest.mark.xfail( reason=( "Failing inside matplotlib. Should probably be fixed upstream because " "other plot functions can handle it. " "Remove this test when it works, already in Common2dMixin" ) ) def test_dates_are_concise(self) -> None: import matplotlib.dates as mdates time = pd.date_range("2000-01-01", "2000-01-10") a = DataArray(np.random.randn(2, len(time)), [("xx", [1, 2]), ("t", time)]) self.plotfunc(a, x="t") ax = plt.gca() assert isinstance(ax.xaxis.get_major_locator(), mdates.AutoDateLocator) assert isinstance(ax.xaxis.get_major_formatter(), mdates.ConciseDateFormatter) def test_primitive_artist_returned(self) -> None: artist = self.plotmethod() assert isinstance(artist, mpl_toolkits.mplot3d.art3d.Poly3DCollection) @pytest.mark.slow def test_2d_coord_names(self) -> None: self.plotmethod(x="x2d", y="y2d") # make sure labels came out ok ax = plt.gca() assert isinstance(ax, mpl_toolkits.mplot3d.axes3d.Axes3D) assert "x2d" == ax.get_xlabel() assert "y2d" == ax.get_ylabel() assert f"{self.darray.long_name} [{self.darray.units}]" == ax.get_zlabel() def test_xyincrease_false_changes_axes(self) -> None: # Does not make sense for surface plots pytest.skip("does not make sense for surface plots") def test_xyincrease_true_changes_axes(self) -> None: # Does not make sense for surface plots pytest.skip("does not make sense for surface plots") def test_can_pass_in_axis(self) -> None: self.pass_in_axis(self.plotmethod, subplot_kw={"projection": "3d"}) def test_default_cmap(self) -> None: # Does not make sense for surface plots with default arguments pytest.skip("does not make sense for surface plots") def test_diverging_color_limits(self) -> None: # Does not make sense for surface plots with default arguments pytest.skip("does not make sense for surface plots") def test_colorbar_kwargs(self) -> None: # Does not make sense for surface plots with default arguments pytest.skip("does not make sense for surface plots") def test_cmap_and_color_both(self) -> None: # Does not make sense for surface plots with default arguments pytest.skip("does not make sense for surface plots") def test_seaborn_palette_as_cmap(self) -> None: # seaborn does not work with mpl_toolkits.mplot3d with pytest.raises(ValueError): super().test_seaborn_palette_as_cmap() # Need to modify this test for surface(), because all subplots should have labels, # not just left and bottom @pytest.mark.filterwarnings("ignore:tight_layout cannot") def test_convenient_facetgrid(self) -> None: a = easy_array((10, 15, 4)) d = DataArray(a, dims=["y", "x", "z"]) g = self.plotfunc(d, x="x", y="y", col="z", col_wrap=2) # type: ignore[arg-type] # https://github.com/python/mypy/issues/15015 assert_array_equal(g.axs.shape, [2, 2]) for (_y, _x), ax in np.ndenumerate(g.axs): assert ax.has_data() assert "y" == ax.get_ylabel() assert "x" == ax.get_xlabel() # Inferring labels g = self.plotfunc(d, col="z", col_wrap=2) # type: ignore[arg-type] # https://github.com/python/mypy/issues/15015 assert_array_equal(g.axs.shape, [2, 2]) for (_y, _x), ax in np.ndenumerate(g.axs): assert ax.has_data() assert "y" == ax.get_ylabel() assert "x" == ax.get_xlabel() def test_viridis_cmap(self) -> None: return super().test_viridis_cmap() def test_can_change_default_cmap(self) -> None: return super().test_can_change_default_cmap() def test_colorbar_default_label(self) -> None: return super().test_colorbar_default_label() def test_facetgrid_map_only_appends_mappables(self) -> None: return super().test_facetgrid_map_only_appends_mappables() class TestFacetGrid(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: d = easy_array((10, 15, 3)) self.darray = DataArray(d, dims=["y", "x", "z"], coords={"z": ["a", "b", "c"]}) self.g = xplt.FacetGrid(self.darray, col="z") @pytest.mark.slow def test_no_args(self) -> None: self.g.map_dataarray(xplt.contourf, "x", "y") # Don't want colorbar labeled with 'None' alltxt = text_in_fig() assert "None" not in alltxt for ax in self.g.axs.flat: assert ax.has_data() @pytest.mark.slow def test_names_appear_somewhere(self) -> None: self.darray.name = "testvar" self.g.map_dataarray(xplt.contourf, "x", "y") for k, ax in zip("abc", self.g.axs.flat, strict=True): assert f"z = {k}" == ax.get_title() alltxt = text_in_fig() assert self.darray.name in alltxt for label in ["x", "y"]: assert label in alltxt @pytest.mark.slow def test_text_not_super_long(self) -> None: self.darray.coords["z"] = [100 * letter for letter in "abc"] g = xplt.FacetGrid(self.darray, col="z") g.map_dataarray(xplt.contour, "x", "y") alltxt = text_in_fig() maxlen = max(len(txt) for txt in alltxt) assert maxlen < 50 t0 = g.axs[0, 0].get_title() assert t0.endswith("...") @pytest.mark.slow def test_colorbar(self) -> None: vmin = self.darray.values.min() vmax = self.darray.values.max() expected = np.array((vmin, vmax)) self.g.map_dataarray(xplt.imshow, "x", "y") for image in plt.gcf().findobj(mpl.image.AxesImage): assert isinstance(image, mpl.image.AxesImage) clim = np.array(image.get_clim()) assert np.allclose(expected, clim) assert 1 == len(find_possible_colorbars()) def test_colorbar_scatter(self) -> None: ds = Dataset({"a": (("x", "y"), np.arange(4).reshape(2, 2))}) fg: xplt.FacetGrid = ds.plot.scatter(x="a", y="a", row="x", hue="a") cbar = fg.cbar assert cbar is not None assert hasattr(cbar, "vmin") assert cbar.vmin == 0 assert hasattr(cbar, "vmax") assert cbar.vmax == 3 @pytest.mark.slow def test_empty_cell(self) -> None: g = xplt.FacetGrid(self.darray, col="z", col_wrap=2) g.map_dataarray(xplt.imshow, "x", "y") bottomright = g.axs[-1, -1] assert not bottomright.has_data() assert not bottomright.get_visible() @pytest.mark.slow def test_norow_nocol_error(self) -> None: with pytest.raises(ValueError, match=r"[Rr]ow"): xplt.FacetGrid(self.darray) @pytest.mark.slow def test_groups(self) -> None: self.g.map_dataarray(xplt.imshow, "x", "y") upperleft_dict = self.g.name_dicts[0, 0] upperleft_array = self.darray.loc[upperleft_dict] z0 = self.darray.isel(z=0) assert_equal(upperleft_array, z0) @pytest.mark.slow def test_float_index(self) -> None: self.darray.coords["z"] = [0.1, 0.2, 0.4] g = xplt.FacetGrid(self.darray, col="z") g.map_dataarray(xplt.imshow, "x", "y") @pytest.mark.slow def test_nonunique_index_error(self) -> None: self.darray.coords["z"] = [0.1, 0.2, 0.2] with pytest.raises(ValueError, match=r"[Uu]nique"): xplt.FacetGrid(self.darray, col="z") @pytest.mark.slow def test_robust(self) -> None: z = np.zeros((20, 20, 2)) darray = DataArray(z, dims=["y", "x", "z"]) darray[:, :, 1] = 1 darray[2, 0, 0] = -1000 darray[3, 0, 0] = 1000 g = xplt.FacetGrid(darray, col="z") g.map_dataarray(xplt.imshow, "x", "y", robust=True) # Color limits should be 0, 1 # The largest number displayed in the figure should be less than 21 numbers = set() alltxt = text_in_fig() for txt in alltxt: with contextlib.suppress(ValueError): numbers.add(float(txt)) largest = max(abs(x) for x in numbers) assert largest < 21 @pytest.mark.slow def test_can_set_vmin_vmax(self) -> None: vmin, vmax = 50.0, 1000.0 expected = np.array((vmin, vmax)) self.g.map_dataarray(xplt.imshow, "x", "y", vmin=vmin, vmax=vmax) for image in plt.gcf().findobj(mpl.image.AxesImage): assert isinstance(image, mpl.image.AxesImage) clim = np.array(image.get_clim()) assert np.allclose(expected, clim) @pytest.mark.slow def test_vmin_vmax_equal(self) -> None: # regression test for GH3734 fg = self.g.map_dataarray(xplt.imshow, "x", "y", vmin=50, vmax=50) for mappable in fg._mappables: assert mappable.norm.vmin != mappable.norm.vmax @pytest.mark.slow @pytest.mark.filterwarnings("ignore") def test_can_set_norm(self) -> None: norm = mpl.colors.SymLogNorm(0.1) self.g.map_dataarray(xplt.imshow, "x", "y", norm=norm) for image in plt.gcf().findobj(mpl.image.AxesImage): assert isinstance(image, mpl.image.AxesImage) assert image.norm is norm @pytest.mark.slow def test_figure_size(self) -> None: assert_array_equal(self.g.fig.get_size_inches(), (10, 3)) g = xplt.FacetGrid(self.darray, col="z", size=6) assert_array_equal(g.fig.get_size_inches(), (19, 6)) g = self.darray.plot.imshow(col="z", size=6) assert_array_equal(g.fig.get_size_inches(), (19, 6)) g = xplt.FacetGrid(self.darray, col="z", size=4, aspect=0.5) assert_array_equal(g.fig.get_size_inches(), (7, 4)) g = xplt.FacetGrid(self.darray, col="z", figsize=(9, 4)) assert_array_equal(g.fig.get_size_inches(), (9, 4)) with pytest.raises(ValueError, match=r"cannot provide both"): g = xplt.plot(self.darray, row=2, col="z", figsize=(6, 4), size=6) with pytest.raises(ValueError, match=r"Can't use"): g = xplt.plot(self.darray, row=2, col="z", ax=plt.gca(), size=6) @pytest.mark.slow def test_num_ticks(self) -> None: nticks = 99 maxticks = nticks + 1 self.g.map_dataarray(xplt.imshow, "x", "y") self.g.set_ticks(max_xticks=nticks, max_yticks=nticks) for ax in self.g.axs.flat: xticks = len(ax.get_xticks()) yticks = len(ax.get_yticks()) assert xticks <= maxticks assert yticks <= maxticks assert xticks >= nticks / 2.0 assert yticks >= nticks / 2.0 @pytest.mark.slow def test_map(self) -> None: assert self.g._finalized is False self.g.map(plt.contourf, "x", "y", ...) assert self.g._finalized is True self.g.map(lambda: None) @pytest.mark.slow def test_map_dataset(self) -> None: g = xplt.FacetGrid(self.darray.to_dataset(name="foo"), col="z") g.map(plt.contourf, "x", "y", "foo") alltxt = text_in_fig() for label in ["x", "y"]: assert label in alltxt # everything has a label assert "None" not in alltxt # colorbar can't be inferred automatically assert "foo" not in alltxt assert 0 == len(find_possible_colorbars()) g.add_colorbar(label="colors!") assert "colors!" in text_in_fig() assert 1 == len(find_possible_colorbars()) @pytest.mark.slow def test_set_axis_labels(self) -> None: g = self.g.map_dataarray(xplt.contourf, "x", "y") g.set_axis_labels("longitude", "latitude") alltxt = text_in_fig() for label in ["longitude", "latitude"]: assert label in alltxt @pytest.mark.slow def test_facetgrid_colorbar(self) -> None: a = easy_array((10, 15, 4)) d = DataArray(a, dims=["y", "x", "z"], name="foo") d.plot.imshow(x="x", y="y", col="z") assert 1 == len(find_possible_colorbars()) d.plot.imshow(x="x", y="y", col="z", add_colorbar=True) assert 1 == len(find_possible_colorbars()) d.plot.imshow(x="x", y="y", col="z", add_colorbar=False) assert 0 == len(find_possible_colorbars()) @pytest.mark.slow def test_facetgrid_polar(self) -> None: # test if polar projection in FacetGrid does not raise an exception self.darray.plot.pcolormesh( col="z", subplot_kws=dict(projection="polar"), sharex=False, sharey=False ) @pytest.mark.slow def test_units_appear_somewhere(self) -> None: # assign coordinates to all dims so we can test for units darray = self.darray.assign_coords( {"x": np.arange(self.darray.x.size), "y": np.arange(self.darray.y.size)} ) darray.x.attrs["units"] = "x_unit" darray.y.attrs["units"] = "y_unit" g = xplt.FacetGrid(darray, col="z") g.map_dataarray(xplt.contourf, "x", "y") alltxt = text_in_fig() # unit should appear as e.g. 'x [x_unit]' for unit_name in ["x_unit", "y_unit"]: assert unit_name in "".join(alltxt) @pytest.mark.filterwarnings("ignore:tight_layout cannot") class TestFacetGrid4d(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: a = easy_array((10, 15, 3, 2)) darray = DataArray(a, dims=["y", "x", "col", "row"]) darray.coords["col"] = np.array( ["col" + str(x) for x in darray.coords["col"].values] ) darray.coords["row"] = np.array( ["row" + str(x) for x in darray.coords["row"].values] ) self.darray = darray def test_title_kwargs(self) -> None: g = xplt.FacetGrid(self.darray, col="col", row="row") g.set_titles(template="{value}", weight="bold") # Rightmost column titles should be bold for label, ax in zip( self.darray.coords["row"].values, g.axs[:, -1], strict=True ): assert property_in_axes_text("weight", "bold", label, ax) # Top row titles should be bold for label, ax in zip( self.darray.coords["col"].values, g.axs[0, :], strict=True ): assert property_in_axes_text("weight", "bold", label, ax) @pytest.mark.slow def test_default_labels(self) -> None: g = xplt.FacetGrid(self.darray, col="col", row="row") assert (2, 3) == g.axs.shape g.map_dataarray(xplt.imshow, "x", "y") # Rightmost column should be labeled for label, ax in zip( self.darray.coords["row"].values, g.axs[:, -1], strict=True ): assert substring_in_axes(label, ax) # Top row should be labeled for label, ax in zip( self.darray.coords["col"].values, g.axs[0, :], strict=True ): assert substring_in_axes(label, ax) # ensure that row & col labels can be changed g.set_titles("abc={value}") for label, ax in zip( self.darray.coords["row"].values, g.axs[:, -1], strict=True ): assert substring_in_axes(f"abc={label}", ax) # previous labels were "row=row0" etc. assert substring_not_in_axes("row=", ax) for label, ax in zip( self.darray.coords["col"].values, g.axs[0, :], strict=True ): assert substring_in_axes(f"abc={label}", ax) # previous labels were "col=row0" etc. assert substring_not_in_axes("col=", ax) @pytest.mark.filterwarnings("ignore:tight_layout cannot") class TestFacetedLinePlotsLegend(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: self.darray = xr.tutorial.scatter_example_dataset() def test_legend_labels(self) -> None: fg = self.darray.A.plot.line(col="x", row="w", hue="z") all_legend_labels = [t.get_text() for t in fg.figlegend.texts] # labels in legend should be ['0', '1', '2', '3'] assert sorted(all_legend_labels) == ["0", "1", "2", "3"] @pytest.mark.filterwarnings("ignore:tight_layout cannot") class TestFacetedLinePlots(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: self.darray = DataArray( np.random.randn(10, 6, 3, 4), dims=["hue", "x", "col", "row"], coords=[range(10), range(6), range(3), ["A", "B", "C", "C++"]], name="Cornelius Ortega the 1st", ) self.darray.hue.name = "huename" self.darray.hue.attrs["units"] = "hunits" self.darray.x.attrs["units"] = "xunits" self.darray.col.attrs["units"] = "colunits" self.darray.row.attrs["units"] = "rowunits" def test_facetgrid_shape(self) -> None: g = self.darray.plot(row="row", col="col", hue="hue") # type: ignore[call-arg] assert g.axs.shape == (len(self.darray.row), len(self.darray.col)) g = self.darray.plot(row="col", col="row", hue="hue") # type: ignore[call-arg] assert g.axs.shape == (len(self.darray.col), len(self.darray.row)) def test_unnamed_args(self) -> None: g = self.darray.plot.line("o--", row="row", col="col", hue="hue") lines = [ q for q in g.axs.flat[0].get_children() if isinstance(q, mpl.lines.Line2D) ] # passing 'o--' as argument should set marker and linestyle assert lines[0].get_marker() == "o" assert lines[0].get_linestyle() == "--" def test_default_labels(self) -> None: g = self.darray.plot(row="row", col="col", hue="hue") # type: ignore[call-arg] # Rightmost column should be labeled for label, ax in zip( self.darray.coords["row"].values, g.axs[:, -1], strict=True ): assert substring_in_axes(label, ax) # Top row should be labeled for label, ax in zip( self.darray.coords["col"].values, g.axs[0, :], strict=True ): assert substring_in_axes(str(label), ax) # Leftmost column should have array name for ax in g.axs[:, 0]: assert substring_in_axes(str(self.darray.name), ax) def test_test_empty_cell(self) -> None: g = ( self.darray.isel(row=1) # type: ignore[call-arg] .drop_vars("row") .plot(col="col", hue="hue", col_wrap=2) ) bottomright = g.axs[-1, -1] assert not bottomright.has_data() assert not bottomright.get_visible() def test_set_axis_labels(self) -> None: g = self.darray.plot(row="row", col="col", hue="hue") # type: ignore[call-arg] g.set_axis_labels("longitude", "latitude") alltxt = text_in_fig() assert "longitude" in alltxt assert "latitude" in alltxt def test_axes_in_faceted_plot(self) -> None: with pytest.raises(ValueError): self.darray.plot.line(row="row", col="col", x="x", ax=plt.axes()) def test_figsize_and_size(self) -> None: with pytest.raises(ValueError): self.darray.plot.line(row="row", col="col", x="x", size=3, figsize=(4, 3)) def test_wrong_num_of_dimensions(self) -> None: with pytest.raises(ValueError): self.darray.plot(row="row", hue="hue") # type: ignore[call-arg] self.darray.plot.line(row="row", hue="hue") @requires_matplotlib class TestDatasetQuiverPlots(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: das = [ DataArray( np.random.randn(3, 3, 4, 4), dims=["x", "y", "row", "col"], coords=[range(k) for k in [3, 3, 4, 4]], ) for _ in [1, 2] ] ds = Dataset({"u": das[0], "v": das[1]}) ds.x.attrs["units"] = "xunits" ds.y.attrs["units"] = "yunits" ds.col.attrs["units"] = "colunits" ds.row.attrs["units"] = "rowunits" ds.u.attrs["units"] = "uunits" ds.v.attrs["units"] = "vunits" ds["mag"] = np.hypot(ds.u, ds.v) self.ds = ds def test_quiver(self) -> None: with figure_context(): hdl = self.ds.isel(row=0, col=0).plot.quiver(x="x", y="y", u="u", v="v") assert isinstance(hdl, mpl.quiver.Quiver) with pytest.raises(ValueError, match=r"specify x, y, u, v"): self.ds.isel(row=0, col=0).plot.quiver(x="x", y="y", u="u") with pytest.raises(ValueError, match=r"hue_style"): self.ds.isel(row=0, col=0).plot.quiver( x="x", y="y", u="u", v="v", hue="mag", hue_style="discrete" ) def test_facetgrid(self) -> None: with figure_context(): fg = self.ds.plot.quiver( x="x", y="y", u="u", v="v", row="row", col="col", scale=1, hue="mag" ) for handle in fg._mappables: assert isinstance(handle, mpl.quiver.Quiver) assert fg.quiverkey is not None assert "uunits" in fg.quiverkey.text.get_text() with figure_context(): fg = self.ds.plot.quiver( x="x", y="y", u="u", v="v", row="row", col="col", scale=1, hue="mag", add_guide=False, ) assert fg.quiverkey is None with pytest.raises(ValueError, match=r"Please provide scale"): self.ds.plot.quiver(x="x", y="y", u="u", v="v", row="row", col="col") @pytest.mark.parametrize( "add_guide, hue_style, legend, colorbar", [ (None, None, False, True), (False, None, False, False), (True, None, False, True), (True, "continuous", False, True), ], ) def test_add_guide(self, add_guide, hue_style, legend, colorbar) -> None: meta_data = _infer_meta_data( self.ds, x="x", y="y", hue="mag", hue_style=hue_style, add_guide=add_guide, funcname="quiver", ) assert meta_data["add_legend"] is legend assert meta_data["add_colorbar"] is colorbar @requires_matplotlib class TestDatasetStreamplotPlots(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: das = [ DataArray( np.random.randn(3, 4, 2, 2), dims=["x", "y", "row", "col"], coords=[range(k) for k in [3, 4, 2, 2]], ) for _ in [1, 2] ] ds = Dataset({"u": das[0], "v": das[1]}) ds.x.attrs["units"] = "xunits" ds.y.attrs["units"] = "yunits" ds.col.attrs["units"] = "colunits" ds.row.attrs["units"] = "rowunits" ds.u.attrs["units"] = "uunits" ds.v.attrs["units"] = "vunits" ds["mag"] = np.hypot(ds.u, ds.v) self.ds = ds def test_streamline(self) -> None: with figure_context(): hdl = self.ds.isel(row=0, col=0).plot.streamplot(x="x", y="y", u="u", v="v") assert isinstance(hdl, mpl.collections.LineCollection) with pytest.raises(ValueError, match=r"specify x, y, u, v"): self.ds.isel(row=0, col=0).plot.streamplot(x="x", y="y", u="u") with pytest.raises(ValueError, match=r"hue_style"): self.ds.isel(row=0, col=0).plot.streamplot( x="x", y="y", u="u", v="v", hue="mag", hue_style="discrete" ) def test_facetgrid(self) -> None: with figure_context(): fg = self.ds.plot.streamplot( x="x", y="y", u="u", v="v", row="row", col="col", hue="mag" ) for handle in fg._mappables: assert isinstance(handle, mpl.collections.LineCollection) with figure_context(): fg = self.ds.plot.streamplot( x="x", y="y", u="u", v="v", row="row", col="col", hue="mag", add_guide=False, ) @requires_matplotlib class TestDatasetScatterPlots(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: das = [ DataArray( np.random.randn(3, 3, 4, 4), dims=["x", "row", "col", "hue"], coords=[range(k) for k in [3, 3, 4, 4]], ) for _ in [1, 2] ] ds = Dataset({"A": das[0], "B": das[1]}) ds.hue.name = "huename" ds.hue.attrs["units"] = "hunits" ds.x.attrs["units"] = "xunits" ds.col.attrs["units"] = "colunits" ds.row.attrs["units"] = "rowunits" ds.A.attrs["units"] = "Aunits" ds.B.attrs["units"] = "Bunits" self.ds = ds def test_accessor(self) -> None: from xarray.plot.accessor import DatasetPlotAccessor assert Dataset.plot is DatasetPlotAccessor assert isinstance(self.ds.plot, DatasetPlotAccessor) @pytest.mark.parametrize( "add_guide, hue_style, legend, colorbar", [ (None, None, False, True), (False, None, False, False), (True, None, False, True), (True, "continuous", False, True), (False, "discrete", False, False), (True, "discrete", True, False), ], ) def test_add_guide( self, add_guide: bool | None, hue_style: Literal["continuous", "discrete"] | None, legend: bool, colorbar: bool, ) -> None: meta_data = _infer_meta_data( self.ds, x="A", y="B", hue="hue", hue_style=hue_style, add_guide=add_guide, funcname="scatter", ) assert meta_data["add_legend"] is legend assert meta_data["add_colorbar"] is colorbar def test_facetgrid_shape(self) -> None: g = self.ds.plot.scatter(x="A", y="B", row="row", col="col") assert g.axs.shape == (len(self.ds.row), len(self.ds.col)) g = self.ds.plot.scatter(x="A", y="B", row="col", col="row") assert g.axs.shape == (len(self.ds.col), len(self.ds.row)) def test_default_labels(self) -> None: g = self.ds.plot.scatter(x="A", y="B", row="row", col="col", hue="hue") # Top row should be labeled for label, ax in zip(self.ds.coords["col"].values, g.axs[0, :], strict=True): assert substring_in_axes(str(label), ax) # Bottom row should have name of x array name and units for ax in g.axs[-1, :]: assert ax.get_xlabel() == "A [Aunits]" # Leftmost column should have name of y array name and units for ax in g.axs[:, 0]: assert ax.get_ylabel() == "B [Bunits]" def test_axes_in_faceted_plot(self) -> None: with pytest.raises(ValueError): self.ds.plot.scatter(x="A", y="B", row="row", ax=plt.axes()) def test_figsize_and_size(self) -> None: with pytest.raises(ValueError): self.ds.plot.scatter(x="A", y="B", row="row", size=3, figsize=(4, 3)) @pytest.mark.parametrize( "x, y, hue, add_legend, add_colorbar, error_type", [ pytest.param( "A", "The Spanish Inquisition", None, None, None, KeyError, id="bad_y" ), pytest.param( "The Spanish Inquisition", "B", None, None, True, ValueError, id="bad_x" ), ], ) def test_bad_args( self, x: Hashable, y: Hashable, hue: Hashable | None, add_legend: bool | None, add_colorbar: bool | None, error_type: type[Exception], ) -> None: with pytest.raises(error_type): self.ds.plot.scatter( x=x, y=y, hue=hue, add_legend=add_legend, add_colorbar=add_colorbar ) def test_datetime_hue(self) -> None: ds2 = self.ds.copy() # TODO: Currently plots as categorical, should it behave as numerical? ds2["hue"] = pd.date_range("2000-1-1", periods=4) ds2.plot.scatter(x="A", y="B", hue="hue") ds2["hue"] = pd.timedelta_range("-1D", periods=4, freq="D", unit="ns") # type: ignore[call-arg,unused-ignore] ds2.plot.scatter(x="A", y="B", hue="hue") def test_facetgrid_hue_style(self) -> None: ds2 = self.ds.copy() # Numbers plots as continuous: g = ds2.plot.scatter(x="A", y="B", row="row", col="col", hue="hue") assert isinstance(g._mappables[-1], mpl.collections.PathCollection) # Datetimes plots as categorical: # TODO: Currently plots as categorical, should it behave as numerical? ds2["hue"] = pd.date_range("2000-1-1", periods=4) g = ds2.plot.scatter(x="A", y="B", row="row", col="col", hue="hue") assert isinstance(g._mappables[-1], mpl.collections.PathCollection) # Strings plots as categorical: ds2["hue"] = ["a", "a", "b", "b"] g = ds2.plot.scatter(x="A", y="B", row="row", col="col", hue="hue") assert isinstance(g._mappables[-1], mpl.collections.PathCollection) @pytest.mark.parametrize( ["x", "y", "hue", "markersize"], [("A", "B", "x", "col"), ("x", "row", "A", "B")], ) def test_scatter( self, x: Hashable, y: Hashable, hue: Hashable, markersize: Hashable ) -> None: self.ds.plot.scatter(x=x, y=y, hue=hue, markersize=markersize) with pytest.raises(ValueError, match=r"u, v"): self.ds.plot.scatter(x=x, y=y, u="col", v="row") def test_non_numeric_legend(self) -> None: ds2 = self.ds.copy() ds2["hue"] = ["a", "b", "c", "d"] pc = ds2.plot.scatter(x="A", y="B", markersize="hue") axes = pc.axes assert axes is not None # should make a discrete legend assert hasattr(axes, "legend_") assert axes.legend_ is not None def test_legend_labels(self) -> None: # regression test for #4126: incorrect legend labels ds2 = self.ds.copy() ds2["hue"] = ["a", "a", "b", "b"] pc = ds2.plot.scatter(x="A", y="B", markersize="hue") axes = pc.axes assert axes is not None legend = axes.get_legend() assert legend is not None actual = [t.get_text() for t in legend.texts] expected = ["hue", "a", "b"] assert actual == expected def test_legend_labels_facetgrid(self) -> None: ds2 = self.ds.copy() ds2["hue"] = ["d", "a", "c", "b"] g = ds2.plot.scatter(x="A", y="B", hue="hue", markersize="x", col="col") legend = g.figlegend assert legend is not None actual = tuple(t.get_text() for t in legend.texts) expected = ( "x [xunits]", "$\\mathdefault{0}$", "$\\mathdefault{1}$", "$\\mathdefault{2}$", ) assert actual == expected def test_add_legend_by_default(self) -> None: sc = self.ds.plot.scatter(x="A", y="B", hue="hue") fig = sc.figure assert fig is not None assert len(fig.axes) == 2 class TestDatetimePlot(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: """ Create a DataArray with a time-axis that contains datetime objects. """ month = np.arange(1, 13, 1) data = np.sin(2 * np.pi * month / 12.0) times = pd.date_range(start="2017-01-01", freq="MS", periods=12) darray = DataArray(data, dims=["time"], coords=[times]) self.darray = darray def test_datetime_line_plot(self) -> None: # test if line plot raises no Exception self.darray.plot.line() def test_datetime_units(self) -> None: # test that matplotlib-native datetime works: _fig, ax = plt.subplots() ax.plot(self.darray["time"], self.darray) # Make sure only mpl converters are used, use type() so only # mpl.dates.AutoDateLocator passes and no other subclasses: assert type(ax.xaxis.get_major_locator()) is mpl.dates.AutoDateLocator def test_datetime_plot1d(self) -> None: # Test that matplotlib-native datetime works: p = self.darray.plot.line() ax = p[0].axes # Make sure only mpl converters are used, use type() so only # mpl.dates.AutoDateLocator passes and no other subclasses: assert type(ax.xaxis.get_major_locator()) is mpl.dates.AutoDateLocator def test_datetime_plot2d(self) -> None: # Test that matplotlib-native datetime works: da = DataArray( np.arange(3 * 4).reshape(3, 4), dims=("x", "y"), coords={ "x": [1, 2, 3], "y": [np.datetime64(f"2000-01-{x:02d}") for x in range(1, 5)], }, ) p = da.plot.pcolormesh() ax = p.axes assert ax is not None # Make sure only mpl converters are used, use type() so only # mpl.dates.AutoDateLocator passes and no other subclasses: assert type(ax.xaxis.get_major_locator()) is mpl.dates.AutoDateLocator @pytest.mark.filterwarnings("ignore:setting an array element with a sequence") @requires_cftime @pytest.mark.skipif(not has_nc_time_axis, reason="nc_time_axis is not installed") class TestCFDatetimePlot(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: """ Create a DataArray with a time-axis that contains cftime.datetime objects. """ # case for 1d array data = np.random.rand(4, 12) time = xr.date_range( start="2017", periods=12, freq="1ME", calendar="noleap", use_cftime=True ) darray = DataArray(data, dims=["x", "time"]) darray.coords["time"] = time self.darray = darray def test_cfdatetime_line_plot(self) -> None: self.darray.isel(x=0).plot.line() def test_cfdatetime_pcolormesh_plot(self) -> None: self.darray.plot.pcolormesh() def test_cfdatetime_contour_plot(self) -> None: self.darray.plot.contour() @requires_cftime @pytest.mark.skipif(has_nc_time_axis, reason="nc_time_axis is installed") class TestNcAxisNotInstalled(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: """ Create a DataArray with a time-axis that contains cftime.datetime objects. """ month = np.arange(1, 13, 1) data = np.sin(2 * np.pi * month / 12.0) darray = DataArray(data, dims=["time"]) darray.coords["time"] = xr.date_range( start="2017", periods=12, freq="1ME", calendar="noleap", use_cftime=True ) self.darray = darray def test_ncaxis_notinstalled_line_plot(self) -> None: with pytest.raises(ImportError, match=r"optional `nc-time-axis`"): self.darray.plot.line() @requires_matplotlib class TestAxesKwargs: @pytest.fixture(params=[1, 2, 3]) def data_array(self, request) -> DataArray: """ Return a simple DataArray """ dims = request.param if dims == 1: return DataArray(easy_array((10,))) elif dims == 2: return DataArray(easy_array((10, 3))) elif dims == 3: return DataArray(easy_array((10, 3, 2))) else: raise ValueError(f"No DataArray implemented for {dims=}.") @pytest.fixture(params=[1, 2]) def data_array_logspaced(self, request) -> DataArray: """ Return a simple DataArray with logspaced coordinates """ dims = request.param if dims == 1: return DataArray( np.arange(7), dims=("x",), coords={"x": np.logspace(-3, 3, 7)} ) elif dims == 2: return DataArray( np.arange(16).reshape(4, 4), dims=("y", "x"), coords={"x": np.logspace(-1, 2, 4), "y": np.logspace(-5, -1, 4)}, ) else: raise ValueError(f"No DataArray implemented for {dims=}.") @pytest.mark.parametrize("xincrease", [True, False]) def test_xincrease_kwarg(self, data_array, xincrease) -> None: with figure_context(): data_array.plot(xincrease=xincrease) assert plt.gca().xaxis_inverted() == (not xincrease) @pytest.mark.parametrize("yincrease", [True, False]) def test_yincrease_kwarg(self, data_array, yincrease) -> None: with figure_context(): data_array.plot(yincrease=yincrease) assert plt.gca().yaxis_inverted() == (not yincrease) @pytest.mark.parametrize("xscale", ["linear", "logit", "symlog"]) def test_xscale_kwarg(self, data_array, xscale) -> None: with figure_context(): data_array.plot(xscale=xscale) assert plt.gca().get_xscale() == xscale @pytest.mark.parametrize("yscale", ["linear", "logit", "symlog"]) def test_yscale_kwarg(self, data_array, yscale) -> None: with figure_context(): data_array.plot(yscale=yscale) assert plt.gca().get_yscale() == yscale def test_xscale_log_kwarg(self, data_array_logspaced) -> None: xscale = "log" with figure_context(): data_array_logspaced.plot(xscale=xscale) assert plt.gca().get_xscale() == xscale def test_yscale_log_kwarg(self, data_array_logspaced) -> None: yscale = "log" with figure_context(): data_array_logspaced.plot(yscale=yscale) assert plt.gca().get_yscale() == yscale def test_xlim_kwarg(self, data_array) -> None: with figure_context(): expected = (0.0, 1000.0) data_array.plot(xlim=[0, 1000]) assert plt.gca().get_xlim() == expected def test_ylim_kwarg(self, data_array) -> None: with figure_context(): data_array.plot(ylim=[0, 1000]) expected = (0.0, 1000.0) assert plt.gca().get_ylim() == expected def test_xticks_kwarg(self, data_array) -> None: with figure_context(): data_array.plot(xticks=np.arange(5)) expected = np.arange(5).tolist() assert_array_equal(plt.gca().get_xticks(), expected) def test_yticks_kwarg(self, data_array) -> None: with figure_context(): data_array.plot(yticks=np.arange(5)) expected = np.arange(5) assert_array_equal(plt.gca().get_yticks(), expected) @requires_matplotlib @pytest.mark.parametrize("plotfunc", ["pcolormesh", "contourf", "contour"]) def test_plot_transposed_nondim_coord(plotfunc) -> None: x = np.linspace(0, 10, 101) h = np.linspace(3, 7, 101) s = np.linspace(0, 1, 51) z = s[:, np.newaxis] * h[np.newaxis, :] da = xr.DataArray( np.sin(x) * np.cos(z), dims=["s", "x"], coords={"x": x, "s": s, "z": (("s", "x"), z), "zt": (("x", "s"), z.T)}, ) with figure_context(): getattr(da.plot, plotfunc)(x="x", y="zt") with figure_context(): getattr(da.plot, plotfunc)(x="zt", y="x") @requires_matplotlib @pytest.mark.parametrize("plotfunc", ["pcolormesh", "imshow"]) def test_plot_transposes_properly(plotfunc) -> None: # test that we aren't mistakenly transposing when the 2 dimensions have equal sizes. da = xr.DataArray([np.sin(2 * np.pi / 10 * np.arange(10))] * 10, dims=("y", "x")) with figure_context(): hdl = getattr(da.plot, plotfunc)(x="x", y="y") # get_array doesn't work for contour, contourf. It returns the colormap intervals. # pcolormesh returns 1D array but imshow returns a 2D array so it is necessary # to ravel() on the LHS assert_array_equal(hdl.get_array().ravel(), da.to_masked_array().ravel()) @requires_matplotlib def test_facetgrid_single_contour() -> None: # regression test for GH3569 x, y = np.meshgrid(np.arange(12), np.arange(12)) z = xr.DataArray(np.hypot(x, y)) z2 = xr.DataArray(np.hypot(x, y) + 1) ds = xr.concat([z, z2], dim="time") ds["time"] = [0, 1] with figure_context(): ds.plot.contour(col="time", levels=[4], colors=["k"]) @requires_matplotlib def test_get_axis_raises() -> None: # test get_axis raises an error if trying to do invalid things # cannot provide both ax and figsize with pytest.raises(ValueError, match="both `figsize` and `ax`"): get_axis(figsize=[4, 4], size=None, aspect=None, ax="something") # type: ignore[arg-type] # cannot provide both ax and size with pytest.raises(ValueError, match="both `size` and `ax`"): get_axis(figsize=None, size=200, aspect=4 / 3, ax="something") # type: ignore[arg-type] # cannot provide both size and figsize with pytest.raises(ValueError, match="both `figsize` and `size`"): get_axis(figsize=[4, 4], size=200, aspect=None, ax=None) # cannot provide aspect and size with pytest.raises(ValueError, match="`aspect` argument without `size`"): get_axis(figsize=None, size=None, aspect=4 / 3, ax=None) # cannot provide axis and subplot_kws with pytest.raises(ValueError, match="cannot use subplot_kws with existing ax"): get_axis(figsize=None, size=None, aspect=None, ax=1, something_else=5) # type: ignore[arg-type] @requires_matplotlib @pytest.mark.parametrize( ["figsize", "size", "aspect", "ax", "kwargs"], [ pytest.param((3, 2), None, None, False, {}, id="figsize"), pytest.param( (3.5, 2.5), None, None, False, {"label": "test"}, id="figsize_kwargs" ), pytest.param(None, 5, None, False, {}, id="size"), pytest.param(None, 5.5, None, False, {"label": "test"}, id="size_kwargs"), pytest.param(None, 5, 1, False, {}, id="size+aspect"), pytest.param(None, 5, "auto", False, {}, id="auto_aspect"), pytest.param(None, 5, "equal", False, {}, id="equal_aspect"), pytest.param(None, None, None, True, {}, id="ax"), pytest.param(None, None, None, False, {}, id="default"), pytest.param(None, None, None, False, {"label": "test"}, id="default_kwargs"), ], ) def test_get_axis( figsize: tuple[float, float] | None, size: float | None, aspect: float | None, ax: bool, kwargs: dict[str, Any], ) -> None: with figure_context(): inp_ax = plt.axes() if ax else None out_ax = get_axis( figsize=figsize, size=size, aspect=aspect, ax=inp_ax, **kwargs ) assert isinstance(out_ax, mpl.axes.Axes) @requires_matplotlib @requires_cartopy @pytest.mark.parametrize( ["figsize", "size", "aspect"], [ pytest.param((3, 2), None, None, id="figsize"), pytest.param(None, 5, None, id="size"), pytest.param(None, 5, 1, id="size+aspect"), pytest.param(None, None, None, id="default"), ], ) def test_get_axis_cartopy( figsize: tuple[float, float] | None, size: float | None, aspect: float | None ) -> None: kwargs = {"projection": cartopy.crs.PlateCarree()} with figure_context(): out_ax = get_axis(figsize=figsize, size=size, aspect=aspect, **kwargs) assert isinstance(out_ax, cartopy.mpl.geoaxes.GeoAxesSubplot) @requires_matplotlib def test_get_axis_current() -> None: with figure_context(): _, ax = plt.subplots() out_ax = get_axis() assert ax is out_ax @requires_matplotlib def test_maybe_gca() -> None: with figure_context(): ax = _maybe_gca(aspect=1) assert isinstance(ax, mpl.axes.Axes) assert ax.get_aspect() == 1 with figure_context(): # create figure without axes plt.figure() ax = _maybe_gca(aspect=1) assert isinstance(ax, mpl.axes.Axes) assert ax.get_aspect() == 1 with figure_context(): existing_axes = plt.axes() ax = _maybe_gca(aspect=1) # reuses the existing axes assert existing_axes == ax # kwargs are ignored when reusing axes assert ax.get_aspect() == "auto" @requires_matplotlib @pytest.mark.parametrize( "x, y, z, hue, markersize, row, col, add_legend, add_colorbar", [ ("A", "B", None, None, None, None, None, None, None), ("B", "A", None, "w", None, None, None, True, None), ("A", "B", None, "y", "x", None, None, True, True), ("A", "B", "z", None, None, None, None, None, None), ("B", "A", "z", "w", None, None, None, True, None), ("A", "B", "z", "y", "x", None, None, True, True), ("A", "B", "z", "y", "x", "w", None, True, True), ], ) def test_datarray_scatter( x, y, z, hue, markersize, row, col, add_legend, add_colorbar ) -> None: """Test datarray scatter. Merge with TestPlot1D eventually.""" ds = xr.tutorial.scatter_example_dataset() extra_coords = [v for v in [x, hue, markersize] if v is not None] # Base coords: coords = dict(ds.coords) # Add extra coords to the DataArray: coords.update({v: ds[v] for v in extra_coords}) darray = xr.DataArray(ds[y], coords=coords) with figure_context(): darray.plot.scatter( x=x, z=z, hue=hue, markersize=markersize, add_legend=add_legend, add_colorbar=add_colorbar, ) @requires_dask @requires_matplotlib @pytest.mark.parametrize( "plotfunc", ["scatter"], ) def test_dataarray_not_loading_inplace(plotfunc: str) -> None: ds = xr.tutorial.scatter_example_dataset() ds = ds.chunk() with figure_context(): getattr(ds.A.plot, plotfunc)(x="x") from dask.array import Array assert isinstance(ds.A.data, Array) @requires_matplotlib def test_assert_valid_xy() -> None: ds = xr.tutorial.scatter_example_dataset() darray = ds.A # x is valid and should not error: _assert_valid_xy(darray=darray, xy="x", name="x") # None should be valid as well even though it isn't in the valid list: _assert_valid_xy(darray=darray, xy=None, name="x") # A hashable that is not valid should error: with pytest.raises(ValueError, match="x must be one of"): _assert_valid_xy(darray=darray, xy="error_now", name="x") @requires_matplotlib @pytest.mark.parametrize( "val", [pytest.param([], id="empty"), pytest.param(0, id="scalar")] ) @pytest.mark.parametrize( "method", [ "__call__", "line", "step", "contour", "contourf", "hist", "imshow", "pcolormesh", "scatter", "surface", ], ) def test_plot_empty_raises(val: list | float, method: str) -> None: da = xr.DataArray(val) with pytest.raises(TypeError, match="No numeric data"): getattr(da.plot, method)() @requires_matplotlib def test_facetgrid_axes_raises_deprecation_warning() -> None: with pytest.warns( DeprecationWarning, match=( "self.axes is deprecated since 2022.11 in order to align with " "matplotlibs plt.subplots, use self.axs instead." ), ): with figure_context(): ds = xr.tutorial.scatter_example_dataset() g = ds.plot.scatter(x="A", y="B", col="x") _ = g.axes @requires_matplotlib def test_plot1d_default_rcparams() -> None: import matplotlib as mpl ds = xr.tutorial.scatter_example_dataset(seed=42) with figure_context(): # scatter markers should by default have white edgecolor to better # see overlapping markers: _fig, ax = plt.subplots(1, 1) ds.plot.scatter(x="A", y="B", marker="o", ax=ax) actual: np.ndarray = mpl.colors.to_rgba_array("w") expected: np.ndarray = ax.collections[0].get_edgecolor() # type: ignore[assignment] np.testing.assert_allclose(actual, expected) # Facetgrids should have the default value as well: fg = ds.plot.scatter(x="A", y="B", col="x", marker="o") ax = fg.axs.ravel()[0] actual = mpl.colors.to_rgba_array("w") expected = ax.collections[0].get_edgecolor() # type: ignore[assignment,unused-ignore] np.testing.assert_allclose(actual, expected) # scatter should not emit any warnings when using unfilled markers: with assert_no_warnings(): _fig, ax = plt.subplots(1, 1) ds.plot.scatter(x="A", y="B", ax=ax, marker="x") # Prioritize edgecolor argument over default plot1d values: _fig, ax = plt.subplots(1, 1) ds.plot.scatter(x="A", y="B", marker="o", ax=ax, edgecolor="k") actual = mpl.colors.to_rgba_array("k") expected = ax.collections[0].get_edgecolor() # type: ignore[assignment] np.testing.assert_allclose(actual, expected) @requires_matplotlib def test_plot1d_filtered_nulls() -> None: ds = xr.tutorial.scatter_example_dataset(seed=42) y = ds.y.where(ds.y > 0.2) expected = y.notnull().sum().item() with figure_context(): pc = y.plot.scatter() actual = pc.get_offsets().shape[0] assert expected == actual @requires_matplotlib def test_9155() -> None: # A test for types from issue #9155 with figure_context(): data = xr.DataArray([1, 2, 3], dims=["x"]) _fig, ax = plt.subplots(ncols=1, nrows=1) data.plot(ax=ax) # type: ignore[call-arg] @requires_matplotlib def test_temp_dataarray() -> None: from xarray.plot.dataset_plot import _temp_dataarray x = np.arange(1, 4) y = np.arange(4, 6) var1 = np.arange(x.size * y.size).reshape((x.size, y.size)) var2 = np.arange(x.size * y.size).reshape((x.size, y.size)) ds = xr.Dataset( { "var1": (["x", "y"], var1), "var2": (["x", "y"], 2 * var2), "var3": (["x"], 3 * x), }, coords={ "x": x, "y": y, "model": np.arange(7), }, ) # No broadcasting: y_ = "var1" locals_ = {"x": "var2"} da = _temp_dataarray(ds, y_, locals_) assert da.shape == (3, 2) # Broadcast from 1 to 2dim: y_ = "var3" locals_ = {"x": "var1"} da = _temp_dataarray(ds, y_, locals_) assert da.shape == (3, 2) # Ignore non-valid coord kwargs: y_ = "var3" locals_ = dict(x="x", extend="var2") da = _temp_dataarray(ds, y_, locals_) assert da.shape == (3,) python-xarray-2026.01.0/xarray/tests/conftest.py0000664000175000017500000001635315136607163021722 0ustar alastairalastairfrom __future__ import annotations import warnings import numpy as np import pandas as pd import pytest import xarray as xr from xarray import DataArray, Dataset, DataTree from xarray.tests import create_test_data, has_cftime, requires_dask @pytest.fixture(autouse=True) def handle_numpy_1_warnings(): """Handle NumPy 1.x DeprecationWarnings for out-of-bound integer conversions. NumPy 1.x raises DeprecationWarning when converting out-of-bounds values (e.g., 255 to int8), while NumPy 2.x raises OverflowError. This fixture suppresses the warning in NumPy 1.x environments to allow tests to pass. """ # Only apply for NumPy < 2.0 if np.__version__.startswith("1."): with warnings.catch_warnings(): warnings.filterwarnings( "ignore", "NumPy will stop allowing conversion of out-of-bound Python integers", DeprecationWarning, ) yield else: yield @pytest.fixture(params=["numpy", pytest.param("dask", marks=requires_dask)]) def backend(request): return request.param @pytest.fixture(params=["numbagg", "bottleneck", None]) def compute_backend(request): if request.param is None: options = dict(use_bottleneck=False, use_numbagg=False) elif request.param == "bottleneck": options = dict(use_bottleneck=True, use_numbagg=False) elif request.param == "numbagg": options = dict(use_bottleneck=False, use_numbagg=True) else: raise ValueError with xr.set_options(**options): yield request.param @pytest.fixture(params=[1]) def ds(request, backend): if request.param == 1: ds = Dataset( dict( z1=(["y", "x"], np.random.randn(2, 8)), z2=(["time", "y"], np.random.randn(10, 2)), ), dict( x=("x", np.linspace(0, 1.0, 8)), time=("time", np.linspace(0, 1.0, 10)), c=("y", ["a", "b"]), y=range(2), ), ) elif request.param == 2: ds = Dataset( dict( z1=(["time", "y"], np.random.randn(10, 2)), z2=(["time"], np.random.randn(10)), z3=(["x", "time"], np.random.randn(8, 10)), ), dict( x=("x", np.linspace(0, 1.0, 8)), time=("time", np.linspace(0, 1.0, 10)), c=("y", ["a", "b"]), y=range(2), ), ) elif request.param == 3: ds = create_test_data() else: raise ValueError if backend == "dask": return ds.chunk() return ds @pytest.fixture(params=[1]) def da(request, backend): if request.param == 1: times = pd.date_range("2000-01-01", freq="1D", periods=21) da = DataArray( np.random.random((3, 21, 4)), dims=("a", "time", "x"), coords=dict(time=times), ) if request.param == 2: da = DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time") if request.param == "repeating_ints": da = DataArray( np.tile(np.arange(12), 5).reshape(5, 4, 3), coords={"x": list("abc"), "y": list("defg")}, dims=list("zyx"), ) if backend == "dask": return da.chunk() elif backend == "numpy": return da else: raise ValueError @pytest.fixture( params=[ False, pytest.param( True, marks=pytest.mark.skipif(not has_cftime, reason="no cftime") ), ] ) def use_cftime(request): return request.param @pytest.fixture(params=[Dataset, DataArray]) def type(request): return request.param @pytest.fixture(params=[1]) def d(request, backend, type) -> DataArray | Dataset: """ For tests which can test either a DataArray or a Dataset. """ result: DataArray | Dataset if request.param == 1: ds = Dataset( dict( a=(["x", "z"], np.arange(24).reshape(2, 12)), b=(["y", "z"], np.arange(100, 136).reshape(3, 12).astype(np.float64)), ), dict( x=("x", np.linspace(0, 1.0, 2)), y=range(3), z=("z", pd.date_range("2000-01-01", periods=12)), w=("x", ["a", "b"]), ), ) if type == DataArray: result = ds["a"].assign_coords(w=ds.coords["w"]) elif type == Dataset: result = ds else: raise ValueError else: raise ValueError if backend == "dask": return result.chunk() elif backend == "numpy": return result else: raise ValueError @pytest.fixture def byte_attrs_dataset(): """For testing issue #9407""" null_byte = b"\x00" other_bytes = bytes(range(1, 256)) ds = Dataset({"x": 1}, coords={"x_coord": [1]}) ds["x"].attrs["null_byte"] = null_byte ds["x"].attrs["other_bytes"] = other_bytes expected = ds.copy() expected["x"].attrs["null_byte"] = "" expected["x"].attrs["other_bytes"] = other_bytes.decode(errors="replace") return { "input": ds, "expected": expected, "h5netcdf_error": r"Invalid value provided for attribute .*: .*\. Null characters .*", } @pytest.fixture(scope="module") def create_test_datatree(): """ Create a test datatree with this structure: Group: / │ Dimensions: (y: 3, x: 2) │ Dimensions without coordinates: y, x │ Data variables: │ a (y) int64 24B 6 7 8 │ set0 (x) int64 16B 9 10 ├── Group: /set1 │ │ Dimensions: () │ │ Data variables: │ │ a int64 8B 0 │ │ b int64 8B 1 │ ├── Group: /set1/set1 │ └── Group: /set1/set2 ├── Group: /set2 │ │ Dimensions: (x: 2) │ │ Dimensions without coordinates: x │ │ Data variables: │ │ a (x) int64 16B 2 3 │ │ b (x) float64 16B 0.1 0.2 │ └── Group: /set2/set1 └── Group: /set3 The structure has deliberately repeated names of tags, variables, and dimensions in order to better check for bugs caused by name conflicts. """ def _create_test_datatree(modify=lambda ds: ds): set1_data = modify(xr.Dataset({"a": 0, "b": 1})) set2_data = modify(xr.Dataset({"a": ("x", [2, 3]), "b": ("x", [0.1, 0.2])})) root_data = modify(xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])})) root = DataTree.from_dict( { "/": root_data, "/set1": set1_data, "/set1/set1": None, "/set1/set2": None, "/set2": set2_data, "/set2/set1": None, "/set3": None, } ) return root return _create_test_datatree @pytest.fixture(scope="module") def simple_datatree(create_test_datatree): """ Invoke create_test_datatree fixture (callback). Returns a DataTree. """ return create_test_datatree() @pytest.fixture(params=["s", "ms", "us", "ns"]) def time_unit(request): return request.param python-xarray-2026.01.0/xarray/tests/test_options.py0000664000175000017500000002033415136607163022621 0ustar alastairalastairfrom __future__ import annotations import re import pytest import xarray from xarray import concat, merge from xarray.backends.file_manager import FILE_CACHE from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.tests.test_dataset import create_test_data def test_invalid_option_raises() -> None: with pytest.raises(ValueError): xarray.set_options(not_a_valid_options=True) def test_display_width() -> None: with pytest.raises(ValueError): xarray.set_options(display_width=0) with pytest.raises(ValueError): xarray.set_options(display_width=-10) with pytest.raises(ValueError): xarray.set_options(display_width=3.5) def test_arithmetic_join() -> None: with pytest.raises(ValueError): xarray.set_options(arithmetic_join="invalid") with xarray.set_options(arithmetic_join="exact"): assert OPTIONS["arithmetic_join"] == "exact" def test_enable_cftimeindex() -> None: with pytest.raises(ValueError): xarray.set_options(enable_cftimeindex=None) with pytest.warns(FutureWarning, match="no-op"): with xarray.set_options(enable_cftimeindex=True): assert OPTIONS["enable_cftimeindex"] def test_file_cache_maxsize() -> None: with pytest.raises(ValueError): xarray.set_options(file_cache_maxsize=0) original_size = FILE_CACHE.maxsize with xarray.set_options(file_cache_maxsize=123): assert FILE_CACHE.maxsize == 123 assert FILE_CACHE.maxsize == original_size def test_keep_attrs() -> None: with pytest.raises(ValueError): xarray.set_options(keep_attrs="invalid_str") with xarray.set_options(keep_attrs=True): assert OPTIONS["keep_attrs"] with xarray.set_options(keep_attrs=False): assert not OPTIONS["keep_attrs"] with xarray.set_options(keep_attrs="default"): assert _get_keep_attrs(default=True) assert not _get_keep_attrs(default=False) def test_nested_options() -> None: original = OPTIONS["display_width"] with xarray.set_options(display_width=1): assert OPTIONS["display_width"] == 1 with xarray.set_options(display_width=2): assert OPTIONS["display_width"] == 2 assert OPTIONS["display_width"] == 1 assert OPTIONS["display_width"] == original def test_netcdf_engine_order() -> None: original = OPTIONS["netcdf_engine_order"] with pytest.raises( ValueError, match=re.escape( "option 'netcdf_engine_order' given an invalid value: ['invalid']. " "Expected a subset of ['h5netcdf', 'netcdf4', 'scipy']" ), ): xarray.set_options(netcdf_engine_order=["invalid"]) assert OPTIONS["netcdf_engine_order"] == original def test_display_style() -> None: original = "html" assert OPTIONS["display_style"] == original with pytest.raises(ValueError): xarray.set_options(display_style="invalid_str") with xarray.set_options(display_style="text"): assert OPTIONS["display_style"] == "text" assert OPTIONS["display_style"] == original def create_test_dataset_attrs(seed=0): ds = create_test_data(seed) ds.attrs = {"attr1": 5, "attr2": "history", "attr3": {"nested": "more_info"}} return ds def create_test_dataarray_attrs(seed=0, var="var1"): da = create_test_data(seed)[var] da.attrs = {"attr1": 5, "attr2": "history", "attr3": {"nested": "more_info"}} return da class TestAttrRetention: def test_dataset_attr_retention(self) -> None: # Use .mean() for all tests: a typical reduction operation ds = create_test_dataset_attrs() original_attrs = ds.attrs # Test default behaviour (keeps attrs for reduction operations) result = ds.mean() assert result.attrs == original_attrs with xarray.set_options(keep_attrs="default"): result = ds.mean() assert ( result.attrs == original_attrs ) # "default" uses operation's default which is True for reduce with xarray.set_options(keep_attrs=True): result = ds.mean() assert result.attrs == original_attrs with xarray.set_options(keep_attrs=False): result = ds.mean() assert result.attrs == {} def test_dataarray_attr_retention(self) -> None: # Use .mean() for all tests: a typical reduction operation da = create_test_dataarray_attrs() original_attrs = da.attrs # Test default behaviour (keeps attrs for reduction operations) result = da.mean() assert result.attrs == original_attrs with xarray.set_options(keep_attrs="default"): result = da.mean() assert ( result.attrs == original_attrs ) # "default" uses operation's default which is True for reduce with xarray.set_options(keep_attrs=True): result = da.mean() assert result.attrs == original_attrs with xarray.set_options(keep_attrs=False): result = da.mean() assert result.attrs == {} def test_groupby_attr_retention(self) -> None: da = xarray.DataArray([1, 2, 3], [("x", [1, 1, 2])]) da.attrs = {"attr1": 5, "attr2": "history", "attr3": {"nested": "more_info"}} original_attrs = da.attrs # Test default behaviour result = da.groupby("x").sum(keep_attrs=True) assert result.attrs == original_attrs with xarray.set_options(keep_attrs="default"): result = da.groupby("x").sum(keep_attrs=True) assert result.attrs == original_attrs with xarray.set_options(keep_attrs=True): result1 = da.groupby("x") result = result1.sum() assert result.attrs == original_attrs with xarray.set_options(keep_attrs=False): result = da.groupby("x").sum() assert result.attrs == {} def test_concat_attr_retention(self) -> None: ds1 = create_test_dataset_attrs() ds2 = create_test_dataset_attrs() ds2.attrs = {"wrong": "attributes"} original_attrs = ds1.attrs # Test default behaviour of keeping the attrs of the first # dataset in the supplied list # global keep_attrs option current doesn't affect concat result = concat([ds1, ds2], dim="dim1") assert result.attrs == original_attrs def test_merge_attr_retention(self) -> None: da1 = create_test_dataarray_attrs(var="var1") da2 = create_test_dataarray_attrs(var="var2") da2.attrs = {"wrong": "attributes"} original_attrs = da1.attrs # merge currently discards attrs, and the global keep_attrs # option doesn't affect this result = merge([da1, da2]) assert result.attrs == original_attrs def test_display_style_text(self) -> None: ds = create_test_dataset_attrs() with xarray.set_options(display_style="text"): text = ds._repr_html_() assert text.startswith("
")
            assert "'nested'" in text
            assert "<xarray.Dataset>" in text

    def test_display_style_html(self) -> None:
        ds = create_test_dataset_attrs()
        with xarray.set_options(display_style="html"):
            html = ds._repr_html_()
            assert html.startswith("
") assert "'nested'" in html def test_display_dataarray_style_text(self) -> None: da = create_test_dataarray_attrs() with xarray.set_options(display_style="text"): text = da._repr_html_() assert text.startswith("
")
            assert "<xarray.DataArray 'var1'" in text

    def test_display_dataarray_style_html(self) -> None:
        da = create_test_dataarray_attrs()
        with xarray.set_options(display_style="html"):
            html = da._repr_html_()
            assert html.startswith("
") assert "#x27;nested'" in html @pytest.mark.parametrize( "set_value", [("left"), ("exact")], ) def test_get_options_retention(set_value): """Test to check if get_options will return changes made by set_options""" with xarray.set_options(arithmetic_join=set_value): get_options = xarray.get_options() assert get_options["arithmetic_join"] == set_value python-xarray-2026.01.0/xarray/tests/test_strategies.py0000664000175000017500000003174115136607163023304 0ustar alastairalastairimport warnings import numpy as np import numpy.testing as npt import pytest from packaging.version import Version pytest.importorskip("hypothesis") # isort: split import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import given from hypothesis.extra.array_api import make_strategies_namespace import xarray as xr from xarray import broadcast from xarray.core.options import set_options from xarray.core.variable import Variable from xarray.testing.strategies import ( attrs, basic_indexers, dimension_names, dimension_sizes, outer_array_indexers, supported_dtypes, unique_subset_of, variables, vectorized_indexers, ) ALLOWED_ATTRS_VALUES_TYPES = (int, bool, str, np.ndarray) class TestDimensionNamesStrategy: @given(dimension_names()) def test_types(self, dims): assert isinstance(dims, list) for d in dims: assert isinstance(d, str) @given(dimension_names()) def test_unique(self, dims): assert len(set(dims)) == len(dims) @given(st.data(), st.tuples(st.integers(0, 10), st.integers(0, 10)).map(sorted)) def test_number_of_dims(self, data, ndims): min_dims, max_dims = ndims dim_names = data.draw(dimension_names(min_dims=min_dims, max_dims=max_dims)) assert isinstance(dim_names, list) assert min_dims <= len(dim_names) <= max_dims class TestDimensionSizesStrategy: @given(dimension_sizes()) def test_types(self, dims): assert isinstance(dims, dict) for d, n in dims.items(): assert isinstance(d, str) assert len(d) >= 1 assert isinstance(n, int) assert n >= 0 @given(st.data(), st.tuples(st.integers(0, 10), st.integers(0, 10)).map(sorted)) def test_number_of_dims(self, data, ndims): min_dims, max_dims = ndims dim_sizes = data.draw(dimension_sizes(min_dims=min_dims, max_dims=max_dims)) assert isinstance(dim_sizes, dict) assert min_dims <= len(dim_sizes) <= max_dims @given(st.data()) def test_restrict_names(self, data): capitalized_names = st.text(st.characters(), min_size=1).map(str.upper) dim_sizes = data.draw(dimension_sizes(dim_names=capitalized_names)) for dim in dim_sizes.keys(): assert dim.upper() == dim def check_dict_values(dictionary: dict, allowed_attrs_values_types) -> bool: """Helper function to assert that all values in recursive dict match one of a set of types.""" for value in dictionary.values(): if isinstance(value, allowed_attrs_values_types) or value is None: continue elif isinstance(value, dict): # If the value is a dictionary, recursively check it if not check_dict_values(value, allowed_attrs_values_types): return False else: # If the value is not an integer or a dictionary, it's not valid return False return True class TestAttrsStrategy: @given(attrs()) def test_type(self, attrs): assert isinstance(attrs, dict) check_dict_values(attrs, ALLOWED_ATTRS_VALUES_TYPES) class TestVariablesStrategy: @given(variables()) def test_given_nothing(self, var): assert isinstance(var, Variable) @given(st.data()) def test_given_incorrect_types(self, data): with pytest.raises(TypeError, match="dims must be provided as a"): data.draw(variables(dims=["x", "y"])) # type: ignore[arg-type] with pytest.raises(TypeError, match="dtype must be provided as a"): data.draw(variables(dtype=np.dtype("int32"))) # type: ignore[arg-type] with pytest.raises(TypeError, match="attrs must be provided as a"): data.draw(variables(attrs=dict())) # type: ignore[arg-type] with pytest.raises(TypeError, match="Callable"): data.draw(variables(array_strategy_fn=np.array([0]))) # type: ignore[arg-type] @given(st.data(), dimension_names()) def test_given_fixed_dim_names(self, data, fixed_dim_names): var = data.draw(variables(dims=st.just(fixed_dim_names))) assert list(var.dims) == fixed_dim_names @given(st.data(), dimension_sizes()) def test_given_fixed_dim_sizes(self, data, dim_sizes): var = data.draw(variables(dims=st.just(dim_sizes))) assert var.dims == tuple(dim_sizes.keys()) assert var.shape == tuple(dim_sizes.values()) @given(st.data(), supported_dtypes()) def test_given_fixed_dtype(self, data, dtype): var = data.draw(variables(dtype=st.just(dtype))) assert var.dtype == dtype @given(st.data(), npst.arrays(shape=npst.array_shapes(), dtype=supported_dtypes())) def test_given_fixed_data_dims_and_dtype(self, data, arr): def fixed_array_strategy_fn(*, shape=None, dtype=None): """The fact this ignores shape and dtype is only okay because compatible shape & dtype will be passed separately.""" return st.just(arr) dim_names = data.draw(dimension_names(min_dims=arr.ndim, max_dims=arr.ndim)) dim_sizes = dict(zip(dim_names, arr.shape, strict=True)) var = data.draw( variables( array_strategy_fn=fixed_array_strategy_fn, dims=st.just(dim_sizes), dtype=st.just(arr.dtype), ) ) npt.assert_equal(var.data, arr) assert var.dtype == arr.dtype @given(st.data(), st.integers(0, 3)) def test_given_array_strat_arbitrary_size_and_arbitrary_data(self, data, ndims): dim_names = data.draw(dimension_names(min_dims=ndims, max_dims=ndims)) def array_strategy_fn(*, shape=None, dtype=None): return npst.arrays(shape=shape, dtype=dtype) var = data.draw( variables( array_strategy_fn=array_strategy_fn, dims=st.just(dim_names), dtype=supported_dtypes(), ) ) assert var.ndim == ndims @given(st.data()) def test_catch_unruly_dtype_from_custom_array_strategy_fn(self, data): def dodgy_array_strategy_fn(*, shape=None, dtype=None): """Dodgy function which ignores the dtype it was passed""" return npst.arrays(shape=shape, dtype=npst.floating_dtypes()) with pytest.raises( ValueError, match="returned an array object with a different dtype" ): data.draw( variables( array_strategy_fn=dodgy_array_strategy_fn, dtype=st.just(np.dtype("int32")), ) ) @given(st.data()) def test_catch_unruly_shape_from_custom_array_strategy_fn(self, data): def dodgy_array_strategy_fn(*, shape=None, dtype=None): """Dodgy function which ignores the shape it was passed""" return npst.arrays(shape=(3, 2), dtype=dtype) with pytest.raises( ValueError, match="returned an array object with a different shape" ): data.draw( variables( array_strategy_fn=dodgy_array_strategy_fn, dims=st.just({"a": 2, "b": 1}), dtype=supported_dtypes(), ) ) @given(st.data()) def test_make_strategies_namespace(self, data): """ Test not causing a hypothesis.InvalidArgument by generating a dtype that's not in the array API. We still want to generate dtypes not in the array API by default, but this checks we don't accidentally override the user's choice of dtypes with non-API-compliant ones. """ if Version(np.__version__) >= Version("2.0.0.dev0"): nxp = np else: # requires numpy>=1.26.0, and we expect a UserWarning to be raised with warnings.catch_warnings(): warnings.filterwarnings( "ignore", category=UserWarning, message=".+See NEP 47." ) from numpy import ( # type: ignore[attr-defined,no-redef,unused-ignore] array_api as nxp, ) nxp_st = make_strategies_namespace(nxp) data.draw( variables( array_strategy_fn=nxp_st.arrays, dtype=nxp_st.scalar_dtypes(), ) ) class TestUniqueSubsetOf: @given(st.data()) def test_invalid(self, data): with pytest.raises(TypeError, match="must be an Iterable or a Mapping"): data.draw(unique_subset_of(0)) # type: ignore[call-overload] with pytest.raises(ValueError, match="length-zero object"): data.draw(unique_subset_of({})) @given(st.data(), dimension_sizes(min_dims=1)) def test_mapping(self, data, dim_sizes): subset_of_dim_sizes = data.draw(unique_subset_of(dim_sizes)) for dim, length in subset_of_dim_sizes.items(): assert dim in dim_sizes assert dim_sizes[dim] == length @given(st.data(), dimension_names(min_dims=1)) def test_iterable(self, data, dim_names): subset_of_dim_names = data.draw(unique_subset_of(dim_names)) for dim in subset_of_dim_names: assert dim in dim_names class TestReduction: """ These tests are for checking that the examples given in the docs page on testing actually work. """ @given(st.data(), variables(dims=dimension_names(min_dims=1))) def test_mean(self, data, var): """ Test that given a Variable of at least one dimension, the mean of the Variable is always equal to the mean of the underlying array. """ with set_options(use_numbagg=False): # specify arbitrary reduction along at least one dimension reduction_dims = data.draw(unique_subset_of(var.dims, min_size=1)) # create expected result (using nanmean because arrays with Nans will be generated) reduction_axes = tuple(var.get_axis_num(dim) for dim in reduction_dims) expected = np.nanmean(var.data, axis=reduction_axes) # assert property is always satisfied result = var.mean(dim=reduction_dims).data npt.assert_equal(expected, result) class TestBasicIndexers: @given(st.data(), dimension_sizes(min_dims=1)) def test_types(self, data, sizes): idxr = data.draw(basic_indexers(sizes=sizes)) assert idxr assert isinstance(idxr, dict) for key, value in idxr.items(): assert key in sizes assert isinstance(value, (int, slice)) @given(st.data(), dimension_sizes(min_dims=2)) def test_min_max_dims(self, data, sizes): min_dims = data.draw(st.integers(min_value=1, max_value=len(sizes))) max_dims = data.draw(st.integers(min_value=min_dims, max_value=len(sizes))) idxr = data.draw( basic_indexers(sizes=sizes, min_dims=min_dims, max_dims=max_dims) ) assert min_dims <= len(idxr) <= max_dims class TestOuterArrayIndexers: @given(st.data(), dimension_sizes(min_dims=1, min_side=1)) def test_types(self, data, sizes): idxr = data.draw(outer_array_indexers(sizes=sizes, min_dims=1)) assert idxr assert isinstance(idxr, dict) for key, value in idxr.items(): assert key in sizes assert isinstance(value, np.ndarray) assert value.dtype == np.int64 assert value.ndim == 1 # Check indices in bounds (negative indices valid) assert np.all((value >= -sizes[key]) & (value < sizes[key])) @given(st.data(), dimension_sizes(min_dims=2, min_side=1)) def test_min_max_dims(self, data, sizes): min_dims = data.draw(st.integers(min_value=1, max_value=len(sizes))) max_dims = data.draw(st.integers(min_value=min_dims, max_value=len(sizes))) idxr = data.draw( outer_array_indexers(sizes=sizes, min_dims=min_dims, max_dims=max_dims) ) assert min_dims <= len(idxr) <= max_dims class TestVectorizedIndexers: @given(st.data(), dimension_sizes(min_dims=2, min_side=1)) def test_types(self, data, sizes): idxr = data.draw(vectorized_indexers(sizes=sizes)) assert isinstance(idxr, dict) assert idxr # not empty # All DataArrays should be broadcastable together broadcast(*idxr.values()) for key, value in idxr.items(): assert key in sizes assert isinstance(value, xr.DataArray) assert value.dtype == np.int64 # Check indices in bounds (negative indices valid) assert np.all((value.values >= -sizes[key]) & (value.values < sizes[key])) @given(st.data(), dimension_sizes(min_dims=3, min_side=1)) def test_min_max_dims(self, data, sizes): min_dims = data.draw(st.integers(min_value=2, max_value=len(sizes))) max_dims = data.draw(st.integers(min_value=min_dims, max_value=len(sizes))) idxr = data.draw( vectorized_indexers(sizes=sizes, min_dims=min_dims, max_dims=max_dims) ) assert min_dims <= len(idxr) <= max_dims python-xarray-2026.01.0/xarray/tests/test_ufuncs.py0000664000175000017500000002240015136607163022425 0ustar alastairalastairfrom __future__ import annotations import pickle from unittest.mock import patch import numpy as np import numpy.typing as npt import pytest import xarray as xr import xarray.ufuncs as xu from xarray.tests import assert_allclose, assert_array_equal, mock, requires_dask from xarray.tests import assert_identical as assert_identical_ def assert_identical(a, b): assert type(a) is type(b) or float(a) == float(b) if isinstance(a, xr.DataArray | xr.Dataset | xr.Variable): assert_identical_(a, b) else: assert_array_equal(a, b) @pytest.mark.parametrize( "a", [ xr.Variable(["x"], [0, 0]), xr.DataArray([0, 0], dims="x"), xr.Dataset({"y": ("x", [0, 0])}), ], ) def test_unary(a): assert_allclose(a + 1, np.cos(a)) def test_binary(): args: list[int | float | npt.NDArray | xr.Variable | xr.DataArray | xr.Dataset] = [ 0, np.zeros(2), xr.Variable(["x"], [0, 0]), xr.DataArray([0, 0], dims="x"), xr.Dataset({"y": ("x", [0, 0])}), ] for n, t1 in enumerate(args): for t2 in args[n:]: assert_identical(t2 + 1, np.maximum(t1, t2 + 1)) assert_identical(t2 + 1, np.maximum(t2, t1 + 1)) assert_identical(t2 + 1, np.maximum(t1 + 1, t2)) assert_identical(t2 + 1, np.maximum(t2 + 1, t1)) def test_binary_out(): args: list[int | float | npt.NDArray | xr.Variable | xr.DataArray | xr.Dataset] = [ 1, np.ones(2), xr.Variable(["x"], [1, 1]), xr.DataArray([1, 1], dims="x"), xr.Dataset({"y": ("x", [1, 1])}), ] for arg in args: actual_mantissa, actual_exponent = np.frexp(arg) assert_identical(actual_mantissa, 0.5 * arg) assert_identical(actual_exponent, arg) def test_binary_coord_attrs(): t = xr.Variable("t", np.arange(2, 4), attrs={"units": "s"}) x = xr.DataArray(t.values**2, coords={"t": t}, attrs={"units": "s^2"}) y = xr.DataArray(t.values**3, coords={"t": t}, attrs={"units": "s^3"}) z1 = xr.apply_ufunc(np.add, x, y, keep_attrs=True) assert z1.coords["t"].attrs == {"units": "s"} z2 = xr.apply_ufunc(np.add, x, y, keep_attrs=False) assert z2.coords["t"].attrs == {} # Check also that input array's coordinate attributes weren't affected assert t.attrs == {"units": "s"} assert x.coords["t"].attrs == {"units": "s"} def test_groupby(): ds = xr.Dataset({"a": ("x", [0, 0, 0])}, {"c": ("x", [0, 0, 1])}) ds_grouped = ds.groupby("c") group_mean = ds_grouped.mean("x") arr_grouped = ds["a"].groupby("c") assert_identical(ds, np.maximum(ds_grouped, group_mean)) # type: ignore[call-overload] assert_identical(ds, np.maximum(group_mean, ds_grouped)) # type: ignore[call-overload] assert_identical(ds, np.maximum(arr_grouped, group_mean)) # type: ignore[call-overload] assert_identical(ds, np.maximum(group_mean, arr_grouped)) # type: ignore[call-overload] assert_identical(ds, np.maximum(ds_grouped, group_mean["a"])) # type: ignore[call-overload] assert_identical(ds, np.maximum(group_mean["a"], ds_grouped)) # type: ignore[call-overload] assert_identical(ds.a, np.maximum(arr_grouped, group_mean.a)) # type: ignore[call-overload] assert_identical(ds.a, np.maximum(group_mean.a, arr_grouped)) # type: ignore[call-overload] with pytest.raises(ValueError, match=r"mismatched lengths for dimension"): np.maximum(ds.a.variable, ds_grouped) # type: ignore[call-overload] def test_alignment(): ds1 = xr.Dataset({"a": ("x", [1, 2])}, {"x": [0, 1]}) ds2 = xr.Dataset({"a": ("x", [2, 3]), "b": 4}, {"x": [1, 2]}) actual = np.add(ds1, ds2) expected = xr.Dataset({"a": ("x", [4])}, {"x": [1]}) assert_identical_(actual, expected) with xr.set_options(arithmetic_join="outer"): actual = np.add(ds1, ds2) expected = xr.Dataset( {"a": ("x", [np.nan, 4, np.nan]), "b": np.nan}, coords={"x": [0, 1, 2]} ) assert_identical_(actual, expected) def test_kwargs(): x = xr.DataArray(0) result = np.add(x, 1, dtype=np.float64) assert result.dtype == np.float64 def test_xarray_defers_to_unrecognized_type(): class Other: def __array_ufunc__(self, *args, **kwargs): return "other" xarray_obj = xr.DataArray([1, 2, 3]) other = Other() assert np.maximum(xarray_obj, other) == "other" # type: ignore[call-overload] assert np.sin(xarray_obj, out=other) == "other" # type: ignore[call-overload] def test_xarray_handles_dask(): da = pytest.importorskip("dask.array") x = xr.DataArray(np.ones((2, 2)), dims=["x", "y"]) y = da.ones((2, 2), chunks=(2, 2)) result = np.add(x, y) assert result.chunks == ((2,), (2,)) assert isinstance(result, xr.DataArray) def test_dask_defers_to_xarray(): da = pytest.importorskip("dask.array") x = xr.DataArray(np.ones((2, 2)), dims=["x", "y"]) y = da.ones((2, 2), chunks=(2, 2)) result = np.add(y, x) assert result.chunks == ((2,), (2,)) assert isinstance(result, xr.DataArray) def test_gufunc_methods(): xarray_obj = xr.DataArray([1, 2, 3]) with pytest.raises(NotImplementedError, match=r"reduce method"): np.add.reduce(xarray_obj, 1) def test_out(): xarray_obj = xr.DataArray([1, 2, 3]) # xarray out arguments should raise with pytest.raises(NotImplementedError, match=r"`out` argument"): np.add(xarray_obj, 1, out=xarray_obj) # type: ignore[call-overload] # but non-xarray should be OK other = np.zeros((3,)) np.add(other, xarray_obj, out=other) assert_identical(other, np.array([1, 2, 3])) def test_gufuncs(): xarray_obj = xr.DataArray([1, 2, 3]) fake_gufunc = mock.Mock(signature="(n)->()", autospec=np.sin) with pytest.raises(NotImplementedError, match=r"generalized ufuncs"): xarray_obj.__array_ufunc__(fake_gufunc, "__call__", xarray_obj) class DuckArray(np.ndarray): # Minimal subclassed duck array with its own self-contained namespace, # which implements a few ufuncs def __new__(cls, array): obj = np.asarray(array).view(cls) return obj def __array_namespace__(self, *, api_version=None): return DuckArray @staticmethod def sin(x): return np.sin(x) @staticmethod def add(x, y): return x + y class DuckArray2(DuckArray): def __array_namespace__(self, *, api_version=None): return DuckArray2 class TestXarrayUfuncs: @pytest.fixture(autouse=True) def setUp(self): self.x = xr.DataArray([1, 2, 3]) self.xd = xr.DataArray(DuckArray([1, 2, 3])) self.xd2 = xr.DataArray(DuckArray2([1, 2, 3])) self.xt = xr.DataArray(np.datetime64("2021-01-01", "ns")) @pytest.mark.filterwarnings("ignore::RuntimeWarning") @pytest.mark.parametrize("name", xu.__all__) def test_ufuncs(self, name, request): xu_func = getattr(xu, name) np_func = getattr(np, name, None) if np_func is None and np.lib.NumpyVersion(np.__version__) < "2.0.0": pytest.skip(f"Ufunc {name} is not available in numpy {np.__version__}.") if name == "isnat": args = (self.xt,) elif hasattr(np_func, "nin") and np_func.nin == 2: # type: ignore[union-attr] args = (self.x, self.x) # type: ignore[assignment] else: args = (self.x,) expected = np_func(*args) # type: ignore[misc] actual = xu_func(*args) if name in ["angle", "iscomplex"]: np.testing.assert_equal(expected, actual.values) else: assert_identical(actual, expected) def test_ufunc_pickle(self): a = 1.0 cos_pickled = pickle.loads(pickle.dumps(xu.cos)) assert_identical(cos_pickled(a), xu.cos(a)) def test_ufunc_scalar(self): actual = xu.sin(1) assert isinstance(actual, float) def test_ufunc_duck_array_dataarray(self): actual = xu.sin(self.xd) assert isinstance(actual.data, DuckArray) def test_ufunc_duck_array_variable(self): actual = xu.sin(self.xd.variable) assert isinstance(actual.data, DuckArray) def test_ufunc_duck_array_dataset(self): ds = xr.Dataset({"a": self.xd}) actual = xu.sin(ds) assert isinstance(actual.a.data, DuckArray) @requires_dask def test_ufunc_duck_dask(self): import dask.array as da x = xr.DataArray(da.from_array(DuckArray(np.array([1, 2, 3])))) actual = xu.sin(x) assert isinstance(actual.data._meta, DuckArray) @requires_dask @pytest.mark.xfail(reason="dask ufuncs currently dispatch to numpy") def test_ufunc_duck_dask_no_array_ufunc(self): import dask.array as da # dask ufuncs currently only preserve duck arrays that implement __array_ufunc__ with patch.object(DuckArray, "__array_ufunc__", new=None, create=True): x = xr.DataArray(da.from_array(DuckArray(np.array([1, 2, 3])))) actual = xu.sin(x) assert isinstance(actual.data._meta, DuckArray) def test_ufunc_mixed_arrays_compatible(self): actual = xu.add(self.xd, self.x) assert isinstance(actual.data, DuckArray) def test_ufunc_mixed_arrays_incompatible(self): with pytest.raises(ValueError, match=r"Mixed array types"): xu.add(self.xd, self.xd2) python-xarray-2026.01.0/xarray/tests/test_missing.py0000664000175000017500000006254615136607163022612 0ustar alastairalastairfrom __future__ import annotations import itertools from typing import Any from unittest import mock import numpy as np import pandas as pd import pytest import xarray as xr from xarray.core import indexing from xarray.core.missing import ( NumpyInterpolator, ScipyInterpolator, SplineInterpolator, _get_nan_block_lengths, get_clean_interp_index, ) from xarray.namedarray.pycompat import array_type from xarray.tests import ( _CFTIME_CALENDARS, assert_allclose, assert_array_equal, assert_equal, raise_if_dask_computes, requires_bottleneck, requires_cftime, requires_dask, requires_numbagg, requires_numbagg_or_bottleneck, requires_scipy, ) dask_array_type = array_type("dask") @pytest.fixture def da(): return xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time") @pytest.fixture def cf_da(): def _cf_da(calendar, freq="1D"): times = xr.date_range( start="1970-01-01", freq=freq, periods=10, calendar=calendar, use_cftime=True, ) values = np.arange(10) return xr.DataArray(values, dims=("time",), coords={"time": times}) return _cf_da @pytest.fixture def ds(): ds = xr.Dataset() ds["var1"] = xr.DataArray( [0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time" ) ds["var2"] = xr.DataArray( [10, np.nan, 11, 12, np.nan, 13, 14, 15, np.nan, 16, 17], dims="x" ) return ds def make_interpolate_example_data(shape, frac_nan, seed=12345, non_uniform=False): rs = np.random.default_rng(seed) vals = rs.normal(size=shape) if frac_nan == 1: vals[:] = np.nan elif frac_nan == 0: pass else: n_missing = int(vals.size * frac_nan) ys = np.arange(shape[0]) xs = np.arange(shape[1]) if n_missing: np.random.shuffle(ys) ys = ys[:n_missing] np.random.shuffle(xs) xs = xs[:n_missing] vals[ys, xs] = np.nan if non_uniform: # construct a datetime index that has irregular spacing deltas = pd.to_timedelta(rs.normal(size=shape[0], scale=10), unit="D") coords = {"time": (pd.Timestamp("2000-01-01") + deltas).sort_values()} else: coords = {"time": pd.date_range("2000-01-01", freq="D", periods=shape[0])} da = xr.DataArray(vals, dims=("time", "x"), coords=coords) df = da.to_pandas() return da, df @pytest.mark.parametrize("fill_value", [None, np.nan, 47.11]) @pytest.mark.parametrize( "method", ["linear", "nearest", "zero", "slinear", "quadratic", "cubic"] ) @requires_scipy def test_interpolate_pd_compat(method, fill_value) -> None: shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] frac_nans = [0, 0.5, 1] for shape, frac_nan in itertools.product(shapes, frac_nans): da, df = make_interpolate_example_data(shape, frac_nan) for dim in ["time", "x"]: actual = da.interpolate_na(method=method, dim=dim, fill_value=fill_value) # need limit_direction="both" here, to let pandas fill # in both directions instead of default forward direction only expected = df.interpolate( method=method, axis=da.get_axis_num(dim), limit_direction="both", fill_value=fill_value, ) if method == "linear": # Note, Pandas does not take left/right fill_value into account # for the numpy linear methods. # see https://github.com/pandas-dev/pandas/issues/55144 # This aligns the pandas output with the xarray output fixed = expected.values.copy() fixed[pd.isnull(actual.values)] = np.nan fixed[actual.values == fill_value] = fill_value else: fixed = expected.values np.testing.assert_allclose(actual.values, fixed) @requires_scipy @pytest.mark.parametrize("method", ["barycentric", "krogh", "pchip", "spline", "akima"]) def test_scipy_methods_function(method) -> None: # Note: Pandas does some wacky things with these methods and the full # integration tests won't work. da, _ = make_interpolate_example_data((25, 25), 0.4, non_uniform=True) if method == "spline": with pytest.warns(PendingDeprecationWarning): actual = da.interpolate_na(method=method, dim="time") else: actual = da.interpolate_na(method=method, dim="time") assert (da.count("time") <= actual.count("time")).all() @requires_scipy def test_interpolate_pd_compat_non_uniform_index(): shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] frac_nans = [0, 0.5, 1] methods = ["time", "index", "values"] for shape, frac_nan, method in itertools.product(shapes, frac_nans, methods): da, df = make_interpolate_example_data(shape, frac_nan, non_uniform=True) for dim in ["time", "x"]: if method == "time" and dim != "time": continue actual = da.interpolate_na( method="linear", dim=dim, use_coordinate=True, fill_value=np.nan ) expected = df.interpolate( method=method, axis=da.get_axis_num(dim), ) # Note, Pandas does some odd things with the left/right fill_value # for the linear methods. This next line inforces the xarray # fill_value convention on the pandas output. Therefore, this test # only checks that interpolated values are the same (not nans) expected_values = expected.values.copy() expected_values[pd.isnull(actual.values)] = np.nan np.testing.assert_allclose(actual.values, expected_values) @requires_scipy def test_interpolate_pd_compat_polynomial(): shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] frac_nans = [0, 0.5, 1] orders = [1, 2, 3] for shape, frac_nan, order in itertools.product(shapes, frac_nans, orders): da, df = make_interpolate_example_data(shape, frac_nan) for dim in ["time", "x"]: actual = da.interpolate_na( method="polynomial", order=order, dim=dim, use_coordinate=False ) expected = df.interpolate( method="polynomial", order=order, axis=da.get_axis_num(dim) ) np.testing.assert_allclose(actual.values, expected.values) @requires_scipy def test_interpolate_unsorted_index_raises(): vals = np.array([1, 2, 3], dtype=np.float64) expected = xr.DataArray(vals, dims="x", coords={"x": [2, 1, 3]}) with pytest.raises(ValueError, match=r"Index 'x' must be monotonically increasing"): expected.interpolate_na(dim="x", method="index") # type: ignore[arg-type] def test_interpolate_no_dim_raises(): da = xr.DataArray(np.array([1, 2, np.nan, 5], dtype=np.float64), dims="x") with pytest.raises(NotImplementedError, match=r"dim is a required argument"): da.interpolate_na(method="linear") def test_interpolate_invalid_interpolator_raises(): da = xr.DataArray(np.array([1, 2, np.nan, 5], dtype=np.float64), dims="x") with pytest.raises(ValueError, match=r"not a valid"): da.interpolate_na(dim="x", method="foo") # type: ignore[arg-type] def test_interpolate_duplicate_values_raises(): data = np.random.randn(2, 3) da = xr.DataArray(data, coords=[("x", ["a", "a"]), ("y", [0, 1, 2])]) with pytest.raises(ValueError, match=r"Index 'x' has duplicate values"): da.interpolate_na(dim="x", method="foo") # type: ignore[arg-type] def test_interpolate_multiindex_raises(): data = np.random.randn(2, 3) data[1, 1] = np.nan da = xr.DataArray(data, coords=[("x", ["a", "b"]), ("y", [0, 1, 2])]) das = da.stack(z=("x", "y")) with pytest.raises(TypeError, match=r"Index 'z' must be castable to float64"): das.interpolate_na(dim="z") def test_interpolate_2d_coord_raises(): coords = { "x": xr.Variable(("a", "b"), np.arange(6).reshape(2, 3)), "y": xr.Variable(("a", "b"), np.arange(6).reshape(2, 3)) * 2, } data = np.random.randn(2, 3) data[1, 1] = np.nan da = xr.DataArray(data, dims=("a", "b"), coords=coords) with pytest.raises(ValueError, match=r"interpolation must be 1D"): da.interpolate_na(dim="a", use_coordinate="x") @requires_scipy def test_interpolate_kwargs(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") expected = xr.DataArray(np.array([4, 5, 6], dtype=np.float64), dims="x") actual = da.interpolate_na(dim="x", fill_value="extrapolate") assert_equal(actual, expected) expected = xr.DataArray(np.array([4, 5, -999], dtype=np.float64), dims="x") actual = da.interpolate_na(dim="x", fill_value=-999) assert_equal(actual, expected) def test_interpolate_keep_attrs(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) mvals = vals.copy() mvals[2] = np.nan missing = xr.DataArray(mvals, dims="x") missing.attrs = {"test": "value"} actual = missing.interpolate_na(dim="x", keep_attrs=True) assert actual.attrs == {"test": "value"} def test_interpolate(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) expected = xr.DataArray(vals, dims="x") mvals = vals.copy() mvals[2] = np.nan missing = xr.DataArray(mvals, dims="x") actual = missing.interpolate_na(dim="x") assert_equal(actual, expected) @requires_scipy @pytest.mark.parametrize( "method,vals", [ pytest.param(method, vals, id=f"{desc}:{method}") for method in [ "linear", "nearest", "zero", "slinear", "quadratic", "cubic", "polynomial", ] for (desc, vals) in [ ("no nans", np.array([1, 2, 3, 4, 5, 6], dtype=np.float64)), ("one nan", np.array([1, np.nan, np.nan], dtype=np.float64)), ("all nans", np.full(6, np.nan, dtype=np.float64)), ] ], ) def test_interp1d_fastrack(method, vals): expected = xr.DataArray(vals, dims="x") actual = expected.interpolate_na(dim="x", method=method) assert_equal(actual, expected) @requires_bottleneck def test_interpolate_limits(): da = xr.DataArray( np.array([1, 2, np.nan, np.nan, np.nan, 6], dtype=np.float64), dims="x" ) actual = da.interpolate_na(dim="x", limit=None) assert actual.isnull().sum() == 0 actual = da.interpolate_na(dim="x", limit=2) expected = xr.DataArray( np.array([1, 2, 3, 4, np.nan, 6], dtype=np.float64), dims="x" ) assert_equal(actual, expected) @requires_scipy def test_interpolate_methods(): for method in ["linear", "nearest", "zero", "slinear", "quadratic", "cubic"]: kwargs: dict[str, Any] = {} da = xr.DataArray( np.array([0, 1, 2, np.nan, np.nan, np.nan, 6, 7, 8], dtype=np.float64), dims="x", ) actual = da.interpolate_na("x", method=method, **kwargs) # type: ignore[arg-type] assert actual.isnull().sum() == 0 actual = da.interpolate_na("x", method=method, limit=2, **kwargs) # type: ignore[arg-type] assert actual.isnull().sum() == 1 @requires_scipy def test_interpolators(): for method, interpolator in [ ("linear", NumpyInterpolator), ("linear", ScipyInterpolator), ("spline", SplineInterpolator), ]: xi = np.array([-1, 0, 1, 2, 5], dtype=np.float64) yi = np.array([-10, 0, 10, 20, 50], dtype=np.float64) x = np.array([3, 4], dtype=np.float64) f = interpolator(xi, yi, method=method) out = f(x) assert pd.isnull(out).sum() == 0 def test_interpolate_use_coordinate(): xc = xr.Variable("x", [100, 200, 300, 400, 500, 600]) da = xr.DataArray( np.array([1, 2, np.nan, np.nan, np.nan, 6], dtype=np.float64), dims="x", coords={"xc": xc}, ) # use_coordinate == False is same as using the default index actual = da.interpolate_na(dim="x", use_coordinate=False) expected = da.interpolate_na(dim="x") assert_equal(actual, expected) # possible to specify non index coordinate actual = da.interpolate_na(dim="x", use_coordinate="xc") expected = da.interpolate_na(dim="x") assert_equal(actual, expected) # possible to specify index coordinate by name actual = da.interpolate_na(dim="x", use_coordinate="x") expected = da.interpolate_na(dim="x") assert_equal(actual, expected) @requires_dask def test_interpolate_dask(): da, _ = make_interpolate_example_data((40, 40), 0.5) da = da.chunk({"x": 5}) actual = da.interpolate_na("time") expected = da.load().interpolate_na("time") assert isinstance(actual.data, dask_array_type) assert_equal(actual.compute(), expected) # with limit da = da.chunk({"x": 5}) actual = da.interpolate_na("time", limit=3) expected = da.load().interpolate_na("time", limit=3) assert isinstance(actual.data, dask_array_type) assert_equal(actual, expected) @requires_dask def test_interpolate_dask_raises_for_invalid_chunk_dim(): da, _ = make_interpolate_example_data((40, 40), 0.5) da = da.chunk({"time": 5}) # this checks for ValueError in dask.array.apply_gufunc with pytest.raises(ValueError, match=r"consists of multiple chunks"): da.interpolate_na("time") @requires_dask @requires_scipy @pytest.mark.parametrize("dtype, method", [(int, "linear"), (int, "nearest")]) def test_interpolate_dask_expected_dtype(dtype, method): da = xr.DataArray( data=np.array([0, 1], dtype=dtype), dims=["time"], coords=dict(time=np.array([0, 1])), ).chunk(dict(time=2)) da = da.interp(time=np.array([0, 0.5, 1, 2]), method=method) assert da.dtype == da.compute().dtype @requires_numbagg_or_bottleneck def test_ffill(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") expected = xr.DataArray(np.array([4, 5, 5], dtype=np.float64), dims="x") actual = da.ffill("x") assert_equal(actual, expected) @pytest.mark.parametrize("compute_backend", [None], indirect=True) @pytest.mark.parametrize("method", ["ffill", "bfill"]) def test_b_ffill_use_bottleneck_numbagg(method, compute_backend): """ bfill & ffill fail if both bottleneck and numba are disabled """ da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") with pytest.raises(RuntimeError): getattr(da, method)("x") @requires_dask @pytest.mark.parametrize("compute_backend", [None], indirect=True) @pytest.mark.parametrize("method", ["ffill", "bfill"]) def test_b_ffill_use_bottleneck_dask(method, compute_backend): """ ffill fails if both bottleneck and numba are disabled, on dask arrays """ da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") with pytest.raises(RuntimeError): getattr(da, method)("x") @requires_numbagg @requires_dask @pytest.mark.parametrize("compute_backend", ["numbagg"], indirect=True) def test_ffill_use_numbagg_dask(compute_backend): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") da = da.chunk(x=-1) # Succeeds with a single chunk: _ = da.ffill("x").compute() @requires_bottleneck @requires_dask @pytest.mark.parametrize("method", ["ffill", "bfill"]) def test_ffill_bfill_dask(method): da, _ = make_interpolate_example_data((40, 40), 0.5) da = da.chunk({"x": 5}) dask_method = getattr(da, method) numpy_method = getattr(da.compute(), method) # unchunked axis with raise_if_dask_computes(): actual = dask_method("time") expected = numpy_method("time") assert_equal(actual, expected) # chunked axis with raise_if_dask_computes(): actual = dask_method("x") expected = numpy_method("x") assert_equal(actual, expected) # with limit with raise_if_dask_computes(): actual = dask_method("time", limit=3) expected = numpy_method("time", limit=3) assert_equal(actual, expected) # limit < axis size with raise_if_dask_computes(): actual = dask_method("x", limit=2) expected = numpy_method("x", limit=2) assert_equal(actual, expected) # limit > axis size with raise_if_dask_computes(): actual = dask_method("x", limit=41) expected = numpy_method("x", limit=41) assert_equal(actual, expected) @requires_bottleneck def test_ffill_bfill_nonans(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) expected = xr.DataArray(vals, dims="x") actual = expected.ffill(dim="x") assert_equal(actual, expected) actual = expected.bfill(dim="x") assert_equal(actual, expected) @requires_bottleneck def test_ffill_bfill_allnans(): vals = np.full(6, np.nan, dtype=np.float64) expected = xr.DataArray(vals, dims="x") actual = expected.ffill(dim="x") assert_equal(actual, expected) actual = expected.bfill(dim="x") assert_equal(actual, expected) @requires_bottleneck def test_ffill_functions(da): result = da.ffill("time") assert result.isnull().sum() == 0 @requires_bottleneck def test_ffill_limit(): da = xr.DataArray( [0, np.nan, np.nan, np.nan, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time" ) result = da.ffill("time") expected = xr.DataArray([0, 0, 0, 0, 0, 3, 4, 5, 5, 6, 7], dims="time") assert_array_equal(result, expected) result = da.ffill("time", limit=1) expected = xr.DataArray( [0, 0, np.nan, np.nan, np.nan, 3, 4, 5, 5, 6, 7], dims="time" ) assert_array_equal(result, expected) def test_interpolate_dataset(ds): actual = ds.interpolate_na(dim="time") # no missing values in var1 assert actual["var1"].count("time") == actual.sizes["time"] # var2 should be the same as it was assert_array_equal(actual["var2"], ds["var2"]) @requires_bottleneck def test_ffill_dataset(ds): ds.ffill(dim="time") @requires_bottleneck def test_bfill_dataset(ds): ds.ffill(dim="time") @requires_bottleneck @pytest.mark.parametrize( "y, lengths_expected", [ [np.arange(9), [[1, 0, 7, 7, 7, 7, 7, 7, 0], [3, 3, 3, 0, 3, 3, 0, 2, 2]]], [ np.arange(9) * 3, [[3, 0, 21, 21, 21, 21, 21, 21, 0], [9, 9, 9, 0, 9, 9, 0, 6, 6]], ], [ [0, 2, 5, 6, 7, 8, 10, 12, 14], [[2, 0, 12, 12, 12, 12, 12, 12, 0], [6, 6, 6, 0, 4, 4, 0, 4, 4]], ], ], ) def test_interpolate_na_nan_block_lengths(y, lengths_expected): arr = [ [np.nan, 1, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 4], [np.nan, np.nan, np.nan, 1, np.nan, np.nan, 4, np.nan, np.nan], ] da = xr.DataArray(arr, dims=["x", "y"], coords={"x": [0, 1], "y": y}) index = get_clean_interp_index(da, dim="y", use_coordinate=True) actual = _get_nan_block_lengths(da, dim="y", index=index) expected = da.copy(data=lengths_expected) assert_equal(actual, expected) @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_get_clean_interp_index_cf_calendar(cf_da, calendar): """The index for CFTimeIndex is in units of days. This means that if two series using a 360 and 365 days calendar each have a trend of .01C/year, the linear regression coefficients will be different because they have different number of days. Another option would be to have an index in units of years, but this would likely create other difficulties. """ i = get_clean_interp_index(cf_da(calendar), dim="time") np.testing.assert_array_equal(i, np.arange(10) * 1e9 * 86400) @requires_cftime @pytest.mark.parametrize("calendar", ["gregorian", "proleptic_gregorian"]) @pytest.mark.parametrize("freq", ["1D", "1ME", "1YE"]) def test_get_clean_interp_index_dt(cf_da, calendar, freq) -> None: """In the gregorian case, the index should be proportional to normal datetimes.""" g = cf_da(calendar, freq=freq) g["stime"] = xr.Variable( data=g.time.to_index().to_datetimeindex(time_unit="ns"), dims=("time",) ) gi = get_clean_interp_index(g, "time") si = get_clean_interp_index(g, "time", use_coordinate="stime") np.testing.assert_array_equal(gi, si) @requires_cftime def test_get_clean_interp_index_potential_overflow(): da = xr.DataArray( [0, 1, 2], dims=("time",), coords={ "time": xr.date_range( "0000-01-01", periods=3, calendar="360_day", use_cftime=True ) }, ) get_clean_interp_index(da, "time") @pytest.mark.parametrize("index", ([0, 2, 1], [0, 1, 1])) def test_get_clean_interp_index_strict(index): da = xr.DataArray([0, 1, 2], dims=("x",), coords={"x": index}) with pytest.raises(ValueError): get_clean_interp_index(da, "x") clean = get_clean_interp_index(da, "x", strict=False) np.testing.assert_array_equal(index, clean) assert clean.dtype == np.float64 @pytest.fixture def da_time(): return xr.DataArray( [np.nan, 1, 2, np.nan, np.nan, 5, np.nan, np.nan, np.nan, np.nan, 10], dims=["t"], ) def test_interpolate_na_max_gap_errors(da_time): with pytest.raises( NotImplementedError, match=r"max_gap not implemented for unlabeled coordinates" ): da_time.interpolate_na("t", max_gap=1) with pytest.raises(ValueError, match=r"max_gap must be a scalar."): da_time.interpolate_na("t", max_gap=(1,)) da_time["t"] = pd.date_range("2001-01-01", freq="h", periods=11) with pytest.raises(TypeError, match=r"Expected value of type str"): da_time.interpolate_na("t", max_gap=1) with pytest.raises(TypeError, match=r"Expected integer or floating point"): da_time.interpolate_na("t", max_gap="1h", use_coordinate=False) with pytest.raises(ValueError, match=r"Could not convert 'huh' to timedelta64"): da_time.interpolate_na("t", max_gap="huh") @requires_bottleneck @pytest.mark.parametrize( "use_cftime", [False, pytest.param(True, marks=requires_cftime)], ) @pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.to_dataset(name="a")]) @pytest.mark.parametrize( "max_gap", ["3h", np.timedelta64(3, "h"), pd.to_timedelta("3h")] ) def test_interpolate_na_max_gap_time_specifier(da_time, max_gap, transform, use_cftime): da_time["t"] = xr.date_range( "2001-01-01", freq="h", periods=11, use_cftime=use_cftime ) expected = transform( da_time.copy(data=[np.nan, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan, 10]) ) actual = transform(da_time).interpolate_na("t", max_gap=max_gap) assert_allclose(actual, expected) @requires_bottleneck @pytest.mark.parametrize( "coords", [ pytest.param(None, marks=pytest.mark.xfail()), {"x": np.arange(4), "y": np.arange(12)}, ], ) def test_interpolate_na_2d(coords): n = np.nan da = xr.DataArray( [ [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], [n, n, 3, n, n, 6, n, n, n, 10, n, n], [n, n, 3, n, n, 6, n, n, n, 10, n, n], [n, 2, 3, 4, n, 6, n, n, n, 10, 11, n], ], dims=["x", "y"], coords=coords, ) actual = da.interpolate_na("y", max_gap=2) expected_y = da.copy( data=[ [1, 2, 3, 4, 5, 6, n, n, n, 10, 11, n], [n, n, 3, n, n, 6, n, n, n, 10, n, n], [n, n, 3, n, n, 6, n, n, n, 10, n, n], [n, 2, 3, 4, 5, 6, n, n, n, 10, 11, n], ] ) assert_equal(actual, expected_y) actual = da.interpolate_na("y", max_gap=1, fill_value="extrapolate") expected_y_extra = da.copy( data=[ [1, 2, 3, 4, n, 6, n, n, n, 10, 11, 12], [n, n, 3, n, n, 6, n, n, n, 10, n, n], [n, n, 3, n, n, 6, n, n, n, 10, n, n], [1, 2, 3, 4, n, 6, n, n, n, 10, 11, 12], ] ) assert_equal(actual, expected_y_extra) actual = da.interpolate_na("x", max_gap=3) expected_x = xr.DataArray( [ [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], [n, 2, 3, 4, n, 6, n, n, n, 10, 11, n], [n, 2, 3, 4, n, 6, n, n, n, 10, 11, n], [n, 2, 3, 4, n, 6, n, n, n, 10, 11, n], ], dims=["x", "y"], coords=coords, ) assert_equal(actual, expected_x) @requires_scipy def test_interpolators_complex_out_of_bounds(): """Ensure complex nans are used for complex data""" xi = np.array([-1, 0, 1, 2, 5], dtype=np.float64) yi = np.exp(1j * xi) x = np.array([-2, 1, 6], dtype=np.float64) expected = np.array( [np.nan + np.nan * 1j, np.exp(1j), np.nan + np.nan * 1j], dtype=yi.dtype ) for method, interpolator in [ ("linear", NumpyInterpolator), ("linear", ScipyInterpolator), ]: f = interpolator(xi, yi, method=method) actual = f(x) assert_array_equal(actual, expected) @requires_scipy def test_indexing_localize(): # regression test for GH10287 ds = xr.Dataset( { "sigma_a": xr.DataArray( data=np.ones((16, 8, 36811)), dims=["p", "t", "w"], coords={"w": np.linspace(0, 30000, 36811)}, ) } ) original_func = indexing.NumpyIndexingAdapter.__getitem__ def wrapper(self, indexer): return original_func(self, indexer) with mock.patch.object( indexing.NumpyIndexingAdapter, "__getitem__", side_effect=wrapper, autospec=True ) as mock_func: ds["sigma_a"].interp(w=15000.5) actual_indexer = mock_func.mock_calls[0].args[1]._key assert actual_indexer == (slice(None), slice(None), slice(18404, 18408)) python-xarray-2026.01.0/xarray/tests/test_coordinate_transform.py0000664000175000017500000002047415136607163025355 0ustar alastairalastairfrom collections.abc import Hashable from typing import Any import numpy as np import pytest import xarray as xr from xarray.core.coordinate_transform import CoordinateTransform from xarray.core.indexes import CoordinateTransformIndex from xarray.tests import assert_equal, assert_identical class SimpleCoordinateTransform(CoordinateTransform): """Simple uniform scale transform in a 2D space (x/y coordinates).""" def __init__(self, shape: tuple[int, int], scale: float, dtype: Any = None): super().__init__(("x", "y"), {"x": shape[1], "y": shape[0]}, dtype=dtype) self.scale = scale # array dimensions in reverse order (y = rows, x = cols) self.xy_dims = tuple(self.dims) self.dims = (self.dims[1], self.dims[0]) def forward(self, dim_positions: dict[str, Any]) -> dict[Hashable, Any]: assert set(dim_positions) == set(self.dims) return { name: dim_positions[dim] * self.scale for name, dim in zip(self.coord_names, self.xy_dims, strict=False) } def reverse(self, coord_labels: dict[Hashable, Any]) -> dict[str, Any]: return {dim: coord_labels[dim] / self.scale for dim in self.xy_dims} def equals( self, other: CoordinateTransform, exclude: frozenset[Hashable] | None = None ) -> bool: if not isinstance(other, SimpleCoordinateTransform): return False return self.scale == other.scale def __repr__(self) -> str: return f"Scale({self.scale})" def test_abstract_coordinate_transform() -> None: tr = CoordinateTransform(["x"], {"x": 5}) with pytest.raises(NotImplementedError): tr.forward({"x": [1, 2]}) with pytest.raises(NotImplementedError): tr.reverse({"x": [3.0, 4.0]}) with pytest.raises(NotImplementedError): tr.equals(CoordinateTransform(["x"], {"x": 5})) def test_coordinate_transform_init() -> None: tr = SimpleCoordinateTransform((4, 4), 2.0) assert tr.coord_names == ("x", "y") # array dimensions in reverse order (y = rows, x = cols) assert tr.dims == ("y", "x") assert tr.dim_size == {"x": 4, "y": 4} assert tr.dtype == np.dtype(np.float64) tr2 = SimpleCoordinateTransform((4, 4), 2.0, dtype=np.int64) assert tr2.dtype == np.dtype(np.int64) @pytest.mark.parametrize("dims", [None, ("y", "x")]) def test_coordinate_transform_generate_coords(dims) -> None: tr = SimpleCoordinateTransform((2, 2), 2.0) actual = tr.generate_coords(dims) expected = {"x": [[0.0, 2.0], [0.0, 2.0]], "y": [[0.0, 0.0], [2.0, 2.0]]} assert set(actual) == set(expected) np.testing.assert_array_equal(actual["x"], expected["x"]) np.testing.assert_array_equal(actual["y"], expected["y"]) def create_coords(scale: float, shape: tuple[int, int]) -> xr.Coordinates: """Create x/y Xarray coordinate variables from a simple coordinate transform.""" tr = SimpleCoordinateTransform(shape, scale) index = CoordinateTransformIndex(tr) return xr.Coordinates.from_xindex(index) def test_coordinate_transform_variable() -> None: coords = create_coords(scale=2.0, shape=(2, 2)) assert coords["x"].dtype == np.dtype(np.float64) assert coords["y"].dtype == np.dtype(np.float64) assert coords["x"].shape == (2, 2) assert coords["y"].shape == (2, 2) np.testing.assert_array_equal(np.array(coords["x"]), [[0.0, 2.0], [0.0, 2.0]]) np.testing.assert_array_equal(np.array(coords["y"]), [[0.0, 0.0], [2.0, 2.0]]) def assert_repr(var: xr.Variable): assert ( repr(var._data) == "CoordinateTransformIndexingAdapter(transform=Scale(2.0))" ) assert_repr(coords["x"].variable) assert_repr(coords["y"].variable) def test_coordinate_transform_variable_repr_inline() -> None: var = create_coords(scale=2.0, shape=(2, 2))["x"].variable actual = var._data._repr_inline_(70) # type: ignore[union-attr] assert actual == "0.0 2.0 0.0 2.0" # truncated inline repr var2 = create_coords(scale=2.0, shape=(10, 10))["x"].variable actual2 = var2._data._repr_inline_(70) # type: ignore[union-attr] assert ( actual2 == "0.0 2.0 4.0 6.0 8.0 10.0 12.0 ... 6.0 8.0 10.0 12.0 14.0 16.0 18.0" ) def test_coordinate_transform_variable_repr() -> None: var = create_coords(scale=2.0, shape=(2, 2))["x"].variable actual = repr(var) expected = """ Size: 32B [4 values with dtype=float64] """.strip() assert actual == expected def test_coordinate_transform_variable_basic_outer_indexing() -> None: var = create_coords(scale=2.0, shape=(4, 4))["x"].variable assert var[0, 0] == 0.0 assert var[0, 1] == 2.0 assert var[0, -1] == 6.0 np.testing.assert_array_equal(var[:, 0:2], [[0.0, 2.0]] * 4) expected = var.values[[0], :][:, [0, -1]] actual = var.isel(y=[0], x=[0, -1]).values np.testing.assert_array_equal(actual, expected) with pytest.raises(IndexError, match="out of bounds index"): var[5] with pytest.raises(IndexError, match="out of bounds index"): var[-5] def test_coordinate_transform_variable_vectorized_indexing() -> None: var = create_coords(scale=2.0, shape=(4, 4))["x"].variable actual = var[{"x": xr.Variable("z", [0]), "y": xr.Variable("z", [0])}] expected = xr.Variable("z", [0.0]) assert_equal(actual, expected) with pytest.raises(IndexError, match="out of bounds index"): var[{"x": xr.Variable("z", [5]), "y": xr.Variable("z", [5])}] def test_coordinate_transform_setitem_error() -> None: var = create_coords(scale=2.0, shape=(4, 4))["x"].variable # basic indexing with pytest.raises(TypeError, match="setting values is not supported"): var[0, 0] = 1.0 # outer indexing with pytest.raises(TypeError, match="setting values is not supported"): var[[0, 2], 0] = [1.0, 2.0] # vectorized indexing with pytest.raises(TypeError, match="setting values is not supported"): var[{"x": xr.Variable("z", [0]), "y": xr.Variable("z", [0])}] = 1.0 def test_coordinate_transform_transpose() -> None: coords = create_coords(scale=2.0, shape=(2, 2)) actual = coords["x"].transpose().values expected = [[0.0, 0.0], [2.0, 2.0]] np.testing.assert_array_equal(actual, expected) def test_coordinate_transform_equals() -> None: ds1 = create_coords(scale=2.0, shape=(2, 2)).to_dataset() ds2 = create_coords(scale=2.0, shape=(2, 2)).to_dataset() ds3 = create_coords(scale=4.0, shape=(2, 2)).to_dataset() # cannot use `assert_equal()` test utility function here yet # (indexes invariant check are still based on IndexVariable, which # doesn't work with coordinate transform index coordinate variables) assert ds1.equals(ds2) assert not ds1.equals(ds3) def test_coordinate_transform_sel() -> None: ds = create_coords(scale=2.0, shape=(4, 4)).to_dataset() data = [ [0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0], [12.0, 13.0, 14.0, 15.0], ] ds["data"] = (("y", "x"), data) actual = ds.sel( x=xr.Variable("z", [0.5, 5.5]), y=xr.Variable("z", [0.0, 0.5]), method="nearest" ) expected = ds.isel(x=xr.Variable("z", [0, 3]), y=xr.Variable("z", [0, 0])) # cannot use `assert_equal()` test utility function here yet # (indexes invariant check are still based on IndexVariable, which # doesn't work with coordinate transform index coordinate variables) assert actual.equals(expected) with pytest.raises(ValueError, match=r".*only supports selection.*nearest"): ds.sel(x=xr.Variable("z", [0.5, 5.5]), y=xr.Variable("z", [0.0, 0.5])) with pytest.raises(ValueError, match=r"missing labels for coordinate.*y"): ds.sel(x=[0.5, 5.5], method="nearest") with pytest.raises(TypeError, match=r".*only supports advanced.*indexing"): ds.sel(x=[0.5, 5.5], y=[0.0, 0.5], method="nearest") with pytest.raises(ValueError, match=r".*only supports advanced.*indexing"): ds.sel( x=xr.Variable("z", [0.5, 5.5]), y=xr.Variable("z", [0.0, 0.5, 1.5]), method="nearest", ) def test_coordinate_transform_rename() -> None: ds = xr.Dataset(coords=create_coords(scale=2.0, shape=(2, 2))) roundtripped = ds.rename(x="u", y="v").rename(u="x", v="y") assert_identical(ds, roundtripped, check_default_indexes=False) python-xarray-2026.01.0/xarray/tests/test_units.py0000664000175000017500000056473215136607163022307 0ustar alastairalastairfrom __future__ import annotations import contextlib import functools import operator from typing import Any import numpy as np import pytest import xarray as xr from xarray.core import dtypes, duck_array_ops from xarray.tests import ( assert_allclose, assert_duckarray_allclose, assert_equal, assert_identical, requires_dask, requires_matplotlib, requires_numbagg, ) from xarray.tests.test_plot import PlotTestCase from xarray.tests.test_variable import _PAD_XR_NP_ARGS with contextlib.suppress(ImportError): import matplotlib.pyplot as plt pint = pytest.importorskip("pint") DimensionalityError = pint.errors.DimensionalityError def create_nan_array(values, dtype): """Create array with NaN values, handling cast warnings for int dtypes.""" import warnings # When casting float arrays with NaN to integer, NumPy raises a warning # This is expected behavior when dtype is int with warnings.catch_warnings(): if np.issubdtype(dtype, np.integer): warnings.filterwarnings("ignore", "invalid value encountered in cast") return np.array(values).astype(dtype) # make sure scalars are converted to 0d arrays so quantities can # always be treated like ndarrays unit_registry = pint.UnitRegistry(force_ndarray_like=True) Quantity = unit_registry.Quantity no_unit_values = ("none", None) pytestmark = [ pytest.mark.filterwarnings("error::pint.UnitStrippedWarning"), ] def is_compatible(unit1, unit2): def dimensionality(obj): if isinstance(obj, unit_registry.Quantity | unit_registry.Unit): unit_like = obj else: unit_like = unit_registry.dimensionless return unit_like.dimensionality return dimensionality(unit1) == dimensionality(unit2) def compatible_mappings(first, second): return { key: is_compatible(unit1, unit2) for key, (unit1, unit2) in zip_mappings(first, second) } def merge_mappings(base, *mappings): result = base.copy() for m in mappings: result.update(m) return result def zip_mappings(*mappings): for key in set(mappings[0]).intersection(*mappings[1:]): yield key, tuple(m[key] for m in mappings) def array_extract_units(obj): if isinstance(obj, xr.Variable | xr.DataArray | xr.Dataset): obj = obj.data try: return obj.units except AttributeError: return None def array_strip_units(array): try: return array.magnitude except AttributeError: return array def array_attach_units(data, unit): if isinstance(data, Quantity) and data.units != unit: raise ValueError(f"cannot attach unit {unit} to quantity {data}") if unit in no_unit_values or (isinstance(unit, int) and unit == 1): return data quantity = unit_registry.Quantity(data, unit) return quantity def extract_units(obj): if isinstance(obj, xr.Dataset): vars_units = { name: array_extract_units(value) for name, value in obj.data_vars.items() } coords_units = { name: array_extract_units(value) for name, value in obj.coords.items() } units = {**vars_units, **coords_units} elif isinstance(obj, xr.DataArray): vars_units = {obj.name: array_extract_units(obj)} coords_units = { name: array_extract_units(value) for name, value in obj.coords.items() } units = {**vars_units, **coords_units} elif isinstance(obj, xr.Variable): vars_units = {None: array_extract_units(obj.data)} units = {**vars_units} elif isinstance(obj, Quantity): vars_units = {None: array_extract_units(obj)} units = {**vars_units} else: units = {} return units def strip_units(obj): if isinstance(obj, xr.Dataset): data_vars = { strip_units(name): strip_units(value) for name, value in obj.data_vars.items() } coords = { strip_units(name): strip_units(value) for name, value in obj.coords.items() } new_obj = xr.Dataset(data_vars=data_vars, coords=coords) elif isinstance(obj, xr.DataArray): data = array_strip_units(obj.variable._data) coords = { strip_units(name): ( (value.dims, array_strip_units(value.variable._data)) if isinstance(value.data, Quantity) else value # to preserve multiindexes ) for name, value in obj.coords.items() } new_obj = xr.DataArray( # type: ignore[assignment] name=strip_units(obj.name), data=data, coords=coords, dims=obj.dims ) elif isinstance(obj, xr.Variable): data = array_strip_units(obj.data) new_obj = obj.copy(data=data) # type: ignore[assignment] elif isinstance(obj, unit_registry.Quantity): new_obj = obj.magnitude elif isinstance(obj, list | tuple): return type(obj)(strip_units(elem) for elem in obj) else: new_obj = obj return new_obj def attach_units(obj, units): if not isinstance(obj, xr.DataArray | xr.Dataset | xr.Variable): units = units.get("data", None) or units.get(None, None) or 1 return array_attach_units(obj, units) if isinstance(obj, xr.Dataset): data_vars = { name: attach_units(value, units) for name, value in obj.data_vars.items() } coords = { name: attach_units(value, units) for name, value in obj.coords.items() } new_obj = xr.Dataset(data_vars=data_vars, coords=coords, attrs=obj.attrs) elif isinstance(obj, xr.DataArray): # try the array name, "data" and None, then fall back to dimensionless data_units = units.get(obj.name, None) or units.get(None, None) or 1 data = array_attach_units(obj.data, data_units) coords = { name: ( (value.dims, array_attach_units(value.data, units.get(name) or 1)) if name in units else (value.dims, value.data) ) for name, value in obj.coords.items() } dims = obj.dims attrs = obj.attrs new_obj = xr.DataArray( # type: ignore[assignment] name=obj.name, data=data, coords=coords, attrs=attrs, dims=dims ) else: data_units = units.get("data", None) or units.get(None, None) or 1 data = array_attach_units(obj.data, data_units) new_obj = obj.copy(data=data) # type: ignore[assignment] return new_obj def convert_units(obj, to): # preprocess to = { key: None if not isinstance(value, unit_registry.Unit) else value for key, value in to.items() } if isinstance(obj, xr.Dataset): data_vars = { name: convert_units(array.variable, {None: to.get(name)}) for name, array in obj.data_vars.items() } coords = { name: convert_units(array.variable, {None: to.get(name)}) for name, array in obj.coords.items() } new_obj = xr.Dataset(data_vars=data_vars, coords=coords, attrs=obj.attrs) elif isinstance(obj, xr.DataArray): name = obj.name new_units = to.get(name) or to.get("data") or to.get(None) or None data = convert_units(obj.variable, {None: new_units}) coords = { name: (array.dims, convert_units(array.variable, {None: to.get(name)})) for name, array in obj.coords.items() if name != obj.name } new_obj = xr.DataArray( # type: ignore[assignment] name=name, data=data, coords=coords, attrs=obj.attrs, dims=obj.dims ) elif isinstance(obj, xr.Variable): new_data = convert_units(obj.data, to) new_obj = obj.copy(data=new_data) # type: ignore[assignment] elif isinstance(obj, unit_registry.Quantity): units = to.get(None) new_obj = obj.to(units) if units is not None else obj else: new_obj = obj return new_obj def assert_units_equal(a, b): __tracebackhide__ = True assert extract_units(a) == extract_units(b) @pytest.fixture(params=[np.dtype(float), np.dtype(int)], ids=str) def dtype(request): return request.param def merge_args(default_args, new_args): from itertools import zip_longest fill_value = object() return [ second if second is not fill_value else first for first, second in zip_longest(default_args, new_args, fillvalue=fill_value) ] class method: """wrapper class to help with passing methods via parametrize This is works a bit similar to using `partial(Class.method, arg, kwarg)` """ def __init__(self, name, *args, fallback_func=None, **kwargs): self.name = name self.fallback = fallback_func self.args = args self.kwargs = kwargs def __call__(self, obj, *args, **kwargs): from functools import partial all_args = merge_args(self.args, args) all_kwargs = {**self.kwargs, **kwargs} from xarray.core.groupby import GroupBy xarray_classes = ( xr.Variable, xr.DataArray, xr.Dataset, GroupBy, ) if not isinstance(obj, xarray_classes): # remove typical xarray args like "dim" exclude_kwargs = ("dim", "dims") # TODO: figure out a way to replace dim / dims with axis all_kwargs = { key: value for key, value in all_kwargs.items() if key not in exclude_kwargs } if self.fallback is not None: func = partial(self.fallback, obj) else: func_attr = getattr(obj, self.name, None) if func_attr is None or not callable(func_attr): # fall back to module level numpy functions numpy_func = getattr(np, self.name) func = partial(numpy_func, obj) else: func = func_attr else: func = getattr(obj, self.name) return func(*all_args, **all_kwargs) def __repr__(self): return f"method_{self.name}" class function: """wrapper class for numpy functions Same as method, but the name is used for referencing numpy functions """ def __init__(self, name_or_function, *args, function_label=None, **kwargs): if callable(name_or_function): self.name = ( function_label if function_label is not None else name_or_function.__name__ ) self.func = name_or_function else: self.name = name_or_function if function_label is None else function_label self.func = getattr(np, name_or_function) if self.func is None: raise AttributeError( f"module 'numpy' has no attribute named '{self.name}'" ) self.args = args self.kwargs = kwargs def __call__(self, *args, **kwargs): all_args = merge_args(self.args, args) all_kwargs = {**self.kwargs, **kwargs} return self.func(*all_args, **all_kwargs) def __repr__(self): return f"function_{self.name}" @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_apply_ufunc_dataarray(variant, dtype): variants = { "data": (unit_registry.m, 1, 1), "dims": (1, unit_registry.m, 1), "coords": (1, 1, unit_registry.m), } data_unit, dim_unit, coord_unit = variants[variant] func = functools.partial( xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} ) array = np.linspace(0, 10, 20).astype(dtype) * data_unit x = np.arange(20) * dim_unit u = np.linspace(-1, 1, 20) * coord_unit data_array = xr.DataArray(data=array, dims="x", coords={"x": x, "u": ("x", u)}) expected = attach_units(func(strip_units(data_array)), extract_units(data_array)) actual = func(data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_apply_ufunc_dataset(variant, dtype): variants = { "data": (unit_registry.m, 1, 1), "dims": (1, unit_registry.m, 1), "coords": (1, 1, unit_registry.s), } data_unit, dim_unit, coord_unit = variants[variant] func = functools.partial( xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} ) array1 = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit array2 = np.linspace(0, 10, 5).astype(dtype) * data_unit x = np.arange(5) * dim_unit y = np.arange(10) * dim_unit u = np.linspace(-1, 1, 10) * coord_unit ds = xr.Dataset( data_vars={"a": (("x", "y"), array1), "b": ("x", array2)}, coords={"x": x, "y": y, "u": ("y", u)}, ) expected = attach_units(func(strip_units(ds)), extract_units(ds)) actual = func(ds) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.parametrize("value", (10, dtypes.NA)) def test_align_dataarray(value, variant, unit, error, dtype): if variant == "coords" and ( value != dtypes.NA or isinstance(unit, unit_registry.Unit) ): pytest.xfail( reason=( "fill_value is used for both data variables and coords. " "See https://github.com/pydata/xarray/issues/4165" ) ) fill_value = dtypes.get_fill_value(dtype) if value == dtypes.NA else value original_unit = unit_registry.m variants = { "data": ((original_unit, unit), (1, 1), (1, 1)), "dims": ((1, 1), (original_unit, unit), (1, 1)), "coords": ((1, 1), (1, 1), (original_unit, unit)), } ( (data_unit1, data_unit2), (dim_unit1, dim_unit2), (coord_unit1, coord_unit2), ) = variants[variant] array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit1 array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit2 x = np.arange(2) * dim_unit1 y1 = np.arange(5) * dim_unit1 y2 = np.arange(2, 7) * dim_unit2 u1 = np.array([3, 5, 7, 8, 9]) * coord_unit1 u2 = np.array([7, 8, 9, 11, 13]) * coord_unit2 coords1 = {"x": x, "y": y1} coords2 = {"x": x, "y": y2} if variant == "coords": coords1["y_a"] = ("y", u1) coords2["y_a"] = ("y", u2) data_array1 = xr.DataArray(data=array1, coords=coords1, dims=("x", "y")) data_array2 = xr.DataArray(data=array2, coords=coords2, dims=("x", "y")) fill_value = fill_value * data_unit2 func = function(xr.align, join="outer", fill_value=fill_value) if error is not None and (value != dtypes.NA or isinstance(fill_value, Quantity)): with pytest.raises(error): func(data_array1, data_array2) return stripped_kwargs = { key: strip_units( convert_units(value, {None: data_unit1 if data_unit2 != 1 else None}) ) for key, value in func.kwargs.items() } units_a = extract_units(data_array1) units_b = extract_units(data_array2) expected_a, expected_b = func( strip_units(data_array1), strip_units(convert_units(data_array2, units_a)), **stripped_kwargs, ) expected_a = attach_units(expected_a, units_a) if isinstance(array2, Quantity): expected_b = convert_units(attach_units(expected_b, units_a), units_b) else: expected_b = attach_units(expected_b, units_b) actual_a, actual_b = func(data_array1, data_array2) assert_units_equal(expected_a, actual_a) assert_allclose(expected_a, actual_a) assert_units_equal(expected_b, actual_b) assert_allclose(expected_b, actual_b) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.parametrize("value", (10, dtypes.NA)) def test_align_dataset(value, unit, variant, error, dtype): if variant == "coords" and ( value != dtypes.NA or isinstance(unit, unit_registry.Unit) ): pytest.xfail( reason=( "fill_value is used for both data variables and coords. " "See https://github.com/pydata/xarray/issues/4165" ) ) fill_value = dtypes.get_fill_value(dtype) if value == dtypes.NA else value original_unit = unit_registry.m variants = { "data": ((original_unit, unit), (1, 1), (1, 1)), "dims": ((1, 1), (original_unit, unit), (1, 1)), "coords": ((1, 1), (1, 1), (original_unit, unit)), } ( (data_unit1, data_unit2), (dim_unit1, dim_unit2), (coord_unit1, coord_unit2), ) = variants[variant] array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit1 array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit2 x = np.arange(2) * dim_unit1 y1 = np.arange(5) * dim_unit1 y2 = np.arange(2, 7) * dim_unit2 u1 = np.array([3, 5, 7, 8, 9]) * coord_unit1 u2 = np.array([7, 8, 9, 11, 13]) * coord_unit2 coords1 = {"x": x, "y": y1} coords2 = {"x": x, "y": y2} if variant == "coords": coords1["u"] = ("y", u1) coords2["u"] = ("y", u2) ds1 = xr.Dataset(data_vars={"a": (("x", "y"), array1)}, coords=coords1) ds2 = xr.Dataset(data_vars={"a": (("x", "y"), array2)}, coords=coords2) fill_value = fill_value * data_unit2 func = function(xr.align, join="outer", fill_value=fill_value) if error is not None and (value != dtypes.NA or isinstance(fill_value, Quantity)): with pytest.raises(error): func(ds1, ds2) return stripped_kwargs = { key: strip_units( convert_units(value, {None: data_unit1 if data_unit2 != 1 else None}) ) for key, value in func.kwargs.items() } units_a = extract_units(ds1) units_b = extract_units(ds2) expected_a, expected_b = func( strip_units(ds1), strip_units(convert_units(ds2, units_a)), **stripped_kwargs, ) expected_a = attach_units(expected_a, units_a) if isinstance(array2, Quantity): expected_b = convert_units(attach_units(expected_b, units_a), units_b) else: expected_b = attach_units(expected_b, units_b) actual_a, actual_b = func(ds1, ds2) assert_units_equal(expected_a, actual_a) assert_allclose(expected_a, actual_a) assert_units_equal(expected_b, actual_b) assert_allclose(expected_b, actual_b) def test_broadcast_dataarray(dtype): # uses align internally so more thorough tests are not needed array1 = np.linspace(0, 10, 2) * unit_registry.Pa array2 = np.linspace(0, 10, 3) * unit_registry.Pa a = xr.DataArray(data=array1, dims="x") b = xr.DataArray(data=array2, dims="y") units_a = extract_units(a) units_b = extract_units(b) expected_a, expected_b = xr.broadcast(strip_units(a), strip_units(b)) expected_a = attach_units(expected_a, units_a) expected_b = convert_units(attach_units(expected_b, units_a), units_b) actual_a, actual_b = xr.broadcast(a, b) assert_units_equal(expected_a, actual_a) assert_identical(expected_a, actual_a) assert_units_equal(expected_b, actual_b) assert_identical(expected_b, actual_b) def test_broadcast_dataset(dtype): # uses align internally so more thorough tests are not needed array1 = np.linspace(0, 10, 2) * unit_registry.Pa array2 = np.linspace(0, 10, 3) * unit_registry.Pa x1 = np.arange(2) y1 = np.arange(3) x2 = np.arange(2, 4) y2 = np.arange(3, 6) ds = xr.Dataset( data_vars={"a": ("x", array1), "b": ("y", array2)}, coords={"x": x1, "y": y1} ) other = xr.Dataset( data_vars={ "a": ("x", array1.to(unit_registry.hPa)), "b": ("y", array2.to(unit_registry.hPa)), }, coords={"x": x2, "y": y2}, ) units_a = extract_units(ds) units_b = extract_units(other) expected_a, expected_b = xr.broadcast(strip_units(ds), strip_units(other)) expected_a = attach_units(expected_a, units_a) expected_b = attach_units(expected_b, units_b) actual_a, actual_b = xr.broadcast(ds, other) assert_units_equal(expected_a, actual_a) assert_identical(expected_a, actual_a) assert_units_equal(expected_b, actual_b) assert_identical(expected_b, actual_b) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.filterwarnings( "ignore:.*the default value for coords will change:FutureWarning" ) def test_combine_by_coords(variant, unit, error, dtype): original_unit = unit_registry.m variants = { "data": ((original_unit, unit), (1, 1), (1, 1)), "dims": ((1, 1), (original_unit, unit), (1, 1)), "coords": ((1, 1), (1, 1), (original_unit, unit)), } ( (data_unit1, data_unit2), (dim_unit1, dim_unit2), (coord_unit1, coord_unit2), ) = variants[variant] array1 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1 array2 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1 x = np.arange(1, 4) * 10 * dim_unit1 y = np.arange(2) * dim_unit1 u = np.arange(3) * coord_unit1 other_array1 = np.ones_like(array1) * data_unit2 other_array2 = np.ones_like(array2) * data_unit2 other_x = np.arange(1, 4) * 10 * dim_unit2 other_y = np.arange(2, 4) * dim_unit2 other_u = np.arange(3, 6) * coord_unit2 ds = xr.Dataset( data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, coords={"x": x, "y": y, "u": ("x", u)}, ) other = xr.Dataset( data_vars={"a": (("y", "x"), other_array1), "b": (("y", "x"), other_array2)}, coords={"x": other_x, "y": other_y, "u": ("x", other_u)}, ) if error is not None: with pytest.raises(error): xr.combine_by_coords([ds, other], coords="different", compat="no_conflicts") return units = extract_units(ds) expected = attach_units( xr.combine_by_coords( [strip_units(ds), strip_units(convert_units(other, units))] ), units, ) actual = xr.combine_by_coords([ds, other]) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_combine_nested(variant, unit, error, dtype): original_unit = unit_registry.m variants = { "data": ((original_unit, unit), (1, 1), (1, 1)), "dims": ((1, 1), (original_unit, unit), (1, 1)), "coords": ((1, 1), (1, 1), (original_unit, unit)), } ( (data_unit1, data_unit2), (dim_unit1, dim_unit2), (coord_unit1, coord_unit2), ) = variants[variant] array1 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1 array2 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1 x = np.arange(1, 4) * 10 * dim_unit1 y = np.arange(2) * dim_unit1 z = np.arange(3) * coord_unit1 ds1 = xr.Dataset( data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, coords={"x": x, "y": y, "z": ("x", z)}, ) ds2 = xr.Dataset( data_vars={ "a": (("y", "x"), np.ones_like(array1) * data_unit2), "b": (("y", "x"), np.ones_like(array2) * data_unit2), }, coords={ "x": np.arange(3) * dim_unit2, "y": np.arange(2, 4) * dim_unit2, "z": ("x", np.arange(-3, 0) * coord_unit2), }, ) ds3 = xr.Dataset( data_vars={ "a": (("y", "x"), np.full_like(array1, fill_value=np.nan) * data_unit2), "b": (("y", "x"), np.full_like(array2, fill_value=np.nan) * data_unit2), }, coords={ "x": np.arange(3, 6) * dim_unit2, "y": np.arange(4, 6) * dim_unit2, "z": ("x", np.arange(3, 6) * coord_unit2), }, ) ds4 = xr.Dataset( data_vars={ "a": (("y", "x"), -1 * np.ones_like(array1) * data_unit2), "b": (("y", "x"), -1 * np.ones_like(array2) * data_unit2), }, coords={ "x": np.arange(6, 9) * dim_unit2, "y": np.arange(6, 8) * dim_unit2, "z": ("x", np.arange(6, 9) * coord_unit2), }, ) func = function(xr.combine_nested, concat_dim=["x", "y"], join="outer") if error is not None: with pytest.raises(error): func([[ds1, ds2], [ds3, ds4]]) return units = extract_units(ds1) convert_and_strip = lambda ds: strip_units(convert_units(ds, units)) expected = attach_units( func( [ [strip_units(ds1), convert_and_strip(ds2)], [convert_and_strip(ds3), convert_and_strip(ds4)], ] ), units, ) actual = func([[ds1, ds2], [ds3, ds4]]) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_concat_dataarray(variant, unit, error, dtype): original_unit = unit_registry.m variants = { "data": ((original_unit, unit), (1, 1), (1, 1)), "dims": ((1, 1), (original_unit, unit), (1, 1)), "coords": ((1, 1), (1, 1), (original_unit, unit)), } ( (data_unit1, data_unit2), (dim_unit1, dim_unit2), (coord_unit1, coord_unit2), ) = variants[variant] array1 = np.linspace(0, 5, 10).astype(dtype) * data_unit1 array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit2 x1 = np.arange(5, 15) * dim_unit1 x2 = np.arange(5) * dim_unit2 u1 = np.linspace(1, 2, 10).astype(dtype) * coord_unit1 u2 = np.linspace(0, 1, 5).astype(dtype) * coord_unit2 arr1 = xr.DataArray(data=array1, coords={"x": x1, "u": ("x", u1)}, dims="x") arr2 = xr.DataArray(data=array2, coords={"x": x2, "u": ("x", u2)}, dims="x") if error is not None: with pytest.raises(error): xr.concat([arr1, arr2], dim="x") return units = extract_units(arr1) expected = attach_units( xr.concat( [strip_units(arr1), strip_units(convert_units(arr2, units))], dim="x" ), units, ) actual = xr.concat([arr1, arr2], dim="x") assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_concat_dataset(variant, unit, error, dtype): original_unit = unit_registry.m variants = { "data": ((original_unit, unit), (1, 1), (1, 1)), "dims": ((1, 1), (original_unit, unit), (1, 1)), "coords": ((1, 1), (1, 1), (original_unit, unit)), } ( (data_unit1, data_unit2), (dim_unit1, dim_unit2), (coord_unit1, coord_unit2), ) = variants[variant] array1 = np.linspace(0, 5, 10).astype(dtype) * data_unit1 array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit2 x1 = np.arange(5, 15) * dim_unit1 x2 = np.arange(5) * dim_unit2 u1 = np.linspace(1, 2, 10).astype(dtype) * coord_unit1 u2 = np.linspace(0, 1, 5).astype(dtype) * coord_unit2 ds1 = xr.Dataset(data_vars={"a": ("x", array1)}, coords={"x": x1, "u": ("x", u1)}) ds2 = xr.Dataset(data_vars={"a": ("x", array2)}, coords={"x": x2, "u": ("x", u2)}) if error is not None: with pytest.raises(error): xr.concat([ds1, ds2], dim="x") return units = extract_units(ds1) expected = attach_units( xr.concat([strip_units(ds1), strip_units(convert_units(ds2, units))], dim="x"), units, ) actual = xr.concat([ds1, ds2], dim="x") assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_merge_dataarray(variant, unit, error, dtype): original_unit = unit_registry.m variants = { "data": ((original_unit, unit), (1, 1), (1, 1)), "dims": ((1, 1), (original_unit, unit), (1, 1)), "coords": ((1, 1), (1, 1), (original_unit, unit)), } ( (data_unit1, data_unit2), (dim_unit1, dim_unit2), (coord_unit1, coord_unit2), ) = variants[variant] array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * data_unit1 x1 = np.arange(2) * dim_unit1 y1 = np.arange(3) * dim_unit1 u1 = np.linspace(10, 20, 2) * coord_unit1 v1 = np.linspace(10, 20, 3) * coord_unit1 array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit2 x2 = np.arange(2, 4) * dim_unit2 z2 = np.arange(4) * dim_unit1 u2 = np.linspace(20, 30, 2) * coord_unit2 w2 = np.linspace(10, 20, 4) * coord_unit1 array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit2 y3 = np.arange(3, 6) * dim_unit2 z3 = np.arange(4, 8) * dim_unit2 v3 = np.linspace(10, 20, 3) * coord_unit2 w3 = np.linspace(10, 20, 4) * coord_unit2 arr1 = xr.DataArray( name="a", data=array1, coords={"x": x1, "y": y1, "u": ("x", u1), "v": ("y", v1)}, dims=("x", "y"), ) arr2 = xr.DataArray( name="a", data=array2, coords={"x": x2, "z": z2, "u": ("x", u2), "w": ("z", w2)}, dims=("x", "z"), ) arr3 = xr.DataArray( name="a", data=array3, coords={"y": y3, "z": z3, "v": ("y", v3), "w": ("z", w3)}, dims=("y", "z"), ) func = function(xr.merge, compat="no_conflicts", join="outer") if error is not None: with pytest.raises(error): func([arr1, arr2, arr3]) return units = { "a": data_unit1, "u": coord_unit1, "v": coord_unit1, "w": coord_unit1, "x": dim_unit1, "y": dim_unit1, "z": dim_unit1, } convert_and_strip = lambda arr: strip_units(convert_units(arr, units)) expected = attach_units( func( [convert_and_strip(arr1), convert_and_strip(arr2), convert_and_strip(arr3)] ), units, ) actual = func([arr1, arr2, arr3]) assert_units_equal(expected, actual) assert_allclose(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_merge_dataset(variant, unit, error, dtype): original_unit = unit_registry.m variants = { "data": ((original_unit, unit), (1, 1), (1, 1)), "dims": ((1, 1), (original_unit, unit), (1, 1)), "coords": ((1, 1), (1, 1), (original_unit, unit)), } ( (data_unit1, data_unit2), (dim_unit1, dim_unit2), (coord_unit1, coord_unit2), ) = variants[variant] array1 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1 array2 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1 x = np.arange(11, 14) * dim_unit1 y = np.arange(2) * dim_unit1 u = np.arange(3) * coord_unit1 ds1 = xr.Dataset( data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, coords={"x": x, "y": y, "u": ("x", u)}, ) ds2 = xr.Dataset( data_vars={ "a": (("y", "x"), np.ones_like(array1) * data_unit2), "b": (("y", "x"), np.ones_like(array2) * data_unit2), }, coords={ "x": np.arange(3) * dim_unit2, "y": np.arange(2, 4) * dim_unit2, "u": ("x", np.arange(-3, 0) * coord_unit2), }, ) ds3 = xr.Dataset( data_vars={ "a": (("y", "x"), np.full_like(array1, np.nan) * data_unit2), "b": (("y", "x"), np.full_like(array2, np.nan) * data_unit2), }, coords={ "x": np.arange(3, 6) * dim_unit2, "y": np.arange(4, 6) * dim_unit2, "u": ("x", np.arange(3, 6) * coord_unit2), }, ) func = function(xr.merge, compat="no_conflicts", join="outer") if error is not None: with pytest.raises(error): func([ds1, ds2, ds3]) return units = extract_units(ds1) convert_and_strip = lambda ds: strip_units(convert_units(ds, units)) expected = attach_units( func([convert_and_strip(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), units, ) actual = func([ds1, ds2, ds3]) assert_units_equal(expected, actual) assert_allclose(expected, actual) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) def test_replication_dataarray(func, variant, dtype): unit = unit_registry.m variants = { "data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit), } data_unit, dim_unit, coord_unit = variants[variant] array = np.linspace(0, 10, 20).astype(dtype) * data_unit x = np.arange(20) * dim_unit u = np.linspace(0, 1, 20) * coord_unit data_array = xr.DataArray(data=array, dims="x", coords={"x": x, "u": ("x", u)}) units = extract_units(data_array) units.pop(data_array.name) expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) def test_replication_dataset(func, variant, dtype): unit = unit_registry.m variants = { "data": ((unit_registry.m, unit_registry.Pa), 1, 1), "dims": ((1, 1), unit, 1), "coords": ((1, 1), 1, unit), } (data_unit1, data_unit2), dim_unit, coord_unit = variants[variant] array1 = np.linspace(0, 10, 20).astype(dtype) * data_unit1 array2 = np.linspace(5, 10, 10).astype(dtype) * data_unit2 x = np.arange(20).astype(dtype) * dim_unit y = np.arange(10).astype(dtype) * dim_unit u = np.linspace(0, 1, 10) * coord_unit ds = xr.Dataset( data_vars={"a": ("x", array1), "b": ("y", array2)}, coords={"x": x, "y": y, "u": ("y", u)}, ) units = { name: unit for name, unit in extract_units(ds).items() if name not in ds.data_vars } expected = attach_units(func(strip_units(ds)), units) actual = func(ds) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), pytest.param( "coords", marks=pytest.mark.xfail(reason="can't copy quantity into non-quantity"), ), ), ) def test_replication_full_like_dataarray(variant, dtype): # since full_like will strip units and then use the units of the # fill value, we don't need to try multiple units unit = unit_registry.m variants = { "data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit), } data_unit, dim_unit, coord_unit = variants[variant] array = np.linspace(0, 5, 10) * data_unit x = np.arange(10) * dim_unit u = np.linspace(0, 1, 10) * coord_unit data_array = xr.DataArray(data=array, dims="x", coords={"x": x, "u": ("x", u)}) fill_value = -1 * unit_registry.degK units = extract_units(data_array) units[data_array.name] = fill_value.units expected = attach_units( xr.full_like(strip_units(data_array), fill_value=strip_units(fill_value)), units ) actual = xr.full_like(data_array, fill_value=fill_value) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), pytest.param( "coords", marks=pytest.mark.xfail(reason="can't copy quantity into non-quantity"), ), ), ) def test_replication_full_like_dataset(variant, dtype): unit = unit_registry.m variants = { "data": ((unit_registry.s, unit_registry.Pa), 1, 1), "dims": ((1, 1), unit, 1), "coords": ((1, 1), 1, unit), } (data_unit1, data_unit2), dim_unit, coord_unit = variants[variant] array1 = np.linspace(0, 10, 20).astype(dtype) * data_unit1 array2 = np.linspace(5, 10, 10).astype(dtype) * data_unit2 x = np.arange(20).astype(dtype) * dim_unit y = np.arange(10).astype(dtype) * dim_unit u = np.linspace(0, 1, 10) * coord_unit ds = xr.Dataset( data_vars={"a": ("x", array1), "b": ("y", array2)}, coords={"x": x, "y": y, "u": ("y", u)}, ) fill_value = -1 * unit_registry.degK units = { **extract_units(ds), **dict.fromkeys(ds.data_vars, unit_registry.degK), } expected = attach_units( xr.full_like(strip_units(ds), fill_value=strip_units(fill_value)), units ) actual = xr.full_like(ds, fill_value=fill_value) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, ) @pytest.mark.parametrize("fill_value", (np.nan, 10.2)) def test_where_dataarray(fill_value, unit, error, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m x = xr.DataArray(data=array, dims="x") cond = x < 5 * unit_registry.m fill_value = fill_value * unit if error is not None and not ( np.isnan(fill_value) and not isinstance(fill_value, Quantity) ): with pytest.raises(error): xr.where(cond, x, fill_value) return expected = attach_units( xr.where( cond, strip_units(x), strip_units(convert_units(fill_value, {None: unit_registry.m})), ), extract_units(x), ) actual = xr.where(cond, x, fill_value) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, ) @pytest.mark.parametrize("fill_value", (np.nan, 10.2)) def test_where_dataset(fill_value, unit, error, dtype): array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m array2 = np.linspace(-5, 0, 10).astype(dtype) * unit_registry.m ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}) cond = array1 < 2 * unit_registry.m fill_value = fill_value * unit if error is not None and not ( np.isnan(fill_value) and not isinstance(fill_value, Quantity) ): with pytest.raises(error): xr.where(cond, ds, fill_value) return expected = attach_units( xr.where( cond, strip_units(ds), strip_units(convert_units(fill_value, {None: unit_registry.m})), ), extract_units(ds), ) actual = xr.where(cond, ds, fill_value) assert_units_equal(expected, actual) assert_identical(expected, actual) def test_dot_dataarray(dtype): array1 = ( np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m / unit_registry.s ) array2 = ( np.linspace(10, 20, 10 * 20).reshape(10, 20).astype(dtype) * unit_registry.s ) data_array = xr.DataArray(data=array1, dims=("x", "y")) other = xr.DataArray(data=array2, dims=("y", "z")) with xr.set_options(use_opt_einsum=False): expected = attach_units( xr.dot(strip_units(data_array), strip_units(other)), {None: unit_registry.m} ) actual = xr.dot(data_array, other) assert_units_equal(expected, actual) assert_identical(expected, actual) class TestVariable: @pytest.mark.parametrize( "func", ( method("all"), method("any"), method("argmax", dim="x"), method("argmin", dim="x"), method("argsort"), method("cumprod"), method("cumsum"), method("max"), method("mean"), method("median"), method("min"), method("prod"), method("std"), method("sum"), method("var"), ), ids=repr, ) def test_aggregation(self, func, dtype): array = np.linspace(0, 1, 10).astype(dtype) * ( unit_registry.m if func.name != "cumprod" else unit_registry.dimensionless ) variable = xr.Variable("x", array) numpy_kwargs = func.kwargs.copy() if "dim" in func.kwargs: numpy_kwargs["axis"] = variable.get_axis_num(numpy_kwargs.pop("dim")) units = extract_units(func(array, **numpy_kwargs)) expected = attach_units(func(strip_units(variable)), units) actual = func(variable) assert_units_equal(expected, actual) assert_allclose(expected, actual) def test_aggregate_complex(self): variable = xr.Variable("x", [1, 2j, np.nan] * unit_registry.m) expected = xr.Variable((), (0.5 + 1j) * unit_registry.m) actual = variable.mean() assert_units_equal(expected, actual) assert_allclose(expected, actual) @pytest.mark.parametrize( "func", ( method("astype", np.float32), method("conj"), method("conjugate"), method("clip", min=2, max=7), ), ids=repr, ) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_numpy_methods(self, func, unit, error, dtype): array = np.linspace(0, 1, 10).astype(dtype) * unit_registry.m variable = xr.Variable("x", array) args = [ item * unit if isinstance(item, int | float | list) else item for item in func.args ] kwargs = { key: value * unit if isinstance(value, int | float | list) else value for key, value in func.kwargs.items() } if error is not None and func.name in ("searchsorted", "clip"): with pytest.raises(error): func(variable, *args, **kwargs) return converted_args = [ strip_units(convert_units(item, {None: unit_registry.m})) for item in args ] converted_kwargs = { key: strip_units(convert_units(value, {None: unit_registry.m})) for key, value in kwargs.items() } units = extract_units(func(array, *args, **kwargs)) expected = attach_units( func(strip_units(variable), *converted_args, **converted_kwargs), units ) actual = func(variable, *args, **kwargs) assert_units_equal(expected, actual) assert_allclose(expected, actual) @pytest.mark.parametrize( "func", (method("item", 5), method("searchsorted", 5)), ids=repr ) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_raw_numpy_methods(self, func, unit, error, dtype): array = np.linspace(0, 1, 10).astype(dtype) * unit_registry.m variable = xr.Variable("x", array) args = [ ( item * unit if isinstance(item, int | float | list) and func.name != "item" else item ) for item in func.args ] kwargs = { key: ( value * unit if isinstance(value, int | float | list) and func.name != "item" else value ) for key, value in func.kwargs.items() } if error is not None and func.name != "item": with pytest.raises(error): func(variable, *args, **kwargs) return converted_args = [ ( strip_units(convert_units(item, {None: unit_registry.m})) if func.name != "item" else item ) for item in args ] converted_kwargs = { key: ( strip_units(convert_units(value, {None: unit_registry.m})) if func.name != "item" else value ) for key, value in kwargs.items() } units = extract_units(func(array, *args, **kwargs)) expected = attach_units( func(strip_units(variable), *converted_args, **converted_kwargs), units ) actual = func(variable, *args, **kwargs) assert_units_equal(expected, actual) assert_duckarray_allclose(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr ) def test_missing_value_detection(self, func): array = ( np.array( [ [1.4, 2.3, np.nan, 7.2], [np.nan, 9.7, np.nan, np.nan], [2.1, np.nan, np.nan, 4.6], [9.9, np.nan, 7.2, 9.1], ] ) * unit_registry.degK ) variable = xr.Variable(("x", "y"), array) expected = func(strip_units(variable)) actual = func(variable) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_missing_value_fillna(self, unit, error): value = 10 array = ( np.array( [ [1.4, 2.3, np.nan, 7.2], [np.nan, 9.7, np.nan, np.nan], [2.1, np.nan, np.nan, 4.6], [9.9, np.nan, 7.2, 9.1], ] ) * unit_registry.m ) variable = xr.Variable(("x", "y"), array) fill_value = value * unit if error is not None: with pytest.raises(error): variable.fillna(value=fill_value) return expected = attach_units( strip_units(variable).fillna( value=fill_value.to(unit_registry.m).magnitude ), extract_units(variable), ) actual = variable.fillna(value=fill_value) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit", ( pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param( unit_registry.cm, id="compatible_unit", ), pytest.param(unit_registry.m, id="identical_unit"), ), ) @pytest.mark.parametrize( "convert_data", ( pytest.param(False, id="no_conversion"), pytest.param(True, id="with_conversion"), ), ) @pytest.mark.parametrize( "func", ( method("equals"), pytest.param( method("identical"), marks=pytest.mark.skip(reason="behavior of identical is undecided"), ), ), ids=repr, ) def test_comparisons(self, func, unit, convert_data, dtype): array = np.linspace(0, 1, 9).astype(dtype) quantity1 = array * unit_registry.m variable = xr.Variable("x", quantity1) if convert_data and is_compatible(unit_registry.m, unit): quantity2 = convert_units(array * unit_registry.m, {None: unit}) else: quantity2 = array * unit other = xr.Variable("x", quantity2) expected = func( strip_units(variable), strip_units( convert_units(other, extract_units(variable)) if is_compatible(unit_registry.m, unit) else other ), ) if func.name == "identical": expected &= extract_units(variable) == extract_units(other) else: expected &= all( compatible_mappings( extract_units(variable), extract_units(other) ).values() ) actual = func(variable, other) assert expected == actual @pytest.mark.parametrize( "unit", ( pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), pytest.param(unit_registry.m, id="identical_unit"), ), ) def test_broadcast_equals(self, unit, dtype): base_unit = unit_registry.m left_array = np.ones(shape=(2, 2), dtype=dtype) * base_unit value = ( (1 * base_unit).to(unit).magnitude if is_compatible(unit, base_unit) else 1 ) right_array = np.full(shape=(2,), fill_value=value, dtype=dtype) * unit left = xr.Variable(("x", "y"), left_array) right = xr.Variable("x", right_array) units = { **extract_units(left), **({} if is_compatible(unit, base_unit) else {None: None}), } expected = strip_units(left).broadcast_equals( strip_units(convert_units(right, units)) ) & is_compatible(unit, base_unit) actual = left.broadcast_equals(right) assert expected == actual @pytest.mark.parametrize("dask", [False, pytest.param(True, marks=[requires_dask])]) @pytest.mark.parametrize( ["variable", "indexers"], ( pytest.param( xr.Variable("x", np.linspace(0, 5, 10)), {"x": 4}, id="single value-single indexer", ), pytest.param( xr.Variable("x", np.linspace(0, 5, 10)), {"x": [5, 2, 9, 1]}, id="multiple values-single indexer", ), pytest.param( xr.Variable(("x", "y"), np.linspace(0, 5, 20).reshape(4, 5)), {"x": 1, "y": 4}, id="single value-multiple indexers", ), pytest.param( xr.Variable(("x", "y"), np.linspace(0, 5, 20).reshape(4, 5)), {"x": [0, 1, 2], "y": [0, 2, 4]}, id="multiple values-multiple indexers", ), ), ) def test_isel(self, variable, indexers, dask, dtype): if dask: variable = variable.chunk(dict.fromkeys(variable.dims, 2)) quantified = xr.Variable( variable.dims, variable.data.astype(dtype) * unit_registry.s ) expected = attach_units( strip_units(quantified).isel(indexers), extract_units(quantified) ) actual = quantified.isel(indexers) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @pytest.mark.parametrize( "func", ( function(lambda x, *_: +x, function_label="unary_plus"), function(lambda x, *_: -x, function_label="unary_minus"), function(lambda x, *_: abs(x), function_label="absolute"), function(lambda x, y: x + y, function_label="sum"), function(lambda x, y: y + x, function_label="commutative_sum"), function(lambda x, y: x * y, function_label="product"), function(lambda x, y: y * x, function_label="commutative_product"), ), ids=repr, ) def test_1d_math(self, func, unit, error, dtype): base_unit = unit_registry.m array = np.arange(5).astype(dtype) * base_unit variable = xr.Variable("x", array) values = np.ones(5) y = values * unit if error is not None and func.name in ("sum", "commutative_sum"): with pytest.raises(error): func(variable, y) return units = extract_units(func(array, y)) if all(compatible_mappings(units, extract_units(y)).values()): converted_y = convert_units(y, units) else: converted_y = y if all(compatible_mappings(units, extract_units(variable)).values()): converted_variable = convert_units(variable, units) else: converted_variable = variable expected = attach_units( func(strip_units(converted_variable), strip_units(converted_y)), units ) actual = func(variable, y) assert_units_equal(expected, actual) assert_allclose(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @pytest.mark.parametrize( "func", (method("where"), method("_getitem_with_mask")), ids=repr ) def test_masking(self, func, unit, error, dtype): base_unit = unit_registry.m array = np.linspace(0, 5, 10).astype(dtype) * base_unit variable = xr.Variable("x", array) cond = np.array([True, False] * 5) other = -1 * unit if error is not None: with pytest.raises(error): func(variable, cond, other) return expected = attach_units( func( strip_units(variable), cond, strip_units( convert_units( other, ( {None: base_unit} if is_compatible(base_unit, unit) else {None: None} ), ) ), ), extract_units(variable), ) actual = func(variable, cond, other) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize("dim", ("x", "y", "z", "t", "all")) def test_squeeze(self, dim, dtype): shape = (2, 1, 3, 1, 1, 2) names = list("abcdef") dim_lengths = dict(zip(names, shape, strict=True)) array = np.ones(shape=shape) * unit_registry.m variable = xr.Variable(names, array) kwargs = {"dim": dim} if dim != "all" and dim_lengths.get(dim, 0) == 1 else {} expected = attach_units( strip_units(variable).squeeze(**kwargs), extract_units(variable) ) actual = variable.squeeze(**kwargs) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize( "func", ( method("coarsen", windows={"y": 2}, func=np.mean), method("quantile", q=[0.25, 0.75]), pytest.param( method("rank", dim="x"), marks=pytest.mark.skip(reason="rank not implemented for non-ndarray"), ), method("roll", {"x": 2}), pytest.param( method("rolling_window", "x", 3, "window"), marks=pytest.mark.xfail(reason="converts to ndarray"), ), method("reduce", np.std, "x"), method("round", 2), method("shift", {"x": -2}), method("transpose", "y", "x"), ), ids=repr, ) def test_computation(self, func, dtype, compute_backend): base_unit = unit_registry.m array = np.linspace(0, 5, 5 * 10).reshape(5, 10).astype(dtype) * base_unit variable = xr.Variable(("x", "y"), array) expected = attach_units(func(strip_units(variable)), extract_units(variable)) actual = func(variable) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_searchsorted(self, unit, error, dtype): base_unit = unit_registry.m array = np.linspace(0, 5, 10).astype(dtype) * base_unit variable = xr.Variable("x", array) value = 0 * unit if error is not None: with pytest.raises(error): variable.searchsorted(value) # type: ignore[attr-defined] return expected = strip_units(variable).searchsorted( strip_units(convert_units(value, {None: base_unit})) ) actual = variable.searchsorted(value) # type: ignore[attr-defined] assert_units_equal(expected, actual) np.testing.assert_allclose(expected, actual) def test_stack(self, dtype): array = np.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) * unit_registry.m variable = xr.Variable(("x", "y"), array) expected = attach_units( strip_units(variable).stack(z=("x", "y")), extract_units(variable) ) actual = variable.stack(z=("x", "y")) assert_units_equal(expected, actual) assert_identical(expected, actual) def test_unstack(self, dtype): array = np.linspace(0, 5, 3 * 10).astype(dtype) * unit_registry.m variable = xr.Variable("z", array) expected = attach_units( strip_units(variable).unstack(z={"x": 3, "y": 10}), extract_units(variable) ) actual = variable.unstack(z={"x": 3, "y": 10}) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_concat(self, unit, error, dtype): array1 = ( np.linspace(0, 5, 9 * 10).reshape(3, 6, 5).astype(dtype) * unit_registry.m ) array2 = np.linspace(5, 10, 10 * 3).reshape(3, 2, 5).astype(dtype) * unit variable = xr.Variable(("x", "y", "z"), array1) other = xr.Variable(("x", "y", "z"), array2) if error is not None: with pytest.raises(error): xr.Variable.concat([variable, other], dim="y") return units = extract_units(variable) expected = attach_units( xr.Variable.concat( [strip_units(variable), strip_units(convert_units(other, units))], dim="y", ), units, ) actual = xr.Variable.concat([variable, other], dim="y") assert_units_equal(expected, actual) assert_identical(expected, actual) def test_set_dims(self, dtype): array = np.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) * unit_registry.m variable = xr.Variable(("x", "y"), array) dims = {"z": 6, "x": 3, "a": 1, "b": 4, "y": 10} expected = attach_units( strip_units(variable).set_dims(dims), extract_units(variable) ) actual = variable.set_dims(dims) assert_units_equal(expected, actual) assert_identical(expected, actual) def test_copy(self, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m other = np.arange(10).astype(dtype) * unit_registry.s variable = xr.Variable("x", array) expected = attach_units( strip_units(variable).copy(data=strip_units(other)), extract_units(other) ) actual = variable.copy(data=other) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit", ( pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), pytest.param(unit_registry.m, id="identical_unit"), ), ) def test_no_conflicts(self, unit, dtype): base_unit = unit_registry.m array1 = ( np.array( [ [6.3, 0.3, 0.45], [np.nan, 0.3, 0.3], [3.7, np.nan, 0.2], [9.43, 0.3, 0.7], ] ) * base_unit ) array2 = np.array([np.nan, 0.3, np.nan]) * unit variable = xr.Variable(("x", "y"), array1) other = xr.Variable("y", array2) expected = strip_units(variable).no_conflicts( strip_units( convert_units( other, {None: base_unit if is_compatible(base_unit, unit) else None} ) ) ) & is_compatible(base_unit, unit) actual = variable.no_conflicts(other) assert expected == actual @pytest.mark.parametrize( "mode", [ "constant", "mean", "median", "reflect", "edge", "linear_ramp", "maximum", "minimum", "symmetric", "wrap", ], ) @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) def test_pad(self, mode, xr_arg, np_arg): data = np.arange(4 * 3 * 2).reshape(4, 3, 2) * unit_registry.m v = xr.Variable(["x", "y", "z"], data) expected = attach_units( strip_units(v).pad(mode=mode, **xr_arg), extract_units(v), ) actual = v.pad(mode=mode, **xr_arg) assert_units_equal(expected, actual) assert_equal(actual, expected) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_pad_unit_constant_value(self, unit, error, dtype): array = np.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) * unit_registry.m variable = xr.Variable(("x", "y"), array) fill_value = -100 * unit func = method("pad", mode="constant", x=(2, 3), y=(1, 4)) if error is not None: with pytest.raises(error): func(variable, constant_values=fill_value) return units = extract_units(variable) expected = attach_units( func( strip_units(variable), constant_values=strip_units(convert_units(fill_value, units)), ), units, ) actual = func(variable, constant_values=fill_value) assert_units_equal(expected, actual) assert_identical(expected, actual) class TestDataArray: @pytest.mark.parametrize( "variant", ( pytest.param( "with_dims", marks=pytest.mark.skip(reason="indexes don't support units"), ), "with_coords", "without_coords", ), ) def test_init(self, variant, dtype): array = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.m x = np.arange(len(array)) * unit_registry.s y = x.to(unit_registry.ms) variants = { "with_dims": {"x": x}, "with_coords": {"y": ("x", y)}, "without_coords": {}, } kwargs = {"data": array, "dims": "x", "coords": variants[variant]} data_array = xr.DataArray(**kwargs) assert isinstance(data_array.data, Quantity) assert all( { name: isinstance(coord.data, Quantity) for name, coord in data_array.coords.items() }.values() ) @pytest.mark.parametrize( "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr")) ) @pytest.mark.parametrize( "variant", ( pytest.param( "with_dims", marks=pytest.mark.skip(reason="indexes don't support units"), ), pytest.param("with_coords"), pytest.param("without_coords"), ), ) def test_repr(self, func, variant, dtype): array = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.m x = np.arange(len(array)) * unit_registry.s y = x.to(unit_registry.ms) variants = { "with_dims": {"x": x}, "with_coords": {"y": ("x", y)}, "without_coords": {}, } kwargs = {"data": array, "dims": "x", "coords": variants[variant]} data_array = xr.DataArray(**kwargs) # FIXME: this just checks that the repr does not raise # warnings or errors, but does not check the result func(data_array) @pytest.mark.parametrize( "func", ( function("all"), function("any"), pytest.param( function("argmax"), marks=pytest.mark.skip( reason="calling np.argmax as a function on xarray objects is not " "supported" ), ), pytest.param( function("argmin"), marks=pytest.mark.skip( reason="calling np.argmin as a function on xarray objects is not " "supported" ), ), function("max"), function("mean"), pytest.param( function("median"), marks=pytest.mark.skip( reason="median does not work with dataarrays yet" ), ), function("min"), function("prod"), function("sum"), function("std"), function("var"), function("cumsum"), function("cumprod"), method("all"), method("any"), method("argmax", dim="x"), method("argmin", dim="x"), method("max"), method("mean"), method("median"), method("min"), method("prod"), method("sum"), method("std"), method("var"), method("cumsum"), method("cumprod"), ), ids=repr, ) def test_aggregation(self, func, dtype): array = np.arange(10).astype(dtype) * ( unit_registry.m if func.name != "cumprod" else unit_registry.dimensionless ) data_array = xr.DataArray(data=array, dims="x") numpy_kwargs = func.kwargs.copy() if "dim" in numpy_kwargs: numpy_kwargs["axis"] = data_array.get_axis_num(numpy_kwargs.pop("dim")) # units differ based on the applied function, so we need to # first compute the units units = extract_units(func(array)) expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) assert_units_equal(expected, actual) assert_allclose(expected, actual) @pytest.mark.parametrize( "func", ( pytest.param(operator.neg, id="negate"), pytest.param(abs, id="absolute"), pytest.param(np.round, id="round"), ), ) def test_unary_operations(self, func, dtype): array = np.arange(10).astype(dtype) * unit_registry.m data_array = xr.DataArray(data=array) units = extract_units(func(array)) expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "func", ( pytest.param(lambda x: 2 * x, id="multiply"), pytest.param(lambda x: x + x, id="add"), pytest.param(lambda x: x[0] + x, id="add scalar"), pytest.param(lambda x: x.T @ x, id="matrix multiply"), ), ) def test_binary_operations(self, func, dtype): array = np.arange(10).astype(dtype) * unit_registry.m data_array = xr.DataArray(data=array) units = extract_units(func(array)) with xr.set_options(use_opt_einsum=False): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "comparison", ( pytest.param(operator.lt, id="less_than"), pytest.param(operator.ge, id="greater_equal"), pytest.param(operator.eq, id="equal"), ), ) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, ValueError, id="without_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_comparison_operations(self, comparison, unit, error, dtype): array = ( np.array([10.1, 5.2, 6.5, 8.0, 21.3, 7.1, 1.3]).astype(dtype) * unit_registry.m ) data_array = xr.DataArray(data=array) value = 8 to_compare_with = value * unit # incompatible units are all not equal if error is not None and comparison is not operator.eq: with pytest.raises(error): comparison(array, to_compare_with) with pytest.raises(error): comparison(data_array, to_compare_with) return actual = comparison(data_array, to_compare_with) expected_units = {None: unit_registry.m if array.check(unit) else None} expected = array.check(unit) & comparison( strip_units(data_array), strip_units(convert_units(to_compare_with, expected_units)), ) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "units,error", ( pytest.param(unit_registry.dimensionless, None, id="dimensionless"), pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.degree, None, id="compatible_unit"), ), ) def test_univariate_ufunc(self, units, error, dtype): array = np.arange(10).astype(dtype) * units data_array = xr.DataArray(data=array) func = function("sin") if error is not None: with pytest.raises(error): np.sin(data_array) return expected = attach_units( func(strip_units(convert_units(data_array, {None: unit_registry.radians}))), {None: unit_registry.dimensionless}, ) actual = func(data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="without_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param( unit_registry.mm, None, id="compatible_unit", marks=pytest.mark.xfail(reason="pint converts to the wrong units"), ), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_bivariate_ufunc(self, unit, error, dtype): original_unit = unit_registry.m array = np.arange(10).astype(dtype) * original_unit data_array = xr.DataArray(data=array) if error is not None: with pytest.raises(error): np.maximum(data_array, 1 * unit) return expected_units = {None: original_unit} expected = attach_units( np.maximum( strip_units(data_array), strip_units(convert_units(1 * unit, expected_units)), ), expected_units, ) actual = np.maximum(data_array, 1 * unit) assert_units_equal(expected, actual) assert_identical(expected, actual) actual = np.maximum(1 * unit, data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize("property", ("T", "imag", "real")) def test_numpy_properties(self, property, dtype): array = ( np.arange(5 * 10).astype(dtype) + 1j * np.linspace(-1, 0, 5 * 10).astype(dtype) ).reshape(5, 10) * unit_registry.s data_array = xr.DataArray(data=array, dims=("x", "y")) expected = attach_units( getattr(strip_units(data_array), property), extract_units(data_array) ) actual = getattr(data_array, property) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "func", (method("conj"), method("argsort"), method("conjugate"), method("round")), ids=repr, ) def test_numpy_methods(self, func, dtype): array = np.arange(10).astype(dtype) * unit_registry.m data_array = xr.DataArray(data=array, dims="x") units = extract_units(func(array)) expected = attach_units(strip_units(data_array), units) actual = func(data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) def test_item(self, dtype): array = np.arange(10).astype(dtype) * unit_registry.m data_array = xr.DataArray(data=array) func = method("item", 2) expected = func(strip_units(data_array)) * unit_registry.m actual = func(data_array) assert_duckarray_allclose(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @pytest.mark.parametrize( "func", ( method("searchsorted", 5), pytest.param( function("searchsorted", 5), marks=pytest.mark.xfail( reason="xarray does not implement __array_function__" ), ), ), ids=repr, ) def test_searchsorted(self, func, unit, error, dtype): array = np.arange(10).astype(dtype) * unit_registry.m data_array = xr.DataArray(data=array) scalar_types = (int, float) args = [value * unit for value in func.args] kwargs = { key: (value * unit if isinstance(value, scalar_types) else value) for key, value in func.kwargs.items() } if error is not None: with pytest.raises(error): func(data_array, *args, **kwargs) return units = extract_units(data_array) expected_units = extract_units(func(array, *args, **kwargs)) stripped_args = [strip_units(convert_units(value, units)) for value in args] stripped_kwargs = { key: strip_units(convert_units(value, units)) for key, value in kwargs.items() } expected = attach_units( func(strip_units(data_array), *stripped_args, **stripped_kwargs), expected_units, ) actual = func(data_array, *args, **kwargs) assert_units_equal(expected, actual) np.testing.assert_allclose(expected, actual) @pytest.mark.parametrize( "func", ( method("clip", min=3, max=8), pytest.param( function("clip", a_min=3, a_max=8), marks=pytest.mark.xfail( reason="xarray does not implement __array_function__" ), ), ), ids=repr, ) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_numpy_methods_with_args(self, func, unit, error, dtype): array = np.arange(10).astype(dtype) * unit_registry.m data_array = xr.DataArray(data=array) scalar_types = (int, float) args = [value * unit for value in func.args] kwargs = { key: (value * unit if isinstance(value, scalar_types) else value) for key, value in func.kwargs.items() } if error is not None: with pytest.raises(error): func(data_array, *args, **kwargs) return units = extract_units(data_array) expected_units = extract_units(func(array, *args, **kwargs)) stripped_args = [strip_units(convert_units(value, units)) for value in args] stripped_kwargs = { key: strip_units(convert_units(value, units)) for key, value in kwargs.items() } expected = attach_units( func(strip_units(data_array), *stripped_args, **stripped_kwargs), expected_units, ) actual = func(data_array, *args, **kwargs) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr ) def test_missing_value_detection(self, func, dtype): array = ( np.array( [ [1.4, 2.3, np.nan, 7.2], [np.nan, 9.7, np.nan, np.nan], [2.1, np.nan, np.nan, 4.6], [9.9, np.nan, 7.2, 9.1], ] ) * unit_registry.degK ) data_array = xr.DataArray(data=array) expected = func(strip_units(data_array)) actual = func(data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.xfail(reason="ffill and bfill lose units in data") @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) def test_missing_value_filling(self, func, dtype): array = ( create_nan_array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1], dtype) * unit_registry.degK ) x = np.arange(len(array)) data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") expected = attach_units( func(strip_units(data_array), dim="x"), extract_units(data_array) ) actual = func(data_array, dim="x") assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @pytest.mark.parametrize( "fill_value", ( pytest.param(-1, id="python_scalar"), pytest.param(np.array(-1), id="numpy_scalar"), pytest.param(np.array([-1]), id="numpy_array"), ), ) def test_fillna(self, fill_value, unit, error, dtype): original_unit = unit_registry.m array = ( create_nan_array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1], dtype) * original_unit ) data_array = xr.DataArray(data=array) func = method("fillna") value = fill_value * unit if error is not None: with pytest.raises(error): func(data_array, value=value) return units = extract_units(data_array) expected = attach_units( func( strip_units(data_array), value=strip_units(convert_units(value, units)) ), units, ) actual = func(data_array, value=value) assert_units_equal(expected, actual) assert_identical(expected, actual) def test_dropna(self, dtype): array = ( create_nan_array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1], dtype) * unit_registry.m ) x = np.arange(len(array)) data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"]) units = extract_units(data_array) expected = attach_units(strip_units(data_array).dropna(dim="x"), units) actual = data_array.dropna(dim="x") assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit", ( pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), pytest.param(unit_registry.m, id="identical_unit"), ), ) def test_isin(self, unit, dtype): array = ( create_nan_array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1], dtype) * unit_registry.m ) data_array = xr.DataArray(data=array, dims="x") raw_values = create_nan_array([1.4, np.nan, 2.3], dtype) values = raw_values * unit units = {None: unit_registry.m if array.check(unit) else None} expected = strip_units(data_array).isin( strip_units(convert_units(values, units)) ) & array.check(unit) actual = data_array.isin(values) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "variant", ("masking", "replacing_scalar", "replacing_array", "dropping") ) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_where(self, variant, unit, error, dtype): original_unit = unit_registry.m array = np.linspace(0, 1, 10).astype(dtype) * original_unit data_array = xr.DataArray(data=array) condition = data_array < 0.5 * original_unit other = np.linspace(-2, -1, 10).astype(dtype) * unit variant_kwargs = { "masking": {"cond": condition}, "replacing_scalar": {"cond": condition, "other": -1 * unit}, "replacing_array": {"cond": condition, "other": other}, "dropping": {"cond": condition, "drop": True}, } kwargs = variant_kwargs[variant] kwargs_without_units = { key: strip_units( convert_units( value, {None: original_unit if array.check(unit) else None} ) ) for key, value in kwargs.items() } if variant not in ("masking", "dropping") and error is not None: with pytest.raises(error): data_array.where(**kwargs) return expected = attach_units( strip_units(data_array).where(**kwargs_without_units), extract_units(data_array), ) actual = data_array.where(**kwargs) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.xfail(reason="uses numpy.vectorize") def test_interpolate_na(self): array = ( np.array([-1.03, 0.1, 1.4, np.nan, 2.3, np.nan, np.nan, 9.1]) * unit_registry.m ) x = np.arange(len(array)) data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") units = extract_units(data_array) expected = attach_units(strip_units(data_array).interpolate_na(dim="x"), units) actual = data_array.interpolate_na(dim="x") assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param( unit_registry.cm, None, id="compatible_unit", ), pytest.param( unit_registry.m, None, id="identical_unit", ), ), ) def test_combine_first(self, unit, error, dtype): array = np.zeros(shape=(2, 2), dtype=dtype) * unit_registry.m other_array = np.ones_like(array) * unit data_array = xr.DataArray( data=array, coords={"x": ["a", "b"], "y": [-1, 0]}, dims=["x", "y"] ) other = xr.DataArray( data=other_array, coords={"x": ["b", "c"], "y": [0, 1]}, dims=["x", "y"] ) if error is not None: with pytest.raises(error): data_array.combine_first(other) return units = extract_units(data_array) expected = attach_units( strip_units(data_array).combine_first( strip_units(convert_units(other, units)) ), units, ) actual = data_array.combine_first(other) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit", ( pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), pytest.param(unit_registry.m, id="identical_unit"), ), ) @pytest.mark.parametrize( "variation", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.parametrize( "func", ( method("equals"), pytest.param( method("identical"), marks=pytest.mark.skip(reason="the behavior of identical is undecided"), ), ), ids=repr, ) def test_comparisons(self, func, variation, unit, dtype): def is_compatible(a, b): a = a if a is not None else 1 b = b if b is not None else 1 quantity = np.arange(5) * a return a == b or quantity.check(b) data = np.linspace(0, 5, 10).astype(dtype) coord = np.arange(len(data)).astype(dtype) base_unit = unit_registry.m array = data * (base_unit if variation == "data" else 1) x = coord * (base_unit if variation == "dims" else 1) y = coord * (base_unit if variation == "coords" else 1) variations = { "data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit), } data_unit, dim_unit, coord_unit = variations[variation] data_array = xr.DataArray(data=array, coords={"x": x, "y": ("x", y)}, dims="x") other = attach_units( strip_units(data_array), {None: data_unit, "x": dim_unit, "y": coord_unit} ) units = extract_units(data_array) other_units = extract_units(other) equal_arrays = all( is_compatible(units[name], other_units[name]) for name in units.keys() ) and ( strip_units(data_array).equals( strip_units(convert_units(other, extract_units(data_array))) ) ) equal_units = units == other_units expected = equal_arrays and (func.name != "identical" or equal_units) actual = func(data_array, other) assert expected == actual @pytest.mark.parametrize( "unit", ( pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), pytest.param(unit_registry.m, id="identical_unit"), ), ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_broadcast_like(self, variant, unit, dtype): original_unit = unit_registry.m variants = { "data": ((original_unit, unit), (1, 1), (1, 1)), "dims": ((1, 1), (original_unit, unit), (1, 1)), "coords": ((1, 1), (1, 1), (original_unit, unit)), } ( (data_unit1, data_unit2), (dim_unit1, dim_unit2), (coord_unit1, coord_unit2), ) = variants[variant] array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * data_unit1 array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * data_unit2 x1 = np.arange(2) * dim_unit1 x2 = np.arange(2) * dim_unit2 y1 = np.array([0]) * dim_unit1 y2 = np.arange(3) * dim_unit2 u1 = np.linspace(0, 1, 2) * coord_unit1 u2 = np.linspace(0, 1, 2) * coord_unit2 arr1 = xr.DataArray( data=array1, coords={"x": x1, "y": y1, "u": ("x", u1)}, dims=("x", "y") ) arr2 = xr.DataArray( data=array2, coords={"x": x2, "y": y2, "u": ("x", u2)}, dims=("x", "y") ) expected = attach_units( strip_units(arr1).broadcast_like(strip_units(arr2)), extract_units(arr1) ) actual = arr1.broadcast_like(arr2) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit", ( pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), pytest.param(unit_registry.m, id="identical_unit"), ), ) def test_broadcast_equals(self, unit, dtype): left_array = np.ones(shape=(2, 2), dtype=dtype) * unit_registry.m right_array = np.ones(shape=(2,), dtype=dtype) * unit left = xr.DataArray(data=left_array, dims=("x", "y")) right = xr.DataArray(data=right_array, dims="x") units = { **extract_units(left), **({} if left_array.check(unit) else {None: None}), } expected = strip_units(left).broadcast_equals( strip_units(convert_units(right, units)) ) & left_array.check(unit) actual = left.broadcast_equals(right) assert expected == actual def test_pad(self, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m data_array = xr.DataArray(data=array, dims="x") units = extract_units(data_array) expected = attach_units(strip_units(data_array).pad(x=(2, 3)), units) actual = data_array.pad(x=(2, 3)) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.parametrize( "func", ( method("pipe", lambda da: da * 10), method("assign_coords", w=("y", np.arange(10) * unit_registry.mm)), method("assign_attrs", attr1="value"), method("rename", u="v"), pytest.param( method("swap_dims", {"x": "u"}), marks=pytest.mark.skip(reason="indexes don't support units"), ), pytest.param( method( "expand_dims", dim={"z": np.linspace(10, 20, 12) * unit_registry.s}, axis=1, ), marks=pytest.mark.skip(reason="indexes don't support units"), ), method("drop_vars", "x"), method("reset_coords", names="u"), method("copy"), method("astype", np.float32), ), ids=repr, ) def test_content_manipulation(self, func, variant, dtype): unit = unit_registry.m variants = { "data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit), } data_unit, dim_unit, coord_unit = variants[variant] quantity = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit x = np.arange(quantity.shape[0]) * dim_unit y = np.arange(quantity.shape[1]) * dim_unit u = np.linspace(0, 1, quantity.shape[0]) * coord_unit data_array = xr.DataArray( name="a", data=quantity, coords={"x": x, "u": ("x", u), "y": y}, dims=("x", "y"), ) stripped_kwargs = { key: array_strip_units(value) for key, value in func.kwargs.items() } units = extract_units(data_array) units["u"] = getattr(u, "units", None) units["v"] = getattr(u, "units", None) expected = attach_units(func(strip_units(data_array), **stripped_kwargs), units) actual = func(data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "unit", ( pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.degK, id="with_unit"), ), ) def test_copy(self, unit, dtype): quantity = np.linspace(0, 10, 20, dtype=dtype) * unit_registry.pascal new_data = np.arange(20) data_array = xr.DataArray(data=quantity, dims="x") expected = attach_units( strip_units(data_array).copy(data=new_data), {None: unit} ) actual = data_array.copy(data=new_data * unit) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "indices", ( pytest.param(4, id="single index"), pytest.param([5, 2, 9, 1], id="multiple indices"), ), ) def test_isel(self, indices, dtype): # TODO: maybe test for units in indexes? array = np.arange(10).astype(dtype) * unit_registry.s data_array = xr.DataArray(data=array, dims="x") expected = attach_units( strip_units(data_array).isel(x=indices), extract_units(data_array) ) actual = data_array.isel(x=indices) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.skip(reason="indexes don't support units") @pytest.mark.parametrize( "raw_values", ( pytest.param(10, id="single_value"), pytest.param([10, 5, 13], id="list_of_values"), pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), ), ) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_sel(self, raw_values, unit, error, dtype): array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m x = np.arange(len(array)) * unit_registry.m data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") values = raw_values * unit if error is not None and not ( isinstance(raw_values, int | float) and x.check(unit) ): with pytest.raises(error): data_array.sel(x=values) return expected = attach_units( strip_units(data_array).sel( x=strip_units(convert_units(values, {None: array.units})) ), extract_units(data_array), ) actual = data_array.sel(x=values) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.skip(reason="indexes don't support units") @pytest.mark.parametrize( "raw_values", ( pytest.param(10, id="single_value"), pytest.param([10, 5, 13], id="list_of_values"), pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), ), ) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_loc(self, raw_values, unit, error, dtype): array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m x = np.arange(len(array)) * unit_registry.m data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") values = raw_values * unit if error is not None and not ( isinstance(raw_values, int | float) and x.check(unit) ): with pytest.raises(error): data_array.loc[{"x": values}] return expected = attach_units( strip_units(data_array).loc[ {"x": strip_units(convert_units(values, {None: array.units}))} ], extract_units(data_array), ) actual = data_array.loc[{"x": values}] assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.skip(reason="indexes don't support units") @pytest.mark.parametrize( "raw_values", ( pytest.param(10, id="single_value"), pytest.param([10, 5, 13], id="list_of_values"), pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), ), ) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_drop_sel(self, raw_values, unit, error, dtype): array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m x = np.arange(len(array)) * unit_registry.m data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") values = raw_values * unit if error is not None and not ( isinstance(raw_values, int | float) and x.check(unit) ): with pytest.raises(error): data_array.drop_sel(x=values) return expected = attach_units( strip_units(data_array).drop_sel( x=strip_units(convert_units(values, {None: x.units})) ), extract_units(data_array), ) actual = data_array.drop_sel(x=values) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize("dim", ("x", "y", "z", "t", "all")) @pytest.mark.parametrize( "shape", ( pytest.param((10, 20), id="nothing_squeezable"), pytest.param((10, 20, 1), id="last_dimension_squeezable"), pytest.param((10, 1, 20), id="middle_dimension_squeezable"), pytest.param((1, 10, 20), id="first_dimension_squeezable"), pytest.param((1, 10, 1, 20), id="first_and_last_dimension_squeezable"), ), ) def test_squeeze(self, shape, dim, dtype): names = "xyzt" dim_lengths = dict(zip(names, shape, strict=False)) names = "xyzt" array = np.arange(10 * 20).astype(dtype).reshape(shape) * unit_registry.J data_array = xr.DataArray(data=array, dims=tuple(names[: len(shape)])) kwargs = {"dim": dim} if dim != "all" and dim_lengths.get(dim, 0) == 1 else {} expected = attach_units( strip_units(data_array).squeeze(**kwargs), extract_units(data_array) ) actual = data_array.squeeze(**kwargs) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "func", (method("head", x=7, y=3), method("tail", x=7, y=3), method("thin", x=7, y=3)), ids=repr, ) def test_head_tail_thin(self, func, dtype): # TODO: works like isel. Maybe also test units in indexes? array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK data_array = xr.DataArray(data=array, dims=("x", "y")) expected = attach_units( func(strip_units(data_array)), extract_units(data_array) ) actual = func(data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( "func", ( pytest.param( method("interp"), marks=pytest.mark.xfail(reason="uses scipy") ), method("reindex"), ), ids=repr, ) def test_interp_reindex(self, variant, func, dtype): variants = { "data": (unit_registry.m, 1), "coords": (1, unit_registry.m), } data_unit, coord_unit = variants[variant] array = np.linspace(1, 2, 10).astype(dtype) * data_unit y = np.arange(10) * coord_unit x = np.arange(10) new_x = np.arange(10) + 0.5 data_array = xr.DataArray(array, coords={"x": x, "y": ("x", y)}, dims="x") units = extract_units(data_array) expected = attach_units(func(strip_units(data_array), x=new_x), units) actual = func(data_array, x=new_x) assert_units_equal(expected, actual) assert_allclose(expected, actual) @pytest.mark.skip(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @pytest.mark.parametrize( "func", (method("interp"), method("reindex")), ids=repr, ) def test_interp_reindex_indexing(self, func, unit, error, dtype): array = np.linspace(1, 2, 10).astype(dtype) x = np.arange(10) * unit_registry.m new_x = (np.arange(10) + 0.5) * unit data_array = xr.DataArray(array, coords={"x": x}, dims="x") if error is not None: with pytest.raises(error): func(data_array, x=new_x) return units = extract_units(data_array) expected = attach_units( func( strip_units(data_array), x=strip_units(convert_units(new_x, {None: unit_registry.m})), ), units, ) actual = func(data_array, x=new_x) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( "func", ( pytest.param( method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy") ), method("reindex_like"), ), ids=repr, ) def test_interp_reindex_like(self, variant, func, dtype): variants = { "data": (unit_registry.m, 1), "coords": (1, unit_registry.m), } data_unit, coord_unit = variants[variant] array = np.linspace(1, 2, 10).astype(dtype) * data_unit coord = np.arange(10) * coord_unit x = np.arange(10) new_x = np.arange(-2, 2) + 0.5 data_array = xr.DataArray(array, coords={"x": x, "y": ("x", coord)}, dims="x") other = xr.DataArray(np.empty_like(new_x), coords={"x": new_x}, dims="x") units = extract_units(data_array) expected = attach_units(func(strip_units(data_array), other), units) actual = func(data_array, other) assert_units_equal(expected, actual) assert_allclose(expected, actual) @pytest.mark.skip(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @pytest.mark.parametrize( "func", (method("interp_like"), method("reindex_like")), ids=repr, ) def test_interp_reindex_like_indexing(self, func, unit, error, dtype): array = np.linspace(1, 2, 10).astype(dtype) x = np.arange(10) * unit_registry.m new_x = (np.arange(-2, 2) + 0.5) * unit data_array = xr.DataArray(array, coords={"x": x}, dims="x") other = xr.DataArray(np.empty_like(new_x), {"x": new_x}, dims="x") if error is not None: with pytest.raises(error): func(data_array, other) return units = extract_units(data_array) expected = attach_units( func( strip_units(data_array), strip_units(convert_units(other, {None: unit_registry.m})), ), units, ) actual = func(data_array, other) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "func", (method("unstack"), method("reset_index", "z"), method("reorder_levels")), ids=repr, ) def test_stacking_stacked(self, func, dtype): array = ( np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m ) x = np.arange(array.shape[0]) y = np.arange(array.shape[1]) data_array = xr.DataArray( name="data", data=array, coords={"x": x, "y": y}, dims=("x", "y") ) stacked = data_array.stack(z=("x", "y")) expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m}) actual = func(stacked) assert_units_equal(expected, actual) # TODO: strip_units/attach_units reconstruct DataArrays from scratch, # losing index structure (e.g., MultiIndex from stack becomes regular Index). # Fix these utilities to preserve indexes, then remove check_indexes=False. if func.name == "reset_index": assert_identical( expected, actual, check_default_indexes=False, check_indexes=False ) else: assert_identical(expected, actual, check_indexes=False) @pytest.mark.skip(reason="indexes don't support units") def test_to_unstacked_dataset(self, dtype): array = ( np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.pascal ) x = np.arange(array.shape[0]) * unit_registry.m y = np.arange(array.shape[1]) * unit_registry.s data_array = xr.DataArray( data=array, coords={"x": x, "y": y}, dims=("x", "y") ).stack(z=("x", "y")) func = method("to_unstacked_dataset", dim="z") expected = attach_units( func(strip_units(data_array)), { "y": y.units, **dict(zip(x.magnitude, [array.units] * len(y), strict=True)), }, ).rename({elem.magnitude: elem for elem in x}) actual = func(data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "func", ( method("transpose", "y", "x", "z"), method("stack", a=("x", "y")), method("set_index", x="x2"), method("shift", x=2), pytest.param( method("rank", dim="x"), marks=pytest.mark.skip(reason="rank not implemented for non-ndarray"), ), method("roll", x=2, roll_coords=False), method("sortby", "x2"), ), ids=repr, ) def test_stacking_reordering(self, func, dtype): array = ( np.linspace(0, 10, 2 * 5 * 10).reshape(2, 5, 10).astype(dtype) * unit_registry.m ) x = np.arange(array.shape[0]) y = np.arange(array.shape[1]) z = np.arange(array.shape[2]) x2 = np.linspace(0, 1, array.shape[0])[::-1] data_array = xr.DataArray( name="data", data=array, coords={"x": x, "y": y, "z": z, "x2": ("x", x2)}, dims=("x", "y", "z"), ) expected = attach_units(func(strip_units(data_array)), {None: unit_registry.m}) actual = func(data_array) assert_units_equal(expected, actual) # TODO: strip_units/attach_units reconstruct DataArrays from scratch, # losing index structure (e.g., MultiIndex from stack becomes regular Index). # Fix these utilities to preserve indexes, then remove check_indexes=False. assert_identical(expected, actual, check_indexes=False) @pytest.mark.parametrize( "variant", ( pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.parametrize( "func", ( method("differentiate", fallback_func=np.gradient), method("integrate", fallback_func=duck_array_ops.cumulative_trapezoid), method("cumulative_integrate", fallback_func=duck_array_ops.trapz), ), ids=repr, ) def test_differentiate_integrate(self, func, variant, dtype): data_unit = unit_registry.m unit = unit_registry.s variants = { "dims": ("x", unit, 1), "coords": ("u", 1, unit), } coord, dim_unit, coord_unit = variants[variant] array = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit x = np.arange(array.shape[0]) * dim_unit y = np.arange(array.shape[1]) * dim_unit u = np.linspace(0, 1, array.shape[0]) * coord_unit data_array = xr.DataArray( data=array, coords={"x": x, "y": y, "u": ("x", u)}, dims=("x", "y") ) # we want to make sure the output unit is correct units = extract_units(data_array) units.update( extract_units( func( data_array.data, getattr(data_array, coord).data, axis=0, ) ) ) expected = attach_units( func(strip_units(data_array), coord=strip_units(coord)), units, ) actual = func(data_array, coord=coord) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.parametrize( "func", ( method("diff", dim="x"), method("quantile", q=[0.25, 0.75]), method("reduce", func=np.sum, dim="x"), pytest.param(lambda x: x.dot(x), id="method_dot"), ), ids=repr, ) def test_computation(self, func, variant, dtype, compute_backend): unit = unit_registry.m variants = { "data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit), } data_unit, dim_unit, coord_unit = variants[variant] array = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit x = np.arange(array.shape[0]) * dim_unit y = np.arange(array.shape[1]) * dim_unit u = np.linspace(0, 1, array.shape[0]) * coord_unit data_array = xr.DataArray( data=array, coords={"x": x, "y": y, "u": ("x", u)}, dims=("x", "y") ) # we want to make sure the output unit is correct units = extract_units(data_array) if not isinstance(func, function | method): units.update(extract_units(func(array.reshape(-1)))) with xr.set_options(use_opt_einsum=False): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.parametrize( "func", ( method("groupby", "x"), method("groupby_bins", "y", bins=4), method("coarsen", y=2), method("rolling", y=3), pytest.param(method("rolling_exp", y=3), marks=requires_numbagg), method("weighted", xr.DataArray(data=np.linspace(0, 1, 10), dims="y")), ), ids=repr, ) def test_computation_objects(self, func, variant, dtype): if variant == "data": if func.name == "rolling_exp": pytest.xfail(reason="numbagg functions are not supported by pint") elif func.name == "rolling": pytest.xfail( reason="numpy.lib.stride_tricks.as_strided converts to ndarray" ) unit = unit_registry.m variants = { "data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit), } data_unit, dim_unit, coord_unit = variants[variant] array = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit x = np.array([0, 0, 1, 2, 2]) * dim_unit y = np.arange(array.shape[1]) * 3 * dim_unit u = np.linspace(0, 1, 5) * coord_unit data_array = xr.DataArray( data=array, coords={"x": x, "y": y, "u": ("x", u)}, dims=("x", "y") ) units = extract_units(data_array) expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() assert_units_equal(expected, actual) assert_allclose(expected, actual) def test_resample(self, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m time = xr.date_range("10-09-2010", periods=len(array), freq="YE") data_array = xr.DataArray(data=array, coords={"time": time}, dims="time") units = extract_units(data_array) func = method("resample", time="6ME") expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() assert_units_equal(expected, actual) assert_identical(expected, actual) @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.parametrize( "func", ( method("assign_coords", z=("x", np.arange(5) * unit_registry.s)), method("first"), method("last"), method("quantile", q=[0.25, 0.5, 0.75], dim="x"), ), ids=repr, ) def test_grouped_operations(self, func, variant, dtype, compute_backend): unit = unit_registry.m variants = { "data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit), } data_unit, dim_unit, coord_unit = variants[variant] array = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit x = np.arange(array.shape[0]) * dim_unit y = np.arange(array.shape[1]) * 3 * dim_unit u = np.linspace(0, 1, array.shape[0]) * coord_unit data_array = xr.DataArray( data=array, coords={"x": x, "y": y, "u": ("x", u)}, dims=("x", "y") ) units = {**extract_units(data_array), "z": unit_registry.s, "q": None} stripped_kwargs = { key: ( strip_units(value) if not isinstance(value, tuple) else tuple(strip_units(elem) for elem in value) ) for key, value in func.kwargs.items() } expected = attach_units( func( strip_units(data_array).groupby("y", squeeze=False), **stripped_kwargs ), units, ) actual = func(data_array.groupby("y", squeeze=False)) assert_units_equal(expected, actual) assert_identical(expected, actual) class TestDataset: @pytest.mark.parametrize( "unit,error", ( pytest.param(1, xr.MergeError, id="no_unit"), pytest.param( unit_registry.dimensionless, xr.MergeError, id="dimensionless" ), pytest.param(unit_registry.s, xr.MergeError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="same_unit"), ), ) @pytest.mark.parametrize( "shared", ( "nothing", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_init(self, shared, unit, error, dtype): original_unit = unit_registry.m scaled_unit = unit_registry.mm a = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa b = np.linspace(-1, 0, 10).astype(dtype) * unit_registry.degK values_a = np.arange(a.shape[0]) dim_a = values_a * original_unit coord_a = dim_a.to(scaled_unit) values_b = np.arange(b.shape[0]) dim_b = values_b * unit coord_b = ( dim_b.to(scaled_unit) if unit_registry.is_compatible_with(dim_b, scaled_unit) and unit != scaled_unit else dim_b * 1000 ) variants = { "nothing": ({}, {}), "dims": ({"x": dim_a}, {"x": dim_b}), "coords": ( {"x": values_a, "y": ("x", coord_a)}, {"x": values_b, "y": ("x", coord_b)}, ), } coords_a, coords_b = variants[shared] dims_a, dims_b = ("x", "y") if shared == "nothing" else ("x", "x") a = xr.DataArray(data=a, coords=coords_a, dims=dims_a) b = xr.DataArray(data=b, coords=coords_b, dims=dims_b) if error is not None and shared != "nothing": with pytest.raises(error): xr.Dataset(data_vars={"a": a, "b": b}) return actual = xr.Dataset(data_vars={"a": a, "b": b}) units = merge_mappings( extract_units(a.rename("a")), extract_units(b.rename("b")) ) expected = attach_units( xr.Dataset(data_vars={"a": strip_units(a), "b": strip_units(b)}), units ) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr")) ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units"), ), "coords", ), ) def test_repr(self, func, variant, dtype): unit1, unit2 = ( (unit_registry.Pa, unit_registry.degK) if variant == "data" else (1, 1) ) array1 = np.linspace(1, 2, 10, dtype=dtype) * unit1 array2 = np.linspace(0, 1, 10, dtype=dtype) * unit2 x = np.arange(len(array1)) * unit_registry.s y = x.to(unit_registry.ms) variants = { "dims": {"x": x}, "coords": {"y": ("x", y)}, "data": {}, } ds = xr.Dataset( data_vars={"a": ("x", array1), "b": ("x", array2)}, coords=variants[variant], ) # FIXME: this just checks that the repr does not raise # warnings or errors, but does not check the result func(ds) @pytest.mark.parametrize( "func", ( method("all"), method("any"), method("argmax", dim="x"), method("argmin", dim="x"), method("max"), method("min"), method("mean"), method("median"), method("sum"), method("prod"), method("std"), method("var"), method("cumsum"), method("cumprod"), ), ids=repr, ) def test_aggregation(self, func, dtype): unit_a, unit_b = ( (unit_registry.Pa, unit_registry.degK) if func.name != "cumprod" else (unit_registry.dimensionless, unit_registry.dimensionless) ) a = np.linspace(0, 1, 10).astype(dtype) * unit_a b = np.linspace(-1, 0, 10).astype(dtype) * unit_b ds = xr.Dataset({"a": ("x", a), "b": ("x", b)}) if "dim" in func.kwargs: numpy_kwargs = func.kwargs.copy() dim = numpy_kwargs.pop("dim") axis_a = ds.a.get_axis_num(dim) axis_b = ds.b.get_axis_num(dim) numpy_kwargs_a = numpy_kwargs.copy() numpy_kwargs_a["axis"] = axis_a numpy_kwargs_b = numpy_kwargs.copy() numpy_kwargs_b["axis"] = axis_b else: numpy_kwargs_a = {} numpy_kwargs_b = {} units_a = array_extract_units(func(a, **numpy_kwargs_a)) units_b = array_extract_units(func(b, **numpy_kwargs_b)) units = {"a": units_a, "b": units_b} actual = func(ds) expected = attach_units(func(strip_units(ds)), units) assert_units_equal(expected, actual) assert_allclose(expected, actual) @pytest.mark.parametrize("property", ("imag", "real")) def test_numpy_properties(self, property, dtype): a = np.linspace(0, 1, 10) * unit_registry.Pa b = np.linspace(-1, 0, 15) * unit_registry.degK ds = xr.Dataset({"a": ("x", a), "b": ("y", b)}) units = extract_units(ds) actual = getattr(ds, property) expected = attach_units(getattr(strip_units(ds), property), units) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("astype", float), method("conj"), method("argsort"), method("conjugate"), method("round"), ), ids=repr, ) def test_numpy_methods(self, func, dtype): a = np.linspace(1, -1, 10) * unit_registry.Pa b = np.linspace(-1, 1, 15) * unit_registry.degK ds = xr.Dataset({"a": ("x", a), "b": ("y", b)}) units_a = array_extract_units(func(a)) units_b = array_extract_units(func(b)) units = {"a": units_a, "b": units_b} actual = func(ds) expected = attach_units(func(strip_units(ds)), units) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize("func", (method("clip", min=3, max=8),), ids=repr) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_numpy_methods_with_args(self, func, unit, error, dtype): data_unit = unit_registry.m a = np.linspace(0, 10, 15) * unit_registry.m b = np.linspace(-2, 12, 20) * unit_registry.m ds = xr.Dataset({"a": ("x", a), "b": ("y", b)}) units = extract_units(ds) kwargs = { key: array_attach_units(value, unit) for key, value in func.kwargs.items() } if error is not None: with pytest.raises(error): func(ds, **kwargs) return stripped_kwargs = { key: strip_units(convert_units(value, {None: data_unit})) for key, value in kwargs.items() } actual = func(ds, **kwargs) expected = attach_units(func(strip_units(ds), **stripped_kwargs), units) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr ) def test_missing_value_detection(self, func, dtype): array1 = ( np.array( [ [1.4, 2.3, np.nan, 7.2], [np.nan, 9.7, np.nan, np.nan], [2.1, np.nan, np.nan, 4.6], [9.9, np.nan, 7.2, 9.1], ] ) * unit_registry.degK ) array2 = ( np.array( [ [np.nan, 5.7, 12.0, 7.2], [np.nan, 12.4, np.nan, 4.2], [9.8, np.nan, 4.6, 1.4], [7.2, np.nan, 6.3, np.nan], [8.4, 3.9, np.nan, np.nan], ] ) * unit_registry.Pa ) ds = xr.Dataset({"a": (("x", "y"), array1), "b": (("z", "x"), array2)}) expected = func(strip_units(ds)) actual = func(ds) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.xfail(reason="ffill and bfill lose the unit") @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) def test_missing_value_filling(self, func, dtype): array1 = ( create_nan_array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1], dtype) * unit_registry.degK ) array2 = ( create_nan_array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan], dtype) * unit_registry.Pa ) ds = xr.Dataset({"a": ("x", array1), "b": ("y", array2)}) units = extract_units(ds) expected = attach_units(func(strip_units(ds), dim="x"), units) actual = func(ds, dim="x") assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param( unit_registry.cm, None, id="compatible_unit", ), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @pytest.mark.parametrize( "fill_value", ( pytest.param(-1, id="python_scalar"), pytest.param(np.array(-1), id="numpy_scalar"), pytest.param(np.array([-1]), id="numpy_array"), ), ) def test_fillna(self, fill_value, unit, error, dtype): array1 = ( create_nan_array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1], dtype) * unit_registry.m ) array2 = ( create_nan_array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan], dtype) * unit_registry.m ) ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) value = fill_value * unit units = extract_units(ds) if error is not None: with pytest.raises(error): ds.fillna(value=value) return actual = ds.fillna(value=value) expected = attach_units( strip_units(ds).fillna( value=strip_units(convert_units(value, {None: unit_registry.m})) ), units, ) assert_units_equal(expected, actual) assert_equal(expected, actual) def test_dropna(self, dtype): array1 = ( create_nan_array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1], dtype) * unit_registry.degK ) array2 = ( create_nan_array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan], dtype) * unit_registry.Pa ) ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) units = extract_units(ds) expected = attach_units(strip_units(ds).dropna(dim="x"), units) actual = ds.dropna(dim="x") assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "unit", ( pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), pytest.param(unit_registry.m, id="same_unit"), ), ) def test_isin(self, unit, dtype): array1 = ( create_nan_array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1], dtype) * unit_registry.m ) array2 = ( create_nan_array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan], dtype) * unit_registry.m ) ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) raw_values = create_nan_array([1.4, np.nan, 2.3], dtype) values = raw_values * unit converted_values = ( convert_units(values, {None: unit_registry.m}) if is_compatible(unit, unit_registry.m) else values ) expected = strip_units(ds).isin(strip_units(converted_values)) # TODO: use `unit_registry.is_compatible_with(unit, unit_registry.m)` instead. # Needs `pint>=0.12.1`, though, so we probably should wait until that is released. if not is_compatible(unit, unit_registry.m): expected.a[:] = False expected.b[:] = False actual = ds.isin(values) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "variant", ("masking", "replacing_scalar", "replacing_array", "dropping") ) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="same_unit"), ), ) def test_where(self, variant, unit, error, dtype): original_unit = unit_registry.m array1 = np.linspace(0, 1, 10).astype(dtype) * original_unit array2 = np.linspace(-1, 0, 10).astype(dtype) * original_unit ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) units = extract_units(ds) condition = ds < 0.5 * original_unit other = np.linspace(-2, -1, 10).astype(dtype) * unit variant_kwargs = { "masking": {"cond": condition}, "replacing_scalar": {"cond": condition, "other": -1 * unit}, "replacing_array": {"cond": condition, "other": other}, "dropping": {"cond": condition, "drop": True}, } kwargs = variant_kwargs[variant] if variant not in ("masking", "dropping") and error is not None: with pytest.raises(error): ds.where(**kwargs) return kwargs_without_units = { key: strip_units(convert_units(value, {None: original_unit})) for key, value in kwargs.items() } expected = attach_units( strip_units(ds).where(**kwargs_without_units), units, ) actual = ds.where(**kwargs) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.xfail(reason="interpolate_na uses numpy.vectorize") def test_interpolate_na(self, dtype): array1 = ( create_nan_array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1], dtype) * unit_registry.degK ) array2 = ( create_nan_array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan], dtype) * unit_registry.Pa ) ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) units = extract_units(ds) expected = attach_units( strip_units(ds).interpolate_na(dim="x"), units, ) actual = ds.interpolate_na(dim="x") assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="same_unit"), ), ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units"), ), ), ) def test_combine_first(self, variant, unit, error, dtype): variants = { "data": (unit_registry.m, unit, 1, 1), "dims": (1, 1, unit_registry.m, unit), } data_unit, other_data_unit, dims_unit, other_dims_unit = variants[variant] array1 = ( create_nan_array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1], dtype) * data_unit ) array2 = ( create_nan_array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan], dtype) * data_unit ) x = np.arange(len(array1)) * dims_unit ds = xr.Dataset( data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}, ) units = extract_units(ds) other_array1 = np.ones_like(array1) * other_data_unit other_array2 = np.full_like(array2, fill_value=-1) * other_data_unit other_x = (np.arange(array1.shape[0]) + 5) * other_dims_unit other = xr.Dataset( data_vars={"a": ("x", other_array1), "b": ("x", other_array2)}, coords={"x": other_x}, ) if error is not None: with pytest.raises(error): ds.combine_first(other) return expected = attach_units( strip_units(ds).combine_first(strip_units(convert_units(other, units))), units, ) actual = ds.combine_first(other) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "unit", ( pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), pytest.param(unit_registry.m, id="identical_unit"), ), ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.parametrize( "func", ( method("equals"), pytest.param( method("identical"), marks=pytest.mark.skip("behaviour of identical is unclear"), ), ), ids=repr, ) def test_comparisons(self, func, variant, unit, dtype): array1 = np.linspace(0, 5, 10).astype(dtype) array2 = np.linspace(-5, 0, 10).astype(dtype) coord = np.arange(len(array1)).astype(dtype) variants = { "data": (unit_registry.m, 1, 1), "dims": (1, unit_registry.m, 1), "coords": (1, 1, unit_registry.m), } data_unit, dim_unit, coord_unit = variants[variant] a = array1 * data_unit b = array2 * data_unit x = coord * dim_unit y = coord * coord_unit ds = xr.Dataset( data_vars={"a": ("x", a), "b": ("x", b)}, coords={"x": x, "y": ("x", y)}, ) units = extract_units(ds) other_variants = { "data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit), } other_data_unit, other_dim_unit, other_coord_unit = other_variants[variant] other_units = { "a": other_data_unit, "b": other_data_unit, "x": other_dim_unit, "y": other_coord_unit, } to_convert = { key: unit if is_compatible(unit, reference) else None for key, (unit, reference) in zip_mappings(units, other_units) } # convert units where possible, then attach all units to the converted dataset other = attach_units(strip_units(convert_units(ds, to_convert)), other_units) other_units = extract_units(other) # make sure all units are compatible and only then try to # convert and compare values equal_ds = all( is_compatible(unit, other_unit) for _, (unit, other_unit) in zip_mappings(units, other_units) ) and (strip_units(ds).equals(strip_units(convert_units(other, units)))) equal_units = units == other_units expected = equal_ds and (func.name != "identical" or equal_units) actual = func(ds, other) assert expected == actual # TODO: eventually use another decorator / wrapper function that # applies a filter to the parametrize combinations: # we only need a single test for data @pytest.mark.parametrize( "unit", ( pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), pytest.param(unit_registry.m, id="identical_unit"), ), ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units"), ), ), ) def test_broadcast_like(self, variant, unit, dtype): variants = { "data": ((unit_registry.m, unit), (1, 1)), "dims": ((1, 1), (unit_registry.m, unit)), } (data_unit1, data_unit2), (dim_unit1, dim_unit2) = variants[variant] array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * data_unit1 array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * data_unit2 x1 = np.arange(2) * dim_unit1 x2 = np.arange(2) * dim_unit2 y1 = np.array([0]) * dim_unit1 y2 = np.arange(3) * dim_unit2 ds1 = xr.Dataset( data_vars={"a": (("x", "y"), array1)}, coords={"x": x1, "y": y1} ) ds2 = xr.Dataset( data_vars={"a": (("x", "y"), array2)}, coords={"x": x2, "y": y2} ) expected = attach_units( strip_units(ds1).broadcast_like(strip_units(ds2)), extract_units(ds1) ) actual = ds1.broadcast_like(ds2) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "unit", ( pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), pytest.param(unit_registry.m, id="identical_unit"), ), ) def test_broadcast_equals(self, unit, dtype): # TODO: does this use indexes? left_array1 = np.ones(shape=(2, 3), dtype=dtype) * unit_registry.m left_array2 = np.zeros(shape=(3, 6), dtype=dtype) * unit_registry.m right_array1 = np.ones(shape=(2,)) * unit right_array2 = np.zeros(shape=(3,)) * unit left = xr.Dataset( {"a": (("x", "y"), left_array1), "b": (("y", "z"), left_array2)}, ) right = xr.Dataset({"a": ("x", right_array1), "b": ("y", right_array2)}) units = merge_mappings( extract_units(left), {} if is_compatible(left_array1, unit) else {"a": None, "b": None}, ) expected = is_compatible(left_array1, unit) and strip_units( left ).broadcast_equals(strip_units(convert_units(right, units))) actual = left.broadcast_equals(right) assert expected == actual def test_pad(self, dtype): a = np.linspace(0, 5, 10).astype(dtype) * unit_registry.Pa b = np.linspace(-5, 0, 10).astype(dtype) * unit_registry.degK ds = xr.Dataset({"a": ("x", a), "b": ("x", b)}) units = extract_units(ds) expected = attach_units(strip_units(ds).pad(x=(2, 3)), units) actual = ds.pad(x=(2, 3)) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "func", (method("unstack"), method("reset_index", "v"), method("reorder_levels")), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units"), ), ), ) def test_stacking_stacked(self, variant, func, dtype): variants = { "data": (unit_registry.m, 1), "dims": (1, unit_registry.m), } data_unit, dim_unit = variants[variant] array1 = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit array2 = ( np.linspace(-10, 0, 5 * 10 * 15).reshape(5, 10, 15).astype(dtype) * data_unit ) x = np.arange(array1.shape[0]) * dim_unit y = np.arange(array1.shape[1]) * dim_unit z = np.arange(array2.shape[2]) * dim_unit ds = xr.Dataset( data_vars={"a": (("x", "y"), array1), "b": (("x", "y", "z"), array2)}, coords={"x": x, "y": y, "z": z}, ) units = extract_units(ds) stacked = ds.stack(v=("x", "y")) expected = attach_units(func(strip_units(stacked)), units) actual = func(stacked) assert_units_equal(expected, actual) if func.name == "reset_index": assert_equal(expected, actual, check_default_indexes=False) else: assert_equal(expected, actual) @pytest.mark.xfail( reason="stacked dimension's labels have to be hashable, but is a numpy.array" ) def test_to_stacked_array(self, dtype): labels = range(5) * unit_registry.s arrays = { name: np.linspace(0, 1, 10).astype(dtype) * unit_registry.m for name in labels } ds = xr.Dataset({name: ("x", array) for name, array in arrays.items()}) units = {None: unit_registry.m, "y": unit_registry.s} func = method("to_stacked_array", "z", variable_dim="y", sample_dims=["x"]) actual = func(ds).rename(None) expected = attach_units( func(strip_units(ds)).rename(None), units, ) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("transpose", "y", "x", "z1", "z2"), method("stack", u=("x", "y")), method("set_index", x="x2"), method("shift", x=2), pytest.param( method("rank", dim="x"), marks=pytest.mark.skip(reason="rank not implemented for non-ndarray"), ), method("roll", x=2, roll_coords=False), method("sortby", "x2"), ), ids=repr, ) def test_stacking_reordering(self, func, dtype): array1 = ( np.linspace(0, 10, 2 * 5 * 10).reshape(2, 5, 10).astype(dtype) * unit_registry.Pa ) array2 = ( np.linspace(0, 10, 2 * 5 * 15).reshape(2, 5, 15).astype(dtype) * unit_registry.degK ) x = np.arange(array1.shape[0]) y = np.arange(array1.shape[1]) z1 = np.arange(array1.shape[2]) z2 = np.arange(array2.shape[2]) x2 = np.linspace(0, 1, array1.shape[0])[::-1] ds = xr.Dataset( data_vars={ "a": (("x", "y", "z1"), array1), "b": (("x", "y", "z2"), array2), }, coords={"x": x, "y": y, "z1": z1, "z2": z2, "x2": ("x", x2)}, ) units = extract_units(ds) expected = attach_units(func(strip_units(ds)), units) actual = func(ds) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "indices", ( pytest.param(4, id="single index"), pytest.param([5, 2, 9, 1], id="multiple indices"), ), ) def test_isel(self, indices, dtype): array1 = np.arange(10).astype(dtype) * unit_registry.s array2 = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}) units = extract_units(ds) expected = attach_units(strip_units(ds).isel(x=indices), units) actual = ds.isel(x=indices) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.skip(reason="indexes don't support units") @pytest.mark.parametrize( "raw_values", ( pytest.param(10, id="single_value"), pytest.param([10, 5, 13], id="list_of_values"), pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), ), ) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), pytest.param(unit_registry.mm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_sel(self, raw_values, unit, error, dtype): array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa x = np.arange(len(array1)) * unit_registry.m ds = xr.Dataset( data_vars={ "a": xr.DataArray(data=array1, dims="x"), "b": xr.DataArray(data=array2, dims="x"), }, coords={"x": x}, ) values = raw_values * unit # TODO: if we choose dm as compatible unit, single value keys # can be found. Should we check that? if error is not None: with pytest.raises(error): ds.sel(x=values) return expected = attach_units( strip_units(ds).sel( x=strip_units(convert_units(values, {None: unit_registry.m})) ), extract_units(ds), ) actual = ds.sel(x=values) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.skip(reason="indexes don't support units") @pytest.mark.parametrize( "raw_values", ( pytest.param(10, id="single_value"), pytest.param([10, 5, 13], id="list_of_values"), pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), ), ) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), pytest.param(unit_registry.mm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_drop_sel(self, raw_values, unit, error, dtype): array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa x = np.arange(len(array1)) * unit_registry.m ds = xr.Dataset( data_vars={ "a": xr.DataArray(data=array1, dims="x"), "b": xr.DataArray(data=array2, dims="x"), }, coords={"x": x}, ) values = raw_values * unit # TODO: if we choose dm as compatible unit, single value keys # can be found. Should we check that? if error is not None: with pytest.raises(error): ds.drop_sel(x=values) return expected = attach_units( strip_units(ds).drop_sel( x=strip_units(convert_units(values, {None: unit_registry.m})) ), extract_units(ds), ) actual = ds.drop_sel(x=values) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.skip(reason="indexes don't support units") @pytest.mark.parametrize( "raw_values", ( pytest.param(10, id="single_value"), pytest.param([10, 5, 13], id="list_of_values"), pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), ), ) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), pytest.param(unit_registry.mm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_loc(self, raw_values, unit, error, dtype): array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa x = np.arange(len(array1)) * unit_registry.m ds = xr.Dataset( data_vars={ "a": xr.DataArray(data=array1, dims="x"), "b": xr.DataArray(data=array2, dims="x"), }, coords={"x": x}, ) values = raw_values * unit # TODO: if we choose dm as compatible unit, single value keys # can be found. Should we check that? if error is not None: with pytest.raises(error): ds.loc[{"x": values}] return expected = attach_units( strip_units(ds).loc[ {"x": strip_units(convert_units(values, {None: unit_registry.m}))} ], extract_units(ds), ) actual = ds.loc[{"x": values}] assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("head", x=7, y=3, z=6), method("tail", x=7, y=3, z=6), method("thin", x=7, y=3, z=6), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_head_tail_thin(self, func, variant, dtype): variants = { "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), "dims": ((1, 1), unit_registry.m, 1), "coords": ((1, 1), 1, unit_registry.m), } (unit_a, unit_b), dim_unit, coord_unit = variants[variant] array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_a array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_b coords = { "x": np.arange(10) * dim_unit, "y": np.arange(5) * dim_unit, "z": np.arange(8) * dim_unit, "u": ("x", np.linspace(0, 1, 10) * coord_unit), "v": ("y", np.linspace(1, 2, 5) * coord_unit), "w": ("z", np.linspace(-1, 0, 8) * coord_unit), } ds = xr.Dataset( data_vars={ "a": xr.DataArray(data=array1, dims=("x", "y")), "b": xr.DataArray(data=array2, dims=("x", "z")), }, coords=coords, ) expected = attach_units(func(strip_units(ds)), extract_units(ds)) actual = func(ds) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize("dim", ("x", "y", "z", "t", "all")) @pytest.mark.parametrize( "shape", ( pytest.param((10, 20), id="nothing squeezable"), pytest.param((10, 20, 1), id="last dimension squeezable"), pytest.param((10, 1, 20), id="middle dimension squeezable"), pytest.param((1, 10, 20), id="first dimension squeezable"), pytest.param((1, 10, 1, 20), id="first and last dimension squeezable"), ), ) def test_squeeze(self, shape, dim, dtype): names = "xyzt" dim_lengths = dict(zip(names, shape, strict=False)) array1 = ( np.linspace(0, 1, 10 * 20).astype(dtype).reshape(shape) * unit_registry.degK ) array2 = ( np.linspace(1, 2, 10 * 20).astype(dtype).reshape(shape) * unit_registry.Pa ) ds = xr.Dataset( data_vars={ "a": (tuple(names[: len(shape)]), array1), "b": (tuple(names[: len(shape)]), array2), }, ) units = extract_units(ds) kwargs = {"dim": dim} if dim != "all" and dim_lengths.get(dim, 0) == 1 else {} expected = attach_units(strip_units(ds).squeeze(**kwargs), units) actual = ds.squeeze(**kwargs) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( "func", ( pytest.param( method("interp"), marks=pytest.mark.xfail(reason="uses scipy") ), method("reindex"), ), ids=repr, ) def test_interp_reindex(self, func, variant, dtype): variants = { "data": (unit_registry.m, 1), "coords": (1, unit_registry.m), } data_unit, coord_unit = variants[variant] array1 = np.linspace(-1, 0, 10).astype(dtype) * data_unit array2 = np.linspace(0, 1, 10).astype(dtype) * data_unit y = np.arange(10) * coord_unit x = np.arange(10) new_x = np.arange(8) + 0.5 ds = xr.Dataset( {"a": ("x", array1), "b": ("x", array2)}, coords={"x": x, "y": ("x", y)} ) units = extract_units(ds) expected = attach_units(func(strip_units(ds), x=new_x), units) actual = func(ds, x=new_x) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.skip(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @pytest.mark.parametrize("func", (method("interp"), method("reindex")), ids=repr) def test_interp_reindex_indexing(self, func, unit, error, dtype): array1 = np.linspace(-1, 0, 10).astype(dtype) array2 = np.linspace(0, 1, 10).astype(dtype) x = np.arange(10) * unit_registry.m new_x = (np.arange(8) + 0.5) * unit ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}) units = extract_units(ds) if error is not None: with pytest.raises(error): func(ds, x=new_x) return expected = attach_units(func(strip_units(ds), x=new_x), units) actual = func(ds, x=new_x) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( "func", ( pytest.param( method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy") ), method("reindex_like"), ), ids=repr, ) def test_interp_reindex_like(self, func, variant, dtype): variants = { "data": (unit_registry.m, 1), "coords": (1, unit_registry.m), } data_unit, coord_unit = variants[variant] array1 = np.linspace(-1, 0, 10).astype(dtype) * data_unit array2 = np.linspace(0, 1, 10).astype(dtype) * data_unit y = np.arange(10) * coord_unit x = np.arange(10) new_x = np.arange(8) + 0.5 ds = xr.Dataset( {"a": ("x", array1), "b": ("x", array2)}, coords={"x": x, "y": ("x", y)} ) units = extract_units(ds) other = xr.Dataset({"a": ("x", np.empty_like(new_x))}, coords={"x": new_x}) expected = attach_units(func(strip_units(ds), other), units) actual = func(ds, other) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.skip(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @pytest.mark.parametrize( "func", (method("interp_like"), method("reindex_like")), ids=repr ) def test_interp_reindex_like_indexing(self, func, unit, error, dtype): array1 = np.linspace(-1, 0, 10).astype(dtype) array2 = np.linspace(0, 1, 10).astype(dtype) x = np.arange(10) * unit_registry.m new_x = (np.arange(8) + 0.5) * unit ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}) units = extract_units(ds) other = xr.Dataset({"a": ("x", np.empty_like(new_x))}, coords={"x": new_x}) if error is not None: with pytest.raises(error): func(ds, other) return expected = attach_units(func(strip_units(ds), other), units) actual = func(ds, other) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize( "func", ( method("diff", dim="x"), method("differentiate", coord="x"), method("integrate", coord="x"), method("quantile", q=[0.25, 0.75]), method("reduce", func=np.sum, dim="x"), method("map", np.fabs), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_computation(self, func, variant, dtype, compute_backend): variants = { "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), "dims": ((1, 1), unit_registry.m, 1), "coords": ((1, 1), 1, unit_registry.m), } (unit1, unit2), dim_unit, coord_unit = variants[variant] array1 = np.linspace(-5, 5, 4 * 5).reshape(4, 5).astype(dtype) * unit1 array2 = np.linspace(10, 20, 4 * 3).reshape(4, 3).astype(dtype) * unit2 x = np.arange(4) * dim_unit y = np.arange(5) * dim_unit z = np.arange(3) * dim_unit ds = xr.Dataset( data_vars={ "a": xr.DataArray(data=array1, dims=("x", "y")), "b": xr.DataArray(data=array2, dims=("x", "z")), }, coords={"x": x, "y": y, "z": z, "y2": ("y", np.arange(5) * coord_unit)}, ) units = extract_units(ds) expected = attach_units(func(strip_units(ds)), units) actual = func(ds) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("groupby", "x"), method("groupby_bins", "x", bins=2), method("coarsen", x=2), pytest.param( method("rolling", x=3), marks=pytest.mark.xfail(reason="strips units") ), pytest.param( method("rolling_exp", x=3), marks=pytest.mark.xfail( reason="numbagg functions are not supported by pint" ), ), method("weighted", xr.DataArray(data=np.linspace(0, 1, 5), dims="y")), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_computation_objects(self, func, variant, dtype): variants = { "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), "dims": ((1, 1), unit_registry.m, 1), "coords": ((1, 1), 1, unit_registry.m), } (unit1, unit2), dim_unit, coord_unit = variants[variant] array1 = np.linspace(-5, 5, 4 * 5).reshape(4, 5).astype(dtype) * unit1 array2 = np.linspace(10, 20, 4 * 3).reshape(4, 3).astype(dtype) * unit2 x = np.arange(4) * dim_unit y = np.arange(5) * dim_unit z = np.arange(3) * dim_unit ds = xr.Dataset( data_vars={"a": (("x", "y"), array1), "b": (("x", "z"), array2)}, coords={"x": x, "y": y, "z": z, "y2": ("y", np.arange(5) * coord_unit)}, ) units = extract_units(ds) args = [] if func.name != "groupby" else ["y"] # Doesn't work with flox because pint doesn't implement # ufunc.reduceat or np.bincount # kwargs = {"engine": "numpy"} if "groupby" in func.name else {} kwargs: dict[str, Any] = {} expected = attach_units(func(strip_units(ds)).mean(*args, **kwargs), units) actual = func(ds).mean(*args, **kwargs) assert_units_equal(expected, actual) assert_allclose(expected, actual) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_resample(self, variant, dtype): # TODO: move this to test_computation_objects variants = { "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), "dims": ((1, 1), unit_registry.m, 1), "coords": ((1, 1), 1, unit_registry.m), } (unit1, unit2), dim_unit, coord_unit = variants[variant] array1 = np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit1 array2 = np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit2 t = xr.date_range("10-09-2010", periods=array1.shape[0], freq="YE") y = np.arange(5) * dim_unit z = np.arange(8) * dim_unit u = np.linspace(-1, 0, 5) * coord_unit ds = xr.Dataset( data_vars={"a": (("time", "y"), array1), "b": (("time", "z"), array2)}, coords={"time": t, "y": y, "z": z, "u": ("y", u)}, ) units = extract_units(ds) func = method("resample", time="6ME") expected = attach_units(func(strip_units(ds)).mean(), units) actual = func(ds).mean() assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize( "func", ( method("assign", c=lambda ds: 10 * ds.b), method("assign_coords", v=("x", np.arange(5) * unit_registry.s)), method("first"), method("last"), method("quantile", q=[0.25, 0.5, 0.75], dim="x"), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_grouped_operations(self, func, variant, dtype, compute_backend): variants = { "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), "dims": ((1, 1), unit_registry.m, 1), "coords": ((1, 1), 1, unit_registry.m), } (unit1, unit2), dim_unit, coord_unit = variants[variant] array1 = np.linspace(-5, 5, 5 * 4).reshape(5, 4).astype(dtype) * unit1 array2 = np.linspace(10, 20, 5 * 4 * 3).reshape(5, 4, 3).astype(dtype) * unit2 x = np.arange(5) * dim_unit y = np.arange(4) * dim_unit z = np.arange(3) * dim_unit u = np.linspace(-1, 0, 4) * coord_unit ds = xr.Dataset( data_vars={"a": (("x", "y"), array1), "b": (("x", "y", "z"), array2)}, coords={"x": x, "y": y, "z": z, "u": ("y", u)}, ) assigned_units = {"c": unit2, "v": unit_registry.s} units = merge_mappings(extract_units(ds), assigned_units) stripped_kwargs = { name: strip_units(value) for name, value in func.kwargs.items() } expected = attach_units( func(strip_units(ds).groupby("y", squeeze=False), **stripped_kwargs), units ) actual = func(ds.groupby("y", squeeze=False)) assert_units_equal(expected, actual) assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("pipe", lambda ds: ds * 10), method("assign", d=lambda ds: ds.b * 10), method("assign_coords", y2=("y", np.arange(4) * unit_registry.mm)), method("assign_attrs", attr1="value"), method("rename", x2="x_mm"), method("rename_vars", c="temperature"), method("rename_dims", x="offset_x"), method("swap_dims", {"x": "u"}), pytest.param( method( "expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1 ), marks=pytest.mark.skip(reason="indexes don't support units"), ), method("drop_vars", "x"), method("drop_dims", "z"), method("set_coords", names="c"), method("reset_coords", names="x2"), method("copy"), ), ids=repr, ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) def test_content_manipulation(self, func, variant, dtype): variants = { "data": ( (unit_registry.m**3, unit_registry.Pa, unit_registry.degK), 1, 1, ), "dims": ((1, 1, 1), unit_registry.m, 1), "coords": ((1, 1, 1), 1, unit_registry.m), } (unit1, unit2, unit3), dim_unit, coord_unit = variants[variant] array1 = np.linspace(-5, 5, 5 * 4).reshape(5, 4).astype(dtype) * unit1 array2 = np.linspace(10, 20, 5 * 4 * 3).reshape(5, 4, 3).astype(dtype) * unit2 array3 = np.linspace(0, 10, 5).astype(dtype) * unit3 x = np.arange(5) * dim_unit y = np.arange(4) * dim_unit z = np.arange(3) * dim_unit x2 = np.linspace(-1, 0, 5) * coord_unit ds = xr.Dataset( data_vars={ "a": (("x", "y"), array1), "b": (("x", "y", "z"), array2), "c": ("x", array3), }, coords={"x": x, "y": y, "z": z, "x2": ("x", x2)}, ) new_units = { "y2": unit_registry.mm, "x_mm": coord_unit, "offset_x": unit_registry.m, "d": unit2, "temperature": unit3, } units = merge_mappings(extract_units(ds), new_units) stripped_kwargs = { key: strip_units(value) for key, value in func.kwargs.items() } expected = attach_units(func(strip_units(ds), **stripped_kwargs), units) actual = func(ds) assert_units_equal(expected, actual) if func.name == "rename_dims": assert_equal(expected, actual, check_default_indexes=False) else: assert_equal(expected, actual) @pytest.mark.parametrize( "unit,error", ( pytest.param(1, xr.MergeError, id="no_unit"), pytest.param( unit_registry.dimensionless, xr.MergeError, id="dimensionless" ), pytest.param(unit_registry.s, xr.MergeError, id="incompatible_unit"), pytest.param(unit_registry.cm, xr.MergeError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @pytest.mark.parametrize( "variant", ( "data", pytest.param( "dims", marks=pytest.mark.skip(reason="indexes don't support units") ), "coords", ), ) @pytest.mark.filterwarnings( "ignore:.*the default value for compat will change:FutureWarning" ) def test_merge(self, variant, unit, error, dtype): left_variants = { "data": (unit_registry.m, 1, 1), "dims": (1, unit_registry.m, 1), "coords": (1, 1, unit_registry.m), } left_data_unit, left_dim_unit, left_coord_unit = left_variants[variant] right_variants = { "data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit), } right_data_unit, right_dim_unit, right_coord_unit = right_variants[variant] left_array = np.arange(10).astype(dtype) * left_data_unit right_array = np.arange(-5, 5).astype(dtype) * right_data_unit left_dim = np.arange(10, 20) * left_dim_unit right_dim = np.arange(5, 15) * right_dim_unit left_coord = np.arange(-10, 0) * left_coord_unit right_coord = np.arange(-15, -5) * right_coord_unit left = xr.Dataset( data_vars={"a": ("x", left_array)}, coords={"x": left_dim, "y": ("x", left_coord)}, ) right = xr.Dataset( data_vars={"a": ("x", right_array)}, coords={"x": right_dim, "y": ("x", right_coord)}, ) units = extract_units(left) if error is not None: with pytest.raises(error): left.merge(right, compat="no_conflicts", join="outer") return converted = convert_units(right, units) expected = attach_units( strip_units(left).merge(strip_units(converted), join="outer"), units ) actual = left.merge(right, join="outer") assert_units_equal(expected, actual) assert_equal(expected, actual) @requires_dask class TestPintWrappingDask: def test_duck_array_ops(self): import dask.array d = dask.array.array([1, 2, 3]) q = unit_registry.Quantity(d, units="m") da = xr.DataArray(q, dims="x") actual = da.mean().compute() actual.name = None expected = xr.DataArray(unit_registry.Quantity(np.array(2.0), units="m")) assert_units_equal(expected, actual) # Don't use isinstance b/c we don't want to allow subclasses through assert type(expected.data) is type(actual.data) @requires_matplotlib class TestPlots(PlotTestCase): @pytest.mark.parametrize( "coord_unit, coord_attrs", [ (1, {"units": "meter"}), pytest.param( unit_registry.m, {}, marks=pytest.mark.xfail(reason="indexes don't support units"), ), ], ) def test_units_in_line_plot_labels(self, coord_unit, coord_attrs): arr = np.linspace(1, 10, 3) * unit_registry.Pa coord_arr = np.linspace(1, 3, 3) * coord_unit x_coord = xr.DataArray(coord_arr, dims="x", attrs=coord_attrs) da = xr.DataArray(data=arr, dims="x", coords={"x": x_coord}, name="pressure") da.plot.line() ax = plt.gca() assert ax.get_ylabel() == "pressure [pascal]" assert ax.get_xlabel() == "x [meter]" @pytest.mark.parametrize( "coord_unit, coord_attrs", [ (1, {"units": "meter"}), pytest.param( unit_registry.m, {}, marks=pytest.mark.xfail(reason="indexes don't support units"), ), ], ) def test_units_in_slice_line_plot_labels_sel(self, coord_unit, coord_attrs): arr = xr.DataArray( name="var_a", data=np.array([[1, 2], [3, 4]]), coords=dict( a=("a", np.array([5, 6]) * coord_unit, coord_attrs), b=("b", np.array([7, 8]) * coord_unit, coord_attrs), ), dims=("a", "b"), ) arr.sel(a=5).plot(marker="o") # type: ignore[call-arg] assert plt.gca().get_title() == "a = 5 [meter]" @pytest.mark.parametrize( "coord_unit, coord_attrs", [ (1, {"units": "meter"}), pytest.param( unit_registry.m, {}, marks=pytest.mark.xfail(reason="pint.errors.UnitStrippedWarning"), ), ], ) def test_units_in_slice_line_plot_labels_isel(self, coord_unit, coord_attrs): arr = xr.DataArray( name="var_a", data=np.array([[1, 2], [3, 4]]), coords=dict( a=("x", np.array([5, 6]) * coord_unit, coord_attrs), b=("y", np.array([7, 8])), ), dims=("x", "y"), ) arr.isel(x=0).plot(marker="o") # type: ignore[call-arg] assert plt.gca().get_title() == "a = 5 [meter]" def test_units_in_2d_plot_colorbar_label(self): arr = np.ones((2, 3)) * unit_registry.Pa da = xr.DataArray(data=arr, dims=["x", "y"], name="pressure") _fig, (ax, cax) = plt.subplots(1, 2) ax = da.plot.contourf(ax=ax, cbar_ax=cax, add_colorbar=True) assert cax.get_ylabel() == "pressure [pascal]" def test_units_facetgrid_plot_labels(self): arr = np.ones((2, 3)) * unit_registry.Pa da = xr.DataArray(data=arr, dims=["x", "y"], name="pressure") _fig, (_ax, _cax) = plt.subplots(1, 2) fgrid = da.plot.line(x="x", col="y") assert fgrid.axs[0, 0].get_ylabel() == "pressure [pascal]" def test_units_facetgrid_2d_imshow_plot_colorbar_labels(self): arr = np.ones((2, 3, 4, 5)) * unit_registry.Pa da = xr.DataArray(data=arr, dims=["x", "y", "z", "w"], name="pressure") da.plot.imshow(x="x", y="y", col="w") # no colorbar to check labels of def test_units_facetgrid_2d_contourf_plot_colorbar_labels(self): arr = np.ones((2, 3, 4)) * unit_registry.Pa da = xr.DataArray(data=arr, dims=["x", "y", "z"], name="pressure") _fig, (_ax1, _ax2, _ax3, _cax) = plt.subplots(1, 4) fgrid = da.plot.contourf(x="x", y="y", col="z") assert fgrid.cbar.ax.get_ylabel() == "pressure [pascal]" # type: ignore[union-attr] python-xarray-2026.01.0/xarray/tests/test_weighted.py0000664000175000017500000006057015136607163022734 0ustar alastairalastairfrom __future__ import annotations from collections.abc import Iterable from typing import Any import numpy as np import pytest import xarray as xr from xarray import DataArray, Dataset from xarray.tests import ( assert_allclose, assert_equal, raise_if_dask_computes, requires_cftime, requires_dask, ) @pytest.mark.parametrize("as_dataset", (True, False)) def test_weighted_non_DataArray_weights(as_dataset: bool) -> None: data: DataArray | Dataset = DataArray([1, 2]) if as_dataset: data = data.to_dataset(name="data") with pytest.raises(ValueError, match=r"`weights` must be a DataArray"): data.weighted([1, 2]) # type: ignore[arg-type] @pytest.mark.parametrize("as_dataset", (True, False)) @pytest.mark.parametrize("weights", ([np.nan, 2], [np.nan, np.nan])) def test_weighted_weights_nan_raises(as_dataset: bool, weights: list[float]) -> None: data: DataArray | Dataset = DataArray([1, 2]) if as_dataset: data = data.to_dataset(name="data") with pytest.raises(ValueError, match=r"`weights` cannot contain missing values."): data.weighted(DataArray(weights)) @requires_dask @pytest.mark.parametrize("as_dataset", (True, False)) @pytest.mark.parametrize("weights", ([np.nan, 2], [np.nan, np.nan])) def test_weighted_weights_nan_raises_dask(as_dataset, weights): data: DataArray | Dataset = DataArray([1, 2]).chunk({"dim_0": -1}) if as_dataset: data = data.to_dataset(name="data") weights = DataArray(weights).chunk({"dim_0": -1}) with raise_if_dask_computes(): weighted = data.weighted(weights) with pytest.raises(ValueError, match=r"`weights` cannot contain missing values."): weighted.sum().load() @requires_cftime @requires_dask @pytest.mark.parametrize("time_chunks", (1, 5)) @pytest.mark.parametrize("resample_spec", ("1YS", "5YS", "10YS")) def test_weighted_lazy_resample(time_chunks, resample_spec): # https://github.com/pydata/xarray/issues/4625 # simple customized weighted mean function def mean_func(ds): return ds.weighted(ds.weights).mean("time") # example dataset t = xr.date_range(start="2000", periods=20, freq="1YS", use_cftime=True) weights = xr.DataArray(np.random.rand(len(t)), dims=["time"], coords={"time": t}) data = xr.DataArray( np.random.rand(len(t)), dims=["time"], coords={"time": t, "weights": weights} ) ds = xr.Dataset({"data": data}).chunk({"time": time_chunks}) with raise_if_dask_computes(): ds.resample(time=resample_spec).map(mean_func) @pytest.mark.parametrize( ("weights", "expected"), (([1, 2], 3), ([2, 0], 2), ([0, 0], np.nan), ([-1, 1], np.nan)), ) def test_weighted_sum_of_weights_no_nan(weights, expected): da = DataArray([1, 2]) weights = DataArray(weights) result = da.weighted(weights).sum_of_weights() expected = DataArray(expected) assert_equal(expected, result) @pytest.mark.parametrize( ("weights", "expected"), (([1, 2], 2), ([2, 0], np.nan), ([0, 0], np.nan), ([-1, 1], 1)), ) def test_weighted_sum_of_weights_nan(weights, expected): da = DataArray([np.nan, 2]) weights = DataArray(weights) result = da.weighted(weights).sum_of_weights() expected = DataArray(expected) assert_equal(expected, result) def test_weighted_sum_of_weights_bool(): # https://github.com/pydata/xarray/issues/4074 da = DataArray([1, 2]) weights = DataArray([True, True]) result = da.weighted(weights).sum_of_weights() expected = DataArray(2) assert_equal(expected, result) @pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan], [np.nan, np.nan])) @pytest.mark.parametrize("factor", [0, 1, 3.14]) @pytest.mark.parametrize("skipna", (True, False)) def test_weighted_sum_equal_weights(da, factor, skipna): # if all weights are 'f'; weighted sum is f times the ordinary sum da = DataArray(da) weights = xr.full_like(da, factor) expected = da.sum(skipna=skipna) * factor result = da.weighted(weights).sum(skipna=skipna) assert_equal(expected, result) @pytest.mark.parametrize( ("weights", "expected"), (([1, 2], 5), ([0, 2], 4), ([0, 0], 0)) ) def test_weighted_sum_no_nan(weights, expected): da = DataArray([1, 2]) weights = DataArray(weights) result = da.weighted(weights).sum() expected = DataArray(expected) assert_equal(expected, result) @pytest.mark.parametrize( ("weights", "expected"), (([1, 2], 4), ([0, 2], 4), ([1, 0], 0), ([0, 0], 0)) ) @pytest.mark.parametrize("skipna", (True, False)) def test_weighted_sum_nan(weights, expected, skipna): da = DataArray([np.nan, 2]) weights = DataArray(weights) result = da.weighted(weights).sum(skipna=skipna) if skipna: expected = DataArray(expected) else: expected = DataArray(np.nan) assert_equal(expected, result) @pytest.mark.filterwarnings("error") @pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan], [np.nan, np.nan])) @pytest.mark.parametrize("skipna", (True, False)) @pytest.mark.parametrize("factor", [1, 2, 3.14]) def test_weighted_mean_equal_weights(da, skipna, factor): # if all weights are equal (!= 0), should yield the same result as mean da = DataArray(da) # all weights as 1. weights = xr.full_like(da, factor) expected = da.mean(skipna=skipna) result = da.weighted(weights).mean(skipna=skipna) assert_equal(expected, result) @pytest.mark.parametrize( ("weights", "expected"), (([4, 6], 1.6), ([1, 0], 1.0), ([0, 0], np.nan)) ) def test_weighted_mean_no_nan(weights, expected): da = DataArray([1, 2]) weights = DataArray(weights) expected = DataArray(expected) result = da.weighted(weights).mean() assert_equal(expected, result) @pytest.mark.parametrize( ("weights", "expected"), ( ( [0.25, 0.05, 0.15, 0.25, 0.15, 0.1, 0.05], [1.554595, 2.463784, 3.000000, 3.518378], ), ( [0.05, 0.05, 0.1, 0.15, 0.15, 0.25, 0.25], [2.840000, 3.632973, 4.076216, 4.523243], ), ), ) def test_weighted_quantile_no_nan(weights, expected): # Expected values were calculated by running the reference implementation # proposed in https://aakinshin.net/posts/weighted-quantiles/ da = DataArray([1, 1.9, 2.2, 3, 3.7, 4.1, 5]) q = [0.2, 0.4, 0.6, 0.8] weights = DataArray(weights) expected = DataArray(expected, coords={"quantile": q}) result = da.weighted(weights).quantile(q) assert_allclose(expected, result) def test_weighted_quantile_zero_weights(): da = DataArray([0, 1, 2, 3]) weights = DataArray([1, 0, 1, 0]) q = 0.75 result = da.weighted(weights).quantile(q) expected = DataArray([0, 2]).quantile(0.75) assert_allclose(expected, result) def test_weighted_quantile_simple(): # Check that weighted quantiles return the same value as numpy quantiles da = DataArray([0, 1, 2, 3]) w = DataArray([1, 0, 1, 0]) w_eps = DataArray([1, 0.0001, 1, 0.0001]) q = 0.75 expected = DataArray(np.quantile([0, 2], q), coords={"quantile": q}) # 1.5 assert_equal(expected, da.weighted(w).quantile(q)) assert_allclose(expected, da.weighted(w_eps).quantile(q), rtol=0.001) @pytest.mark.parametrize("skipna", (True, False)) def test_weighted_quantile_nan(skipna): # Check skipna behavior da = DataArray([0, 1, 2, 3, np.nan]) w = DataArray([1, 0, 1, 0, 1]) q = [0.5, 0.75] result = da.weighted(w).quantile(q, skipna=skipna) if skipna: expected = DataArray(np.quantile([0, 2], q), coords={"quantile": q}) else: expected = DataArray(np.full(len(q), np.nan), coords={"quantile": q}) assert_allclose(expected, result) @pytest.mark.parametrize( "da", ( pytest.param([1, 1.9, 2.2, 3, 3.7, 4.1, 5], id="nonan"), pytest.param([1, 1.9, 2.2, 3, 3.7, 4.1, np.nan], id="singlenan"), pytest.param( [np.nan, np.nan, np.nan], id="allnan", marks=pytest.mark.filterwarnings( "ignore:All-NaN slice encountered:RuntimeWarning" ), ), ), ) @pytest.mark.parametrize("q", (0.5, (0.2, 0.8))) @pytest.mark.parametrize("skipna", (True, False)) @pytest.mark.parametrize("factor", [1, 3.14]) def test_weighted_quantile_equal_weights( da: list[float], q: float | tuple[float, ...], skipna: bool, factor: float ) -> None: # if all weights are equal (!= 0), should yield the same result as quantile data = DataArray(da) weights = xr.full_like(data, factor) expected = data.quantile(q, skipna=skipna) result = data.weighted(weights).quantile(q, skipna=skipna) assert_allclose(expected, result) @pytest.mark.skip(reason="`method` argument is not currently exposed") @pytest.mark.parametrize( "da", ( [1, 1.9, 2.2, 3, 3.7, 4.1, 5], [1, 1.9, 2.2, 3, 3.7, 4.1, np.nan], [np.nan, np.nan, np.nan], ), ) @pytest.mark.parametrize("q", (0.5, (0.2, 0.8))) @pytest.mark.parametrize("skipna", (True, False)) @pytest.mark.parametrize( "method", [ "linear", "interpolated_inverted_cdf", "hazen", "weibull", "median_unbiased", "normal_unbiased2", ], ) def test_weighted_quantile_equal_weights_all_methods(da, q, skipna, factor, method): # If all weights are equal (!= 0), should yield the same result as numpy quantile da = DataArray(da) weights = xr.full_like(da, 3.14) expected = da.quantile(q, skipna=skipna, method=method) result = da.weighted(weights).quantile(q, skipna=skipna, method=method) assert_allclose(expected, result) def test_weighted_quantile_bool(): # https://github.com/pydata/xarray/issues/4074 da = DataArray([1, 1]) weights = DataArray([True, True]) q = 0.5 expected = DataArray([1], coords={"quantile": [q]}).squeeze() result = da.weighted(weights).quantile(q) assert_equal(expected, result) @pytest.mark.parametrize("q", (-1, 1.1, (0.5, 1.1), ((0.2, 0.4), (0.6, 0.8)))) def test_weighted_quantile_with_invalid_q(q): da = DataArray([1, 1.9, 2.2, 3, 3.7, 4.1, 5]) q = np.asarray(q) weights = xr.ones_like(da) if q.ndim <= 1: with pytest.raises(ValueError, match="q values must be between 0 and 1"): da.weighted(weights).quantile(q) else: with pytest.raises(ValueError, match="q must be a scalar or 1d"): da.weighted(weights).quantile(q) @pytest.mark.parametrize( ("weights", "expected"), (([4, 6], 2.0), ([1, 0], np.nan), ([0, 0], np.nan)) ) @pytest.mark.parametrize("skipna", (True, False)) def test_weighted_mean_nan(weights, expected, skipna): da = DataArray([np.nan, 2]) weights = DataArray(weights) if skipna: expected = DataArray(expected) else: expected = DataArray(np.nan) result = da.weighted(weights).mean(skipna=skipna) assert_equal(expected, result) def test_weighted_mean_bool(): # https://github.com/pydata/xarray/issues/4074 da = DataArray([1, 1]) weights = DataArray([True, True]) expected = DataArray(1) result = da.weighted(weights).mean() assert_equal(expected, result) @pytest.mark.parametrize( ("weights", "expected"), (([1, 2], 2 / 3), ([2, 0], 0), ([0, 0], 0), ([-1, 1], 0)), ) def test_weighted_sum_of_squares_no_nan(weights, expected): da = DataArray([1, 2]) weights = DataArray(weights) result = da.weighted(weights).sum_of_squares() expected = DataArray(expected) assert_equal(expected, result) @pytest.mark.parametrize( ("weights", "expected"), (([1, 2], 0), ([2, 0], 0), ([0, 0], 0), ([-1, 1], 0)), ) def test_weighted_sum_of_squares_nan(weights, expected): da = DataArray([np.nan, 2]) weights = DataArray(weights) result = da.weighted(weights).sum_of_squares() expected = DataArray(expected) assert_equal(expected, result) @pytest.mark.filterwarnings("error") @pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan])) @pytest.mark.parametrize("skipna", (True, False)) @pytest.mark.parametrize("factor", [1, 2, 3.14]) def test_weighted_var_equal_weights(da, skipna, factor): # if all weights are equal (!= 0), should yield the same result as var da = DataArray(da) # all weights as 1. weights = xr.full_like(da, factor) expected = da.var(skipna=skipna) result = da.weighted(weights).var(skipna=skipna) assert_equal(expected, result) @pytest.mark.parametrize( ("weights", "expected"), (([4, 6], 0.24), ([1, 0], 0.0), ([0, 0], np.nan)) ) def test_weighted_var_no_nan(weights, expected): da = DataArray([1, 2]) weights = DataArray(weights) expected = DataArray(expected) result = da.weighted(weights).var() assert_equal(expected, result) @pytest.mark.parametrize( ("weights", "expected"), (([4, 6], 0), ([1, 0], np.nan), ([0, 0], np.nan)) ) def test_weighted_var_nan(weights, expected): da = DataArray([np.nan, 2]) weights = DataArray(weights) expected = DataArray(expected) result = da.weighted(weights).var() assert_equal(expected, result) def test_weighted_var_bool(): # https://github.com/pydata/xarray/issues/4074 da = DataArray([1, 1]) weights = DataArray([True, True]) expected = DataArray(0) result = da.weighted(weights).var() assert_equal(expected, result) @pytest.mark.filterwarnings("error") @pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan])) @pytest.mark.parametrize("skipna", (True, False)) @pytest.mark.parametrize("factor", [1, 2, 3.14]) def test_weighted_std_equal_weights(da, skipna, factor): # if all weights are equal (!= 0), should yield the same result as std da = DataArray(da) # all weights as 1. weights = xr.full_like(da, factor) expected = da.std(skipna=skipna) result = da.weighted(weights).std(skipna=skipna) assert_equal(expected, result) @pytest.mark.parametrize( ("weights", "expected"), (([4, 6], np.sqrt(0.24)), ([1, 0], 0.0), ([0, 0], np.nan)) ) def test_weighted_std_no_nan(weights, expected): da = DataArray([1, 2]) weights = DataArray(weights) expected = DataArray(expected) result = da.weighted(weights).std() assert_equal(expected, result) @pytest.mark.parametrize( ("weights", "expected"), (([4, 6], 0), ([1, 0], np.nan), ([0, 0], np.nan)) ) def test_weighted_std_nan(weights, expected): da = DataArray([np.nan, 2]) weights = DataArray(weights) expected = DataArray(expected) result = da.weighted(weights).std() assert_equal(expected, result) def test_weighted_std_bool(): # https://github.com/pydata/xarray/issues/4074 da = DataArray([1, 1]) weights = DataArray([True, True]) expected = DataArray(0) result = da.weighted(weights).std() assert_equal(expected, result) def expected_weighted(da, weights, dim, skipna, operation): """ Generate expected result using ``*`` and ``sum``. This is checked against the result of da.weighted which uses ``dot`` """ weighted_sum = (da * weights).sum(dim=dim, skipna=skipna) if operation == "sum": return weighted_sum masked_weights = weights.where(da.notnull()) sum_of_weights = masked_weights.sum(dim=dim, skipna=True) valid_weights = sum_of_weights != 0 sum_of_weights = sum_of_weights.where(valid_weights) if operation == "sum_of_weights": return sum_of_weights weighted_mean = weighted_sum / sum_of_weights if operation == "mean": return weighted_mean demeaned = da - weighted_mean sum_of_squares = ((demeaned**2) * weights).sum(dim=dim, skipna=skipna) if operation == "sum_of_squares": return sum_of_squares var = sum_of_squares / sum_of_weights if operation == "var": return var if operation == "std": return np.sqrt(var) def check_weighted_operations(data, weights, dim, skipna): # check sum of weights result = data.weighted(weights).sum_of_weights(dim) expected = expected_weighted(data, weights, dim, skipna, "sum_of_weights") assert_allclose(expected, result) # check weighted sum result = data.weighted(weights).sum(dim, skipna=skipna) expected = expected_weighted(data, weights, dim, skipna, "sum") assert_allclose(expected, result) # check weighted mean result = data.weighted(weights).mean(dim, skipna=skipna) expected = expected_weighted(data, weights, dim, skipna, "mean") assert_allclose(expected, result) # check weighted sum of squares result = data.weighted(weights).sum_of_squares(dim, skipna=skipna) expected = expected_weighted(data, weights, dim, skipna, "sum_of_squares") assert_allclose(expected, result) # check weighted var result = data.weighted(weights).var(dim, skipna=skipna) expected = expected_weighted(data, weights, dim, skipna, "var") assert_allclose(expected, result) # check weighted std result = data.weighted(weights).std(dim, skipna=skipna) expected = expected_weighted(data, weights, dim, skipna, "std") assert_allclose(expected, result) @pytest.mark.parametrize("dim", ("a", "b", "c", ("a", "b"), ("a", "b", "c"), None)) @pytest.mark.parametrize("add_nans", (True, False)) @pytest.mark.parametrize("skipna", (None, True, False)) @pytest.mark.filterwarnings("ignore:invalid value encountered in sqrt") def test_weighted_operations_3D(dim, add_nans, skipna): dims = ("a", "b", "c") coords = dict(a=[0, 1, 2, 3], b=[0, 1, 2, 3], c=[0, 1, 2, 3]) weights = DataArray(np.random.randn(4, 4, 4), dims=dims, coords=coords) data_values = np.random.randn(4, 4, 4) # add approximately 25 % NaNs (https://stackoverflow.com/a/32182680/3010700) if add_nans: c = int(data_values.size * 0.25) data_values.ravel()[np.random.choice(data_values.size, c, replace=False)] = ( np.nan ) data = DataArray(data_values, dims=dims, coords=coords) check_weighted_operations(data, weights, dim, skipna) ds = data.to_dataset(name="data") check_weighted_operations(ds, weights, dim, skipna) @pytest.mark.parametrize("dim", ("a", "b", "c", ("a", "b"), ("a", "b", "c"), None)) @pytest.mark.parametrize("q", (0.5, (0.1, 0.9), (0.2, 0.4, 0.6, 0.8))) @pytest.mark.parametrize("add_nans", (True, False)) @pytest.mark.parametrize("skipna", (None, True, False)) def test_weighted_quantile_3D(dim, q, add_nans, skipna): dims = ("a", "b", "c") coords = dict(a=[0, 1, 2], b=[0, 1, 2, 3], c=[0, 1, 2, 3, 4]) data = np.arange(60).reshape(3, 4, 5).astype(float) # add approximately 25 % NaNs (https://stackoverflow.com/a/32182680/3010700) if add_nans: c = int(data.size * 0.25) data.ravel()[np.random.choice(data.size, c, replace=False)] = np.nan da = DataArray(data, dims=dims, coords=coords) # Weights are all ones, because we will compare against DataArray.quantile (non-weighted) weights = xr.ones_like(da) result = da.weighted(weights).quantile(q, dim=dim, skipna=skipna) expected = da.quantile(q, dim=dim, skipna=skipna) assert_allclose(expected, result) ds = da.to_dataset(name="data") result2 = ds.weighted(weights).quantile(q, dim=dim, skipna=skipna) assert_allclose(expected, result2.data) @pytest.mark.parametrize( "coords_weights, coords_data, expected_value_at_weighted_quantile", [ ([0, 1, 2, 3], [1, 2, 3, 4], 2.5), # no weights for coord a == 4 ([0, 1, 2, 3], [2, 3, 4, 5], 1.8), # no weights for coord a == 4 or 5 ([2, 3, 4, 5], [0, 1, 2, 3], 3.8), # no weights for coord a == 0 or 1 ], ) def test_weighted_operations_nonequal_coords( coords_weights: Iterable[Any], coords_data: Iterable[Any], expected_value_at_weighted_quantile: float, ) -> None: """Check that weighted operations work with unequal coords. Parameters ---------- coords_weights : Iterable[Any] The coords for the weights. coords_data : Iterable[Any] The coords for the data. expected_value_at_weighted_quantile : float The expected value for the quantile of the weighted data. """ da_weights = DataArray( [0.5, 1.0, 1.0, 2.0], dims=("a",), coords=dict(a=coords_weights) ) da_data = DataArray([1, 2, 3, 4], dims=("a",), coords=dict(a=coords_data)) check_weighted_operations(da_data, da_weights, dim="a", skipna=None) quantile = 0.5 da_actual = da_data.weighted(da_weights).quantile(quantile, dim="a") da_expected = DataArray( [expected_value_at_weighted_quantile], coords={"quantile": [quantile]} ).squeeze() assert_allclose(da_actual, da_expected) ds_data = da_data.to_dataset(name="data") check_weighted_operations(ds_data, da_weights, dim="a", skipna=None) ds_actual = ds_data.weighted(da_weights).quantile(quantile, dim="a") assert_allclose(ds_actual, da_expected.to_dataset(name="data")) @pytest.mark.parametrize("shape_data", ((4,), (4, 4), (4, 4, 4))) @pytest.mark.parametrize("shape_weights", ((4,), (4, 4), (4, 4, 4))) @pytest.mark.parametrize("add_nans", (True, False)) @pytest.mark.parametrize("skipna", (None, True, False)) @pytest.mark.filterwarnings("ignore:invalid value encountered in sqrt") def test_weighted_operations_different_shapes( shape_data, shape_weights, add_nans, skipna ): weights = DataArray(np.random.randn(*shape_weights)) data_values = np.random.randn(*shape_data) # add approximately 25 % NaNs if add_nans: c = int(data_values.size * 0.25) data_values.ravel()[np.random.choice(data_values.size, c, replace=False)] = ( np.nan ) data = DataArray(data_values) check_weighted_operations(data, weights, "dim_0", skipna) check_weighted_operations(data, weights, None, skipna) ds = data.to_dataset(name="data") check_weighted_operations(ds, weights, "dim_0", skipna) check_weighted_operations(ds, weights, None, skipna) @pytest.mark.parametrize( "operation", ("sum_of_weights", "sum", "mean", "sum_of_squares", "var", "std", "quantile"), ) @pytest.mark.parametrize("as_dataset", (True, False)) @pytest.mark.parametrize("keep_attrs", (True, False, None)) def test_weighted_operations_keep_attr(operation, as_dataset, keep_attrs): weights = DataArray(np.random.randn(2, 2), attrs=dict(attr="weights")) data: DataArray | Dataset = DataArray(np.random.randn(2, 2)) if as_dataset: data = data.to_dataset(name="data") data.attrs = dict(attr="weights") kwargs = {"keep_attrs": keep_attrs} if operation == "quantile": kwargs["q"] = 0.5 result = getattr(data.weighted(weights), operation)(**kwargs) # When keep_attrs is None, it defaults to True expected_keep = keep_attrs if keep_attrs is not None else True if operation == "sum_of_weights": assert result.attrs == (weights.attrs if expected_keep else {}) else: assert result.attrs == (data.attrs if expected_keep else {}) @pytest.mark.parametrize( "operation", ("sum_of_weights", "sum", "mean", "sum_of_squares", "var", "std", "quantile"), ) def test_weighted_operations_keep_attr_da_in_ds(operation): # GH #3595 weights = DataArray(np.random.randn(2, 2)) da = DataArray(np.random.randn(2, 2), attrs=dict(attr="data")) data = da.to_dataset(name="a") kwargs: dict[str, Any] = {"keep_attrs": True} if operation == "quantile": kwargs["q"] = 0.5 result = getattr(data.weighted(weights), operation)(**kwargs) assert data.a.attrs == result.a.attrs def test_weighted_mean_keep_attrs_ds(): weights = DataArray(np.random.randn(2)) data = Dataset( {"a": (["dim_0", "dim_1"], np.random.randn(2, 2), dict(attr="data"))}, coords={"dim_1": ("dim_1", ["a", "b"], {"attr1": "value1"})}, ) result = data.weighted(weights).mean(dim="dim_0", keep_attrs=True) assert data.coords["dim_1"].attrs == result.coords["dim_1"].attrs @pytest.mark.parametrize("operation", ("sum_of_weights", "sum", "mean", "quantile")) @pytest.mark.parametrize("as_dataset", (True, False)) def test_weighted_bad_dim(operation, as_dataset): data_array = DataArray(np.random.randn(2, 2)) weights = xr.ones_like(data_array) data: DataArray | Dataset = data_array if as_dataset: data = data_array.to_dataset(name="data") kwargs: dict[str, Any] = {"dim": "bad_dim"} if operation == "quantile": kwargs["q"] = 0.5 with pytest.raises( ValueError, match=( f"Dimensions \\('bad_dim',\\) not found in {data.__class__.__name__}Weighted " # the order of (dim_0, dim_1) varies "dimensions \\(('dim_0', 'dim_1'|'dim_1', 'dim_0')\\)" ), ): getattr(data.weighted(weights), operation)(**kwargs) python-xarray-2026.01.0/xarray/tests/test_plugins.py0000664000175000017500000002430715136607163022613 0ustar alastairalastairfrom __future__ import annotations import sys from importlib.metadata import EntryPoint, EntryPoints from itertools import starmap from unittest import mock import pytest from xarray.backends import common, plugins from xarray.core.options import OPTIONS from xarray.tests import ( has_h5netcdf, has_netCDF4, has_pydap, has_scipy, has_zarr, ) # Do not import list_engines here, this will break the lazy tests importlib_metadata_mock = "importlib.metadata" class DummyBackendEntrypointArgs(common.BackendEntrypoint): def open_dataset(filename_or_obj, *args): # type: ignore[override] pass class DummyBackendEntrypointKwargs(common.BackendEntrypoint): def open_dataset(filename_or_obj, **kwargs): # type: ignore[override] pass class DummyBackendEntrypoint1(common.BackendEntrypoint): def open_dataset(self, filename_or_obj, *, decoder): # type: ignore[override] pass class DummyBackendEntrypoint2(common.BackendEntrypoint): def open_dataset(self, filename_or_obj, *, decoder): # type: ignore[override] pass @pytest.fixture def dummy_duplicated_entrypoints(): specs = [ ["engine1", "xarray.tests.test_plugins:backend_1", "xarray.backends"], ["engine1", "xarray.tests.test_plugins:backend_2", "xarray.backends"], ["engine2", "xarray.tests.test_plugins:backend_1", "xarray.backends"], ["engine2", "xarray.tests.test_plugins:backend_2", "xarray.backends"], ] eps = list(starmap(EntryPoint, specs)) return eps @pytest.mark.filterwarnings("ignore:Found") def test_remove_duplicates(dummy_duplicated_entrypoints) -> None: with pytest.warns(RuntimeWarning): entrypoints = plugins.remove_duplicates(dummy_duplicated_entrypoints) assert len(entrypoints) == 2 def test_broken_plugin() -> None: broken_backend = EntryPoint( "broken_backend", "xarray.tests.test_plugins:backend_1", "xarray.backends", ) with pytest.warns(RuntimeWarning) as record: _ = plugins.build_engines(EntryPoints([broken_backend])) assert len(record) == 1 message = str(record[0].message) assert "Engine 'broken_backend'" in message def test_remove_duplicates_warnings(dummy_duplicated_entrypoints) -> None: with pytest.warns(RuntimeWarning) as record: _ = plugins.remove_duplicates(dummy_duplicated_entrypoints) assert len(record) == 2 message0 = str(record[0].message) message1 = str(record[1].message) assert "entrypoints" in message0 assert "entrypoints" in message1 @mock.patch( f"{importlib_metadata_mock}.EntryPoint.load", mock.MagicMock(return_value=None) ) def test_backends_dict_from_pkg() -> None: specs = [ ["engine1", "xarray.tests.test_plugins:backend_1", "xarray.backends"], ["engine2", "xarray.tests.test_plugins:backend_2", "xarray.backends"], ] entrypoints = list(starmap(EntryPoint, specs)) engines = plugins.backends_dict_from_pkg(entrypoints) assert len(engines) == 2 assert engines.keys() == {"engine1", "engine2"} def test_set_missing_parameters() -> None: backend_1 = DummyBackendEntrypoint1 backend_2 = DummyBackendEntrypoint2 backend_2.open_dataset_parameters = ("filename_or_obj",) engines = {"engine_1": backend_1, "engine_2": backend_2} plugins.set_missing_parameters(engines) assert len(engines) == 2 assert backend_1.open_dataset_parameters == ("filename_or_obj", "decoder") assert backend_2.open_dataset_parameters == ("filename_or_obj",) backend_kwargs = DummyBackendEntrypointKwargs backend_kwargs.open_dataset_parameters = ("filename_or_obj", "decoder") plugins.set_missing_parameters({"engine": backend_kwargs}) assert backend_kwargs.open_dataset_parameters == ("filename_or_obj", "decoder") backend_args = DummyBackendEntrypointArgs backend_args.open_dataset_parameters = ("filename_or_obj", "decoder") plugins.set_missing_parameters({"engine": backend_args}) assert backend_args.open_dataset_parameters == ("filename_or_obj", "decoder") # reset backend_1.open_dataset_parameters = None backend_1.open_dataset_parameters = None backend_kwargs.open_dataset_parameters = None backend_args.open_dataset_parameters = None def test_set_missing_parameters_raise_error() -> None: backend = DummyBackendEntrypointKwargs with pytest.raises(TypeError): plugins.set_missing_parameters({"engine": backend}) backend_args = DummyBackendEntrypointArgs with pytest.raises(TypeError): plugins.set_missing_parameters({"engine": backend_args}) @mock.patch( f"{importlib_metadata_mock}.EntryPoint.load", mock.MagicMock(return_value=DummyBackendEntrypoint1), ) def test_build_engines() -> None: dummy_pkg_entrypoint = EntryPoint( "dummy", "xarray.tests.test_plugins:backend_1", "xarray_backends" ) backend_entrypoints = plugins.build_engines(EntryPoints([dummy_pkg_entrypoint])) assert isinstance(backend_entrypoints["dummy"], DummyBackendEntrypoint1) assert backend_entrypoints["dummy"].open_dataset_parameters == ( "filename_or_obj", "decoder", ) @mock.patch( f"{importlib_metadata_mock}.EntryPoint.load", mock.MagicMock(return_value=DummyBackendEntrypoint1), ) def test_build_engines_sorted() -> None: dummy_pkg_entrypoints = EntryPoints( [ EntryPoint( "dummy2", "xarray.tests.test_plugins:backend_1", "xarray.backends" ), EntryPoint( "dummy1", "xarray.tests.test_plugins:backend_1", "xarray.backends" ), ] ) backend_entrypoints = list(plugins.build_engines(dummy_pkg_entrypoints)) indices = [] for be in OPTIONS["netcdf_engine_order"]: try: index = backend_entrypoints.index(be) backend_entrypoints.pop(index) indices.append(index) except ValueError: pass assert set(indices) < {0, -1} assert list(backend_entrypoints) == sorted(backend_entrypoints) @mock.patch( "xarray.backends.plugins.list_engines", mock.MagicMock(return_value={"dummy": DummyBackendEntrypointArgs()}), ) def test_no_matching_engine_found() -> None: with pytest.raises(ValueError, match=r"did not find a match in any"): plugins.guess_engine("not-valid") with pytest.raises(ValueError, match=r"found the following matches with the input"): plugins.guess_engine("foo.nc") @mock.patch( "xarray.backends.plugins.list_engines", mock.MagicMock(return_value={}), ) def test_engines_not_installed() -> None: with pytest.raises(ValueError, match=r"xarray is unable to open"): plugins.guess_engine("not-valid") with pytest.raises(ValueError, match=r"found the following matches with the input"): plugins.guess_engine("foo.nc") @pytest.mark.parametrize("engine", common.BACKEND_ENTRYPOINTS.keys()) def test_get_backend_fastpath_skips_list_engines(engine: str) -> None: """Test that built-in engines skip list_engines (fastpath).""" plugins.list_engines.cache_clear() initial_misses = plugins.list_engines.cache_info().misses plugins.get_backend(engine) assert plugins.list_engines.cache_info().misses == initial_misses def test_lazy_import() -> None: """Test that some modules are imported in a lazy manner. When importing xarray these should not be imported as well. Only when running code for the first time that requires them. """ deny_list = [ "cubed", "cupy", # "dask", # TODO: backends.locks is not lazy yet :( "dask.array", "dask.distributed", "flox", "h5netcdf", "matplotlib", "nc_time_axis", "netCDF4", "numbagg", "pint", "pydap", # "scipy", # TODO: xarray.backends.scipy_ is currently not lazy "sparse", "zarr", ] # ensure that none of the above modules has been imported before modules_backup = {} for pkg in list(sys.modules.keys()): for mod in deny_list + ["xarray"]: if pkg.startswith(mod): modules_backup[pkg] = sys.modules[pkg] del sys.modules[pkg] break try: import xarray # noqa: F401 from xarray.backends import list_engines list_engines() # ensure that none of the modules that are supposed to be # lazy loaded are loaded when importing xarray is_imported = set() for pkg in sys.modules: for mod in deny_list: if pkg.startswith(mod): is_imported.add(mod) break assert len(is_imported) == 0, ( f"{is_imported} have been imported but should be lazy" ) finally: # restore original sys.modules.update(modules_backup) def test_list_engines() -> None: from xarray.backends import list_engines engines = list_engines() assert list_engines.cache_info().currsize == 1 assert ("scipy" in engines) == has_scipy assert ("h5netcdf" in engines) == has_h5netcdf assert ("netcdf4" in engines) == has_netCDF4 assert ("pydap" in engines) == has_pydap assert ("zarr" in engines) == has_zarr assert "store" in engines def test_refresh_engines() -> None: from xarray.backends import list_engines, refresh_engines EntryPointMock1 = mock.MagicMock() EntryPointMock1.name = "test1" EntryPointMock1.load.return_value = DummyBackendEntrypoint1 return_value = EntryPoints([EntryPointMock1]) with mock.patch("xarray.backends.plugins.entry_points", return_value=return_value): list_engines.cache_clear() engines = list_engines() assert "test1" in engines assert isinstance(engines["test1"], DummyBackendEntrypoint1) EntryPointMock2 = mock.MagicMock() EntryPointMock2.name = "test2" EntryPointMock2.load.return_value = DummyBackendEntrypoint2 return_value2 = EntryPoints([EntryPointMock2]) with mock.patch("xarray.backends.plugins.entry_points", return_value=return_value2): refresh_engines() engines = list_engines() assert "test1" not in engines assert "test2" in engines assert isinstance(engines["test2"], DummyBackendEntrypoint2) # reset to original refresh_engines() python-xarray-2026.01.0/xarray/tests/data/0000775000175000017500000000000015136607163020424 5ustar alastairalastairpython-xarray-2026.01.0/xarray/tests/data/example.ict0000664000175000017500000000141715136607163022563 0ustar alastairalastair29, 1001 Henderson, Barron U.S. EPA Example file with artificial data JUST_A_TEST 1, 1 2018, 04, 27 2018, 04, 27 0 Start_UTC 5 1, 1, 1, 1, 1 -9999, -9999, -9999, -9999, -9999 lat, degrees_north lon, degrees_east elev, meters TEST_ppbv, ppbv TESTM_ppbv, ppbv 0 9 INDEPENDENT_VARIABLE_DEFINITION: Start_UTC INDEPENDENT_VARIABLE_UNITS: Start_UTC ULOD_FLAG: -7777 ULOD_VALUE: N/A LLOD_FLAG: -8888 LLOD_VALUE: N/A, N/A, N/A, N/A, 0.025 OTHER_COMMENTS: www-air.larc.nasa.gov/missions/etc/IcarttDataFormat.htm REVISION: R0 R0: No comments for this revision. Start_UTC, lat, lon, elev, TEST_ppbv, TESTM_ppbv 43200, 41.00000, -71.00000, 5, 1.2345, 2.220 46800, 42.00000, -72.00000, 15, 2.3456, -9999 50400, 42.00000, -73.00000, 20, 3.4567, -7777 50400, 42.00000, -74.00000, 25, 4.5678, -8888 python-xarray-2026.01.0/xarray/tests/data/bears.nc0000664000175000017500000000224015136607163022040 0ustar alastairalastairCDF ij bears_lenl historyThis is an example of a multi-line global\012attribute. It could be used for representing the\012processing history of the data, for example. 2017-12-12 15:55:12 GMT Hyrax-1.14.0 http://test.opendap.org/opendap/hyrax/data/nc/bears.nc.nc?DODS_EXTRA.Unlimited_Dimensionk i attr11attr21 2 3 4 i_1.attr3_117 i_1.attr3_2@3@7@;j bears acttext string\012\011123acsaclBhacf?acd? string_lengthorder ,shot8aloanPcross0hl @@@indistinguishable@@@@@@Shԥ@@?0@@ B_ python-xarray-2026.01.0/xarray/tests/data/example_1.nc.gz0000664000175000017500000000072615136607163023245 0ustar alastairalastair!Texample_2.nc픿K@/VP(dtP B6Ijl5)wN qrYGGܙXh}^%?hX! R0=c"uI]Z>=X+Z]6%>ۘǁ^+y6=$ky3WLfǗ[rp,)_d&Ou+d^'m"ΣX~Rn2*u6Q?r>̝# ߉Z3 c|&zqߋN9y>].6ng29>>ǵA(D`(6'^ juT.>Dχ7LN\7+|;p!U? ! hl2lNRQLIԌO)MӑԒOWNFW~;kDžpython-xarray-2026.01.0/xarray/tests/data/example_1.nc0000664000175000017500000000331015136607163022616 0ustar alastairalastairCDF latlon leveltime sourceFictional Model Output temp  long_name temperatureunitscelsius rh  long_namerelative humidity valid_range?lat units degrees_northlon units degrees_east(level units millibarstime unitshours since 1996-1-1(2<`tR||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||?>L>>L>>L>???333=>====??333?L?L=>L>L>L>L??333?L?fff?fff=>L>>>>?333?L?fff?fff=>L>>>>?333?fff?fff python-xarray-2026.01.0/xarray/tests/data/example.grib0000664000175000017500000001216015136607163022724 0ustar alastairalastairGRIBH ]J0]J߀]J]J%d  FE )`P`P`P`Pyj{7777GRIB H ]J0]J߀]J]J%d  FG )@@@@ `7777GRIBH ]J0]J߀]J]J%d  FJ )] ] ] ] Ӏ@7777GRIBH ]J0]J߀]J]J%dLK@ GF ) x x x x(͐87777GRIB H ]J0]J߀]J]J%dLK@ GG" )XXXXXŨʠȰ7777GRIBH ]J0]J߀]J]J%dLK@ GE )Ոx8    7777GRIBH ]J0]J߀]J]J%d  FFD )____Pwiy@7777GRIB H ]J0]J߀]J]J%d  FH$ ) @@7777GRIBH ]J0]J߀]J]J%d  FJ| )\\\\```@7777GRIBH ]J0]J߀]J]J%dLK@ GF ) Ѱp̀7777GRIB H ]J0]J߀]J]J%dLK@ GG' )88888ňXȨ7777GRIBH ]J0]J߀]J]J%dLK@ GE )HѰ7777GRIBH ]J0]J߀]J]J%d  C|)7777GRIB H ]J0]J߀]J]J%d  C{)7777GRIBH ]J0]J߀]J]J%d  C{)7777GRIBH ]J0]J߀]J]J%dLK@ Ci)7777GRIB H ]J0]J߀]J]J%dLK@ Ci)7777GRIBH ]J0]J߀]J]J%dLK@ Cg)$$$$7777GRIBH ]J0]J߀]J]J%d  C|)7777GRIB H ]J0]J߀]J]J%d  C{)7777GRIBH ]J0]J߀]J]J%d  Cz) 7777GRIBH ]J0]J߀]J]J%dLK@ Ci)7777GRIB H ]J0]J߀]J]J%dLK@ Ci)7777GRIBH ]J0]J߀]J]J%dLK@ Cg)$$$$7777python-xarray-2026.01.0/xarray/tests/data/example.uamiv0000664000175000017500000000114015136607163023116 0ustar alastairalastair0A V E R A G E C A M x 5 . 4 0 T e s t P r o b l e m - - M e c h 6 C F C B 0 5 v 5 . 4 0 . m i d w e s t . 3 6 . 1 2 . jj?0<A\&G G <(O 3 (jj?|O 3 ?@@@@@@@AAA A0A@APA`ApAAAA|python-xarray-2026.01.0/xarray/tests/test_nputils.py0000664000175000017500000000173515136607163022630 0ustar alastairalastairfrom __future__ import annotations import numpy as np from numpy.testing import assert_array_equal from xarray.core.nputils import NumpyVIndexAdapter, _is_contiguous def test_is_contiguous() -> None: assert _is_contiguous([1]) assert _is_contiguous([1, 2, 3]) assert not _is_contiguous([1, 3]) def test_vindex() -> None: x = np.arange(3 * 4 * 5).reshape((3, 4, 5)) vindex = NumpyVIndexAdapter(x) # getitem assert_array_equal(vindex[0], x[0]) assert_array_equal(vindex[[1, 2], [1, 2]], x[([1, 2], [1, 2])]) assert vindex[[0, 1], [0, 1], :].shape == (2, 5) assert vindex[[0, 1], :, [0, 1]].shape == (2, 4) assert vindex[:, [0, 1], [0, 1]].shape == (2, 3) # setitem vindex[:] = 0 assert_array_equal(x, np.zeros_like(x)) # assignment should not raise vindex[[0, 1], [0, 1], :] = vindex[[0, 1], [0, 1], :] vindex[[0, 1], :, [0, 1]] = vindex[[0, 1], :, [0, 1]] vindex[:, [0, 1], [0, 1]] = vindex[:, [0, 1], [0, 1]] python-xarray-2026.01.0/xarray/tests/test_calendar_ops.py0000664000175000017500000002463415136607163023567 0ustar alastairalastairfrom __future__ import annotations import numpy as np import pandas as pd import pytest from xarray import CFTimeIndex, DataArray, Dataset, infer_freq from xarray.coding.calendar_ops import convert_calendar, interp_calendar from xarray.coding.cftime_offsets import date_range from xarray.testing import assert_identical from xarray.tests import requires_cftime cftime = pytest.importorskip("cftime") @pytest.mark.parametrize( "source, target, use_cftime, freq", [ ("standard", "noleap", None, "D"), ("noleap", "proleptic_gregorian", True, "D"), ("noleap", "all_leap", None, "D"), ("all_leap", "proleptic_gregorian", False, "4h"), ], ) def test_convert_calendar(source, target, use_cftime, freq): src = DataArray( date_range("2004-01-01", "2004-12-31", freq=freq, calendar=source), dims=("time",), name="time", ) da_src = DataArray( np.linspace(0, 1, src.size), dims=("time",), coords={"time": src} ) conv = convert_calendar(da_src, target, use_cftime=use_cftime) assert conv.time.dt.calendar == target if source != "noleap": expected_times = date_range( "2004-01-01", "2004-12-31", freq=freq, use_cftime=use_cftime, calendar=target, ) else: expected_times_pre_leap = date_range( "2004-01-01", "2004-02-28", freq=freq, use_cftime=use_cftime, calendar=target, ) expected_times_post_leap = date_range( "2004-03-01", "2004-12-31", freq=freq, use_cftime=use_cftime, calendar=target, ) expected_times = expected_times_pre_leap.append(expected_times_post_leap) np.testing.assert_array_equal(conv.time, expected_times) def test_convert_calendar_dataset(): # Check that variables without a time dimension are not modified src = DataArray( date_range("2004-01-01", "2004-12-31", freq="D", calendar="standard"), dims=("time",), name="time", ) da_src = DataArray( np.linspace(0, 1, src.size), dims=("time",), coords={"time": src} ).expand_dims(lat=[0, 1]) ds_src = Dataset({"hastime": da_src, "notime": (("lat",), [0, 1])}) conv = convert_calendar(ds_src, "360_day", align_on="date") assert conv.time.dt.calendar == "360_day" assert_identical(ds_src.notime, conv.notime) @pytest.mark.parametrize( "source,target,freq", [ ("standard", "360_day", "D"), ("360_day", "proleptic_gregorian", "D"), ("proleptic_gregorian", "360_day", "4h"), ], ) @pytest.mark.parametrize("align_on", ["date", "year"]) def test_convert_calendar_360_days(source, target, freq, align_on): src = DataArray( date_range("2004-01-01", "2004-12-30", freq=freq, calendar=source), dims=("time",), name="time", ) da_src = DataArray( np.linspace(0, 1, src.size), dims=("time",), coords={"time": src} ) conv = convert_calendar(da_src, target, align_on=align_on) assert conv.time.dt.calendar == target if align_on == "date": np.testing.assert_array_equal( conv.time.resample(time="ME").last().dt.day, [30, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30], ) elif target == "360_day": np.testing.assert_array_equal( conv.time.resample(time="ME").last().dt.day, [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29], ) else: np.testing.assert_array_equal( conv.time.resample(time="ME").last().dt.day, [30, 29, 30, 30, 31, 30, 30, 31, 30, 31, 29, 31], ) if source == "360_day" and align_on == "year": assert conv.size == 360 if freq == "D" else 360 * 4 else: assert conv.size == 359 if freq == "D" else 359 * 4 def test_convert_calendar_360_days_random(): da_std = DataArray( np.linspace(0, 1, 366), dims=("time",), coords={ "time": date_range( "2004-01-01", "2004-12-31", freq="D", calendar="standard", use_cftime=False, ) }, ) da_360 = DataArray( np.linspace(0, 1, 360), dims=("time",), coords={ "time": date_range("2004-01-01", "2004-12-30", freq="D", calendar="360_day") }, ) conv = convert_calendar(da_std, "360_day", align_on="random") conv2 = convert_calendar(da_std, "360_day", align_on="random") assert (conv != conv2).any() conv = convert_calendar(da_360, "standard", use_cftime=False, align_on="random") assert np.datetime64("2004-02-29") not in conv.time conv2 = convert_calendar(da_360, "standard", use_cftime=False, align_on="random") assert (conv2 != conv).any() # Ensure that added days are evenly distributed in the 5 fifths of each year conv = convert_calendar(da_360, "noleap", align_on="random", missing=np.nan) conv = conv.where(conv.isnull(), drop=True) nandoys = conv.time.dt.dayofyear[:366] assert all(nandoys < np.array([74, 147, 220, 293, 366])) assert all(nandoys > np.array([0, 73, 146, 219, 292])) @requires_cftime @pytest.mark.parametrize( "source,target,freq", [ ("standard", "noleap", "D"), ("noleap", "proleptic_gregorian", "4h"), ("noleap", "all_leap", "ME"), ("360_day", "noleap", "D"), ("noleap", "360_day", "D"), ], ) def test_convert_calendar_missing(source, target, freq): src = DataArray( date_range( "2004-01-01", "2004-12-31" if source != "360_day" else "2004-12-30", freq=freq, calendar=source, ), dims=("time",), name="time", ) da_src = DataArray( np.linspace(0, 1, src.size), dims=("time",), coords={"time": src} ) out = convert_calendar(da_src, target, missing=np.nan, align_on="date") expected_freq = freq assert infer_freq(out.time) == expected_freq expected = date_range( "2004-01-01", "2004-12-31" if target != "360_day" else "2004-12-30", freq=freq, calendar=target, ) np.testing.assert_array_equal(out.time, expected) if freq != "ME": out_without_missing = convert_calendar(da_src, target, align_on="date") expected_nan = out.isel(time=~out.time.isin(out_without_missing.time)) assert expected_nan.isnull().all() expected_not_nan = out.sel(time=out_without_missing.time) assert_identical(expected_not_nan, out_without_missing) @requires_cftime def test_convert_calendar_errors(): src_nl = DataArray( date_range("0000-01-01", "0000-12-31", freq="D", calendar="noleap"), dims=("time",), name="time", ) # no align_on for conversion to 360_day with pytest.raises(ValueError, match="Argument `align_on` must be specified"): convert_calendar(src_nl, "360_day") # Standard doesn't support year 0 with pytest.raises( ValueError, match="Source time coordinate contains dates with year 0" ): convert_calendar(src_nl, "standard") # no align_on for conversion from 360 day src_360 = convert_calendar(src_nl, "360_day", align_on="year") with pytest.raises(ValueError, match="Argument `align_on` must be specified"): convert_calendar(src_360, "noleap") # Datetime objects da = DataArray([0, 1, 2], dims=("x",), name="x") with pytest.raises( ValueError, match=r"Coordinate x must contain datetime objects." ): convert_calendar(da, "standard", dim="x") def test_convert_calendar_dimension_name(): src = DataArray( date_range("2004-01-01", "2004-01-31", freq="D", calendar="noleap"), dims=("date",), name="date", ) out = convert_calendar(src, "proleptic_gregorian", dim="date") np.testing.assert_array_equal(src, out) def test_convert_calendar_same_calendar(): src = DataArray( date_range("2000-01-01", periods=12, freq="6h", use_cftime=False), dims=("time",), name="time", ) out = convert_calendar(src, "proleptic_gregorian") assert src is out @pytest.mark.parametrize( "source,target", [ ("standard", "noleap"), ("noleap", "proleptic_gregorian"), ("standard", "360_day"), ("360_day", "proleptic_gregorian"), ("noleap", "all_leap"), ("360_day", "noleap"), ], ) def test_interp_calendar(source, target): src = DataArray( date_range("2004-01-01", "2004-07-30", freq="D", calendar=source), dims=("time",), name="time", ) tgt = DataArray( date_range("2004-01-01", "2004-07-30", freq="D", calendar=target), dims=("time",), name="time", ) da_src = DataArray( np.linspace(0, 1, src.size), dims=("time",), coords={"time": src} ) conv = interp_calendar(da_src, tgt) assert_identical(tgt.time, conv.time) np.testing.assert_almost_equal(conv.max(), 1, 2) assert conv.min() == 0 @requires_cftime def test_interp_calendar_errors(): src_nl = DataArray( [1] * 100, dims=("time",), coords={ "time": date_range("0000-01-01", periods=100, freq="MS", calendar="noleap") }, ) tgt_360 = date_range("0001-01-01", "0001-12-30", freq="MS", calendar="standard") with pytest.raises( ValueError, match="Source time coordinate contains dates with year 0" ): interp_calendar(src_nl, tgt_360) da1 = DataArray([0, 1, 2], dims=("x",), name="x") da2 = da1 + 1 with pytest.raises( ValueError, match=r"Both 'source.x' and 'target' must contain datetime objects." ): interp_calendar(da1, da2, dim="x") @requires_cftime @pytest.mark.parametrize( ("source_calendar", "target_calendar", "expected_index"), [("standard", "noleap", CFTimeIndex), ("all_leap", "standard", pd.DatetimeIndex)], ) def test_convert_calendar_produces_time_index( source_calendar, target_calendar, expected_index ): # https://github.com/pydata/xarray/issues/9138 time = date_range("2000-01-01", "2002-01-01", freq="D", calendar=source_calendar) temperature = np.ones(len(time)) da = DataArray( data=temperature, dims=["time"], coords=dict( time=time, ), ) converted = da.convert_calendar(target_calendar) assert isinstance(converted.indexes["time"], expected_index) python-xarray-2026.01.0/xarray/tests/test_dataset.py0000664000175000017500000113542215136607163022561 0ustar alastairalastairfrom __future__ import annotations import pickle import re import sys import warnings from collections.abc import Hashable from copy import copy, deepcopy from io import StringIO from textwrap import dedent from typing import Any, Literal, cast import numpy as np import pandas as pd import pytest from packaging.version import Version from pandas.core.indexes.datetimes import DatetimeIndex # remove once numpy 2.0 is the oldest supported version try: from numpy.exceptions import RankWarning except ImportError: from numpy import RankWarning # type: ignore[no-redef,attr-defined,unused-ignore] import contextlib from pandas.errors import UndefinedVariableError import xarray as xr from xarray import ( AlignmentError, DataArray, Dataset, IndexVariable, MergeError, Variable, align, backends, broadcast, open_dataset, set_options, ) from xarray.coding.cftimeindex import CFTimeIndex from xarray.core import dtypes, indexing, utils from xarray.core.common import duck_array_ops, full_like from xarray.core.coordinates import Coordinates, DatasetCoordinates from xarray.core.indexes import Index, PandasIndex from xarray.core.types import ArrayLike from xarray.core.utils import is_scalar from xarray.groupers import SeasonResampler, TimeResampler from xarray.namedarray.pycompat import array_type, integer_types from xarray.testing import _assert_internal_invariants from xarray.tests import ( DuckArrayWrapper, InaccessibleArray, UnexpectedDataAccess, assert_allclose, assert_array_equal, assert_equal, assert_identical, assert_no_warnings, assert_writeable, create_test_data, has_cftime, has_dask, has_pyarrow, raise_if_dask_computes, requires_bottleneck, requires_cftime, requires_cupy, requires_dask, requires_numexpr, requires_pint, requires_pyarrow, requires_scipy, requires_sparse, source_ndarray, ) from xarray.tests.indexes import ScalarIndex, XYIndex with contextlib.suppress(ImportError): import dask.array as da # from numpy version 2.0 trapz is deprecated and renamed to trapezoid # remove once numpy 2.0 is the oldest supported version try: from numpy import trapezoid # type: ignore[attr-defined,unused-ignore] except ImportError: from numpy import ( # type: ignore[arg-type,no-redef,attr-defined,unused-ignore] trapz as trapezoid, ) sparse_array_type = array_type("sparse") pytestmark = [ pytest.mark.filterwarnings("error:Mean of empty slice"), pytest.mark.filterwarnings("error:All-NaN (slice|axis) encountered"), ] def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]: rs = np.random.default_rng(seed) lat = [2, 1, 0] lon = [0, 1, 2] nt1 = 3 nt2 = 2 time1 = pd.date_range("2000-01-01", periods=nt1).as_unit("ns") time2 = pd.date_range("2000-02-01", periods=nt2).as_unit("ns") string_var = np.array(["a", "bc", "def"], dtype=object) string_var_to_append = np.array(["asdf", "asdfg"], dtype=object) string_var_fixed_length = np.array(["aa", "bb", "cc"], dtype="|S2") string_var_fixed_length_to_append = np.array(["dd", "ee"], dtype="|S2") unicode_var = np.array(["áó", "áó", "áó"]) datetime_var = np.array( ["2019-01-01", "2019-01-02", "2019-01-03"], dtype="datetime64[ns]" ) datetime_var_to_append = np.array( ["2019-01-04", "2019-01-05"], dtype="datetime64[ns]" ) bool_var = np.array([True, False, True], dtype=bool) bool_var_to_append = np.array([False, True], dtype=bool) with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Converting non-default") ds = xr.Dataset( data_vars={ "da": xr.DataArray( rs.random((3, 3, nt1)), coords=[lat, lon, time1], dims=["lat", "lon", "time"], ), "string_var": ("time", string_var), "string_var_fixed_length": ("time", string_var_fixed_length), "unicode_var": ("time", unicode_var), "datetime_var": ("time", datetime_var), "bool_var": ("time", bool_var), } ) ds_to_append = xr.Dataset( data_vars={ "da": xr.DataArray( rs.random((3, 3, nt2)), coords=[lat, lon, time2], dims=["lat", "lon", "time"], ), "string_var": ("time", string_var_to_append), "string_var_fixed_length": ("time", string_var_fixed_length_to_append), "unicode_var": ("time", unicode_var[:nt2]), "datetime_var": ("time", datetime_var_to_append), "bool_var": ("time", bool_var_to_append), } ) ds_with_new_var = xr.Dataset( data_vars={ "new_var": xr.DataArray( rs.random((3, 3, nt1 + nt2)), coords=[lat, lon, time1.append(time2)], dims=["lat", "lon", "time"], ) } ) assert_writeable(ds) assert_writeable(ds_to_append) assert_writeable(ds_with_new_var) return ds, ds_to_append, ds_with_new_var def create_append_string_length_mismatch_test_data(dtype) -> tuple[Dataset, Dataset]: def make_datasets(data, data_to_append) -> tuple[Dataset, Dataset]: ds = xr.Dataset( {"temperature": (["time"], data)}, coords={"time": [0, 1, 2]}, ) ds_to_append = xr.Dataset( {"temperature": (["time"], data_to_append)}, coords={"time": [0, 1, 2]} ) assert_writeable(ds) assert_writeable(ds_to_append) return ds, ds_to_append u2_strings = ["ab", "cd", "ef"] u5_strings = ["abc", "def", "ghijk"] s2_strings = np.array(["aa", "bb", "cc"], dtype="|S2") s3_strings = np.array(["aaa", "bbb", "ccc"], dtype="|S3") if dtype == "U": return make_datasets(u2_strings, u5_strings) elif dtype == "S": return make_datasets(s2_strings, s3_strings) else: raise ValueError(f"unsupported dtype {dtype}.") def create_test_multiindex() -> Dataset: mindex = pd.MultiIndex.from_product( [["a", "b"], [1, 2]], names=("level_1", "level_2") ) return Dataset({}, Coordinates.from_pandas_multiindex(mindex, "x")) def create_test_stacked_array() -> tuple[DataArray, DataArray]: x = DataArray(pd.Index(np.r_[:10], name="x")) y = DataArray(pd.Index(np.r_[:20], name="y")) a = x * y b = x * y * y return a, b class InaccessibleVariableDataStore(backends.InMemoryDataStore): """ Store that does not allow any data access. """ def __init__(self): super().__init__() self._indexvars = set() def store(self, variables, *args, **kwargs) -> None: super().store(variables, *args, **kwargs) for k, v in variables.items(): if isinstance(v, IndexVariable): self._indexvars.add(k) def get_variables(self): def lazy_inaccessible(k, v): if k in self._indexvars: return v data = indexing.LazilyIndexedArray(InaccessibleArray(v.values)) return Variable(v.dims, data, v.attrs) return {k: lazy_inaccessible(k, v) for k, v in self._variables.items()} class DuckBackendArrayWrapper(backends.common.BackendArray): """Mimic a BackendArray wrapper around DuckArrayWrapper""" def __init__(self, array): self.array = DuckArrayWrapper(array) self.shape = array.shape self.dtype = array.dtype def get_array(self): return self.array def __getitem__(self, key): return self.array[key.tuple] class AccessibleAsDuckArrayDataStore(backends.InMemoryDataStore): """ Store that returns a duck array, not convertible to numpy array, on read. Modeled after nVIDIA's kvikio. """ def __init__(self): super().__init__() self._indexvars = set() def store(self, variables, *args, **kwargs) -> None: super().store(variables, *args, **kwargs) for k, v in variables.items(): if isinstance(v, IndexVariable): self._indexvars.add(k) def get_variables(self) -> dict[Any, xr.Variable]: def lazy_accessible(k, v) -> xr.Variable: if k in self._indexvars: return v data = indexing.LazilyIndexedArray(DuckBackendArrayWrapper(v.values)) return Variable(v.dims, data, v.attrs) return {k: lazy_accessible(k, v) for k, v in self._variables.items()} class TestDataset: def test_repr(self) -> None: data = create_test_data(seed=123, use_extension_array=True) data.attrs["foo"] = "bar" # need to insert str dtype at runtime to handle different endianness var5 = ( "\n var5 (dim1) int64[pyarrow] 64B 5 9 7 2 6 2 8 1" if has_pyarrow else "" ) expected = dedent( f"""\ Size: 2kB Dimensions: (dim2: 9, dim3: 10, time: 20, dim1: 8) Coordinates: * dim2 (dim2) float64 72B 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 * dim3 (dim3) {data["dim3"].dtype} 40B 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' numbers (dim3) int64 80B 0 1 2 0 0 1 1 2 2 3 * time (time) datetime64[ns] 160B 2000-01-01 2000-01-02 ... 2000-01-20 Dimensions without coordinates: dim1 Data variables: var1 (dim1, dim2) float64 576B -0.9891 -0.3678 1.288 ... -0.2116 0.364 var2 (dim1, dim2) float64 576B 0.953 1.52 1.704 ... 0.1347 -0.6423 var3 (dim3, dim1) float64 640B 0.4107 0.9941 0.1665 ... 0.716 1.555 var4 (dim1) category 3{6 if Version(pd.__version__) >= Version("3.0.0dev0") else 2}B b c b a c a c a{var5} Attributes: foo: bar""" ) actual = "\n".join(x.rstrip() for x in repr(data).split("\n")) assert expected == actual with set_options(display_width=100): max_len = max(map(len, repr(data).split("\n"))) assert 90 < max_len < 100 expected = dedent( """\ Size: 0B Dimensions: () Data variables: *empty*""" ) actual = "\n".join(x.rstrip() for x in repr(Dataset()).split("\n")) print(actual) assert expected == actual # verify that ... doesn't appear for scalar coordinates data = Dataset({"foo": ("x", np.ones(10))}).mean() expected = dedent( """\ Size: 8B Dimensions: () Data variables: foo float64 8B 1.0""" ) actual = "\n".join(x.rstrip() for x in repr(data).split("\n")) print(actual) assert expected == actual # verify long attributes are truncated data = Dataset(attrs={"foo": "bar" * 1000}) assert len(repr(data)) < 1000 def test_repr_multiindex(self) -> None: data = create_test_multiindex() obj_size = np.dtype("O").itemsize expected = dedent( f"""\ Size: {8 * obj_size + 32}B Dimensions: (x: 4) Coordinates: * x (x) object {4 * obj_size}B MultiIndex * level_1 (x) object {4 * obj_size}B 'a' 'a' 'b' 'b' * level_2 (x) int64 32B 1 2 1 2 Data variables: *empty*""" ) actual = "\n".join(x.rstrip() for x in repr(data).split("\n")) print(actual) assert expected == actual # verify that long level names are not truncated midx = pd.MultiIndex.from_product( [["a", "b"], [1, 2]], names=("a_quite_long_level_name", "level_2") ) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") data = Dataset({}, midx_coords) expected = dedent( f"""\ Size: {8 * obj_size + 32}B Dimensions: (x: 4) Coordinates: * x (x) object {4 * obj_size}B MultiIndex * a_quite_long_level_name (x) object {4 * obj_size}B 'a' 'a' 'b' 'b' * level_2 (x) int64 32B 1 2 1 2 Data variables: *empty*""" ) actual = "\n".join(x.rstrip() for x in repr(data).split("\n")) print(actual) assert expected == actual def test_repr_period_index(self) -> None: data = create_test_data(seed=456) data.coords["time"] = pd.period_range("2000-01-01", periods=20, freq="D") # check that creating the repr doesn't raise an error #GH645 repr(data) def test_unicode_data(self) -> None: # regression test for GH834 data = Dataset({"foø": ["ba®"]}, attrs={"å": "∑"}) repr(data) # should not raise byteorder = "<" if sys.byteorder == "little" else ">" expected = dedent( f"""\ Size: 12B Dimensions: (foø: 1) Coordinates: * foø (foø) {byteorder}U3 12B {"ba®"!r} Data variables: *empty* Attributes: å: ∑""" ) actual = str(data) assert expected == actual def test_repr_nep18(self) -> None: class Array: def __init__(self): self.shape = (2,) self.ndim = 1 self.dtype = np.dtype(np.float64) def __array_function__(self, *args, **kwargs): return NotImplemented def __array_ufunc__(self, *args, **kwargs): return NotImplemented def __repr__(self): return "Custom\nArray" dataset = Dataset({"foo": ("x", Array())}) expected = dedent( """\ Size: 16B Dimensions: (x: 2) Dimensions without coordinates: x Data variables: foo (x) float64 16B Custom Array""" ) assert expected == repr(dataset) def test_info(self) -> None: ds = create_test_data(seed=123) ds = ds.drop_vars("dim3") # string type prints differently in PY2 vs PY3 ds.attrs["unicode_attr"] = "ba®" ds.attrs["string_attr"] = "bar" buf = StringIO() ds.info(buf=buf) expected = dedent( """\ xarray.Dataset { dimensions: \tdim2 = 9 ; \ttime = 20 ; \tdim1 = 8 ; \tdim3 = 10 ; variables: \tfloat64 dim2(dim2) ; \tdatetime64[ns] time(time) ; \tfloat64 var1(dim1, dim2) ; \t\tvar1:foo = variable ; \tfloat64 var2(dim1, dim2) ; \t\tvar2:foo = variable ; \tfloat64 var3(dim3, dim1) ; \t\tvar3:foo = variable ; \tint64 numbers(dim3) ; // global attributes: \t:unicode_attr = ba® ; \t:string_attr = bar ; }""" ) actual = buf.getvalue() assert expected == actual buf.close() def test_constructor(self) -> None: x1 = ("x", 2 * np.arange(100)) x2 = ("x", np.arange(1000)) z = (["x", "y"], np.arange(1000).reshape(100, 10)) with pytest.raises(ValueError, match=r"conflicting sizes"): Dataset({"a": x1, "b": x2}) with pytest.raises(TypeError, match=r"tuple of form"): Dataset({"x": (1, 2, 3, 4, 5, 6, 7)}) with pytest.raises(ValueError, match=r"already exists as a scalar"): Dataset({"x": 0, "y": ("x", [1, 2, 3])}) # nD coordinate variable "x" sharing name with dimension actual = Dataset({"a": x1, "x": z}) assert "x" not in actual.xindexes _assert_internal_invariants(actual, check_default_indexes=True) # verify handling of DataArrays expected = Dataset({"x": x1, "z": z}) actual = Dataset({"z": expected["z"]}) assert_identical(expected, actual) def test_constructor_1d(self) -> None: expected = Dataset({"x": (["x"], 5.0 + np.arange(5))}) actual = Dataset({"x": 5.0 + np.arange(5)}) assert_identical(expected, actual) actual = Dataset({"x": [5, 6, 7, 8, 9]}) assert_identical(expected, actual) def test_constructor_0d(self) -> None: expected = Dataset({"x": ([], 1)}) for arg in [1, np.array(1), expected["x"]]: actual = Dataset({"x": arg}) assert_identical(expected, actual) class Arbitrary: pass d = pd.Timestamp("2000-01-01T12") args = [ True, None, 3.4, np.nan, "hello", b"raw", np.datetime64("2000-01-01"), d, d.to_pydatetime(), Arbitrary(), ] for arg in args: print(arg) expected = Dataset({"x": ([], arg)}) actual = Dataset({"x": arg}) assert_identical(expected, actual) def test_constructor_auto_align(self) -> None: a = DataArray([1, 2], [("x", [0, 1])]) b = DataArray([3, 4], [("x", [1, 2])]) # verify align uses outer join expected = Dataset( {"a": ("x", [1, 2, np.nan]), "b": ("x", [np.nan, 3, 4])}, {"x": [0, 1, 2]} ) actual = Dataset({"a": a, "b": b}) assert_identical(expected, actual) # regression test for GH346 assert isinstance(actual.variables["x"], IndexVariable) # variable with different dimensions c = ("y", [3, 4]) expected2 = expected.merge({"c": c}) actual = Dataset({"a": a, "b": b, "c": c}) assert_identical(expected2, actual) # variable that is only aligned against the aligned variables d = ("x", [3, 2, 1]) expected3 = expected.merge({"d": d}) actual = Dataset({"a": a, "b": b, "d": d}) assert_identical(expected3, actual) e = ("x", [0, 0]) with pytest.raises(ValueError, match=r"conflicting sizes"): Dataset({"a": a, "b": b, "e": e}) def test_constructor_pandas_sequence(self) -> None: ds = self.make_example_math_dataset() pandas_objs = { var_name: ds[var_name].to_pandas() for var_name in ["foo", "bar"] } ds_based_on_pandas = Dataset(pandas_objs, ds.coords, attrs=ds.attrs) del ds_based_on_pandas["x"] assert_equal(ds, ds_based_on_pandas) # reindex pandas obj, check align works rearranged_index = reversed(pandas_objs["foo"].index) pandas_objs["foo"] = pandas_objs["foo"].reindex(rearranged_index) ds_based_on_pandas = Dataset(pandas_objs, ds.coords, attrs=ds.attrs) del ds_based_on_pandas["x"] assert_equal(ds, ds_based_on_pandas) def test_constructor_pandas_single(self) -> None: das = [ DataArray(np.random.rand(4), dims=["a"]), # series DataArray(np.random.rand(4, 3), dims=["a", "b"]), # df ] for a in das: pandas_obj = a.to_pandas() ds_based_on_pandas = Dataset(pandas_obj) # type: ignore[arg-type] # TODO: improve typing of __init__ for dim in ds_based_on_pandas.data_vars: assert isinstance(dim, int) assert_array_equal(ds_based_on_pandas[dim], pandas_obj[dim]) def test_constructor_compat(self) -> None: data = {"x": DataArray(0, coords={"y": 1}), "y": ("z", [1, 1, 1])} expected = Dataset({"x": 0}, {"y": ("z", [1, 1, 1])}) actual = Dataset(data) assert_identical(expected, actual) data = {"y": ("z", [1, 1, 1]), "x": DataArray(0, coords={"y": 1})} actual = Dataset(data) assert_identical(expected, actual) original = Dataset( {"a": (("x", "y"), np.ones((2, 3)))}, {"c": (("x", "y"), np.zeros((2, 3))), "x": [0, 1]}, ) expected = Dataset( {"a": ("x", np.ones(2)), "b": ("y", np.ones(3))}, {"c": (("x", "y"), np.zeros((2, 3))), "x": [0, 1]}, ) actual = Dataset( {"a": original["a"][:, 0], "b": original["a"][0].drop_vars("x")} ) assert_identical(expected, actual) data = {"x": DataArray(0, coords={"y": 3}), "y": ("z", [1, 1, 1])} with pytest.raises(MergeError): Dataset(data) data = {"x": DataArray(0, coords={"y": 1}), "y": [1, 1]} actual = Dataset(data) expected = Dataset({"x": 0}, {"y": [1, 1]}) assert_identical(expected, actual) def test_constructor_with_coords(self) -> None: with pytest.raises(ValueError, match=r"found in both data_vars and"): Dataset({"a": ("x", [1])}, {"a": ("x", [1])}) ds = Dataset({}, {"a": ("x", [1])}) assert not ds.data_vars assert list(ds.coords.keys()) == ["a"] mindex = pd.MultiIndex.from_product( [["a", "b"], [1, 2]], names=("level_1", "level_2") ) with pytest.raises(ValueError, match=r"conflicting MultiIndex"): with pytest.warns( FutureWarning, match=r".*`pandas.MultiIndex`.*no longer be implicitly promoted.*", ): Dataset({}, {"x": mindex, "y": mindex}) Dataset({}, {"x": mindex, "level_1": range(4)}) def test_constructor_no_default_index(self) -> None: # explicitly passing a Coordinates object skips the creation of default index ds = Dataset(coords=Coordinates({"x": [1, 2, 3]}, indexes={})) assert "x" in ds assert "x" not in ds.xindexes def test_constructor_multiindex(self) -> None: midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")) coords = Coordinates.from_pandas_multiindex(midx, "x") ds = Dataset(coords=coords) assert_identical(ds, coords.to_dataset()) with pytest.warns( FutureWarning, match=r".*`pandas.MultiIndex`.*no longer be implicitly promoted.*", ): Dataset(data_vars={"x": midx}) with pytest.warns( FutureWarning, match=r".*`pandas.MultiIndex`.*no longer be implicitly promoted.*", ): Dataset(coords={"x": midx}) def test_constructor_custom_index(self) -> None: class CustomIndex(Index): ... coords = Coordinates( coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()} ) ds = Dataset(coords=coords) assert isinstance(ds.xindexes["x"], CustomIndex) # test coordinate variables copied assert ds.variables["x"] is not coords.variables["x"] @pytest.mark.filterwarnings("ignore:return type") def test_properties(self) -> None: ds = create_test_data() # dims / sizes # These exact types aren't public API, but this makes sure we don't # change them inadvertently: assert isinstance(ds.dims, utils.Frozen) # TODO change after deprecation cycle in GH #8500 is complete assert isinstance(ds.dims.mapping, dict) assert type(ds.dims.mapping) is dict with pytest.warns( FutureWarning, match=r" To access a mapping from dimension names to lengths, please use `Dataset.sizes`", ): assert ds.dims == ds.sizes assert ds.sizes == {"dim1": 8, "dim2": 9, "dim3": 10, "time": 20} # dtypes assert isinstance(ds.dtypes, utils.Frozen) assert isinstance(ds.dtypes.mapping, dict) assert ds.dtypes == { "var1": np.dtype("float64"), "var2": np.dtype("float64"), "var3": np.dtype("float64"), } # data_vars assert list(ds) == list(ds.data_vars) assert list(ds.keys()) == list(ds.data_vars) assert "aasldfjalskdfj" not in ds.variables assert "dim1" in repr(ds.variables) assert len(ds) == 3 assert bool(ds) assert list(ds.data_vars) == ["var1", "var2", "var3"] assert list(ds.data_vars.keys()) == ["var1", "var2", "var3"] assert "var1" in ds.data_vars assert "dim1" not in ds.data_vars assert "numbers" not in ds.data_vars assert len(ds.data_vars) == 3 # xindexes assert set(ds.xindexes) == {"dim2", "dim3", "time"} assert len(ds.xindexes) == 3 assert "dim2" in repr(ds.xindexes) assert all(isinstance(idx, Index) for idx in ds.xindexes.values()) # indexes assert set(ds.indexes) == {"dim2", "dim3", "time"} assert len(ds.indexes) == 3 assert "dim2" in repr(ds.indexes) assert all(isinstance(idx, pd.Index) for idx in ds.indexes.values()) # coords assert list(ds.coords) == ["dim2", "dim3", "time", "numbers"] assert "dim2" in ds.coords assert "numbers" in ds.coords assert "var1" not in ds.coords assert "dim1" not in ds.coords assert len(ds.coords) == 4 # nbytes assert ( Dataset({"x": np.int64(1), "y": np.array([1, 2], dtype=np.float32)}).nbytes == 16 ) def test_warn_ds_dims_deprecation(self) -> None: # TODO remove after deprecation cycle in GH #8500 is complete ds = create_test_data() with pytest.warns(FutureWarning, match="return type"): ds.dims["dim1"] with pytest.warns(FutureWarning, match="return type"): ds.dims.keys() with pytest.warns(FutureWarning, match="return type"): ds.dims.values() with pytest.warns(FutureWarning, match="return type"): ds.dims.items() with assert_no_warnings(): len(ds.dims) ds.dims.__iter__() _ = "dim1" in ds.dims def test_asarray(self) -> None: ds = Dataset({"x": 0}) with pytest.raises(TypeError, match=r"cannot directly convert"): np.asarray(ds) def test_get_index(self) -> None: ds = Dataset({"foo": (("x", "y"), np.zeros((2, 3)))}, coords={"x": ["a", "b"]}) assert ds.get_index("x").equals(pd.Index(["a", "b"])) assert ds.get_index("y").equals(pd.Index([0, 1, 2])) with pytest.raises(KeyError): ds.get_index("z") def test_attr_access(self) -> None: ds = Dataset( {"tmin": ("x", [42], {"units": "Celsius"})}, attrs={"title": "My test data"} ) assert_identical(ds.tmin, ds["tmin"]) assert_identical(ds.tmin.x, ds.x) assert ds.title == ds.attrs["title"] assert ds.tmin.units == ds["tmin"].attrs["units"] assert {"tmin", "title"} <= set(dir(ds)) assert "units" in set(dir(ds.tmin)) # should defer to variable of same name ds.attrs["tmin"] = -999 assert ds.attrs["tmin"] == -999 assert_identical(ds.tmin, ds["tmin"]) def test_variable(self) -> None: a = Dataset() d = np.random.random((10, 3)) a["foo"] = (("time", "x"), d) assert "foo" in a.variables assert "foo" in a a["bar"] = (("time", "x"), d) # order of creation is preserved assert list(a.variables) == ["foo", "bar"] assert_array_equal(a["foo"].values, d) # try to add variable with dim (10,3) with data that's (3,10) with pytest.raises(ValueError): a["qux"] = (("time", "x"), d.T) def test_modify_inplace(self) -> None: a = Dataset() vec = np.random.random((10,)) attributes = {"foo": "bar"} a["x"] = ("x", vec, attributes) assert "x" in a.coords assert isinstance(a.coords["x"].to_index(), pd.Index) assert_identical(a.coords["x"].variable, a.variables["x"]) b = Dataset() b["x"] = ("x", vec, attributes) assert_identical(a["x"], b["x"]) assert a.sizes == b.sizes # this should work a["x"] = ("x", vec[:5]) a["z"] = ("x", np.arange(5)) with pytest.raises(ValueError): # now it shouldn't, since there is a conflicting length a["x"] = ("x", vec[:4]) arr = np.random.random((10, 1)) scal = np.array(0) with pytest.raises(ValueError): a["y"] = ("y", arr) with pytest.raises(ValueError): a["y"] = ("y", scal) assert "y" not in a.dims def test_coords_properties(self) -> None: # use int64 for repr consistency on windows data = Dataset( { "x": ("x", np.array([-1, -2], "int64")), "y": ("y", np.array([0, 1, 2], "int64")), "foo": (["x", "y"], np.random.randn(2, 3)), }, {"a": ("x", np.array([4, 5], "int64")), "b": np.int64(-10)}, ) coords = data.coords assert isinstance(coords, DatasetCoordinates) # len assert len(coords) == 4 # iter assert list(coords) == ["x", "y", "a", "b"] assert_identical(coords["x"].variable, data["x"].variable) assert_identical(coords["y"].variable, data["y"].variable) assert "x" in coords assert "a" in coords assert 0 not in coords assert "foo" not in coords with pytest.raises(KeyError): coords["foo"] with pytest.raises(KeyError): coords[0] # repr expected = dedent( """\ Coordinates: * x (x) int64 16B -1 -2 a (x) int64 16B 4 5 * y (y) int64 24B 0 1 2 b int64 8B -10""" ) actual = repr(coords) assert expected == actual # dims assert coords.sizes == {"x": 2, "y": 3} # dtypes assert coords.dtypes == { "x": np.dtype("int64"), "y": np.dtype("int64"), "a": np.dtype("int64"), "b": np.dtype("int64"), } def test_coords_modify(self) -> None: data = Dataset( { "x": ("x", [-1, -2]), "y": ("y", [0, 1, 2]), "foo": (["x", "y"], np.random.randn(2, 3)), }, {"a": ("x", [4, 5]), "b": -10}, ) actual = data.copy(deep=True) actual.coords["x"] = ("x", ["a", "b"]) assert_array_equal(actual["x"], ["a", "b"]) actual = data.copy(deep=True) actual.coords["z"] = ("z", ["a", "b"]) assert_array_equal(actual["z"], ["a", "b"]) actual = data.copy(deep=True) with pytest.raises(ValueError, match=r"conflicting dimension sizes"): actual.coords["x"] = ("x", [-1]) assert_identical(actual, data) # should not be modified actual = data.copy() del actual.coords["b"] expected = data.reset_coords("b", drop=True) assert_identical(expected, actual) with pytest.raises(KeyError): del data.coords["not_found"] with pytest.raises(KeyError): del data.coords["foo"] actual = data.copy(deep=True) actual.coords.update({"c": 11}) expected = data.merge({"c": 11}).set_coords("c") assert_identical(expected, actual) # regression test for GH3746 del actual.coords["x"] assert "x" not in actual.xindexes def test_update_index(self) -> None: actual = Dataset(coords={"x": [1, 2, 3]}) actual["x"] = ["a", "b", "c"] assert actual.xindexes["x"].to_pandas_index().equals(pd.Index(["a", "b", "c"])) def test_coords_setitem_with_new_dimension(self) -> None: actual = Dataset() actual.coords["foo"] = ("x", [1, 2, 3]) expected = Dataset(coords={"foo": ("x", [1, 2, 3])}) assert_identical(expected, actual) def test_coords_setitem_multiindex(self) -> None: data = create_test_multiindex() with pytest.raises(ValueError, match=r"cannot drop or update.*corrupt.*index "): data.coords["level_1"] = range(4) def test_coords_set(self) -> None: one_coord = Dataset({"x": ("x", [0]), "yy": ("x", [1]), "zzz": ("x", [2])}) two_coords = Dataset({"zzz": ("x", [2])}, {"x": ("x", [0]), "yy": ("x", [1])}) all_coords = Dataset( coords={"x": ("x", [0]), "yy": ("x", [1]), "zzz": ("x", [2])} ) actual = one_coord.set_coords("x") assert_identical(one_coord, actual) actual = one_coord.set_coords(["x"]) assert_identical(one_coord, actual) actual = one_coord.set_coords("yy") assert_identical(two_coords, actual) actual = one_coord.set_coords(["yy", "zzz"]) assert_identical(all_coords, actual) actual = one_coord.reset_coords() assert_identical(one_coord, actual) actual = two_coords.reset_coords() assert_identical(one_coord, actual) actual = all_coords.reset_coords() assert_identical(one_coord, actual) actual = all_coords.reset_coords(["yy", "zzz"]) assert_identical(one_coord, actual) actual = all_coords.reset_coords("zzz") assert_identical(two_coords, actual) with pytest.raises(ValueError, match=r"cannot remove index"): one_coord.reset_coords("x") actual = all_coords.reset_coords("zzz", drop=True) expected = all_coords.drop_vars("zzz") assert_identical(expected, actual) expected = two_coords.drop_vars("zzz") assert_identical(expected, actual) def test_coords_to_dataset(self) -> None: orig = Dataset({"foo": ("y", [-1, 0, 1])}, {"x": 10, "y": [2, 3, 4]}) expected = Dataset(coords={"x": 10, "y": [2, 3, 4]}) actual = orig.coords.to_dataset() assert_identical(expected, actual) def test_coords_merge(self) -> None: orig_coords = Dataset(coords={"a": ("x", [1, 2]), "x": [0, 1]}).coords other_coords = Dataset(coords={"b": ("x", ["a", "b"]), "x": [0, 1]}).coords expected = Dataset( coords={"a": ("x", [1, 2]), "b": ("x", ["a", "b"]), "x": [0, 1]} ) actual = orig_coords.merge(other_coords) assert_identical(expected, actual) actual = other_coords.merge(orig_coords) assert_identical(expected, actual) other_coords = Dataset(coords={"x": ("x", ["a"])}).coords with pytest.raises(MergeError): orig_coords.merge(other_coords) other_coords = Dataset(coords={"x": ("x", ["a", "b"])}).coords with pytest.raises(MergeError): orig_coords.merge(other_coords) other_coords = Dataset(coords={"x": ("x", ["a", "b", "c"])}).coords with pytest.raises(MergeError): orig_coords.merge(other_coords) other_coords = Dataset(coords={"a": ("x", [8, 9])}).coords expected = Dataset(coords={"x": range(2)}) actual = orig_coords.merge(other_coords) assert_identical(expected, actual) actual = other_coords.merge(orig_coords) assert_identical(expected, actual) other_coords = Dataset(coords={"x": np.nan}).coords actual = orig_coords.merge(other_coords) assert_identical(orig_coords.to_dataset(), actual) actual = other_coords.merge(orig_coords) assert_identical(orig_coords.to_dataset(), actual) def test_coords_merge_mismatched_shape(self) -> None: orig_coords = Dataset(coords={"a": ("x", [1, 1])}).coords other_coords = Dataset(coords={"a": 1}).coords expected = orig_coords.to_dataset() actual = orig_coords.merge(other_coords) assert_identical(expected, actual) other_coords = Dataset(coords={"a": ("y", [1])}).coords expected = Dataset(coords={"a": (["x", "y"], [[1], [1]])}) actual = orig_coords.merge(other_coords) assert_identical(expected, actual) actual = other_coords.merge(orig_coords) assert_identical(expected.transpose(), actual) orig_coords = Dataset(coords={"a": ("x", [np.nan])}).coords other_coords = Dataset(coords={"a": np.nan}).coords expected = orig_coords.to_dataset() actual = orig_coords.merge(other_coords) assert_identical(expected, actual) def test_data_vars_properties(self) -> None: ds = Dataset() ds["foo"] = (("x",), [1.0]) ds["bar"] = 2.0 # iter assert set(ds.data_vars) == {"foo", "bar"} assert "foo" in ds.data_vars assert "x" not in ds.data_vars assert_identical(ds["foo"], ds.data_vars["foo"]) # repr expected = dedent( """\ Data variables: foo (x) float64 8B 1.0 bar float64 8B 2.0""" ) actual = repr(ds.data_vars) assert expected == actual # dtypes assert ds.data_vars.dtypes == { "foo": np.dtype("float64"), "bar": np.dtype("float64"), } # len ds.coords["x"] = [1] assert len(ds.data_vars) == 2 # https://github.com/pydata/xarray/issues/7588 with pytest.raises( AssertionError, match=r"something is wrong with Dataset._coord_names" ): ds._coord_names = {"w", "x", "y", "z"} len(ds.data_vars) def test_equals_and_identical(self) -> None: data = create_test_data(seed=42) assert data.equals(data) assert data.identical(data) data2 = create_test_data(seed=42) data2.attrs["foobar"] = "baz" assert data.equals(data2) assert not data.identical(data2) del data2["time"] assert not data.equals(data2) data = create_test_data(seed=42).rename({"var1": None}) assert data.equals(data) assert data.identical(data) data2 = data.reset_coords() assert not data2.equals(data) assert not data2.identical(data) def test_equals_failures(self) -> None: data = create_test_data() assert not data.equals("foo") # type: ignore[arg-type] assert not data.identical(123) # type: ignore[arg-type] assert not data.broadcast_equals({1: 2}) # type: ignore[arg-type] def test_broadcast_equals(self) -> None: data1 = Dataset(coords={"x": 0}) data2 = Dataset(coords={"x": [0]}) assert data1.broadcast_equals(data2) assert not data1.equals(data2) assert not data1.identical(data2) def test_attrs(self) -> None: data = create_test_data(seed=42) data.attrs = {"foobar": "baz"} assert data.attrs["foobar"], "baz" assert isinstance(data.attrs, dict) def test_chunks_does_not_load_data(self) -> None: # regression test for GH6538 store = InaccessibleVariableDataStore() create_test_data().dump_to_store(store) ds = open_dataset(store) assert ds.chunks == {} @requires_dask @pytest.mark.parametrize( "use_cftime,calendar", [ (False, "standard"), (pytest.param(True, marks=pytest.mark.skipif(not has_cftime)), "standard"), (pytest.param(True, marks=pytest.mark.skipif(not has_cftime)), "noleap"), (pytest.param(True, marks=pytest.mark.skipif(not has_cftime)), "360_day"), ], ) def test_chunk_by_season_resampler(self, use_cftime: bool, calendar: str) -> None: import dask.array N = 365 + 365 # 2 years - 1 day time = xr.date_range( "2000-01-01", periods=N, freq="D", use_cftime=use_cftime, calendar=calendar ) ds = Dataset( { "pr": ("time", dask.array.random.random((N), chunks=(20))), "pr2d": (("x", "time"), dask.array.random.random((10, N), chunks=(20))), "ones": ("time", np.ones((N,))), }, coords={"time": time}, ) # Standard seasons rechunked = ds.chunk( {"x": 2, "time": SeasonResampler(["DJF", "MAM", "JJA", "SON"])} ) assert rechunked.chunksizes["x"] == (2,) * 5 assert len(rechunked.chunksizes["time"]) == 9 assert rechunked.chunksizes["x"] == (2,) * 5 assert sum(rechunked.chunksizes["time"]) == ds.sizes["time"] if calendar == "standard": assert rechunked.chunksizes["time"] == (60, 92, 92, 91, 90, 92, 92, 91, 30) elif calendar == "noleap": assert rechunked.chunksizes["time"] == (59, 92, 92, 91, 90, 92, 92, 91, 31) elif calendar == "360_day": assert rechunked.chunksizes["time"] == (60, 90, 90, 90, 90, 90, 90, 90, 40) else: raise AssertionError("unreachable") # Custom seasons rechunked = ds.chunk( {"x": 2, "time": SeasonResampler(["DJFM", "AM", "JJA", "SON"])} ) assert len(rechunked.chunksizes["time"]) == 9 assert sum(rechunked.chunksizes["time"]) == ds.sizes["time"] assert rechunked.chunksizes["x"] == (2,) * 5 if calendar == "standard": assert rechunked.chunksizes["time"] == (91, 61, 92, 91, 121, 61, 92, 91, 30) elif calendar == "noleap": assert rechunked.chunksizes["time"] == (90, 61, 92, 91, 121, 61, 92, 91, 31) elif calendar == "360_day": assert rechunked.chunksizes["time"] == (90, 60, 90, 90, 120, 60, 90, 90, 40) else: raise AssertionError("unreachable") # Test that drop_incomplete doesn't affect chunking rechunked_drop_true = ds.chunk( time=SeasonResampler(["DJF", "MAM", "JJA", "SON"], drop_incomplete=True) ) rechunked_drop_false = ds.chunk( time=SeasonResampler(["DJF", "MAM", "JJA", "SON"], drop_incomplete=False) ) assert ( rechunked_drop_true.chunksizes["time"] == rechunked_drop_false.chunksizes["time"] ) @requires_dask def test_chunk_by_season_resampler_errors(self): """Test error handling for SeasonResampler chunking.""" # Test error on missing season (should fail with incomplete seasons) ds = Dataset( {"x": ("time", np.arange(12))}, coords={"time": pd.date_range("2000-01-01", periods=12, freq="MS")}, ) with pytest.raises(ValueError, match="does not cover all 12 months"): ds.chunk(time=SeasonResampler(["DJF", "MAM", "SON"])) ds = Dataset({"foo": ("x", [1, 2, 3])}) # Test error on virtual variable with pytest.raises(ValueError, match="virtual variable"): ds.chunk(x=SeasonResampler(["DJF", "MAM", "JJA", "SON"])) # Test error on non-datetime variable ds["x"] = ("x", [1, 2, 3]) with pytest.raises(ValueError, match="datetime variables"): ds.chunk(x=SeasonResampler(["DJF", "MAM", "JJA", "SON"])) # Test successful case with 1D datetime variable ds["x"] = ("x", xr.date_range("2001-01-01", periods=3, freq="D")) # This should work result = ds.chunk(x=SeasonResampler(["DJF", "MAM", "JJA", "SON"])) assert result.chunks is not None # Test error on missing season (should fail with incomplete seasons) with pytest.raises(ValueError): ds.chunk(x=SeasonResampler(["DJF", "MAM", "SON"])) @requires_dask def test_chunk(self) -> None: data = create_test_data() for v in data.variables.values(): assert isinstance(v.data, np.ndarray) assert data.chunks == {} reblocked = data.chunk() for k, v in reblocked.variables.items(): if k in reblocked.dims: assert isinstance(v.data, np.ndarray) else: assert isinstance(v.data, da.Array) expected_chunks: dict[Hashable, tuple[int, ...]] = { "dim1": (8,), "dim2": (9,), "dim3": (10,), } assert reblocked.chunks == expected_chunks # test kwargs form of chunks assert data.chunk(expected_chunks).chunks == expected_chunks def get_dask_names(ds): return {k: v.data.name for k, v in ds.items()} orig_dask_names = get_dask_names(reblocked) reblocked = data.chunk({"time": 5, "dim1": 5, "dim2": 5, "dim3": 5}) # time is not a dim in any of the data_vars, so it # doesn't get chunked expected_chunks = {"dim1": (5, 3), "dim2": (5, 4), "dim3": (5, 5)} assert reblocked.chunks == expected_chunks # make sure dask names change when rechunking by different amounts # regression test for GH3350 new_dask_names = get_dask_names(reblocked) for k, v in new_dask_names.items(): assert v != orig_dask_names[k] reblocked = data.chunk(expected_chunks) assert reblocked.chunks == expected_chunks # reblock on already blocked data orig_dask_names = get_dask_names(reblocked) reblocked = reblocked.chunk(expected_chunks) new_dask_names = get_dask_names(reblocked) assert reblocked.chunks == expected_chunks assert_identical(reblocked, data) # rechunking with same chunk sizes should not change names for k, v in new_dask_names.items(): assert v == orig_dask_names[k] with pytest.raises( ValueError, match=re.escape( "chunks keys ('foo',) not found in data dimensions ('dim2', 'dim3', 'time', 'dim1')" ), ): data.chunk({"foo": 10}) @requires_dask @pytest.mark.parametrize( "calendar", ( "standard", pytest.param( "gregorian", marks=pytest.mark.skipif(not has_cftime, reason="needs cftime"), ), ), ) @pytest.mark.parametrize("freq", ["D", "W", "5ME", "YE"]) @pytest.mark.parametrize("add_gap", [True, False]) def test_chunk_by_frequency(self, freq: str, calendar: str, add_gap: bool) -> None: import dask.array N = 365 * 2 ΔN = 28 # noqa: PLC2401 time = xr.date_range( "2001-01-01", periods=N + ΔN, freq="D", calendar=calendar ).to_numpy(copy=True) if add_gap: # introduce an empty bin time[31 : 31 + ΔN] = np.datetime64("NaT") time = time[~np.isnat(time)] else: time = time[:N] ds = Dataset( { "pr": ("time", dask.array.random.random((N), chunks=(20))), "pr2d": (("x", "time"), dask.array.random.random((10, N), chunks=(20))), "ones": ("time", np.ones((N,))), }, coords={"time": time}, ) rechunked = ds.chunk(x=2, time=TimeResampler(freq)) expected = tuple( ds.ones.resample(time=freq).sum().dropna("time").astype(int).data.tolist() ) assert rechunked.chunksizes["time"] == expected assert rechunked.chunksizes["x"] == (2,) * 5 rechunked = ds.chunk({"x": 2, "time": TimeResampler(freq)}) assert rechunked.chunksizes["time"] == expected assert rechunked.chunksizes["x"] == (2,) * 5 def test_chunk_by_frequency_errors(self): ds = Dataset({"foo": ("x", [1, 2, 3])}) with pytest.raises(ValueError, match="virtual variable"): ds.chunk(x=TimeResampler("YE")) ds["x"] = ("x", [1, 2, 3]) with pytest.raises(ValueError, match="datetime variables"): ds.chunk(x=TimeResampler("YE")) ds["x"] = ("x", xr.date_range("2001-01-01", periods=3, freq="D")) with pytest.raises(ValueError, match="Invalid frequency"): ds.chunk(x=TimeResampler("foo")) @requires_dask def test_dask_is_lazy(self) -> None: store = InaccessibleVariableDataStore() create_test_data().dump_to_store(store) ds = open_dataset(store).chunk() with pytest.raises(UnexpectedDataAccess): ds.load() with pytest.raises(UnexpectedDataAccess): _ = ds["var1"].values # these should not raise UnexpectedDataAccess: _ = ds.var1.data ds.isel(time=10) ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) ds.transpose() ds.mean() ds.fillna(0) ds.rename({"dim1": "foobar"}) ds.set_coords("var1") ds.drop_vars("var1") def test_isel(self) -> None: data = create_test_data() slicers: dict[Hashable, slice] = { "dim1": slice(None, None, 2), "dim2": slice(0, 2), } ret = data.isel(slicers) # Verify that only the specified dimension was altered assert list(data.dims) == list(ret.dims) for d in data.dims: if d in slicers: assert ret.sizes[d] == np.arange(data.sizes[d])[slicers[d]].size else: assert data.sizes[d] == ret.sizes[d] # Verify that the data is what we expect for v in data.variables: assert data[v].dims == ret[v].dims assert data[v].attrs == ret[v].attrs slice_list = [slice(None)] * data[v].values.ndim for d, s in slicers.items(): if d in data[v].dims: inds = np.nonzero(np.array(data[v].dims) == d)[0] for ind in inds: slice_list[ind] = s expected = data[v].values[tuple(slice_list)] actual = ret[v].values np.testing.assert_array_equal(expected, actual) with pytest.raises(ValueError): data.isel(not_a_dim=slice(0, 2)) with pytest.raises( ValueError, match=r"Dimensions {'not_a_dim'} do not exist. Expected " r"one or more of " r"[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*", ): data.isel(not_a_dim=slice(0, 2)) with pytest.warns( UserWarning, match=r"Dimensions {'not_a_dim'} do not exist. " r"Expected one or more of " r"[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*", ): data.isel(not_a_dim=slice(0, 2), missing_dims="warn") assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore")) ret = data.isel(dim1=0) assert {"time": 20, "dim2": 9, "dim3": 10} == ret.sizes assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) assert set(data.xindexes) == set(ret.xindexes) ret = data.isel(time=slice(2), dim1=0, dim2=slice(5)) assert {"time": 2, "dim2": 5, "dim3": 10} == ret.sizes assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) assert set(data.xindexes) == set(ret.xindexes) ret = data.isel(time=0, dim1=0, dim2=slice(5)) assert {"dim2": 5, "dim3": 10} == ret.sizes assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) assert set(data.xindexes) == set(list(ret.xindexes) + ["time"]) def test_isel_fancy(self) -> None: # isel with fancy indexing. data = create_test_data() pdim1 = [1, 2, 3] pdim2 = [4, 5, 1] pdim3 = [1, 2, 3] actual = data.isel( dim1=(("test_coord",), pdim1), dim2=(("test_coord",), pdim2), dim3=(("test_coord",), pdim3), ) assert "test_coord" in actual.dims assert actual.coords["test_coord"].shape == (len(pdim1),) # Should work with DataArray actual = data.isel( dim1=DataArray(pdim1, dims="test_coord"), dim2=(("test_coord",), pdim2), dim3=(("test_coord",), pdim3), ) assert "test_coord" in actual.dims assert actual.coords["test_coord"].shape == (len(pdim1),) expected = data.isel( dim1=(("test_coord",), pdim1), dim2=(("test_coord",), pdim2), dim3=(("test_coord",), pdim3), ) assert_identical(actual, expected) # DataArray with coordinate idx1 = DataArray(pdim1, dims=["a"], coords={"a": np.random.randn(3)}) idx2 = DataArray(pdim2, dims=["b"], coords={"b": np.random.randn(3)}) idx3 = DataArray(pdim3, dims=["c"], coords={"c": np.random.randn(3)}) # Should work with DataArray actual = data.isel(dim1=idx1, dim2=idx2, dim3=idx3) assert "a" in actual.dims assert "b" in actual.dims assert "c" in actual.dims assert "time" in actual.coords assert "dim2" in actual.coords assert "dim3" in actual.coords expected = data.isel( dim1=(("a",), pdim1), dim2=(("b",), pdim2), dim3=(("c",), pdim3) ) expected = expected.assign_coords(a=idx1["a"], b=idx2["b"], c=idx3["c"]) assert_identical(actual, expected) idx1 = DataArray(pdim1, dims=["a"], coords={"a": np.random.randn(3)}) idx2 = DataArray(pdim2, dims=["a"]) idx3 = DataArray(pdim3, dims=["a"]) # Should work with DataArray actual = data.isel(dim1=idx1, dim2=idx2, dim3=idx3) assert "a" in actual.dims assert "time" in actual.coords assert "dim2" in actual.coords assert "dim3" in actual.coords expected = data.isel( dim1=(("a",), pdim1), dim2=(("a",), pdim2), dim3=(("a",), pdim3) ) expected = expected.assign_coords(a=idx1["a"]) assert_identical(actual, expected) actual = data.isel(dim1=(("points",), pdim1), dim2=(("points",), pdim2)) assert "points" in actual.dims assert "dim3" in actual.dims assert "dim3" not in actual.data_vars np.testing.assert_array_equal(data["dim2"][pdim2], actual["dim2"]) # test that the order of the indexers doesn't matter assert_identical( data.isel(dim1=(("points",), pdim1), dim2=(("points",), pdim2)), data.isel(dim2=(("points",), pdim2), dim1=(("points",), pdim1)), ) # make sure we're raising errors in the right places with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"): data.isel(dim1=(("points",), [1, 2]), dim2=(("points",), [1, 2, 3])) with pytest.raises(TypeError, match=r"cannot use a Dataset"): data.isel(dim1=Dataset({"points": [1, 2]})) # test to be sure we keep around variables that were not indexed ds = Dataset({"x": [1, 2, 3, 4], "y": 0}) actual = ds.isel(x=(("points",), [0, 1, 2])) assert_identical(ds["y"], actual["y"]) # tests using index or DataArray as indexers stations = Dataset() stations["station"] = (("station",), ["A", "B", "C"]) stations["dim1s"] = (("station",), [1, 2, 3]) stations["dim2s"] = (("station",), [4, 5, 1]) actual = data.isel(dim1=stations["dim1s"], dim2=stations["dim2s"]) assert "station" in actual.coords assert "station" in actual.dims assert_identical(actual["station"].drop_vars(["dim2"]), stations["station"]) with pytest.raises(ValueError, match=r"conflicting values/indexes on "): data.isel( dim1=DataArray( [0, 1, 2], dims="station", coords={"station": [0, 1, 2]} ), dim2=DataArray( [0, 1, 2], dims="station", coords={"station": [0, 1, 3]} ), ) # multi-dimensional selection stations = Dataset() stations["a"] = (("a",), ["A", "B", "C"]) stations["b"] = (("b",), [0, 1]) stations["dim1s"] = (("a", "b"), [[1, 2], [2, 3], [3, 4]]) stations["dim2s"] = (("a",), [4, 5, 1]) actual = data.isel(dim1=stations["dim1s"], dim2=stations["dim2s"]) assert "a" in actual.coords assert "a" in actual.dims assert "b" in actual.coords assert "b" in actual.dims assert "dim2" in actual.coords assert "a" in actual["dim2"].dims assert_identical(actual["a"].drop_vars(["dim2"]), stations["a"]) assert_identical(actual["b"], stations["b"]) expected_var1 = data["var1"].variable[ stations["dim1s"].variable, stations["dim2s"].variable ] expected_var2 = data["var2"].variable[ stations["dim1s"].variable, stations["dim2s"].variable ] expected_var3 = data["var3"].variable[slice(None), stations["dim1s"].variable] assert_equal(actual["a"].drop_vars("dim2"), stations["a"]) assert_array_equal(actual["var1"], expected_var1) assert_array_equal(actual["var2"], expected_var2) assert_array_equal(actual["var3"], expected_var3) # test that drop works ds = xr.Dataset({"a": (("x",), [1, 2, 3])}, coords={"b": (("x",), [5, 6, 7])}) actual = ds.isel({"x": 1}, drop=False) expected = xr.Dataset({"a": 2}, coords={"b": 6}) assert_identical(actual, expected) actual = ds.isel({"x": 1}, drop=True) expected = xr.Dataset({"a": 2}) assert_identical(actual, expected) actual = ds.isel({"x": DataArray(1)}, drop=False) expected = xr.Dataset({"a": 2}, coords={"b": 6}) assert_identical(actual, expected) actual = ds.isel({"x": DataArray(1)}, drop=True) expected = xr.Dataset({"a": 2}) assert_identical(actual, expected) def test_isel_dataarray(self) -> None: """Test for indexing by DataArray""" data = create_test_data() # indexing with DataArray with same-name coordinates. indexing_da = DataArray( np.arange(1, 4), dims=["dim1"], coords={"dim1": np.random.randn(3)} ) actual = data.isel(dim1=indexing_da) assert_identical(indexing_da["dim1"], actual["dim1"]) assert_identical(data["dim2"], actual["dim2"]) # Conflict in the dimension coordinate indexing_da = DataArray( np.arange(1, 4), dims=["dim2"], coords={"dim2": np.random.randn(3)} ) with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"): data.isel(dim2=indexing_da) # Also the case for DataArray with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"): data["var2"].isel(dim2=indexing_da) with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"): data["dim2"].isel(dim2=indexing_da) # same name coordinate which does not conflict indexing_da = DataArray( np.arange(1, 4), dims=["dim2"], coords={"dim2": data["dim2"].values[1:4]} ) actual = data.isel(dim2=indexing_da) assert_identical(actual["dim2"], indexing_da["dim2"]) # Silently drop conflicted (non-dimensional) coordinate of indexer indexing_da = DataArray( np.arange(1, 4), dims=["dim2"], coords={ "dim2": data["dim2"].values[1:4], "numbers": ("dim2", np.arange(2, 5)), }, ) actual = data.isel(dim2=indexing_da) assert_identical(actual["numbers"], data["numbers"]) # boolean data array with coordinate with the same name indexing_da = DataArray( np.arange(1, 10), dims=["dim2"], coords={"dim2": data["dim2"].values} ) indexing_da = indexing_da < 3 actual = data.isel(dim2=indexing_da) assert_identical(actual["dim2"], data["dim2"][:2]) # boolean data array with non-dimensioncoordinate indexing_da = DataArray( np.arange(1, 10), dims=["dim2"], coords={ "dim2": data["dim2"].values, "non_dim": (("dim2",), np.random.randn(9)), "non_dim2": 0, }, ) indexing_da = indexing_da < 3 actual = data.isel(dim2=indexing_da) assert_identical( actual["dim2"].drop_vars("non_dim").drop_vars("non_dim2"), data["dim2"][:2] ) assert_identical(actual["non_dim"], indexing_da["non_dim"][:2]) assert_identical(actual["non_dim2"], indexing_da["non_dim2"]) # non-dimension coordinate will be also attached indexing_da = DataArray( np.arange(1, 4), dims=["dim2"], coords={"non_dim": (("dim2",), np.random.randn(3))}, ) actual = data.isel(dim2=indexing_da) assert "non_dim" in actual assert "non_dim" in actual.coords # Index by a scalar DataArray indexing_da = DataArray(3, dims=[], coords={"station": 2}) actual = data.isel(dim2=indexing_da) assert "station" in actual actual = data.isel(dim2=indexing_da["station"]) assert "station" in actual # indexer generated from coordinates indexing_ds = Dataset({}, coords={"dim2": [0, 1, 2]}) with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"): actual = data.isel(dim2=indexing_ds["dim2"]) def test_isel_fancy_convert_index_variable(self) -> None: # select index variable "x" with a DataArray of dim "z" # -> drop index and convert index variable to base variable ds = xr.Dataset({"foo": ("x", [1, 2, 3])}, coords={"x": [0, 1, 2]}) idxr = xr.DataArray([1], dims="z", name="x") actual = ds.isel(x=idxr) assert "x" not in actual.xindexes assert not isinstance(actual.x.variable, IndexVariable) def test_isel_multicoord_index(self) -> None: # regression test https://github.com/pydata/xarray/issues/10063 # isel on a multi-coordinate index should return a unique index associated # to each coordinate coords = xr.Coordinates(coords={"x": [0, 1], "y": [1, 2]}, indexes={}) ds = xr.Dataset(coords=coords).set_xindex(["x", "y"], XYIndex) ds2 = ds.isel(x=slice(None), y=slice(None)) assert ds2.xindexes["x"] is ds2.xindexes["y"] def test_sel(self) -> None: data = create_test_data() int_slicers = {"dim1": slice(None, None, 2), "dim2": slice(2), "dim3": slice(3)} loc_slicers = { "dim1": slice(None, None, 2), "dim2": slice(0, 0.5), "dim3": slice("a", "c"), } assert_equal(data.isel(int_slicers), data.sel(loc_slicers)) data["time"] = ("time", pd.date_range("2000-01-01", periods=20)) assert_equal(data.isel(time=0), data.sel(time="2000-01-01")) assert_equal( data.isel(time=slice(10)), data.sel(time=slice("2000-01-01", "2000-01-10")) ) assert_equal(data, data.sel(time=slice("1999", "2005"))) times = pd.date_range("2000-01-01", periods=3) assert_equal(data.isel(time=slice(3)), data.sel(time=times)) assert_equal( data.isel(time=slice(3)), data.sel(time=(data["time.dayofyear"] <= 3)) ) td = pd.to_timedelta(np.arange(3), unit="days") data = Dataset({"x": ("td", np.arange(3)), "td": td}) assert_equal(data, data.sel(td=td)) assert_equal(data, data.sel(td=slice("3 days"))) assert_equal(data.isel(td=0), data.sel(td=pd.Timedelta("0 days"))) assert_equal(data.isel(td=0), data.sel(td=pd.Timedelta("0h"))) assert_equal(data.isel(td=slice(1, 3)), data.sel(td=slice("1 days", "2 days"))) def test_sel_dataarray(self) -> None: data = create_test_data() ind = DataArray([0.0, 0.5, 1.0], dims=["dim2"]) actual = data.sel(dim2=ind) assert_equal(actual, data.isel(dim2=[0, 1, 2])) # with different dimension ind = DataArray([0.0, 0.5, 1.0], dims=["new_dim"]) actual = data.sel(dim2=ind) expected = data.isel(dim2=Variable("new_dim", [0, 1, 2])) assert "new_dim" in actual.dims assert_equal(actual, expected) # Multi-dimensional ind = DataArray([[0.0], [0.5], [1.0]], dims=["new_dim", "new_dim2"]) actual = data.sel(dim2=ind) expected = data.isel(dim2=Variable(("new_dim", "new_dim2"), [[0], [1], [2]])) assert "new_dim" in actual.dims assert "new_dim2" in actual.dims assert_equal(actual, expected) # with coordinate ind = DataArray( [0.0, 0.5, 1.0], dims=["new_dim"], coords={"new_dim": ["a", "b", "c"]} ) actual = data.sel(dim2=ind) expected = data.isel(dim2=[0, 1, 2]).rename({"dim2": "new_dim"}) assert "new_dim" in actual.dims assert "new_dim" in actual.coords assert_equal( actual.drop_vars("new_dim").drop_vars("dim2"), expected.drop_vars("new_dim") ) assert_equal(actual["new_dim"].drop_vars("dim2"), ind["new_dim"]) # with conflicted coordinate (silently ignored) ind = DataArray( [0.0, 0.5, 1.0], dims=["dim2"], coords={"dim2": ["a", "b", "c"]} ) actual = data.sel(dim2=ind) expected = data.isel(dim2=[0, 1, 2]) assert_equal(actual, expected) # with conflicted coordinate (silently ignored) ind = DataArray( [0.0, 0.5, 1.0], dims=["new_dim"], coords={"new_dim": ["a", "b", "c"], "dim2": 3}, ) actual = data.sel(dim2=ind) assert_equal( actual["new_dim"].drop_vars("dim2"), ind["new_dim"].drop_vars("dim2") ) expected = data.isel(dim2=[0, 1, 2]) expected["dim2"] = (("new_dim"), expected["dim2"].values) assert_equal(actual["dim2"].drop_vars("new_dim"), expected["dim2"]) assert actual["var1"].dims == ("dim1", "new_dim") # with non-dimensional coordinate ind = DataArray( [0.0, 0.5, 1.0], dims=["dim2"], coords={ "dim2": ["a", "b", "c"], "numbers": ("dim2", [0, 1, 2]), "new_dim": ("dim2", [1.1, 1.2, 1.3]), }, ) actual = data.sel(dim2=ind) expected = data.isel(dim2=[0, 1, 2]) assert_equal(actual.drop_vars("new_dim"), expected) assert np.allclose(actual["new_dim"].values, ind["new_dim"].values) def test_sel_dataarray_mindex(self) -> None: midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two")) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") midx_coords["y"] = range(3) mds = xr.Dataset( {"var": (("x", "y"), np.random.rand(6, 3))}, coords=midx_coords ) actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims="x")) actual_sel = mds.sel(x=DataArray(midx[:3], dims="x")) assert actual_isel["x"].dims == ("x",) assert actual_sel["x"].dims == ("x",) assert_identical(actual_isel, actual_sel) actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims="z")) actual_sel = mds.sel(x=Variable("z", midx[:3])) assert actual_isel["x"].dims == ("z",) assert actual_sel["x"].dims == ("z",) assert_identical(actual_isel, actual_sel) # with coordinate actual_isel = mds.isel( x=xr.DataArray(np.arange(3), dims="z", coords={"z": [0, 1, 2]}) ) actual_sel = mds.sel( x=xr.DataArray(midx[:3], dims="z", coords={"z": [0, 1, 2]}) ) assert actual_isel["x"].dims == ("z",) assert actual_sel["x"].dims == ("z",) assert_identical(actual_isel, actual_sel) # Vectorized indexing with level-variables raises an error with pytest.raises(ValueError, match=r"Vectorized selection is "): mds.sel(one=["a", "b"]) with pytest.raises( ValueError, match=r"Vectorized selection is not available along coordinate 'x' with a multi-index", ): mds.sel( x=xr.DataArray( [np.array(midx[:2]), np.array(midx[-2:])], dims=["a", "b"] ) ) def test_sel_categorical(self) -> None: ind = pd.Series(["foo", "bar"], dtype="category") df = pd.DataFrame({"ind": ind, "values": [1, 2]}) ds = df.set_index("ind").to_xarray() actual = ds.sel(ind="bar") expected = ds.isel(ind=1) assert_identical(expected, actual) def test_sel_categorical_error(self) -> None: ind = pd.Series(["foo", "bar"], dtype="category") df = pd.DataFrame({"ind": ind, "values": [1, 2]}) ds = df.set_index("ind").to_xarray() with pytest.raises(ValueError): ds.sel(ind="bar", method="nearest") with pytest.raises(ValueError): ds.sel(ind="bar", tolerance="nearest") # type: ignore[arg-type] def test_categorical_index(self) -> None: cat = pd.CategoricalIndex( ["foo", "bar", "foo"], categories=["foo", "bar", "baz", "qux", "quux", "corge"], ) ds = xr.Dataset( {"var": ("cat", np.arange(3))}, coords={"cat": ("cat", cat), "c": ("cat", [0, 1, 1])}, ) # test slice actual1 = ds.sel(cat="foo") expected1 = ds.isel(cat=[0, 2]) assert_identical(expected1, actual1) # make sure the conversion to the array works actual2 = ds.sel(cat="foo")["cat"].values assert (actual2 == np.array(["foo", "foo"])).all() ds = ds.set_index(index=["cat", "c"]) actual3 = ds.unstack("index") assert actual3["var"].shape == (2, 2) def test_categorical_index_reindex(self) -> None: cat = pd.CategoricalIndex( ["foo", "bar", "baz"], categories=["foo", "bar", "baz", "qux", "quux", "corge"], ) ds = xr.Dataset( {"var": ("cat", np.arange(3))}, coords={"cat": ("cat", cat), "c": ("cat", [0, 1, 2])}, ) actual = ds.reindex(cat=["foo"])["cat"].values assert (actual == np.array(["foo"])).all() @pytest.mark.parametrize("fill_value", [np.nan, pd.NA, None]) @pytest.mark.parametrize( "extension_array", [ pytest.param( pd.Categorical( ["foo", "bar", "baz"], categories=["foo", "bar", "baz", "qux"], ), id="categorical", ), ] + ( [ pytest.param( pd.array([1, 1, None], dtype="int64[pyarrow]"), id="int64[pyarrow]" ) ] if has_pyarrow else [] ), ) def test_extensionarray_negative_reindex(self, fill_value, extension_array) -> None: ds = xr.Dataset( {"arr": ("index", extension_array)}, coords={"index": ("index", np.arange(3))}, ) kwargs = {} if fill_value is not None: kwargs["fill_value"] = fill_value reindexed_cat = cast( pd.api.extensions.ExtensionArray, (ds.reindex(index=[-1, 1, 1], **kwargs)["arr"].to_pandas().values), ) assert reindexed_cat.equals( # type: ignore[attr-defined] pd.array( [pd.NA, extension_array[1], extension_array[1]], dtype=extension_array.dtype, ) ) @requires_pyarrow def test_extension_array_reindex_same(self) -> None: series = pd.Series([1, 2, pd.NA, 3], dtype="int32[pyarrow]") test = xr.Dataset({"test": series}) res = test.reindex(dim_0=series.index) align(res, test, join="exact") def test_categorical_multiindex(self) -> None: i1 = pd.Series([0, 0]) cat = pd.CategoricalDtype(categories=["foo", "baz", "bar"]) i2 = pd.Series(["baz", "bar"], dtype=cat) df = pd.DataFrame({"i1": i1, "i2": i2, "values": [1, 2]}).set_index( ["i1", "i2"] ) actual = df.to_xarray() assert actual["values"].shape == (1, 2) def test_sel_drop(self) -> None: data = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]}) expected = Dataset({"foo": 1}) selected = data.sel(x=0, drop=True) assert_identical(expected, selected) expected = Dataset({"foo": 1}, {"x": 0}) selected = data.sel(x=0, drop=False) assert_identical(expected, selected) data = Dataset({"foo": ("x", [1, 2, 3])}) expected = Dataset({"foo": 1}) selected = data.sel(x=0, drop=True) assert_identical(expected, selected) def test_sel_drop_mindex(self) -> None: midx = pd.MultiIndex.from_arrays([["a", "a"], [1, 2]], names=("foo", "bar")) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") data = Dataset(coords=midx_coords) actual = data.sel(foo="a", drop=True) assert "foo" not in actual.coords actual = data.sel(foo="a", drop=False) assert_equal(actual.foo, DataArray("a", coords={"foo": "a"})) def test_isel_drop(self) -> None: data = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]}) expected = Dataset({"foo": 1}) selected = data.isel(x=0, drop=True) assert_identical(expected, selected) expected = Dataset({"foo": 1}, {"x": 0}) selected = data.isel(x=0, drop=False) assert_identical(expected, selected) def test_head(self) -> None: data = create_test_data() expected = data.isel(time=slice(5), dim2=slice(6)) actual = data.head(time=5, dim2=6) assert_equal(expected, actual) expected = data.isel(time=slice(0)) actual = data.head(time=0) assert_equal(expected, actual) expected = data.isel({dim: slice(6) for dim in data.dims}) actual = data.head(6) assert_equal(expected, actual) expected = data.isel({dim: slice(5) for dim in data.dims}) actual = data.head() assert_equal(expected, actual) with pytest.raises(TypeError, match=r"either dict-like or a single int"): data.head([3]) # type: ignore[arg-type] with pytest.raises(TypeError, match=r"expected integer type"): data.head(dim2=3.1) with pytest.raises(ValueError, match=r"expected positive int"): data.head(time=-3) def test_tail(self) -> None: data = create_test_data() expected = data.isel(time=slice(-5, None), dim2=slice(-6, None)) actual = data.tail(time=5, dim2=6) assert_equal(expected, actual) expected = data.isel(dim1=slice(0)) actual = data.tail(dim1=0) assert_equal(expected, actual) expected = data.isel({dim: slice(-6, None) for dim in data.dims}) actual = data.tail(6) assert_equal(expected, actual) expected = data.isel({dim: slice(-5, None) for dim in data.dims}) actual = data.tail() assert_equal(expected, actual) with pytest.raises(TypeError, match=r"either dict-like or a single int"): data.tail([3]) # type: ignore[arg-type] with pytest.raises(TypeError, match=r"expected integer type"): data.tail(dim2=3.1) with pytest.raises(ValueError, match=r"expected positive int"): data.tail(time=-3) def test_thin(self) -> None: data = create_test_data() expected = data.isel(time=slice(None, None, 5), dim2=slice(None, None, 6)) actual = data.thin(time=5, dim2=6) assert_equal(expected, actual) expected = data.isel({dim: slice(None, None, 6) for dim in data.dims}) actual = data.thin(6) assert_equal(expected, actual) with pytest.raises(TypeError, match=r"either dict-like or a single int"): data.thin([3]) # type: ignore[arg-type] with pytest.raises(TypeError, match=r"expected integer type"): data.thin(dim2=3.1) with pytest.raises(ValueError, match=r"cannot be zero"): data.thin(time=0) with pytest.raises(ValueError, match=r"expected positive int"): data.thin(time=-3) @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_sel_fancy(self) -> None: data = create_test_data() # add in a range() index data["dim1"] = data.dim1 pdim1 = [1, 2, 3] pdim2 = [4, 5, 1] pdim3 = [1, 2, 3] expected = data.isel( dim1=Variable(("test_coord",), pdim1), dim2=Variable(("test_coord",), pdim2), dim3=Variable(("test_coord"), pdim3), ) actual = data.sel( dim1=Variable(("test_coord",), data.dim1[pdim1]), dim2=Variable(("test_coord",), data.dim2[pdim2]), dim3=Variable(("test_coord",), data.dim3[pdim3]), ) assert_identical(expected, actual) # DataArray Indexer idx_t = DataArray( data["time"][[3, 2, 1]].values, dims=["a"], coords={"a": ["a", "b", "c"]} ) idx_2 = DataArray( data["dim2"][[3, 2, 1]].values, dims=["a"], coords={"a": ["a", "b", "c"]} ) idx_3 = DataArray( data["dim3"][[3, 2, 1]].values, dims=["a"], coords={"a": ["a", "b", "c"]} ) actual = data.sel(time=idx_t, dim2=idx_2, dim3=idx_3) expected = data.isel( time=Variable(("a",), [3, 2, 1]), dim2=Variable(("a",), [3, 2, 1]), dim3=Variable(("a",), [3, 2, 1]), ) expected = expected.assign_coords(a=idx_t["a"]) assert_identical(expected, actual) idx_t = DataArray( data["time"][[3, 2, 1]].values, dims=["a"], coords={"a": ["a", "b", "c"]} ) idx_2 = DataArray( data["dim2"][[2, 1, 3]].values, dims=["b"], coords={"b": [0, 1, 2]} ) idx_3 = DataArray( data["dim3"][[1, 2, 1]].values, dims=["c"], coords={"c": [0.0, 1.1, 2.2]} ) actual = data.sel(time=idx_t, dim2=idx_2, dim3=idx_3) expected = data.isel( time=Variable(("a",), [3, 2, 1]), dim2=Variable(("b",), [2, 1, 3]), dim3=Variable(("c",), [1, 2, 1]), ) expected = expected.assign_coords(a=idx_t["a"], b=idx_2["b"], c=idx_3["c"]) assert_identical(expected, actual) # test from sel_points data = Dataset({"foo": (("x", "y"), np.arange(9).reshape(3, 3))}) data.coords.update({"x": [0, 1, 2], "y": [0, 1, 2]}) expected = Dataset( {"foo": ("points", [0, 4, 8])}, coords={ "x": Variable(("points",), [0, 1, 2]), "y": Variable(("points",), [0, 1, 2]), }, ) actual = data.sel( x=Variable(("points",), [0, 1, 2]), y=Variable(("points",), [0, 1, 2]) ) assert_identical(expected, actual) expected.coords.update({"x": ("points", [0, 1, 2]), "y": ("points", [0, 1, 2])}) actual = data.sel( x=Variable(("points",), [0.1, 1.1, 2.5]), y=Variable(("points",), [0, 1.2, 2.0]), method="pad", ) assert_identical(expected, actual) idx_x = DataArray([0, 1, 2], dims=["a"], coords={"a": ["a", "b", "c"]}) idx_y = DataArray([0, 2, 1], dims=["b"], coords={"b": [0, 3, 6]}) expected_ary = data["foo"][[0, 1, 2], [0, 2, 1]] actual = data.sel(x=idx_x, y=idx_y) assert_array_equal(expected_ary, actual["foo"]) assert_identical(actual["a"].drop_vars("x"), idx_x["a"]) assert_identical(actual["b"].drop_vars("y"), idx_y["b"]) with pytest.raises(KeyError): data.sel(x=[2.5], y=[2.0], method="pad", tolerance=1e-3) def test_sel_method(self) -> None: data = create_test_data() expected = data.sel(dim2=1) actual = data.sel(dim2=0.95, method="nearest") assert_identical(expected, actual) actual = data.sel(dim2=0.95, method="nearest", tolerance=1) assert_identical(expected, actual) with pytest.raises(KeyError): actual = data.sel(dim2=np.pi, method="nearest", tolerance=0) expected = data.sel(dim2=[1.5]) actual = data.sel(dim2=[1.45], method="backfill") assert_identical(expected, actual) with pytest.raises(NotImplementedError, match=r"slice objects"): data.sel(dim2=slice(1, 3), method="ffill") with pytest.raises(TypeError, match=r"``method``"): # this should not pass silently data.sel(dim2=1, method=data) # type: ignore[arg-type] # cannot pass method if there is no associated coordinate with pytest.raises(ValueError, match=r"cannot supply"): data.sel(dim1=0, method="nearest") def test_loc(self) -> None: data = create_test_data() expected = data.sel(dim3="a") actual = data.loc[dict(dim3="a")] assert_identical(expected, actual) with pytest.raises(TypeError, match=r"can only lookup dict"): data.loc["a"] # type: ignore[index] def test_selection_multiindex(self) -> None: midx = pd.MultiIndex.from_product( [["a", "b"], [1, 2], [-1, -2]], names=("one", "two", "three") ) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") mdata = Dataset(data_vars={"var": ("x", range(8))}, coords=midx_coords) def test_sel( lab_indexer, pos_indexer, replaced_idx=False, renamed_dim=None ) -> None: ds = mdata.sel(x=lab_indexer) expected_ds = mdata.isel(x=pos_indexer) if not replaced_idx: assert_identical(ds, expected_ds) else: if renamed_dim: assert ds["var"].dims[0] == renamed_dim ds = ds.rename({renamed_dim: "x"}) assert_identical(ds["var"].variable, expected_ds["var"].variable) assert not ds["x"].equals(expected_ds["x"]) test_sel(("a", 1, -1), 0) test_sel(("b", 2, -2), -1) test_sel(("a", 1), [0, 1], replaced_idx=True, renamed_dim="three") test_sel(("a",), range(4), replaced_idx=True) test_sel("a", range(4), replaced_idx=True) test_sel([("a", 1, -1), ("b", 2, -2)], [0, 7]) test_sel(slice("a", "b"), range(8)) test_sel(slice(("a", 1), ("b", 1)), range(6)) test_sel({"one": "a", "two": 1, "three": -1}, 0) test_sel({"one": "a", "two": 1}, [0, 1], replaced_idx=True, renamed_dim="three") test_sel({"one": "a"}, range(4), replaced_idx=True) assert_identical(mdata.loc[{"x": {"one": "a"}}], mdata.sel(x={"one": "a"})) assert_identical(mdata.loc[{"x": "a"}], mdata.sel(x="a")) assert_identical(mdata.loc[{"x": ("a", 1)}], mdata.sel(x=("a", 1))) assert_identical(mdata.loc[{"x": ("a", 1, -1)}], mdata.sel(x=("a", 1, -1))) assert_identical(mdata.sel(x={"one": "a", "two": 1}), mdata.sel(one="a", two=1)) def test_broadcast_like(self) -> None: original1 = DataArray( np.random.randn(5), [("x", range(5))], name="a" ).to_dataset() original2 = DataArray(np.random.randn(6), [("y", range(6))], name="b") expected1, expected2 = broadcast(original1, original2) assert_identical( original1.broadcast_like(original2), expected1.transpose("y", "x") ) assert_identical(original2.broadcast_like(original1), expected2) def test_to_pandas(self) -> None: # 0D -> series actual = Dataset({"a": 1, "b": 2}).to_pandas() expected = pd.Series([1, 2], ["a", "b"]) assert_array_equal(actual, expected) # 1D -> dataframe x = np.random.randn(10) y = np.random.randn(10) t = list("abcdefghij") ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t)}) actual_df = ds.to_pandas() expected_df = ds.to_dataframe() assert expected_df.equals(actual_df), (expected_df, actual_df) # 2D -> error x2d = np.random.randn(10, 10) y2d = np.random.randn(10, 10) with pytest.raises(ValueError, match=r"cannot convert Datasets"): Dataset({"a": (["t", "r"], x2d), "b": (["t", "r"], y2d)}).to_pandas() def test_reindex_like(self) -> None: data = create_test_data() data["letters"] = ("dim3", 10 * ["a"]) expected = data.isel(dim1=slice(10), time=slice(13)) actual = data.reindex_like(expected) assert_identical(actual, expected) expected = data.copy(deep=True) expected["dim3"] = ("dim3", list("cdefghijkl")) expected["var3"][:-2] = expected["var3"][2:].values expected["var3"][-2:] = np.nan expected["letters"] = expected["letters"].astype(object) expected["letters"][-2:] = np.nan expected["numbers"] = expected["numbers"].astype(float) expected["numbers"][:-2] = expected["numbers"][2:].values expected["numbers"][-2:] = np.nan actual = data.reindex_like(expected) assert_identical(actual, expected) def test_reindex(self) -> None: data = create_test_data() assert_identical(data, data.reindex()) expected = data.assign_coords(dim1=data["dim1"]) actual = data.reindex(dim1=data["dim1"]) assert_identical(actual, expected) actual = data.reindex(dim1=data["dim1"].values) assert_identical(actual, expected) actual = data.reindex(dim1=data["dim1"].to_index()) assert_identical(actual, expected) with pytest.raises( ValueError, match=r"cannot reindex or align along dimension" ): data.reindex(dim1=data["dim1"][:5]) expected = data.isel(dim2=slice(5)) actual = data.reindex(dim2=data["dim2"][:5]) assert_identical(actual, expected) # test dict-like argument actual = data.reindex({"dim2": data["dim2"]}) expected = data assert_identical(actual, expected) with pytest.raises(ValueError, match=r"cannot specify both"): data.reindex({"x": 0}, x=0) with pytest.raises(ValueError, match=r"dictionary"): data.reindex("foo") # type: ignore[arg-type] # invalid dimension # TODO: (benbovy - explicit indexes): uncomment? # --> from reindex docstrings: "any mismatched dimension is simply ignored" # with pytest.raises(ValueError, match=r"indexer keys.*not correspond.*"): # data.reindex(invalid=0) # out of order expected = data.sel(dim2=data["dim2"][:5:-1]) actual = data.reindex(dim2=data["dim2"][:5:-1]) assert_identical(actual, expected) # multiple fill values expected = data.reindex(dim2=[0.1, 2.1, 3.1, 4.1]).assign( var1=lambda ds: ds.var1.copy(data=[[-10, -10, -10, -10]] * len(ds.dim1)), var2=lambda ds: ds.var2.copy(data=[[-20, -20, -20, -20]] * len(ds.dim1)), ) actual = data.reindex( dim2=[0.1, 2.1, 3.1, 4.1], fill_value={"var1": -10, "var2": -20} ) assert_identical(actual, expected) # use the default value expected = data.reindex(dim2=[0.1, 2.1, 3.1, 4.1]).assign( var1=lambda ds: ds.var1.copy(data=[[-10, -10, -10, -10]] * len(ds.dim1)), var2=lambda ds: ds.var2.copy( data=[[np.nan, np.nan, np.nan, np.nan]] * len(ds.dim1) ), ) actual = data.reindex(dim2=[0.1, 2.1, 3.1, 4.1], fill_value={"var1": -10}) assert_identical(actual, expected) # regression test for #279 expected = Dataset({"x": ("time", np.random.randn(5))}, {"time": range(5)}) time2 = DataArray(np.arange(5), dims="time2") with pytest.raises(ValueError): actual = expected.reindex(time=time2) # another regression test ds = Dataset( {"foo": (["x", "y"], np.zeros((3, 4)))}, {"x": range(3), "y": range(4)} ) expected = Dataset( {"foo": (["x", "y"], np.zeros((3, 2)))}, {"x": [0, 1, 3], "y": [0, 1]} ) expected["foo"][-1] = np.nan actual = ds.reindex(x=[0, 1, 3], y=[0, 1]) assert_identical(expected, actual) def test_reindex_attrs_encoding(self) -> None: ds = Dataset( {"data": ("x", [1, 2, 3])}, {"x": ("x", [0, 1, 2], {"foo": "bar"}, {"bar": "baz"})}, ) actual = ds.reindex(x=[0, 1]) expected = Dataset( {"data": ("x", [1, 2])}, {"x": ("x", [0, 1], {"foo": "bar"}, {"bar": "baz"})}, ) assert_identical(actual, expected) assert actual.x.encoding == expected.x.encoding def test_reindex_warning(self) -> None: data = create_test_data() with pytest.raises(ValueError): # DataArray with different dimension raises Future warning ind = xr.DataArray([0.0, 1.0], dims=["new_dim"], name="ind") data.reindex(dim2=ind) # Should not warn ind = xr.DataArray([0.0, 1.0], dims=["dim2"], name="ind") with warnings.catch_warnings(record=True) as ws: data.reindex(dim2=ind) assert len(ws) == 0 def test_reindex_variables_copied(self) -> None: data = create_test_data() reindexed_data = data.reindex(copy=False) for k in data.variables: assert reindexed_data.variables[k] is not data.variables[k] def test_reindex_method(self) -> None: ds = Dataset({"x": ("y", [10, 20]), "y": [0, 1]}) y = [-0.5, 0.5, 1.5] actual = ds.reindex(y=y, method="backfill") expected = Dataset({"x": ("y", [10, 20, np.nan]), "y": y}) assert_identical(expected, actual) actual = ds.reindex(y=y, method="backfill", tolerance=0.1) expected = Dataset({"x": ("y", 3 * [np.nan]), "y": y}) assert_identical(expected, actual) actual = ds.reindex(y=y, method="backfill", tolerance=[0.1, 0.5, 0.1]) expected = Dataset({"x": ("y", [np.nan, 20, np.nan]), "y": y}) assert_identical(expected, actual) actual = ds.reindex(y=[0.1, 0.1, 1], tolerance=[0, 0.1, 0], method="nearest") expected = Dataset({"x": ("y", [np.nan, 10, 20]), "y": [0.1, 0.1, 1]}) assert_identical(expected, actual) actual = ds.reindex(y=y, method="pad") expected = Dataset({"x": ("y", [np.nan, 10, 20]), "y": y}) assert_identical(expected, actual) alt = Dataset({"y": y}) actual = ds.reindex_like(alt, method="pad") assert_identical(expected, actual) @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"x": 2, "z": 1}]) def test_reindex_fill_value(self, fill_value) -> None: ds = Dataset({"x": ("y", [10, 20]), "z": ("y", [-20, -10]), "y": [0, 1]}) y = [0, 1, 2] actual = ds.reindex(y=y, fill_value=fill_value) if fill_value == dtypes.NA: # if we supply the default, we expect the missing value for a # float array fill_value_x = fill_value_z = np.nan elif isinstance(fill_value, dict): fill_value_x = fill_value["x"] fill_value_z = fill_value["z"] else: fill_value_x = fill_value_z = fill_value expected = Dataset( { "x": ("y", [10, 20, fill_value_x]), "z": ("y", [-20, -10, fill_value_z]), "y": y, } ) assert_identical(expected, actual) @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"x": 2, "z": 1}]) def test_reindex_like_fill_value(self, fill_value) -> None: ds = Dataset({"x": ("y", [10, 20]), "z": ("y", [-20, -10]), "y": [0, 1]}) y = [0, 1, 2] alt = Dataset({"y": y}) actual = ds.reindex_like(alt, fill_value=fill_value) if fill_value == dtypes.NA: # if we supply the default, we expect the missing value for a # float array fill_value_x = fill_value_z = np.nan elif isinstance(fill_value, dict): fill_value_x = fill_value["x"] fill_value_z = fill_value["z"] else: fill_value_x = fill_value_z = fill_value expected = Dataset( { "x": ("y", [10, 20, fill_value_x]), "z": ("y", [-20, -10, fill_value_z]), "y": y, } ) assert_identical(expected, actual) @pytest.mark.parametrize("dtype", [str, bytes]) def test_reindex_str_dtype(self, dtype) -> None: data = Dataset({"data": ("x", [1, 2]), "x": np.array(["a", "b"], dtype=dtype)}) actual = data.reindex(x=data.x) expected = data assert_identical(expected, actual) assert actual.x.dtype == expected.x.dtype def test_reindex_with_multiindex_level(self) -> None: # test for https://github.com/pydata/xarray/issues/10347 mindex = pd.MultiIndex.from_product( [[100, 200, 300], [1, 2, 3, 4]], names=["x", "y"] ) y_idx = PandasIndex(mindex.levels[1], "y") ds1 = xr.Dataset(coords={"y": [1, 2, 3]}) ds2 = xr.Dataset(coords=xr.Coordinates.from_xindex(y_idx)) actual = ds1.reindex(y=ds2.y) assert_identical(actual, ds2) @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"foo": 2, "bar": 1}]) def test_align_fill_value(self, fill_value) -> None: x = Dataset({"foo": DataArray([1, 2], dims=["x"], coords={"x": [1, 2]})}) y = Dataset({"bar": DataArray([1, 2], dims=["x"], coords={"x": [1, 3]})}) x2, y2 = align(x, y, join="outer", fill_value=fill_value) if fill_value == dtypes.NA: # if we supply the default, we expect the missing value for a # float array fill_value_foo = fill_value_bar = np.nan elif isinstance(fill_value, dict): fill_value_foo = fill_value["foo"] fill_value_bar = fill_value["bar"] else: fill_value_foo = fill_value_bar = fill_value expected_x2 = Dataset( { "foo": DataArray( [1, 2, fill_value_foo], dims=["x"], coords={"x": [1, 2, 3]} ) } ) expected_y2 = Dataset( { "bar": DataArray( [1, fill_value_bar, 2], dims=["x"], coords={"x": [1, 2, 3]} ) } ) assert_identical(expected_x2, x2) assert_identical(expected_y2, y2) def test_align(self) -> None: left = create_test_data() right = left.copy(deep=True) right["dim3"] = ("dim3", list("cdefghijkl")) right["var3"][:-2] = right["var3"][2:].values right["var3"][-2:] = np.random.randn(*right["var3"][-2:].shape) right["numbers"][:-2] = right["numbers"][2:].values right["numbers"][-2:] = -10 intersection = list("cdefghij") union = list("abcdefghijkl") left2, right2 = align(left, right, join="inner") assert_array_equal(left2["dim3"], intersection) assert_identical(left2, right2) left2, right2 = align(left, right, join="outer") assert_array_equal(left2["dim3"], union) assert_equal(left2["dim3"].variable, right2["dim3"].variable) assert_identical(left2.sel(dim3=intersection), right2.sel(dim3=intersection)) assert np.isnan(left2["var3"][-2:]).all() assert np.isnan(right2["var3"][:2]).all() left2, right2 = align(left, right, join="left") assert_equal(left2["dim3"].variable, right2["dim3"].variable) assert_equal(left2["dim3"].variable, left["dim3"].variable) assert_identical(left2.sel(dim3=intersection), right2.sel(dim3=intersection)) assert np.isnan(right2["var3"][:2]).all() left2, right2 = align(left, right, join="right") assert_equal(left2["dim3"].variable, right2["dim3"].variable) assert_equal(left2["dim3"].variable, right["dim3"].variable) assert_identical(left2.sel(dim3=intersection), right2.sel(dim3=intersection)) assert np.isnan(left2["var3"][-2:]).all() with pytest.raises(ValueError, match=r"invalid value for join"): align(left, right, join="foobar") # type: ignore[call-overload] with pytest.raises(TypeError): align(left, right, foo="bar") # type: ignore[call-overload] def test_align_exact(self) -> None: left = xr.Dataset(coords={"x": [0, 1]}) right = xr.Dataset(coords={"x": [1, 2]}) left1, left2 = xr.align(left, left, join="exact") assert_identical(left1, left) assert_identical(left2, left) with pytest.raises(ValueError, match=r"cannot align.*join.*exact.*not equal.*"): xr.align(left, right, join="exact") def test_align_override(self) -> None: left = xr.Dataset(coords={"x": [0, 1, 2]}) right = xr.Dataset(coords={"x": [0.1, 1.1, 2.1], "y": [1, 2, 3]}) expected_right = xr.Dataset(coords={"x": [0, 1, 2], "y": [1, 2, 3]}) new_left, new_right = xr.align(left, right, join="override") assert_identical(left, new_left) assert_identical(new_right, expected_right) new_left, new_right = xr.align(left, right, exclude="x", join="override") assert_identical(left, new_left) assert_identical(right, new_right) new_left, new_right = xr.align( left.isel(x=0, drop=True), right, exclude="x", join="override" ) assert_identical(left.isel(x=0, drop=True), new_left) assert_identical(right, new_right) with pytest.raises( ValueError, match=r"cannot align.*join.*override.*same size" ): xr.align(left.isel(x=0).expand_dims("x"), right, join="override") def test_align_exclude(self) -> None: x = Dataset( { "foo": DataArray( [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [1, 2], "y": [3, 4]} ) } ) y = Dataset( { "bar": DataArray( [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [1, 3], "y": [5, 6]} ) } ) x2, y2 = align(x, y, exclude=["y"], join="outer") expected_x2 = Dataset( { "foo": DataArray( [[1, 2], [3, 4], [np.nan, np.nan]], dims=["x", "y"], coords={"x": [1, 2, 3], "y": [3, 4]}, ) } ) expected_y2 = Dataset( { "bar": DataArray( [[1, 2], [np.nan, np.nan], [3, 4]], dims=["x", "y"], coords={"x": [1, 2, 3], "y": [5, 6]}, ) } ) assert_identical(expected_x2, x2) assert_identical(expected_y2, y2) def test_align_nocopy(self) -> None: x = Dataset({"foo": DataArray([1, 2, 3], coords=[("x", [1, 2, 3])])}) y = Dataset({"foo": DataArray([1, 2], coords=[("x", [1, 2])])}) expected_x2 = x expected_y2 = Dataset( {"foo": DataArray([1, 2, np.nan], coords=[("x", [1, 2, 3])])} ) x2, y2 = align(x, y, copy=False, join="outer") assert_identical(expected_x2, x2) assert_identical(expected_y2, y2) assert source_ndarray(x["foo"].data) is source_ndarray(x2["foo"].data) x2, y2 = align(x, y, copy=True, join="outer") assert source_ndarray(x["foo"].data) is not source_ndarray(x2["foo"].data) assert_identical(expected_x2, x2) assert_identical(expected_y2, y2) def test_align_indexes(self) -> None: x = Dataset({"foo": DataArray([1, 2, 3], dims="x", coords=[("x", [1, 2, 3])])}) (x2,) = align(x, indexes={"x": [2, 3, 1]}) expected_x2 = Dataset( {"foo": DataArray([2, 3, 1], dims="x", coords={"x": [2, 3, 1]})} ) assert_identical(expected_x2, x2) def test_align_multiple_indexes_common_dim(self) -> None: a = Dataset(coords={"x": [1, 2], "xb": ("x", [3, 4])}).set_xindex("xb") b = Dataset(coords={"x": [1], "xb": ("x", [3])}).set_xindex("xb") (a2, b2) = align(a, b, join="inner") assert_identical(a2, b, check_default_indexes=False) assert_identical(b2, b, check_default_indexes=False) c = Dataset(coords={"x": [1, 3], "xb": ("x", [2, 4])}).set_xindex("xb") with pytest.raises(AlignmentError, match=r".*conflicting re-indexers"): align(a, c) def test_align_conflicting_indexes(self) -> None: class CustomIndex(PandasIndex): ... a = Dataset(coords={"xb": ("x", [3, 4])}).set_xindex("xb") b = Dataset(coords={"xb": ("x", [3])}).set_xindex("xb", CustomIndex) with pytest.raises(AlignmentError, match=r"cannot align.*conflicting indexes"): align(a, b) def test_align_non_unique(self) -> None: x = Dataset({"foo": ("x", [3, 4, 5]), "x": [0, 0, 1]}) x1, x2 = align(x, x) assert_identical(x1, x) assert_identical(x2, x) y = Dataset({"bar": ("x", [6, 7]), "x": [0, 1]}) with pytest.raises(ValueError, match=r"cannot reindex or align"): align(x, y) def test_align_str_dtype(self) -> None: a = Dataset({"foo": ("x", [0, 1])}, coords={"x": ["a", "b"]}) b = Dataset({"foo": ("x", [1, 2])}, coords={"x": ["b", "c"]}) expected_a = Dataset( {"foo": ("x", [0, 1, np.nan])}, coords={"x": ["a", "b", "c"]} ) expected_b = Dataset( {"foo": ("x", [np.nan, 1, 2])}, coords={"x": ["a", "b", "c"]} ) actual_a, actual_b = xr.align(a, b, join="outer") assert_identical(expected_a, actual_a) assert expected_a.x.dtype == actual_a.x.dtype assert_identical(expected_b, actual_b) assert expected_b.x.dtype == actual_b.x.dtype @pytest.mark.parametrize("join", ["left", "override"]) def test_align_index_var_attrs(self, join) -> None: # regression test https://github.com/pydata/xarray/issues/6852 # aligning two objects should have no side effect on their index variable # metadata. ds = Dataset(coords={"x": ("x", [1, 2, 3], {"units": "m"})}) ds_noattr = Dataset(coords={"x": ("x", [1, 2, 3])}) xr.align(ds_noattr, ds, join=join) assert ds.x.attrs == {"units": "m"} assert ds_noattr.x.attrs == {} def test_align_scalar_index(self) -> None: # ensure that indexes associated with scalar coordinates are not ignored # during alignment ds1 = Dataset(coords={"x": 0}).set_xindex("x", ScalarIndex) ds2 = Dataset(coords={"x": 0}).set_xindex("x", ScalarIndex) actual = xr.align(ds1, ds2, join="exact") assert_identical(actual[0], ds1, check_default_indexes=False) assert_identical(actual[1], ds2, check_default_indexes=False) ds3 = Dataset(coords={"x": 1}).set_xindex("x", ScalarIndex) with pytest.raises(AlignmentError, match="cannot align objects"): xr.align(ds1, ds3, join="exact") def test_align_multi_dim_index_exclude_dims(self) -> None: ds1 = ( Dataset(coords={"x": [1, 2], "y": [3, 4]}) .drop_indexes(["x", "y"]) .set_xindex(["x", "y"], XYIndex) ) ds2 = ( Dataset(coords={"x": [1, 2], "y": [5, 6]}) .drop_indexes(["x", "y"]) .set_xindex(["x", "y"], XYIndex) ) for join in ("outer", "exact"): actual = xr.align(ds1, ds2, join=join, exclude="y") assert_identical(actual[0], ds1, check_default_indexes=False) assert_identical(actual[1], ds2, check_default_indexes=False) with pytest.raises( AlignmentError, match=r"cannot align objects.*index.*not equal" ): xr.align(ds1, ds2, join="exact") with pytest.raises(AlignmentError, match="cannot exclude dimension"): xr.align(ds1, ds2, join="override", exclude="y") def test_align_index_equals_future_warning(self) -> None: # TODO: remove this test once the deprecation cycle is completed class DeprecatedEqualsSignatureIndex(PandasIndex): def equals(self, other: Index) -> bool: # type: ignore[override] return super().equals(other, exclude=None) ds = ( Dataset(coords={"x": [1, 2]}) .drop_indexes("x") .set_xindex("x", DeprecatedEqualsSignatureIndex) ) with pytest.warns(FutureWarning, match=r"signature.*deprecated"): xr.align(ds, ds.copy(), join="exact") def test_broadcast(self) -> None: ds = Dataset( {"foo": 0, "bar": ("x", [1]), "baz": ("y", [2, 3])}, {"c": ("x", [4])} ) expected = Dataset( { "foo": (("x", "y"), [[0, 0]]), "bar": (("x", "y"), [[1, 1]]), "baz": (("x", "y"), [[2, 3]]), }, {"c": ("x", [4])}, ) (actual,) = broadcast(ds) assert_identical(expected, actual) ds_x = Dataset({"foo": ("x", [1])}) ds_y = Dataset({"bar": ("y", [2, 3])}) expected_x = Dataset({"foo": (("x", "y"), [[1, 1]])}) expected_y = Dataset({"bar": (("x", "y"), [[2, 3]])}) actual_x, actual_y = broadcast(ds_x, ds_y) assert_identical(expected_x, actual_x) assert_identical(expected_y, actual_y) array_y = ds_y["bar"] expected_y2 = expected_y["bar"] actual_x2, actual_y2 = broadcast(ds_x, array_y) assert_identical(expected_x, actual_x2) assert_identical(expected_y2, actual_y2) def test_broadcast_nocopy(self) -> None: # Test that data is not copied if not needed x = Dataset({"foo": (("x", "y"), [[1, 1]])}) y = Dataset({"bar": ("y", [2, 3])}) (actual_x,) = broadcast(x) assert_identical(x, actual_x) assert source_ndarray(actual_x["foo"].data) is source_ndarray(x["foo"].data) actual_x, _actual_y = broadcast(x, y) assert_identical(x, actual_x) assert source_ndarray(actual_x["foo"].data) is source_ndarray(x["foo"].data) def test_broadcast_exclude(self) -> None: x = Dataset( { "foo": DataArray( [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [1, 2], "y": [3, 4]} ), "bar": DataArray(5), } ) y = Dataset( { "foo": DataArray( [[1, 2]], dims=["z", "y"], coords={"z": [1], "y": [5, 6]} ) } ) x2, y2 = broadcast(x, y, exclude=["y"]) expected_x2 = Dataset( { "foo": DataArray( [[[1, 2]], [[3, 4]]], dims=["x", "z", "y"], coords={"z": [1], "x": [1, 2], "y": [3, 4]}, ), "bar": DataArray( [[5], [5]], dims=["x", "z"], coords={"x": [1, 2], "z": [1]} ), } ) expected_y2 = Dataset( { "foo": DataArray( [[[1, 2]], [[1, 2]]], dims=["x", "z", "y"], coords={"z": [1], "x": [1, 2], "y": [5, 6]}, ) } ) assert_identical(expected_x2, x2) assert_identical(expected_y2, y2) def test_broadcast_misaligned(self) -> None: x = Dataset({"foo": DataArray([1, 2, 3], coords=[("x", [-1, -2, -3])])}) y = Dataset( { "bar": DataArray( [[1, 2], [3, 4]], dims=["y", "x"], coords={"y": [1, 2], "x": [10, -3]}, ) } ) x2, y2 = broadcast(x, y) expected_x2 = Dataset( { "foo": DataArray( [[3, 3], [2, 2], [1, 1], [np.nan, np.nan]], dims=["x", "y"], coords={"y": [1, 2], "x": [-3, -2, -1, 10]}, ) } ) expected_y2 = Dataset( { "bar": DataArray( [[2, 4], [np.nan, np.nan], [np.nan, np.nan], [1, 3]], dims=["x", "y"], coords={"y": [1, 2], "x": [-3, -2, -1, 10]}, ) } ) assert_identical(expected_x2, x2) assert_identical(expected_y2, y2) def test_broadcast_multi_index(self) -> None: # GH6430 ds = Dataset( {"foo": (("x", "y", "z"), np.ones((3, 4, 2)))}, {"x": ["a", "b", "c"], "y": [1, 2, 3, 4]}, ) stacked = ds.stack(space=["x", "y"]) broadcasted, _ = broadcast(stacked, stacked.space) assert broadcasted.xindexes["x"] is broadcasted.xindexes["space"] assert broadcasted.xindexes["y"] is broadcasted.xindexes["space"] def test_variable_indexing(self) -> None: data = create_test_data() v = data["var1"] d1 = data["dim1"] d2 = data["dim2"] assert_equal(v, v[d1.values]) assert_equal(v, v[d1]) assert_equal(v[:3], v[d1 < 3]) assert_equal(v[:, 3:], v[:, d2 >= 1.5]) assert_equal(v[:3, 3:], v[d1 < 3, d2 >= 1.5]) assert_equal(v[:3, :2], v[range(3), range(2)]) assert_equal(v[:3, :2], v.loc[d1[:3], d2[:2]]) def test_drop_variables(self) -> None: data = create_test_data() assert_identical(data, data.drop_vars([])) expected = Dataset({k: data[k] for k in data.variables if k != "time"}) actual = data.drop_vars("time") assert_identical(expected, actual) actual = data.drop_vars(["time"]) assert_identical(expected, actual) with pytest.raises( ValueError, match=re.escape( "These variables cannot be found in this dataset: ['not_found_here']" ), ): data.drop_vars("not_found_here") actual = data.drop_vars("not_found_here", errors="ignore") assert_identical(data, actual) actual = data.drop_vars(["not_found_here"], errors="ignore") assert_identical(data, actual) actual = data.drop_vars(["time", "not_found_here"], errors="ignore") assert_identical(expected, actual) # deprecated approach with `drop` works (straight copy paste from above) with pytest.warns(DeprecationWarning): actual = data.drop("not_found_here", errors="ignore") assert_identical(data, actual) with pytest.warns(DeprecationWarning): actual = data.drop(["not_found_here"], errors="ignore") assert_identical(data, actual) with pytest.warns(DeprecationWarning): actual = data.drop(["time", "not_found_here"], errors="ignore") assert_identical(expected, actual) with pytest.warns(DeprecationWarning): actual = data.drop({"time", "not_found_here"}, errors="ignore") assert_identical(expected, actual) def test_drop_multiindex_level(self) -> None: data = create_test_multiindex() expected = data.drop_vars(["x", "level_1", "level_2"]) with pytest.warns(DeprecationWarning): actual = data.drop_vars("level_1") assert_identical(expected, actual) def test_drop_multiindex_labels(self) -> None: data = create_test_multiindex() mindex = pd.MultiIndex.from_tuples( [ ("a", 2), ("b", 1), ("b", 2), ], names=("level_1", "level_2"), ) expected = Dataset({}, Coordinates.from_pandas_multiindex(mindex, "x")) actual = data.drop_sel(x=("a", 1)) assert_identical(expected, actual) def test_drop_index_labels(self) -> None: data = Dataset({"A": (["x", "y"], np.random.randn(2, 3)), "x": ["a", "b"]}) with pytest.warns(DeprecationWarning): actual = data.drop(["a"], dim="x") expected = data.isel(x=[1]) assert_identical(expected, actual) with pytest.warns(DeprecationWarning): actual = data.drop(["a", "b"], dim="x") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) with pytest.raises(KeyError): # not contained in axis with pytest.warns(DeprecationWarning): data.drop(["c"], dim="x") with pytest.warns(DeprecationWarning): actual = data.drop(["c"], dim="x", errors="ignore") assert_identical(data, actual) with pytest.raises(ValueError): data.drop(["c"], dim="x", errors="wrong_value") # type: ignore[arg-type] with pytest.warns(DeprecationWarning): actual = data.drop(["a", "b", "c"], "x", errors="ignore") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) # DataArrays as labels are a nasty corner case as they are not # Iterable[Hashable] - DataArray.__iter__ yields scalar DataArrays. actual = data.drop_sel(x=DataArray(["a", "b", "c"]), errors="ignore") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) with pytest.warns(DeprecationWarning): data.drop(DataArray(["a", "b", "c"]), dim="x", errors="ignore") assert_identical(expected, actual) actual = data.drop_sel(y=[1]) expected = data.isel(y=[0, 2]) assert_identical(expected, actual) with pytest.raises(KeyError, match=r"not found in axis"): data.drop_sel(x=0) def test_drop_labels_by_keyword(self) -> None: data = Dataset( {"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)} ) # Basic functionality. assert len(data.coords["x"]) == 2 with pytest.warns(DeprecationWarning): ds1 = data.drop(["a"], dim="x") ds2 = data.drop_sel(x="a") ds3 = data.drop_sel(x=["a"]) ds4 = data.drop_sel(x=["a", "b"]) ds5 = data.drop_sel(x=["a", "b"], y=range(0, 6, 2)) arr = DataArray(range(3), dims=["c"]) with pytest.warns(DeprecationWarning): data.drop(arr.coords) with pytest.warns(DeprecationWarning): data.drop(arr.xindexes) assert_array_equal(ds1.coords["x"], ["b"]) assert_array_equal(ds2.coords["x"], ["b"]) assert_array_equal(ds3.coords["x"], ["b"]) assert ds4.coords["x"].size == 0 assert ds5.coords["x"].size == 0 assert_array_equal(ds5.coords["y"], [1, 3, 5]) # Error handling if user tries both approaches. with pytest.raises(ValueError): data.drop(labels=["a"], x="a") with pytest.raises(ValueError): data.drop(labels=["a"], dim="x", x="a") warnings.filterwarnings("ignore", r"\W*drop") with pytest.raises(ValueError): data.drop(dim="x", x="a") def test_drop_labels_by_position(self) -> None: data = Dataset( {"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)} ) # Basic functionality. assert len(data.coords["x"]) == 2 actual = data.drop_isel(x=0) expected = data.drop_sel(x="a") assert_identical(expected, actual) actual = data.drop_isel(x=[0]) expected = data.drop_sel(x=["a"]) assert_identical(expected, actual) actual = data.drop_isel(x=[0, 1]) expected = data.drop_sel(x=["a", "b"]) assert_identical(expected, actual) assert actual.coords["x"].size == 0 actual = data.drop_isel(x=[0, 1], y=range(0, 6, 2)) expected = data.drop_sel(x=["a", "b"], y=range(0, 6, 2)) assert_identical(expected, actual) assert actual.coords["x"].size == 0 with pytest.raises(KeyError): data.drop_isel(z=1) def test_drop_indexes(self) -> None: ds = Dataset( coords={ "x": ("x", [0, 1, 2]), "y": ("y", [3, 4, 5]), "foo": ("x", ["a", "a", "b"]), } ) actual = ds.drop_indexes("x") assert "x" not in actual.xindexes assert type(actual.x.variable) is Variable actual = ds.drop_indexes(["x", "y"]) assert "x" not in actual.xindexes assert "y" not in actual.xindexes assert type(actual.x.variable) is Variable assert type(actual.y.variable) is Variable with pytest.raises( ValueError, match=r"The coordinates \('not_a_coord',\) are not found in the dataset coordinates", ): ds.drop_indexes("not_a_coord") with pytest.raises(ValueError, match="those coordinates do not have an index"): ds.drop_indexes("foo") actual = ds.drop_indexes(["foo", "not_a_coord"], errors="ignore") assert_identical(actual, ds) # test index corrupted midx = pd.MultiIndex.from_tuples([(1, 2), (3, 4)], names=["a", "b"]) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") ds = Dataset(coords=midx_coords) with pytest.raises(ValueError, match=r".*would corrupt the following index.*"): ds.drop_indexes("a") def test_sel_on_unindexed_coordinate(self) -> None: # Test that .sel() works on coordinates without an index by creating # a PandasIndex on the fly ds = Dataset( {"data": (["x", "y"], np.arange(6).reshape(2, 3))}, coords={"x": [0, 1], "y": [10, 20, 30], "y_meta": ("y", ["a", "b", "c"])}, ) # Drop the index on y to create an unindexed dim coord # also check that coord y_meta works despite not being a dim coord ds = ds.drop_indexes("y") assert "y" not in ds.xindexes assert "y_meta" not in ds.xindexes assert "y" in ds.coords # .sel() should still work by creating a PandasIndex on the fly result = ds.sel(y=20) expected = ds.isel(y=1) assert_identical(result, expected, check_default_indexes=False) result = ds.sel(y_meta="b") expected = ds.isel(y=1) assert_identical(result, expected, check_default_indexes=False) # check that our auto-created indexes are ephemeral assert "y" not in ds.xindexes assert "y_meta" not in ds.xindexes assert "y" in ds.coords result_slice = ds.sel(y=slice(10, 20)) expected_slice = ds.isel(y=slice(0, 2)) assert_identical( result_slice["data"], expected_slice["data"], check_default_indexes=False ) assert_identical( result_slice["y"], expected_slice["y"], check_default_indexes=False ) def test_drop_dims(self) -> None: data = xr.Dataset( { "A": (["x", "y"], np.random.randn(2, 3)), "B": ("x", np.random.randn(2)), "x": ["a", "b"], "z": np.pi, } ) actual = data.drop_dims("x") expected = data.drop_vars(["A", "B", "x"]) assert_identical(expected, actual) actual = data.drop_dims("y") expected = data.drop_vars("A") assert_identical(expected, actual) actual = data.drop_dims(["x", "y"]) expected = data.drop_vars(["A", "B", "x"]) assert_identical(expected, actual) with pytest.raises((ValueError, KeyError)): data.drop_dims("z") # not a dimension with pytest.raises((ValueError, KeyError)): data.drop_dims(None) # type:ignore[arg-type] actual = data.drop_dims("z", errors="ignore") assert_identical(data, actual) # should this be allowed? actual = data.drop_dims(None, errors="ignore") # type:ignore[arg-type] assert_identical(data, actual) with pytest.raises(ValueError): actual = data.drop_dims("z", errors="wrong_value") # type: ignore[arg-type] actual = data.drop_dims(["x", "y", "z"], errors="ignore") expected = data.drop_vars(["A", "B", "x"]) assert_identical(expected, actual) def test_copy(self) -> None: data = create_test_data() data.attrs["Test"] = [1, 2, 3] for copied in [data.copy(deep=False), copy(data)]: assert_identical(data, copied) assert data.encoding == copied.encoding # Note: IndexVariable objects with string dtype are always # copied because of xarray.core.indexes.safe_cast_to_index. # Limiting the test to data variables. for k in data.data_vars: v0 = data.variables[k] v1 = copied.variables[k] assert source_ndarray(v0.data) is source_ndarray(v1.data) copied["foo"] = ("z", np.arange(5)) assert "foo" not in data copied.attrs["foo"] = "bar" assert "foo" not in data.attrs assert data.attrs["Test"] is copied.attrs["Test"] for copied in [data.copy(deep=True), deepcopy(data)]: assert_identical(data, copied) for k, v0 in data.variables.items(): v1 = copied.variables[k] assert v0 is not v1 assert data.attrs["Test"] is not copied.attrs["Test"] def test_copy_with_data(self) -> None: orig = create_test_data() new_data = {k: np.random.randn(*v.shape) for k, v in orig.data_vars.items()} actual = orig.copy(data=new_data) expected = orig.copy() for k, v in new_data.items(): expected[k].data = v assert_identical(expected, actual) @pytest.mark.xfail(raises=AssertionError) @pytest.mark.parametrize( "deep, expected_orig", [ [ True, xr.DataArray( xr.IndexVariable("a", np.array([1, 2])), coords={"a": [1, 2]}, dims=["a"], ), ], [ False, xr.DataArray( xr.IndexVariable("a", np.array([999, 2])), coords={"a": [999, 2]}, dims=["a"], ), ], ], ) def test_copy_coords(self, deep, expected_orig) -> None: """The test fails for the shallow copy, and apparently only on Windows for some reason. In windows coords seem to be immutable unless it's one dataset deep copied from another.""" ds = xr.DataArray( np.ones([2, 2, 2]), coords={"a": [1, 2], "b": ["x", "y"], "c": [0, 1]}, dims=["a", "b", "c"], name="value", ).to_dataset() ds_cp = ds.copy(deep=deep) new_a = np.array([999, 2]) ds_cp.coords["a"] = ds_cp.a.copy(data=new_a) expected_cp = xr.DataArray( xr.IndexVariable("a", new_a), coords={"a": [999, 2]}, dims=["a"], ) assert_identical(ds_cp.coords["a"], expected_cp) assert_identical(ds.coords["a"], expected_orig) def test_copy_with_data_errors(self) -> None: orig = create_test_data() new_var1 = np.arange(orig["var1"].size).reshape(orig["var1"].shape) with pytest.raises(ValueError, match=r"Data must be dict-like"): orig.copy(data=new_var1) # type: ignore[arg-type] with pytest.raises(ValueError, match=r"only contain variables in original"): orig.copy(data={"not_in_original": new_var1}) with pytest.raises(ValueError, match=r"contain all variables in original"): orig.copy(data={"var1": new_var1}) def test_drop_encoding(self) -> None: orig = create_test_data() vencoding = {"scale_factor": 10} orig.encoding = {"foo": "bar"} for k in orig.variables.keys(): orig[k].encoding = vencoding actual = orig.drop_encoding() assert actual.encoding == {} for v in actual.variables.values(): assert v.encoding == {} assert_equal(actual, orig) def test_rename(self) -> None: data = create_test_data() newnames = { "var1": "renamed_var1", "dim2": "renamed_dim2", } renamed = data.rename(newnames) variables = dict(data.variables) for nk, nv in newnames.items(): variables[nv] = variables.pop(nk) for k, v in variables.items(): dims = list(v.dims) for name, newname in newnames.items(): if name in dims: dims[dims.index(name)] = newname assert_equal( Variable(dims, v.values, v.attrs), renamed[k].variable.to_base_variable(), ) assert v.encoding == renamed[k].encoding assert type(v) is type(renamed.variables[k]) assert "var1" not in renamed assert "dim2" not in renamed with pytest.raises(ValueError, match=r"cannot rename 'not_a_var'"): data.rename({"not_a_var": "nada"}) with pytest.raises(ValueError, match=r"'var1' conflicts"): data.rename({"var2": "var1"}) # verify that we can rename a variable without accessing the data var1 = data["var1"] data["var1"] = (var1.dims, InaccessibleArray(var1.values)) renamed = data.rename(newnames) with pytest.raises(UnexpectedDataAccess): _ = renamed["renamed_var1"].values # https://github.com/python/mypy/issues/10008 renamed_kwargs = data.rename(**newnames) # type: ignore[arg-type] assert_identical(renamed, renamed_kwargs) def test_rename_old_name(self) -> None: # regtest for GH1477 data = create_test_data() with pytest.raises(ValueError, match=r"'samecol' conflicts"): data.rename({"var1": "samecol", "var2": "samecol"}) # This shouldn't cause any problems. data.rename({"var1": "var2", "var2": "var1"}) def test_rename_same_name(self) -> None: data = create_test_data() newnames = {"var1": "var1", "dim2": "dim2"} renamed = data.rename(newnames) assert_identical(renamed, data) def test_rename_dims(self) -> None: original = Dataset({"x": ("x", [0, 1, 2]), "y": ("x", [10, 11, 12]), "z": 42}) expected = Dataset( {"x": ("x_new", [0, 1, 2]), "y": ("x_new", [10, 11, 12]), "z": 42} ) # TODO: (benbovy - explicit indexes) update when set_index supports # setting index for non-dimension variables expected = expected.set_coords("x") actual = original.rename_dims({"x": "x_new"}) assert_identical(expected, actual, check_default_indexes=False) actual_2 = original.rename_dims(x="x_new") assert_identical(expected, actual_2, check_default_indexes=False) # Test to raise ValueError dims_dict_bad = {"x_bad": "x_new"} with pytest.raises(ValueError): original.rename_dims(dims_dict_bad) with pytest.raises(ValueError): original.rename_dims({"x": "z"}) def test_rename_vars(self) -> None: original = Dataset({"x": ("x", [0, 1, 2]), "y": ("x", [10, 11, 12]), "z": 42}) expected = Dataset( {"x_new": ("x", [0, 1, 2]), "y": ("x", [10, 11, 12]), "z": 42} ) # TODO: (benbovy - explicit indexes) update when set_index supports # setting index for non-dimension variables expected = expected.set_coords("x_new") actual = original.rename_vars({"x": "x_new"}) assert_identical(expected, actual, check_default_indexes=False) actual_2 = original.rename_vars(x="x_new") assert_identical(expected, actual_2, check_default_indexes=False) # Test to raise ValueError names_dict_bad = {"x_bad": "x_new"} with pytest.raises(ValueError): original.rename_vars(names_dict_bad) def test_rename_dimension_coord(self) -> None: # rename a dimension corodinate to a non-dimension coordinate # should preserve index original = Dataset(coords={"x": ("x", [0, 1, 2])}) actual = original.rename_vars({"x": "x_new"}) assert "x_new" in actual.xindexes actual_2 = original.rename_dims({"x": "x_new"}) assert "x" in actual_2.xindexes def test_rename_dimension_coord_warnings(self) -> None: # create a dimension coordinate by renaming a dimension or coordinate # should raise a warning (no index created) ds = Dataset(coords={"x": ("y", [0, 1])}) with pytest.warns( UserWarning, match=r"rename 'x' to 'y' does not create an index.*" ): ds.rename(x="y") ds = Dataset(coords={"y": ("x", [0, 1])}) with pytest.warns( UserWarning, match=r"rename 'x' to 'y' does not create an index.*" ): ds.rename(x="y") # No operation should not raise a warning ds = Dataset( data_vars={"data": (("x", "y"), np.ones((2, 3)))}, coords={"x": range(2), "y": range(3), "a": ("x", [3, 4])}, ) with warnings.catch_warnings(): warnings.simplefilter("error") ds.rename(x="x") def test_rename_multiindex(self) -> None: midx = pd.MultiIndex.from_tuples([(1, 2), (3, 4)], names=["a", "b"]) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") original = Dataset({}, midx_coords) # pandas-stubs expects Hashable for rename, but list of names works for MultiIndex midx_renamed = midx.rename(["a", "c"]) # type: ignore[call-overload] midx_coords_renamed = Coordinates.from_pandas_multiindex(midx_renamed, "x") expected = Dataset({}, midx_coords_renamed) actual = original.rename({"b": "c"}) assert_identical(expected, actual) with pytest.raises(ValueError, match=r"'a' conflicts"): with pytest.warns(UserWarning, match="does not create an index anymore"): original.rename({"x": "a"}) with pytest.raises(ValueError, match=r"'x' conflicts"): with pytest.warns(UserWarning, match="does not create an index anymore"): original.rename({"a": "x"}) with pytest.raises(ValueError, match=r"'b' conflicts"): original.rename({"a": "b"}) def test_rename_preserve_attrs_encoding(self) -> None: # test propagate attrs/encoding to new variable(s) created from Index object original = Dataset(coords={"x": ("x", [0, 1, 2])}) expected = Dataset(coords={"y": ("y", [0, 1, 2])}) for ds, dim in zip([original, expected], ["x", "y"], strict=True): ds[dim].attrs = {"foo": "bar"} ds[dim].encoding = {"foo": "bar"} actual = original.rename({"x": "y"}) assert_identical(actual, expected) @requires_cftime def test_rename_does_not_change_CFTimeIndex_type(self) -> None: # make sure CFTimeIndex is not converted to DatetimeIndex #3522 time = xr.date_range( start="2000", periods=6, freq="2MS", calendar="noleap", use_cftime=True ) orig = Dataset(coords={"time": time}) renamed = orig.rename(time="time_new") assert "time_new" in renamed.xindexes # TODO: benbovy - flexible indexes: update when CFTimeIndex # inherits from xarray.Index assert isinstance(renamed.xindexes["time_new"].to_pandas_index(), CFTimeIndex) assert renamed.xindexes["time_new"].to_pandas_index().name == "time_new" # check original has not changed assert "time" in orig.xindexes assert isinstance(orig.xindexes["time"].to_pandas_index(), CFTimeIndex) assert orig.xindexes["time"].to_pandas_index().name == "time" # note: rename_dims(time="time_new") drops "ds.indexes" renamed = orig.rename_dims() assert isinstance(renamed.xindexes["time"].to_pandas_index(), CFTimeIndex) renamed = orig.rename_vars() assert isinstance(renamed.xindexes["time"].to_pandas_index(), CFTimeIndex) def test_rename_does_not_change_DatetimeIndex_type(self) -> None: # make sure DatetimeIndex is conderved on rename time = pd.date_range(start="2000", periods=6, freq="2MS") orig = Dataset(coords={"time": time}) renamed = orig.rename(time="time_new") assert "time_new" in renamed.xindexes # TODO: benbovy - flexible indexes: update when DatetimeIndex # inherits from xarray.Index? assert isinstance(renamed.xindexes["time_new"].to_pandas_index(), DatetimeIndex) assert renamed.xindexes["time_new"].to_pandas_index().name == "time_new" # check original has not changed assert "time" in orig.xindexes assert isinstance(orig.xindexes["time"].to_pandas_index(), DatetimeIndex) assert orig.xindexes["time"].to_pandas_index().name == "time" # note: rename_dims(time="time_new") drops "ds.indexes" renamed = orig.rename_dims() assert isinstance(renamed.xindexes["time"].to_pandas_index(), DatetimeIndex) renamed = orig.rename_vars() assert isinstance(renamed.xindexes["time"].to_pandas_index(), DatetimeIndex) def test_swap_dims(self) -> None: original = Dataset({"x": [1, 2, 3], "y": ("x", list("abc")), "z": 42}) expected = Dataset({"z": 42}, {"x": ("y", [1, 2, 3]), "y": list("abc")}) actual = original.swap_dims({"x": "y"}) assert_identical(expected, actual) assert isinstance(actual.variables["y"], IndexVariable) assert isinstance(actual.variables["x"], Variable) assert actual.xindexes["y"].equals(expected.xindexes["y"]) roundtripped = actual.swap_dims({"y": "x"}) assert_identical(original.set_coords("y"), roundtripped) with pytest.raises(ValueError, match=r"cannot swap"): original.swap_dims({"y": "x"}) with pytest.raises(ValueError, match=r"replacement dimension"): original.swap_dims({"x": "z"}) expected = Dataset( {"y": ("u", list("abc")), "z": 42}, coords={"x": ("u", [1, 2, 3])} ) actual = original.swap_dims({"x": "u"}) assert_identical(expected, actual) # as kwargs expected = Dataset( {"y": ("u", list("abc")), "z": 42}, coords={"x": ("u", [1, 2, 3])} ) actual = original.swap_dims(x="u") assert_identical(expected, actual) # handle multiindex case midx = pd.MultiIndex.from_arrays([list("aab"), list("yzz")], names=["y1", "y2"]) original = Dataset({"x": [1, 2, 3], "y": ("x", midx), "z": 42}) midx_coords = Coordinates.from_pandas_multiindex(midx, "y") midx_coords["x"] = ("y", [1, 2, 3]) expected = Dataset({"z": 42}, midx_coords) actual = original.swap_dims({"x": "y"}) assert_identical(expected, actual) assert isinstance(actual.variables["y"], IndexVariable) assert isinstance(actual.variables["x"], Variable) assert actual.xindexes["y"].equals(expected.xindexes["y"]) def test_expand_dims_error(self) -> None: original = Dataset( { "x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3)), "z": ("a", np.random.randn(3)), }, coords={ "a": np.linspace(0, 1, 3), "b": np.linspace(0, 1, 4), "c": np.linspace(0, 1, 5), }, attrs={"key": "entry"}, ) with pytest.raises(ValueError, match=r"already exists"): original.expand_dims(dim=["x"]) # Make sure it raises true error also for non-dimensional coordinates # which has dimension. original = original.set_coords("z") with pytest.raises(ValueError, match=r"already exists"): original.expand_dims(dim=["z"]) original = Dataset( { "x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3)), "z": ("a", np.random.randn(3)), }, coords={ "a": np.linspace(0, 1, 3), "b": np.linspace(0, 1, 4), "c": np.linspace(0, 1, 5), }, attrs={"key": "entry"}, ) with pytest.raises(TypeError, match=r"value of new dimension"): original.expand_dims({"d": 3.2}) with pytest.raises(ValueError, match=r"both keyword and positional"): original.expand_dims({"d": 4}, e=4) def test_expand_dims_int(self) -> None: original = Dataset( {"x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3))}, coords={ "a": np.linspace(0, 1, 3), "b": np.linspace(0, 1, 4), "c": np.linspace(0, 1, 5), }, attrs={"key": "entry"}, ) actual = original.expand_dims(["z"], [1]) expected = Dataset( { "x": original["x"].expand_dims("z", 1), "y": original["y"].expand_dims("z", 1), }, coords={ "a": np.linspace(0, 1, 3), "b": np.linspace(0, 1, 4), "c": np.linspace(0, 1, 5), }, attrs={"key": "entry"}, ) assert_identical(expected, actual) # make sure squeeze restores the original data set. roundtripped = actual.squeeze("z") assert_identical(original, roundtripped) # another test with a negative axis actual = original.expand_dims(["z"], [-1]) expected = Dataset( { "x": original["x"].expand_dims("z", -1), "y": original["y"].expand_dims("z", -1), }, coords={ "a": np.linspace(0, 1, 3), "b": np.linspace(0, 1, 4), "c": np.linspace(0, 1, 5), }, attrs={"key": "entry"}, ) assert_identical(expected, actual) # make sure squeeze restores the original data set. roundtripped = actual.squeeze("z") assert_identical(original, roundtripped) def test_expand_dims_coords(self) -> None: original = Dataset({"x": ("a", np.array([1, 2, 3]))}) expected = Dataset( {"x": (("b", "a"), np.array([[1, 2, 3], [1, 2, 3]]))}, coords={"b": [1, 2]} ) actual = original.expand_dims(dict(b=[1, 2])) assert_identical(expected, actual) assert "b" not in original._coord_names def test_expand_dims_existing_scalar_coord(self) -> None: original = Dataset({"x": 1}, {"a": 2}) expected = Dataset({"x": (("a",), [1])}, {"a": [2]}) actual = original.expand_dims("a") assert_identical(expected, actual) def test_isel_expand_dims_roundtrip(self) -> None: original = Dataset({"x": (("a",), [1])}, {"a": [2]}) actual = original.isel(a=0).expand_dims("a") assert_identical(actual, original) def test_expand_dims_mixed_int_and_coords(self) -> None: # Test expanding one dimension to have size > 1 that doesn't have # coordinates, and also expanding another dimension to have size > 1 # that DOES have coordinates. original = Dataset( {"x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3))}, coords={ "a": np.linspace(0, 1, 3), "b": np.linspace(0, 1, 4), "c": np.linspace(0, 1, 5), }, ) actual = original.expand_dims({"d": 4, "e": ["l", "m", "n"]}) expected = Dataset( { "x": xr.DataArray( original["x"].values * np.ones([4, 3, 3]), coords=dict(d=range(4), e=["l", "m", "n"], a=np.linspace(0, 1, 3)), dims=["d", "e", "a"], ).drop_vars("d"), "y": xr.DataArray( original["y"].values * np.ones([4, 3, 4, 3]), coords=dict( d=range(4), e=["l", "m", "n"], b=np.linspace(0, 1, 4), a=np.linspace(0, 1, 3), ), dims=["d", "e", "b", "a"], ).drop_vars("d"), }, coords={"c": np.linspace(0, 1, 5)}, ) assert_identical(actual, expected) def test_expand_dims_kwargs_python36plus(self) -> None: original = Dataset( {"x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3))}, coords={ "a": np.linspace(0, 1, 3), "b": np.linspace(0, 1, 4), "c": np.linspace(0, 1, 5), }, attrs={"key": "entry"}, ) other_way = original.expand_dims(e=["l", "m", "n"]) other_way_expected = Dataset( { "x": xr.DataArray( original["x"].values * np.ones([3, 3]), coords=dict(e=["l", "m", "n"], a=np.linspace(0, 1, 3)), dims=["e", "a"], ), "y": xr.DataArray( original["y"].values * np.ones([3, 4, 3]), coords=dict( e=["l", "m", "n"], b=np.linspace(0, 1, 4), a=np.linspace(0, 1, 3), ), dims=["e", "b", "a"], ), }, coords={"c": np.linspace(0, 1, 5)}, attrs={"key": "entry"}, ) assert_identical(other_way_expected, other_way) @pytest.mark.parametrize("create_index_for_new_dim_flag", [True, False]) def test_expand_dims_create_index_data_variable( self, create_index_for_new_dim_flag ): # data variables should not gain an index ever ds = Dataset({"x": 0}) if create_index_for_new_dim_flag: with pytest.warns(UserWarning, match="No index created"): expanded = ds.expand_dims( "x", create_index_for_new_dim=create_index_for_new_dim_flag ) else: expanded = ds.expand_dims( "x", create_index_for_new_dim=create_index_for_new_dim_flag ) # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959 expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x") assert_identical(expanded, expected, check_default_indexes=False) assert expanded.indexes == {} def test_expand_dims_create_index_coordinate_variable(self): # coordinate variables should gain an index only if create_index_for_new_dim is True (the default) ds = Dataset(coords={"x": 0}) expanded = ds.expand_dims("x") expected = Dataset({"x": ("x", [0])}) assert_identical(expanded, expected) expanded_no_index = ds.expand_dims("x", create_index_for_new_dim=False) # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959 expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x") assert_identical(expanded_no_index, expected, check_default_indexes=False) assert expanded_no_index.indexes == {} def test_expand_dims_create_index_from_iterable(self): ds = Dataset(coords={"x": 0}) expanded = ds.expand_dims(x=[0, 1]) expected = Dataset({"x": ("x", [0, 1])}) assert_identical(expanded, expected) expanded_no_index = ds.expand_dims(x=[0, 1], create_index_for_new_dim=False) # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959 expected = Dataset(coords={"x": ("x", [0, 1])}).drop_indexes("x") assert_identical(expanded, expected, check_default_indexes=False) assert expanded_no_index.indexes == {} def test_expand_dims_non_nanosecond_conversion(self) -> None: # Regression test for https://github.com/pydata/xarray/issues/7493#issuecomment-1953091000 # todo: test still needed? ds = Dataset().expand_dims({"time": [np.datetime64("2018-01-01", "m")]}) assert ds.time.dtype == np.dtype("datetime64[s]") def test_set_index(self) -> None: expected = create_test_multiindex() mindex = expected["x"].to_index() indexes = [mindex.get_level_values(str(n)) for n in mindex.names] coords = {idx.name: ("x", idx) for idx in indexes} ds = Dataset({}, coords=coords) obj = ds.set_index(x=mindex.names) assert_identical(obj, expected) # ensure pre-existing indexes involved are removed # (level_2 should be a coordinate with no index) ds = create_test_multiindex() coords = {"x": coords["level_1"], "level_2": coords["level_2"]} expected = Dataset({}, coords=coords) obj = ds.set_index(x="level_1") assert_identical(obj, expected) # ensure set_index with no existing index and a single data var given # doesn't return multi-index ds = Dataset(data_vars={"x_var": ("x", [0, 1, 2])}) expected = Dataset(coords={"x": [0, 1, 2]}) assert_identical(ds.set_index(x="x_var"), expected) with pytest.raises(ValueError, match=r"bar variable\(s\) do not exist"): ds.set_index(foo="bar") with pytest.raises(ValueError, match=r"dimension mismatch.*"): ds.set_index(y="x_var") ds = Dataset(coords={"x": 1}) with pytest.raises( ValueError, match=r".*cannot set a PandasIndex.*scalar variable.*" ): ds.set_index(x="x") def test_set_index_deindexed_coords(self) -> None: # test de-indexed coordinates are converted to base variable # https://github.com/pydata/xarray/issues/6969 one = ["a", "a", "b", "b"] two = [1, 2, 1, 2] three = ["c", "c", "d", "d"] four = [3, 4, 3, 4] midx_12 = pd.MultiIndex.from_arrays([one, two], names=["one", "two"]) midx_34 = pd.MultiIndex.from_arrays([three, four], names=["three", "four"]) coords = Coordinates.from_pandas_multiindex(midx_12, "x") coords["three"] = ("x", three) coords["four"] = ("x", four) ds = xr.Dataset(coords=coords) actual = ds.set_index(x=["three", "four"]) coords_expected = Coordinates.from_pandas_multiindex(midx_34, "x") coords_expected["one"] = ("x", one) coords_expected["two"] = ("x", two) expected = xr.Dataset(coords=coords_expected) assert_identical(actual, expected) def test_reset_index(self) -> None: ds = create_test_multiindex() mindex = ds["x"].to_index() indexes = [mindex.get_level_values(str(n)) for n in mindex.names] coords = {idx.name: ("x", idx) for idx in indexes} expected = Dataset({}, coords=coords) obj = ds.reset_index("x") assert_identical(obj, expected, check_default_indexes=False) assert len(obj.xindexes) == 0 ds = Dataset(coords={"y": ("x", [1, 2, 3])}) with pytest.raises(ValueError, match=r".*not coordinates with an index"): ds.reset_index("y") def test_reset_index_keep_attrs(self) -> None: coord_1 = DataArray([1, 2], dims=["coord_1"], attrs={"attrs": True}) ds = Dataset({}, {"coord_1": coord_1}) obj = ds.reset_index("coord_1") assert ds.coord_1.attrs == obj.coord_1.attrs assert len(obj.xindexes) == 0 def test_reset_index_drop_dims(self) -> None: ds = Dataset(coords={"x": [1, 2]}) reset = ds.reset_index("x", drop=True) assert len(reset.dims) == 0 @pytest.mark.parametrize( ["arg", "drop", "dropped", "converted", "renamed"], [ ("foo", False, [], [], {"bar": "x"}), ("foo", True, ["foo"], [], {"bar": "x"}), ("x", False, ["x"], ["foo", "bar"], {}), ("x", True, ["x", "foo", "bar"], [], {}), (["foo", "bar"], False, ["x"], ["foo", "bar"], {}), (["foo", "bar"], True, ["x", "foo", "bar"], [], {}), (["x", "foo"], False, ["x"], ["foo", "bar"], {}), (["foo", "x"], True, ["x", "foo", "bar"], [], {}), ], ) def test_reset_index_drop_convert( self, arg: str | list[str], drop: bool, dropped: list[str], converted: list[str], renamed: dict[str, str], ) -> None: # regressions https://github.com/pydata/xarray/issues/6946 and # https://github.com/pydata/xarray/issues/6989 # check that multi-index dimension or level coordinates are dropped, converted # from IndexVariable to Variable or renamed to dimension as expected midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("foo", "bar")) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") ds = xr.Dataset(coords=midx_coords) reset = ds.reset_index(arg, drop=drop) for name in dropped: assert name not in reset.variables for name in converted: assert_identical(reset[name].variable, ds[name].variable.to_base_variable()) for old_name, new_name in renamed.items(): assert_identical(ds[old_name].variable, reset[new_name].variable) def test_reorder_levels(self) -> None: ds = create_test_multiindex() mindex = ds["x"].to_index() assert isinstance(mindex, pd.MultiIndex) midx = mindex.reorder_levels(["level_2", "level_1"]) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") expected = Dataset({}, coords=midx_coords) # check attrs propagated ds["level_1"].attrs["foo"] = "bar" expected["level_1"].attrs["foo"] = "bar" reindexed = ds.reorder_levels(x=["level_2", "level_1"]) assert_identical(reindexed, expected) ds = Dataset({}, coords={"x": [1, 2]}) with pytest.raises(ValueError, match=r"has no MultiIndex"): ds.reorder_levels(x=["level_1", "level_2"]) def test_set_xindex(self) -> None: ds = Dataset( coords={"foo": ("x", ["a", "a", "b", "b"]), "bar": ("x", [0, 1, 2, 3])} ) actual = ds.set_xindex("foo") expected = ds.set_index(x="foo").rename_vars(x="foo") assert_identical(actual, expected, check_default_indexes=False) actual_mindex = ds.set_xindex(["foo", "bar"]) expected_mindex = ds.set_index(x=["foo", "bar"]) assert_identical(actual_mindex, expected_mindex) class NotAnIndex: ... with pytest.raises(TypeError, match=r".*not a subclass of xarray.Index"): ds.set_xindex("foo", NotAnIndex) # type: ignore[arg-type] with pytest.raises(ValueError, match="those variables don't exist"): ds.set_xindex("not_a_coordinate", PandasIndex) ds["data_var"] = ("x", [1, 2, 3, 4]) with pytest.raises(ValueError, match="those variables are data variables"): ds.set_xindex("data_var", PandasIndex) ds = Dataset(coords={"x": ("x", [0, 1, 2, 3])}) # With drop_existing=True, it should succeed result = ds.set_xindex("x", PandasIndex) assert "x" in result.xindexes assert isinstance(result.xindexes["x"], PandasIndex) class CustomIndex(PandasIndex): pass result_custom = ds.set_xindex("x", CustomIndex) assert "x" in result_custom.xindexes assert isinstance(result_custom.xindexes["x"], CustomIndex) # Verify the result is equivalent to drop_indexes + set_xindex expected = ds.drop_indexes("x").set_xindex("x", CustomIndex) assert_identical(result_custom, expected) def test_set_xindex_options(self) -> None: ds = Dataset(coords={"foo": ("x", ["a", "a", "b", "b"])}) class IndexWithOptions(Index): def __init__(self, opt): self.opt = opt @classmethod def from_variables(cls, variables, options): return cls(options["opt"]) indexed = ds.set_xindex("foo", IndexWithOptions, opt=1) assert indexed.xindexes["foo"].opt == 1 # type: ignore[attr-defined] def test_stack(self) -> None: ds = Dataset( data_vars={"b": (("x", "y"), [[0, 1], [2, 3]])}, coords={"x": ("x", [0, 1]), "y": ["a", "b"]}, ) midx_expected = pd.MultiIndex.from_product( [[0, 1], ["a", "b"]], names=["x", "y"] ) midx_coords_expected = Coordinates.from_pandas_multiindex(midx_expected, "z") expected = Dataset( data_vars={"b": ("z", [0, 1, 2, 3])}, coords=midx_coords_expected ) # check attrs propagated ds["x"].attrs["foo"] = "bar" expected["x"].attrs["foo"] = "bar" actual = ds.stack(z=["x", "y"]) assert_identical(expected, actual) assert list(actual.xindexes) == ["z", "x", "y"] actual = ds.stack(z=[...]) assert_identical(expected, actual) # non list dims with ellipsis actual = ds.stack(z=(...,)) assert_identical(expected, actual) # ellipsis with given dim actual = ds.stack(z=[..., "y"]) assert_identical(expected, actual) midx_expected = pd.MultiIndex.from_product( [["a", "b"], [0, 1]], names=["y", "x"] ) midx_coords_expected = Coordinates.from_pandas_multiindex(midx_expected, "z") expected = Dataset( data_vars={"b": ("z", [0, 2, 1, 3])}, coords=midx_coords_expected ) expected["x"].attrs["foo"] = "bar" actual = ds.stack(z=["y", "x"]) assert_identical(expected, actual) assert list(actual.xindexes) == ["z", "y", "x"] @pytest.mark.parametrize( "create_index,expected_keys", [ (True, ["z", "x", "y"]), (False, []), (None, ["z", "x", "y"]), ], ) def test_stack_create_index(self, create_index, expected_keys) -> None: ds = Dataset( data_vars={"b": (("x", "y"), [[0, 1], [2, 3]])}, coords={"x": ("x", [0, 1]), "y": ["a", "b"]}, ) actual = ds.stack(z=["x", "y"], create_index=create_index) assert list(actual.xindexes) == expected_keys # TODO: benbovy (flexible indexes) - test error multiple indexes found # along dimension + create_index=True def test_stack_multi_index(self) -> None: # multi-index on a dimension to stack is discarded too midx = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=("lvl1", "lvl2")) coords = Coordinates.from_pandas_multiindex(midx, "x") coords["y"] = [0, 1] ds = xr.Dataset( data_vars={"b": (("x", "y"), [[0, 1], [2, 3], [4, 5], [6, 7]])}, coords=coords, ) expected = Dataset( data_vars={"b": ("z", [0, 1, 2, 3, 4, 5, 6, 7])}, coords={ "x": ("z", np.repeat(midx.values, 2)), "lvl1": ("z", np.repeat(midx.get_level_values("lvl1"), 2)), "lvl2": ("z", np.repeat(midx.get_level_values("lvl2"), 2)), "y": ("z", [0, 1, 0, 1] * 2), }, ) actual = ds.stack(z=["x", "y"], create_index=False) assert_identical(expected, actual) assert len(actual.xindexes) == 0 with pytest.raises(ValueError, match=r"cannot create.*wraps a multi-index"): ds.stack(z=["x", "y"], create_index=True) def test_stack_non_dim_coords(self) -> None: ds = Dataset( data_vars={"b": (("x", "y"), [[0, 1], [2, 3]])}, coords={"x": ("x", [0, 1]), "y": ["a", "b"]}, ).rename_vars(x="xx") exp_index = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["xx", "y"]) exp_coords = Coordinates.from_pandas_multiindex(exp_index, "z") expected = Dataset(data_vars={"b": ("z", [0, 1, 2, 3])}, coords=exp_coords) actual = ds.stack(z=["x", "y"]) assert_identical(expected, actual) assert list(actual.xindexes) == ["z", "xx", "y"] def test_unstack(self) -> None: index = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["x", "y"]) coords = Coordinates.from_pandas_multiindex(index, "z") ds = Dataset(data_vars={"b": ("z", [0, 1, 2, 3])}, coords=coords) expected = Dataset( {"b": (("x", "y"), [[0, 1], [2, 3]]), "x": [0, 1], "y": ["a", "b"]} ) # check attrs propagated ds["x"].attrs["foo"] = "bar" expected["x"].attrs["foo"] = "bar" for dim in ["z", ["z"], None]: actual = ds.unstack(dim) assert_identical(actual, expected) def test_unstack_errors(self) -> None: ds = Dataset({"x": [1, 2, 3]}) with pytest.raises( ValueError, match=re.escape("Dimensions ('foo',) not found in data dimensions ('x',)"), ): ds.unstack("foo") with pytest.raises(ValueError, match=r".*do not have exactly one multi-index"): ds.unstack("x") ds = Dataset({"da": [1, 2]}, coords={"y": ("x", [1, 1]), "z": ("x", [0, 0])}) ds = ds.set_index(x=("y", "z")) with pytest.raises( ValueError, match="Cannot unstack MultiIndex containing duplicates" ): ds.unstack("x") def test_unstack_fill_value(self) -> None: ds = xr.Dataset( {"var": (("x",), np.arange(6)), "other_var": (("x",), np.arange(3, 9))}, coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)}, ) # make ds incomplete ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"]) # test fill_value actual1 = ds.unstack("index", fill_value=-1) expected1 = ds.unstack("index").fillna(-1).astype(int) assert actual1["var"].dtype == int assert_equal(actual1, expected1) actual2 = ds["var"].unstack("index", fill_value=-1) expected2 = ds["var"].unstack("index").fillna(-1).astype(int) assert_equal(actual2, expected2) actual3 = ds.unstack("index", fill_value={"var": -1, "other_var": 1}) expected3 = ds.unstack("index").fillna({"var": -1, "other_var": 1}).astype(int) assert_equal(actual3, expected3) actual4 = ds.unstack("index", fill_value={"var": -1}) expected4 = ds.unstack("index").fillna({"var": -1, "other_var": np.nan}) assert_equal(actual4, expected4) @requires_sparse def test_unstack_sparse(self) -> None: ds = xr.Dataset( {"var": (("x",), np.arange(6))}, coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)}, ) # make ds incomplete ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"]) # test fill_value actual1 = ds.unstack("index", sparse=True) expected1 = ds.unstack("index") assert isinstance(actual1["var"].data, sparse_array_type) assert actual1["var"].variable._to_dense().equals(expected1["var"].variable) assert actual1["var"].data.density < 1.0 actual2 = ds["var"].unstack("index", sparse=True) expected2 = ds["var"].unstack("index") assert isinstance(actual2.data, sparse_array_type) assert actual2.variable._to_dense().equals(expected2.variable) assert actual2.data.density < 1.0 midx = pd.MultiIndex.from_arrays([np.arange(3), np.arange(3)], names=["a", "b"]) coords = Coordinates.from_pandas_multiindex(midx, "z") coords["foo"] = np.arange(4) coords["bar"] = np.arange(5) ds_eye = Dataset( {"var": (("z", "foo", "bar"), np.ones((3, 4, 5)))}, coords=coords ) actual3 = ds_eye.unstack(sparse=True, fill_value=0) assert isinstance(actual3["var"].data, sparse_array_type) expected3 = xr.Dataset( { "var": ( ("foo", "bar", "a", "b"), np.broadcast_to(np.eye(3, 3), (4, 5, 3, 3)), ) }, coords={ "foo": np.arange(4), "bar": np.arange(5), "a": np.arange(3), "b": np.arange(3), }, ) actual3["var"].data = actual3["var"].data.todense() assert_equal(expected3, actual3) def test_stack_unstack_fast(self) -> None: ds = Dataset( { "a": ("x", [0, 1]), "b": (("x", "y"), [[0, 1], [2, 3]]), "x": [0, 1], "y": ["a", "b"], } ) actual = ds.stack(z=["x", "y"]).unstack("z") assert actual.broadcast_equals(ds) actual = ds[["b"]].stack(z=["x", "y"]).unstack("z") assert actual.identical(ds[["b"]]) def test_stack_unstack_slow(self) -> None: ds = Dataset( data_vars={ "a": ("x", [0, 1]), "b": (("x", "y"), [[0, 1], [2, 3]]), }, coords={"x": [0, 1], "y": ["a", "b"]}, ) stacked = ds.stack(z=["x", "y"]) actual = stacked.isel(z=slice(None, None, -1)).unstack("z") assert actual.broadcast_equals(ds) stacked = ds[["b"]].stack(z=["x", "y"]) actual = stacked.isel(z=slice(None, None, -1)).unstack("z") assert actual.identical(ds[["b"]]) def test_to_stacked_array_invalid_sample_dims(self) -> None: data = xr.Dataset( data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])}, coords={"y": ["u", "v", "w"]}, ) with pytest.raises( ValueError, match=r"Variables in the dataset must contain all ``sample_dims`` \(\['y'\]\) but 'b' misses \['y'\]", ): data.to_stacked_array("features", sample_dims=["y"]) def test_to_stacked_array_name(self) -> None: name = "adf9d" # make a two dimensional dataset a, b = create_test_stacked_array() D = xr.Dataset({"a": a, "b": b}) sample_dims = ["x"] y = D.to_stacked_array("features", sample_dims, name=name) assert y.name == name def test_to_stacked_array_dtype_dims(self) -> None: # make a two dimensional dataset a, b = create_test_stacked_array() D = xr.Dataset({"a": a, "b": b}) sample_dims = ["x"] y = D.to_stacked_array("features", sample_dims) mindex = y.xindexes["features"].to_pandas_index() assert isinstance(mindex, pd.MultiIndex) assert mindex.levels[1].dtype == D.y.dtype assert y.dims == ("x", "features") def test_to_stacked_array_to_unstacked_dataset(self) -> None: # single dimension: regression test for GH4049 arr = xr.DataArray(np.arange(3), coords=[("x", [0, 1, 2])]) data = xr.Dataset({"a": arr, "b": arr}) stacked = data.to_stacked_array("y", sample_dims=["x"]) unstacked = stacked.to_unstacked_dataset("y") assert_identical(unstacked, data) # make a two dimensional dataset a, b = create_test_stacked_array() D = xr.Dataset({"a": a, "b": b}) sample_dims = ["x"] y = D.to_stacked_array("features", sample_dims).transpose("x", "features") x = y.to_unstacked_dataset("features") assert_identical(D, x) # test on just one sample x0 = y[0].to_unstacked_dataset("features") d0 = D.isel(x=0) assert_identical(d0, x0) def test_to_stacked_array_to_unstacked_dataset_different_dimension(self) -> None: # test when variables have different dimensionality a, b = create_test_stacked_array() sample_dims = ["x"] D = xr.Dataset({"a": a, "b": b.isel(y=0)}) y = D.to_stacked_array("features", sample_dims) x = y.to_unstacked_dataset("features") assert_identical(D, x) def test_to_stacked_array_preserves_dtype(self) -> None: # regression test for bug found in https://github.com/pydata/xarray/pull/8872#issuecomment-2081218616 ds = xr.Dataset( data_vars={ "a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7]), }, coords={"y": ["u", "v", "w"]}, ) stacked = ds.to_stacked_array("z", sample_dims=["x"]) # coordinate created from variables names should be of string dtype data = np.array(["a", "a", "a", "b"], dtype=" None: # test that to_stacked_array uses updated dim order after transposition ds = xr.Dataset( data_vars=dict( v1=(["d1", "d2"], np.arange(6).reshape((2, 3))), ), coords=dict( d1=(["d1"], np.arange(2)), d2=(["d2"], np.arange(3)), ), ) da = ds.to_stacked_array( new_dim="new_dim", sample_dims=[], variable_dim="variable", ) dsT = ds.transpose() daT = dsT.to_stacked_array( new_dim="new_dim", sample_dims=[], variable_dim="variable", ) v1 = np.arange(6) v1T = np.arange(6).reshape((2, 3)).T.flatten() np.testing.assert_equal(da.to_numpy(), v1) np.testing.assert_equal(daT.to_numpy(), v1T) def test_update(self) -> None: data = create_test_data(seed=0) expected = data.copy() var2 = Variable("dim1", np.arange(8)) actual = data actual.update({"var2": var2}) expected["var2"] = var2 assert_identical(expected, actual) actual = data.copy() actual.update(data) assert_identical(expected, actual) other = Dataset(attrs={"new": "attr"}) actual = data.copy() actual.update(other) assert_identical(expected, actual) def test_update_overwrite_coords(self) -> None: data = Dataset({"a": ("x", [1, 2])}, {"b": 3}) data.update(Dataset(coords={"b": 4})) expected = Dataset({"a": ("x", [1, 2])}, {"b": 4}) assert_identical(data, expected) data = Dataset({"a": ("x", [1, 2])}, {"b": 3}) data.update(Dataset({"c": 5}, coords={"b": 4})) expected = Dataset({"a": ("x", [1, 2]), "c": 5}, {"b": 4}) assert_identical(data, expected) data = Dataset({"a": ("x", [1, 2])}, {"b": 3}) data.update({"c": DataArray(5, coords={"b": 4})}) expected = Dataset({"a": ("x", [1, 2]), "c": 5}, {"b": 3}) assert_identical(data, expected) def test_update_multiindex_level(self) -> None: data = create_test_multiindex() with pytest.raises( ValueError, match=r"cannot set or update variable.*corrupt.*index " ): data.update({"level_1": range(4)}) def test_update_auto_align(self) -> None: ds = Dataset({"x": ("t", [3, 4])}, {"t": [0, 1]}) expected1 = Dataset( {"x": ("t", [3, 4]), "y": ("t", [np.nan, 5])}, {"t": [0, 1]} ) actual1 = ds.copy() other1 = {"y": ("t", [5]), "t": [1]} with pytest.raises(ValueError, match=r"conflicting sizes"): actual1.update(other1) actual1.update(Dataset(other1)) assert_identical(expected1, actual1) actual2 = ds.copy() other2 = Dataset({"y": ("t", [5]), "t": [100]}) actual2.update(other2) expected2 = Dataset( {"x": ("t", [3, 4]), "y": ("t", [np.nan] * 2)}, {"t": [0, 1]} ) assert_identical(expected2, actual2) def test_getitem(self) -> None: data = create_test_data() assert isinstance(data["var1"], DataArray) assert_equal(data["var1"].variable, data.variables["var1"]) with pytest.raises(KeyError): data["notfound"] with pytest.raises(KeyError): data[["var1", "notfound"]] with pytest.raises( KeyError, match=r"Hint: use a list to select multiple variables, for example `ds\[\['var1', 'var2'\]\]`", ): data["var1", "var2"] actual1 = data[["var1", "var2"]] expected1 = Dataset({"var1": data["var1"], "var2": data["var2"]}) assert_equal(expected1, actual1) actual2 = data["numbers"] expected2 = DataArray( data["numbers"].variable, {"dim3": data["dim3"], "numbers": data["numbers"]}, dims="dim3", name="numbers", ) assert_identical(expected2, actual2) actual3 = data[dict(dim1=0)] expected3 = data.isel(dim1=0) assert_identical(expected3, actual3) def test_getitem_hashable(self) -> None: data = create_test_data() data[(3, 4)] = data["var1"] + 1 expected = data["var1"] + 1 expected.name = (3, 4) assert_identical(expected, data[(3, 4)]) with pytest.raises(KeyError, match=r"('var1', 'var2')"): data[("var1", "var2")] def test_getitem_multiple_dtype(self) -> None: keys = ["foo", 1] dataset = Dataset({key: ("dim0", range(1)) for key in keys}) assert_identical(dataset, dataset[keys]) def test_getitem_extra_dim_index_coord(self) -> None: class AnyIndex(Index): def should_add_coord_to_array(self, name, var, dims): return True idx = AnyIndex() coords = Coordinates( coords={ "x": ("x", [1, 2]), "x_bounds": (("x", "x_bnds"), [(0.5, 1.5), (1.5, 2.5)]), }, indexes={"x": idx, "x_bounds": idx}, ) ds = Dataset({"foo": (("x"), [1.0, 2.0])}, coords=coords) actual = ds["foo"] assert_identical(actual.coords, coords, check_default_indexes=False) assert "x_bnds" not in actual.dims def test_virtual_variables_default_coords(self) -> None: dataset = Dataset({"foo": ("x", range(10))}) expected1 = DataArray(range(10), dims="x", name="x") actual1 = dataset["x"] assert_identical(expected1, actual1) assert isinstance(actual1.variable, IndexVariable) actual2 = dataset[["x", "foo"]] expected2 = dataset.assign_coords(x=range(10)) assert_identical(expected2, actual2) def test_virtual_variables_time(self) -> None: # access virtual variables data = create_test_data() index = data.variables["time"].to_index() assert isinstance(index, pd.DatetimeIndex) assert_array_equal(data["time.month"].values, index.month) assert_array_equal(data["time.season"].values, "DJF") # test virtual variable math assert_array_equal(data["time.dayofyear"] + 1, 2 + np.arange(20)) assert_array_equal(np.sin(data["time.dayofyear"]), np.sin(1 + np.arange(20))) # ensure they become coordinates expected = Dataset({}, {"dayofyear": data["time.dayofyear"]}) actual = data[["time.dayofyear"]] assert_equal(expected, actual) # non-coordinate variables ds = Dataset({"t": ("x", pd.date_range("2000-01-01", periods=3))}) assert (ds["t.year"] == 2000).all() def test_virtual_variable_same_name(self) -> None: # regression test for GH367 times = pd.date_range("2000-01-01", freq="h", periods=5) data = Dataset({"time": times}) actual = data["time.time"] expected = DataArray(times.time, [("time", times)], name="time") assert_identical(actual, expected) def test_time_season(self) -> None: time = xr.date_range("2000-01-01", periods=12, freq="ME", use_cftime=False) ds = Dataset({"t": time}) seas = ["DJF"] * 2 + ["MAM"] * 3 + ["JJA"] * 3 + ["SON"] * 3 + ["DJF"] assert_array_equal(seas, ds["t.season"]) def test_slice_virtual_variable(self) -> None: data = create_test_data() assert_equal( data["time.dayofyear"][:10].variable, Variable(["time"], 1 + np.arange(10)) ) assert_equal(data["time.dayofyear"][0].variable, Variable([], 1)) def test_setitem(self) -> None: # assign a variable var = Variable(["dim1"], np.random.randn(8)) data1 = create_test_data() data1["A"] = var data2 = data1.copy() data2["A"] = var assert_identical(data1, data2) # assign a dataset array dv = 2 * data2["A"] data1["B"] = dv.variable data2["B"] = dv assert_identical(data1, data2) # can't assign an ND array without dimensions with pytest.raises(ValueError, match=r"without explicit dimension names"): data2["C"] = var.values.reshape(2, 4) # but can assign a 1D array data1["C"] = var.values data2["C"] = ("C", var.values) assert_identical(data1, data2) # can assign a scalar data1["scalar"] = 0 data2["scalar"] = ([], 0) assert_identical(data1, data2) # can't use the same dimension name as a scalar var with pytest.raises(ValueError, match=r"already exists as a scalar"): data1["newvar"] = ("scalar", [3, 4, 5]) # can't resize a used dimension with pytest.raises(ValueError, match=r"conflicting dimension sizes"): data1["dim1"] = data1["dim1"][:5] # override an existing value data1["A"] = 3 * data2["A"] assert_equal(data1["A"], 3 * data2["A"]) # can't assign a dataset to a single key with pytest.raises(TypeError, match="Cannot assign a Dataset to a single key"): data1["D"] = xr.Dataset() # test assignment with positional and label-based indexing data3 = data1[["var1", "var2"]] data3["var3"] = data3.var1.isel(dim1=0) data4 = data3.copy() err_msg = ( "can only set locations defined by dictionaries from Dataset.loc. Got: a" ) with pytest.raises(TypeError, match=err_msg): data1.loc["a"] = 0 err_msg = r"Variables \['A', 'B', 'scalar'\] in new values not available in original dataset:" with pytest.raises(ValueError, match=err_msg): data4[{"dim2": 1}] = data1[{"dim2": 2}] err_msg = "Variable 'var3': indexer {'dim2': 0} not available" with pytest.raises(ValueError, match=err_msg): data1[{"dim2": 0}] = 0.0 err_msg = "Variable 'var1': indexer {'dim2': 10} not available" with pytest.raises(ValueError, match=err_msg): data4[{"dim2": 10}] = data3[{"dim2": 2}] err_msg = "Variable 'var1': dimension 'dim2' appears in new values" with pytest.raises(KeyError, match=err_msg): data4[{"dim2": 2}] = data3[{"dim2": [2]}] err_msg = ( "Variable 'var2': dimension order differs between original and new data" ) data3["var2"] = data3["var2"].T with pytest.raises(ValueError, match=err_msg): data4[{"dim2": [2, 3]}] = data3[{"dim2": [2, 3]}] data3["var2"] = data3["var2"].T err_msg = r"cannot align objects.*not equal along these coordinates.*" with pytest.raises(ValueError, match=err_msg): data4[{"dim2": [2, 3]}] = data3[{"dim2": [2, 3, 4]}] err_msg = "Dataset assignment only accepts DataArrays, Datasets, and scalars." with pytest.raises(TypeError, match=err_msg): data4[{"dim2": [2, 3]}] = data3["var1"][{"dim2": [3, 4]}].values data5 = data4.astype(str) data5["var4"] = data4["var1"] # convert to `np.str_('a')` once `numpy<2.0` has been dropped err_msg = "could not convert string to float: .*'a'.*" with pytest.raises(ValueError, match=err_msg): data5[{"dim2": 1}] = "a" data4[{"dim2": 0}] = 0.0 data4[{"dim2": 1}] = data3[{"dim2": 2}] data4.loc[{"dim2": 1.5}] = 1.0 data4.loc[{"dim2": 2.0}] = data3.loc[{"dim2": 2.5}] for v, dat3 in data3.items(): dat4 = data4[v] assert_array_equal(dat4[{"dim2": 0}], 0.0) assert_array_equal(dat4[{"dim2": 1}], dat3[{"dim2": 2}]) assert_array_equal(dat4.loc[{"dim2": 1.5}], 1.0) assert_array_equal(dat4.loc[{"dim2": 2.0}], dat3.loc[{"dim2": 2.5}]) unchanged = [1.0, 2.5, 3.0, 3.5, 4.0] assert_identical( dat4.loc[{"dim2": unchanged}], dat3.loc[{"dim2": unchanged}] ) def test_setitem_pandas(self) -> None: ds = self.make_example_math_dataset() ds["x"] = np.arange(3) ds_copy = ds.copy() ds_copy["bar"] = ds["bar"].to_pandas() assert_equal(ds, ds_copy) def test_setitem_auto_align(self) -> None: ds = Dataset() ds["x"] = ("y", range(3)) ds["y"] = 1 + np.arange(3) expected = Dataset({"x": ("y", range(3)), "y": 1 + np.arange(3)}) assert_identical(ds, expected) ds["y"] = DataArray(range(3), dims="y") expected = Dataset({"x": ("y", range(3))}, {"y": range(3)}) assert_identical(ds, expected) ds["x"] = DataArray([1, 2], coords=[("y", [0, 1])]) expected = Dataset({"x": ("y", [1, 2, np.nan])}, {"y": range(3)}) assert_identical(ds, expected) ds["x"] = 42 expected = Dataset({"x": 42, "y": range(3)}) assert_identical(ds, expected) ds["x"] = DataArray([4, 5, 6, 7], coords=[("y", [0, 1, 2, 3])]) expected = Dataset({"x": ("y", [4, 5, 6])}, {"y": range(3)}) assert_identical(ds, expected) def test_setitem_dimension_override(self) -> None: # regression test for GH-3377 ds = xr.Dataset({"x": [0, 1, 2]}) ds["x"] = ds["x"][:2] expected = Dataset({"x": [0, 1]}) assert_identical(ds, expected) ds = xr.Dataset({"x": [0, 1, 2]}) ds["x"] = np.array([0, 1]) assert_identical(ds, expected) ds = xr.Dataset({"x": [0, 1, 2]}) ds.coords["x"] = [0, 1] assert_identical(ds, expected) def test_setitem_with_coords(self) -> None: # Regression test for GH:2068 ds = create_test_data() other = DataArray( np.arange(10), dims="dim3", coords={"numbers": ("dim3", np.arange(10))} ) expected = ds.copy() expected["var3"] = other.drop_vars("numbers") actual = ds.copy() actual["var3"] = other assert_identical(expected, actual) assert "numbers" in other.coords # should not change other # with alignment other = ds["var3"].isel(dim3=slice(1, -1)) other["numbers"] = ("dim3", np.arange(8)) actual = ds.copy() actual["var3"] = other assert "numbers" in other.coords # should not change other expected = ds.copy() expected["var3"] = ds["var3"].isel(dim3=slice(1, -1)) assert_identical(expected, actual) # with non-duplicate coords other = ds["var3"].isel(dim3=slice(1, -1)) other["numbers"] = ("dim3", np.arange(8)) other["position"] = ("dim3", np.arange(8)) actual = ds.copy() actual["var3"] = other assert "position" in actual assert "position" in other.coords # assigning a coordinate-only dataarray actual = ds.copy() other = actual["numbers"] other[0] = 10 actual["numbers"] = other assert actual["numbers"][0] == 10 # GH: 2099 ds = Dataset( {"var": ("x", [1, 2, 3])}, coords={"x": [0, 1, 2], "z1": ("x", [1, 2, 3]), "z2": ("x", [1, 2, 3])}, ) ds["var"] = ds["var"] * 2 assert np.allclose(ds["var"], [2, 4, 6]) def test_setitem_align_new_indexes(self) -> None: ds = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]}) ds["bar"] = DataArray([2, 3, 4], [("x", [1, 2, 3])]) expected = Dataset( {"foo": ("x", [1, 2, 3]), "bar": ("x", [np.nan, 2, 3])}, {"x": [0, 1, 2]} ) assert_identical(ds, expected) def test_setitem_vectorized(self) -> None: # Regression test for GH:7030 # Positional indexing da = xr.DataArray(np.r_[:120].reshape(2, 3, 4, 5), dims=["a", "b", "c", "d"]) ds = xr.Dataset({"da": da}) b = xr.DataArray([[0, 0], [1, 0]], dims=["u", "v"]) c = xr.DataArray([[0, 1], [2, 3]], dims=["u", "v"]) w = xr.DataArray([-1, -2], dims=["u"]) index = dict(b=b, c=c) ds[index] = xr.Dataset({"da": w}) assert (ds[index]["da"] == w).all() # Indexing with coordinates da = xr.DataArray(np.r_[:120].reshape(2, 3, 4, 5), dims=["a", "b", "c", "d"]) ds = xr.Dataset({"da": da}) ds.coords["b"] = [2, 4, 6] b = xr.DataArray([[2, 2], [4, 2]], dims=["u", "v"]) c = xr.DataArray([[0, 1], [2, 3]], dims=["u", "v"]) w = xr.DataArray([-1, -2], dims=["u"]) index = dict(b=b, c=c) ds.loc[index] = xr.Dataset({"da": w}, coords={"b": ds.coords["b"]}) assert (ds.loc[index]["da"] == w).all() @pytest.mark.parametrize("dtype", [str, bytes]) def test_setitem_str_dtype(self, dtype) -> None: ds = xr.Dataset(coords={"x": np.array(["x", "y"], dtype=dtype)}) # test Dataset update ds["foo"] = xr.DataArray(np.array([0, 0]), dims=["x"]) assert np.issubdtype(ds.x.dtype, dtype) def test_setitem_using_list(self) -> None: # assign a list of variables var1 = Variable(["dim1"], np.random.randn(8)) var2 = Variable(["dim1"], np.random.randn(8)) actual = create_test_data() expected = actual.copy() expected["A"] = var1 expected["B"] = var2 actual[["A", "B"]] = [var1, var2] assert_identical(actual, expected) # assign a list of dataset arrays dv = 2 * expected[["A", "B"]] actual[["C", "D"]] = [d.variable for d in dv.data_vars.values()] expected[["C", "D"]] = dv assert_identical(actual, expected) @pytest.mark.parametrize( "var_list, data, error_regex", [ ( ["A", "B"], [Variable(["dim1"], np.random.randn(8))], r"Different lengths", ), ([], [Variable(["dim1"], np.random.randn(8))], r"Empty list of variables"), (["A", "B"], xr.DataArray([1, 2]), r"assign single DataArray"), ], ) def test_setitem_using_list_errors(self, var_list, data, error_regex) -> None: actual = create_test_data() with pytest.raises(ValueError, match=error_regex): actual[var_list] = data def test_setitem_uses_base_variable_class_even_for_index_variables(self) -> None: ds = Dataset(coords={"x": [1, 2, 3]}) ds["y"] = ds["x"] # explicit check assert isinstance(ds["x"].variable, IndexVariable) assert not isinstance(ds["y"].variable, IndexVariable) # test internal invariant checks when comparing the datasets expected = Dataset(data_vars={"y": ("x", [1, 2, 3])}, coords={"x": [1, 2, 3]}) assert_identical(ds, expected) def test_assign(self) -> None: ds = Dataset() actual = ds.assign(x=[0, 1, 2], y=2) expected = Dataset({"x": [0, 1, 2], "y": 2}) assert_identical(actual, expected) assert list(actual.variables) == ["x", "y"] assert_identical(ds, Dataset()) actual = actual.assign(y=lambda ds: ds.x**2) expected = Dataset({"y": ("x", [0, 1, 4]), "x": [0, 1, 2]}) assert_identical(actual, expected) actual = actual.assign_coords(z=2) expected = Dataset({"y": ("x", [0, 1, 4])}, {"z": 2, "x": [0, 1, 2]}) assert_identical(actual, expected) def test_assign_coords(self) -> None: ds = Dataset() actual = ds.assign(x=[0, 1, 2], y=2) actual = actual.assign_coords(x=list("abc")) expected = Dataset({"x": list("abc"), "y": 2}) assert_identical(actual, expected) actual = ds.assign(x=[0, 1, 2], y=[2, 3]) actual = actual.assign_coords({"y": [2.0, 3.0]}) expected = ds.assign(x=[0, 1, 2], y=[2.0, 3.0]) assert_identical(actual, expected) def test_assign_attrs(self) -> None: expected = Dataset(attrs=dict(a=1, b=2)) new = Dataset() actual = new.assign_attrs(a=1, b=2) assert_identical(actual, expected) assert new.attrs == {} expected.attrs["c"] = 3 new_actual = actual.assign_attrs({"c": 3}) assert_identical(new_actual, expected) assert actual.attrs == dict(a=1, b=2) def test_drop_attrs(self) -> None: # Simple example ds = Dataset().assign_attrs(a=1, b=2) original = ds.copy() expected = Dataset() result = ds.drop_attrs() assert_identical(result, expected) # Doesn't change original assert_identical(ds, original) # Example with variables and coords with attrs, and a multiindex. (arguably # should have used a canonical dataset with all the features we're should # support...) var = Variable("x", [1, 2, 3], attrs=dict(x=1, y=2)) idx = IndexVariable("y", [1, 2, 3], attrs=dict(c=1, d=2)) mx = xr.Coordinates.from_pandas_multiindex( pd.MultiIndex.from_tuples([(1, 2), (3, 4)], names=["d", "e"]), "z" ) ds = Dataset(dict(var1=var), coords=dict(y=idx, z=mx)).assign_attrs(a=1, b=2) assert ds.attrs != {} assert ds["var1"].attrs != {} assert ds["y"].attrs != {} assert ds.coords["y"].attrs != {} original = ds.copy(deep=True) result = ds.drop_attrs() assert result.attrs == {} assert result["var1"].attrs == {} assert result["y"].attrs == {} assert list(result.data_vars) == list(ds.data_vars) assert list(result.coords) == list(ds.coords) # Doesn't change original assert_identical(ds, original) # Specifically test that the attrs on the coords are still there. (The index # can't currently contain `attrs`, so we can't test those.) assert ds.coords["y"].attrs != {} # Test for deep=False result_shallow = ds.drop_attrs(deep=False) assert result_shallow.attrs == {} assert result_shallow["var1"].attrs != {} assert result_shallow["y"].attrs != {} assert list(result.data_vars) == list(ds.data_vars) assert list(result.coords) == list(ds.coords) def test_drop_attrs_custom_index(self): class CustomIndex(Index): @classmethod def from_variables(cls, variables, *, options=None): return cls() ds = xr.Dataset(coords={"y": ("x", [1, 2])}).set_xindex("y", CustomIndex) # should not raise a TypeError ds.drop_attrs() # make sure the index didn't disappear assert "y" in ds.xindexes def test_assign_multiindex_level(self) -> None: data = create_test_multiindex() with pytest.raises(ValueError, match=r"cannot drop or update.*corrupt.*index "): data.assign(level_1=range(4)) data.assign_coords(level_1=range(4)) def test_assign_new_multiindex(self) -> None: midx = pd.MultiIndex.from_arrays([["a", "a", "b", "b"], [0, 1, 0, 1]]) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") ds = Dataset(coords={"x": [1, 2]}) expected = Dataset(coords=midx_coords) with pytest.warns( FutureWarning, match=r".*`pandas.MultiIndex`.*no longer be implicitly promoted.*", ): actual = ds.assign(x=midx) assert_identical(actual, expected) @pytest.mark.parametrize("orig_coords", [{}, {"x": range(4)}]) def test_assign_coords_new_multiindex(self, orig_coords) -> None: ds = Dataset(coords=orig_coords) midx = pd.MultiIndex.from_arrays( [["a", "a", "b", "b"], [0, 1, 0, 1]], names=("one", "two") ) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") expected = Dataset(coords=midx_coords) with pytest.warns( FutureWarning, match=r".*`pandas.MultiIndex`.*no longer be implicitly promoted.*", ): actual = ds.assign_coords({"x": midx}) assert_identical(actual, expected) actual = ds.assign_coords(midx_coords) assert_identical(actual, expected) def test_assign_coords_existing_multiindex(self) -> None: data = create_test_multiindex() with pytest.warns( FutureWarning, match=r"updating coordinate.*MultiIndex.*inconsistent" ): updated = data.assign_coords(x=range(4)) # https://github.com/pydata/xarray/issues/7097 (coord names updated) assert len(updated.coords) == 1 with pytest.warns( FutureWarning, match=r"updating coordinate.*MultiIndex.*inconsistent" ): updated = data.assign(x=range(4)) # https://github.com/pydata/xarray/issues/7097 (coord names updated) assert len(updated.coords) == 1 def test_assign_all_multiindex_coords(self) -> None: data = create_test_multiindex() actual = data.assign(x=range(4), level_1=range(4), level_2=range(4)) # no error but multi-index dropped in favor of single indexes for each level assert ( actual.xindexes["x"] is not actual.xindexes["level_1"] is not actual.xindexes["level_2"] ) def test_assign_coords_custom_index_side_effect(self) -> None: # test that assigning new coordinates do not reset other dimension coord indexes # to default (pandas) index (https://github.com/pydata/xarray/issues/7346) class CustomIndex(PandasIndex): pass ds = ( Dataset(coords={"x": [1, 2, 3]}) .drop_indexes("x") .set_xindex("x", CustomIndex) ) actual = ds.assign_coords(y=[4, 5, 6]) assert isinstance(actual.xindexes["x"], CustomIndex) def test_assign_coords_custom_index(self) -> None: class CustomIndex(Index): pass coords = Coordinates( coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()} ) ds = Dataset() actual = ds.assign_coords(coords) assert isinstance(actual.xindexes["x"], CustomIndex) def test_assign_coords_no_default_index(self) -> None: coords = Coordinates({"y": [1, 2, 3]}, indexes={}) ds = Dataset() actual = ds.assign_coords(coords) expected = coords.to_dataset() assert_identical(expected, actual, check_default_indexes=False) assert "y" not in actual.xindexes def test_merge_multiindex_level(self) -> None: data = create_test_multiindex() other = Dataset({"level_1": ("x", [0, 1])}) with pytest.raises(ValueError, match=r".*conflicting dimension sizes.*"): data.merge(other) other = Dataset({"level_1": ("x", range(4))}) with pytest.raises( ValueError, match=r"unable to determine.*coordinates or not.*" ): data.merge(other) # `other` Dataset coordinates are ignored (bug or feature?) other = Dataset(coords={"level_1": ("x", range(4))}) assert_identical(data.merge(other), data) def test_setitem_original_non_unique_index(self) -> None: # regression test for GH943 original = Dataset({"data": ("x", np.arange(5))}, coords={"x": [0, 1, 2, 0, 1]}) expected = Dataset({"data": ("x", np.arange(5))}, {"x": range(5)}) actual = original.copy() actual["x"] = list(range(5)) assert_identical(actual, expected) actual = original.copy() actual["x"] = ("x", list(range(5))) assert_identical(actual, expected) actual = original.copy() actual.coords["x"] = list(range(5)) assert_identical(actual, expected) def test_setitem_both_non_unique_index(self) -> None: # regression test for GH956 names = ["joaquin", "manolo", "joaquin"] values = np.random.randint(0, 256, (3, 4, 4)) array = DataArray( values, dims=["name", "row", "column"], coords=[names, range(4), range(4)] ) expected = Dataset({"first": array, "second": array}) actual = array.rename("first").to_dataset() actual["second"] = array assert_identical(expected, actual) def test_setitem_multiindex_level(self) -> None: data = create_test_multiindex() with pytest.raises( ValueError, match=r"cannot set or update variable.*corrupt.*index " ): data["level_1"] = range(4) def test_delitem(self) -> None: data = create_test_data() all_items = set(data.variables) assert set(data.variables) == all_items del data["var1"] assert set(data.variables) == all_items - {"var1"} del data["numbers"] assert set(data.variables) == all_items - {"var1", "numbers"} assert "numbers" not in data.coords expected = Dataset() actual = Dataset({"y": ("x", [1, 2])}) del actual["y"] assert_identical(expected, actual) def test_delitem_multiindex_level(self) -> None: data = create_test_multiindex() with pytest.raises( ValueError, match=r"cannot remove coordinate.*corrupt.*index " ): del data["level_1"] def test_squeeze(self) -> None: data = Dataset({"foo": (["x", "y", "z"], [[[1], [2]]])}) test_args: list[list] = [[], [["x"]], [["x", "z"]]] for args in test_args: def get_args(args, v): return [set(args[0]) & set(v.dims)] if args else [] expected = Dataset( {k: v.squeeze(*get_args(args, v)) for k, v in data.variables.items()} ) expected = expected.set_coords(data.coords) assert_identical(expected, data.squeeze(*args)) # invalid squeeze with pytest.raises(ValueError, match=r"cannot select a dimension"): data.squeeze("y") def test_squeeze_drop(self) -> None: data = Dataset({"foo": ("x", [1])}, {"x": [0]}) expected = Dataset({"foo": 1}) selected = data.squeeze(drop=True) assert_identical(expected, selected) expected = Dataset({"foo": 1}, {"x": 0}) selected = data.squeeze(drop=False) assert_identical(expected, selected) data = Dataset({"foo": (("x", "y"), [[1]])}, {"x": [0], "y": [0]}) expected = Dataset({"foo": 1}) selected = data.squeeze(drop=True) assert_identical(expected, selected) expected = Dataset({"foo": ("x", [1])}, {"x": [0]}) selected = data.squeeze(dim="y", drop=True) assert_identical(expected, selected) data = Dataset({"foo": (("x",), [])}, {"x": []}) selected = data.squeeze(drop=True) assert_identical(data, selected) def test_to_dataarray(self) -> None: ds = Dataset( {"a": 1, "b": ("x", [1, 2, 3])}, coords={"c": 42}, attrs={"Conventions": "None"}, ) data = [[1, 1, 1], [1, 2, 3]] coords = {"c": 42, "variable": ["a", "b"]} dims = ("variable", "x") expected = DataArray(data, coords, dims, attrs=ds.attrs) actual = ds.to_dataarray() assert_identical(expected, actual) actual = ds.to_dataarray("abc", name="foo") expected = expected.rename({"variable": "abc"}).rename("foo") assert_identical(expected, actual) def test_to_and_from_dataframe(self) -> None: x = np.random.randn(10) y = np.random.randn(10) t = list("abcdefghij") cat = pd.Categorical(["a", "b"] * 5) ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t), "cat": ("t", cat)}) expected = pd.DataFrame( np.array([x, y]).T, columns=["a", "b"], index=pd.Index(t, name="t") ) expected["cat"] = cat actual = ds.to_dataframe() # use the .equals method to check all DataFrame metadata assert expected.equals(actual), (expected, actual) # verify coords are included actual = ds.set_coords("b").to_dataframe() assert expected.equals(actual), (expected, actual) # check roundtrip assert_identical(ds, Dataset.from_dataframe(actual)) assert isinstance(ds["cat"].variable.data.dtype, pd.CategoricalDtype) # test a case with a MultiIndex w = np.random.randn(2, 3) cat = pd.Categorical(["a", "a", "c"]) ds = Dataset({"w": (("x", "y"), w), "cat": ("y", cat)}) ds["y"] = ("y", list("abc")) exp_index = pd.MultiIndex.from_arrays( [[0, 0, 0, 1, 1, 1], ["a", "b", "c", "a", "b", "c"]], names=["x", "y"] ) expected = pd.DataFrame( {"w": w.reshape(-1), "cat": pd.Categorical(["a", "a", "c", "a", "a", "c"])}, index=exp_index, ) actual = ds.to_dataframe() assert expected.equals(actual) # check roundtrip # from_dataframe attempts to broadcast across because it doesn't know better, so cat must be converted ds["cat"] = (("x", "y"), np.stack((ds["cat"].to_numpy(), ds["cat"].to_numpy()))) assert_identical(ds.assign_coords(x=[0, 1]), Dataset.from_dataframe(actual)) # Check multiindex reordering new_order = ["x", "y"] # revert broadcasting fix above for 1d arrays ds["cat"] = ("y", cat) actual = ds.to_dataframe(dim_order=new_order) assert expected.equals(actual) new_order = ["y", "x"] exp_index = pd.MultiIndex.from_arrays( [["a", "a", "b", "b", "c", "c"], [0, 1, 0, 1, 0, 1]], names=["y", "x"] ) expected = pd.DataFrame( { "w": w.transpose().reshape(-1), "cat": pd.Categorical(["a", "a", "a", "a", "c", "c"]), }, index=exp_index, ) actual = ds.to_dataframe(dim_order=new_order) assert expected.equals(actual) invalid_order = ["x"] with pytest.raises( ValueError, match="does not match the set of dimensions of this" ): ds.to_dataframe(dim_order=invalid_order) invalid_order = ["x", "z"] with pytest.raises( ValueError, match="does not match the set of dimensions of this" ): ds.to_dataframe(dim_order=invalid_order) # test a case with a MultiIndex along a single dimension data_dict = dict( x=[1, 2, 1, 2, 1], y=["a", "a", "b", "b", "b"], z=[5, 10, 15, 20, 25] ) data_dict_w_dims = {k: ("single_dim", v) for k, v in data_dict.items()} # Dataset multi-indexed along "single_dim" by "x" and "y" ds = Dataset(data_dict_w_dims).set_coords(["x", "y"]).set_xindex(["x", "y"]) expected = pd.DataFrame(data_dict).set_index(["x", "y"]) actual = ds.to_dataframe() assert expected.equals(actual) # should be possible to reset index, as there should be no duplication # between index and columns, and dataframes should still be equal assert expected.reset_index().equals(actual.reset_index()) # MultiIndex deduplication should not affect other coordinates. mindex_single = pd.MultiIndex.from_product( [list(range(6)), list("ab")], names=["A", "B"] ) ds = DataArray( range(12), [("MI", mindex_single)], dims="MI", name="test" )._to_dataset_whole() ds.coords["C"] = "a single value" ds.coords["D"] = ds.coords["A"] ** 2 expected = pd.DataFrame( dict( test=range(12), C="a single value", D=[0, 0, 1, 1, 4, 4, 9, 9, 16, 16, 25, 25], ) ).set_index(mindex_single) actual = ds.to_dataframe() assert expected.equals(actual) assert expected.reset_index().equals(actual.reset_index()) # check pathological cases df = pd.DataFrame([1]) actual_ds = Dataset.from_dataframe(df) expected_ds = Dataset({0: ("index", [1])}, {"index": [0]}) assert_identical(expected_ds, actual_ds) df = pd.DataFrame() actual_ds = Dataset.from_dataframe(df) expected_ds = Dataset(coords={"index": []}) assert_identical(expected_ds, actual_ds) # GH697 df = pd.DataFrame({"A": []}) actual_ds = Dataset.from_dataframe(df) expected_ds = Dataset({"A": DataArray([], dims=("index",))}, {"index": []}) assert_identical(expected_ds, actual_ds) # regression test for GH278 # use int64 to ensure consistent results for the pandas .equals method # on windows (which requires the same dtype) ds = Dataset({"x": pd.Index(["bar"]), "a": ("y", np.array([1], "int64"))}).isel( x=0 ) # use .loc to ensure consistent results on Python 3 actual = ds.to_dataframe().loc[:, ["a", "x"]] expected = pd.DataFrame( [[1, "bar"]], index=pd.Index([0], name="y"), columns=["a", "x"] ) assert expected.equals(actual), (expected, actual) ds = Dataset({"x": np.array([0], "int64"), "y": np.array([1], "int64")}) actual = ds.to_dataframe() idx = pd.MultiIndex.from_arrays([[0], [1]], names=["x", "y"]) expected = pd.DataFrame([[]], index=idx) assert expected.equals(actual), (expected, actual) def test_from_dataframe_categorical_dtype_index(self) -> None: cat = pd.CategoricalIndex(list("abcd")) df = pd.DataFrame({"f": [0, 1, 2, 3]}, index=cat) ds = df.to_xarray() restored = ds.to_dataframe() df.index.name = ( "index" # restored gets the name because it has the coord with the name ) pd.testing.assert_frame_equal(df, restored) def test_from_dataframe_categorical_index(self) -> None: cat = pd.CategoricalDtype( categories=["foo", "bar", "baz", "qux", "quux", "corge"] ) i1 = pd.Series(["foo", "bar", "foo"], dtype=cat) i2 = pd.Series(["bar", "bar", "baz"], dtype=cat) df = pd.DataFrame({"i1": i1, "i2": i2, "values": [1, 2, 3]}) ds = df.set_index("i1").to_xarray() assert len(ds["i1"]) == 3 ds = df.set_index(["i1", "i2"]).to_xarray() assert len(ds["i1"]) == 2 assert len(ds["i2"]) == 2 def test_from_dataframe_categorical_index_string_categories(self) -> None: cat = pd.CategoricalIndex( pd.Categorical.from_codes( np.array([1, 1, 0, 2], dtype=np.int64), # type: ignore[arg-type] categories=pd.Index(["foo", "bar", "baz"], dtype="string"), ) ) ser = pd.Series(1, index=cat) ds = ser.to_xarray() assert ds.coords.dtypes["index"] == ser.index.dtype @requires_sparse def test_from_dataframe_sparse(self) -> None: import sparse df_base = pd.DataFrame( {"x": range(10), "y": list("abcdefghij"), "z": np.arange(0, 100, 10)} ) ds_sparse = Dataset.from_dataframe(df_base.set_index("x"), sparse=True) ds_dense = Dataset.from_dataframe(df_base.set_index("x"), sparse=False) assert isinstance(ds_sparse["y"].data, sparse.COO) assert isinstance(ds_sparse["z"].data, sparse.COO) ds_sparse["y"].data = ds_sparse["y"].data.todense() ds_sparse["z"].data = ds_sparse["z"].data.todense() assert_identical(ds_dense, ds_sparse) ds_sparse = Dataset.from_dataframe(df_base.set_index(["x", "y"]), sparse=True) ds_dense = Dataset.from_dataframe(df_base.set_index(["x", "y"]), sparse=False) assert isinstance(ds_sparse["z"].data, sparse.COO) ds_sparse["z"].data = ds_sparse["z"].data.todense() assert_identical(ds_dense, ds_sparse) def test_to_and_from_empty_dataframe(self) -> None: # GH697 expected = pd.DataFrame({"foo": []}) ds = Dataset.from_dataframe(expected) assert len(ds["foo"]) == 0 actual = ds.to_dataframe() assert len(actual) == 0 assert expected.equals(actual) def test_from_dataframe_multiindex(self) -> None: index = pd.MultiIndex.from_product([["a", "b"], [1, 2, 3]], names=["x", "y"]) df = pd.DataFrame({"z": np.arange(6)}, index=index) expected = Dataset( {"z": (("x", "y"), [[0, 1, 2], [3, 4, 5]])}, coords={"x": ["a", "b"], "y": [1, 2, 3]}, ) actual = Dataset.from_dataframe(df) assert_identical(actual, expected) df2 = df.iloc[[3, 2, 1, 0, 4, 5], :] actual = Dataset.from_dataframe(df2) assert_identical(actual, expected) df3 = df.iloc[:4, :] expected3 = Dataset( {"z": (("x", "y"), [[0, 1, 2], [3, np.nan, np.nan]])}, coords={"x": ["a", "b"], "y": [1, 2, 3]}, ) actual = Dataset.from_dataframe(df3) assert_identical(actual, expected3) df_nonunique = df.iloc[[0, 0], :] with pytest.raises(ValueError, match=r"non-unique MultiIndex"): Dataset.from_dataframe(df_nonunique) def test_from_dataframe_unsorted_levels(self) -> None: # regression test for GH-4186 index = pd.MultiIndex( levels=[["b", "a"], ["foo"]], codes=[[0, 1], [0, 0]], names=["lev1", "lev2"] ) df = pd.DataFrame({"c1": [0, 2], "c2": [1, 3]}, index=index) expected = Dataset( { "c1": (("lev1", "lev2"), [[0], [2]]), "c2": (("lev1", "lev2"), [[1], [3]]), }, coords={"lev1": ["b", "a"], "lev2": ["foo"]}, ) actual = Dataset.from_dataframe(df) assert_identical(actual, expected) def test_from_dataframe_non_unique_columns(self) -> None: # regression test for GH449 df = pd.DataFrame(np.zeros((2, 2))) df.columns = ["foo", "foo"] # type: ignore[assignment,list-item,unused-ignore] with pytest.raises(ValueError, match=r"non-unique columns"): Dataset.from_dataframe(df) def test_convert_dataframe_with_many_types_and_multiindex(self) -> None: # regression test for GH737 df = pd.DataFrame( { "a": list("abc"), "b": list(range(1, 4)), "c": np.arange(3, 6).astype("u1"), "d": np.arange(4.0, 7.0, dtype="float64"), "e": [True, False, True], "f": pd.Categorical(list("abc")), "g": pd.date_range("20130101", periods=3), "h": pd.date_range("20130101", periods=3, tz="America/New_York"), } ) df.index = pd.MultiIndex.from_product([["a"], range(3)], names=["one", "two"]) roundtripped = Dataset.from_dataframe(df).to_dataframe() # we can't do perfectly, but we should be at least as faithful as # np.asarray expected = df.apply(np.asarray) assert roundtripped.equals(expected) @pytest.mark.parametrize("encoding", [True, False]) @pytest.mark.parametrize("data", [True, "list", "array"]) def test_to_and_from_dict( self, encoding: bool, data: bool | Literal["list", "array"] ) -> None: # # Dimensions: (t: 10) # Coordinates: # * t (t) U1" expected_no_data["coords"]["t"].update({"dtype": endiantype, "shape": (10,)}) expected_no_data["data_vars"]["a"].update({"dtype": "float64", "shape": (10,)}) expected_no_data["data_vars"]["b"].update({"dtype": "float64", "shape": (10,)}) actual_no_data = ds.to_dict(data=False, encoding=encoding) assert expected_no_data == actual_no_data # verify coords are included roundtrip expected_ds = ds.set_coords("b") actual2 = Dataset.from_dict(expected_ds.to_dict(data=data, encoding=encoding)) assert_identical(expected_ds, actual2) if encoding: assert set(expected_ds.variables) == set(actual2.variables) for vv in ds.variables: np.testing.assert_equal(expected_ds[vv].encoding, actual2[vv].encoding) # test some incomplete dicts: # this one has no attrs field, the dims are strings, and x, y are # np.arrays d = { "coords": {"t": {"dims": "t", "data": t}}, "dims": "t", "data_vars": {"a": {"dims": "t", "data": x}, "b": {"dims": "t", "data": y}}, } assert_identical(ds, Dataset.from_dict(d)) # this is kind of a flattened version with no coords, or data_vars d = { "a": {"dims": "t", "data": x}, "t": {"data": t, "dims": "t"}, "b": {"dims": "t", "data": y}, } assert_identical(ds, Dataset.from_dict(d)) # this one is missing some necessary information d = { "a": {"data": x}, "t": {"data": t, "dims": "t"}, "b": {"dims": "t", "data": y}, } with pytest.raises( ValueError, match=r"cannot convert dict without the key 'dims'" ): Dataset.from_dict(d) def test_to_and_from_dict_with_time_dim(self) -> None: x = np.random.randn(10, 3) y = np.random.randn(10, 3) t = pd.date_range("20130101", periods=10) lat = [77.7, 83.2, 76] ds = Dataset( { "a": (["t", "lat"], x), "b": (["t", "lat"], y), "t": ("t", t), "lat": ("lat", lat), } ) roundtripped = Dataset.from_dict(ds.to_dict()) assert_identical(ds, roundtripped) @pytest.mark.parametrize("data", [True, "list", "array"]) def test_to_and_from_dict_with_nan_nat( self, data: bool | Literal["list", "array"] ) -> None: x = np.random.randn(10, 3) y = np.random.randn(10, 3) y[2] = np.nan t = pd.Series(pd.date_range("20130101", periods=10)) # pandas-stubs doesn't allow np.nan for datetime Series, but it converts to NaT t[2] = np.nan # type: ignore[call-overload] lat = [77.7, 83.2, 76] ds = Dataset( { "a": (["t", "lat"], x), "b": (["t", "lat"], y), "t": ("t", t), "lat": ("lat", lat), } ) roundtripped = Dataset.from_dict(ds.to_dict(data=data)) if data == "array": # TODO: to_dict(data="array") converts datetime64[ns] to datetime64[us] # (numpy's default), causing index dtype mismatch on roundtrip. assert_identical(ds, roundtripped, check_indexes=False) else: assert_identical(ds, roundtripped) def test_to_dict_with_numpy_attrs(self) -> None: # this doesn't need to roundtrip x = np.random.randn(10) y = np.random.randn(10) t = list("abcdefghij") attrs = { "created": np.float64(1998), "coords": np.array([37, -110.1, 100]), "maintainer": "bar", } ds = Dataset({"a": ("t", x, attrs), "b": ("t", y, attrs), "t": ("t", t)}) expected_attrs = { "created": attrs["created"].item(), # type: ignore[attr-defined] "coords": attrs["coords"].tolist(), # type: ignore[attr-defined] "maintainer": "bar", } actual = ds.to_dict() # check that they are identical assert expected_attrs == actual["data_vars"]["a"]["attrs"] def test_pickle(self) -> None: data = create_test_data() roundtripped = pickle.loads(pickle.dumps(data)) assert_identical(data, roundtripped) # regression test for #167: assert data.sizes == roundtripped.sizes def test_lazy_load(self) -> None: store = InaccessibleVariableDataStore() create_test_data().dump_to_store(store) for decode_cf in [True, False]: ds = open_dataset(store, decode_cf=decode_cf) with pytest.raises(UnexpectedDataAccess): ds.load() with pytest.raises(UnexpectedDataAccess): _ = ds["var1"].values # these should not raise UnexpectedDataAccess: ds.isel(time=10) ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) def test_lazy_load_duck_array(self) -> None: store = AccessibleAsDuckArrayDataStore() create_test_data().dump_to_store(store) for decode_cf in [True, False]: ds = open_dataset(store, decode_cf=decode_cf) with pytest.raises(UnexpectedDataAccess): _ = ds["var1"].values # these should not raise UnexpectedDataAccess: _ = ds.var1.data ds.isel(time=10) ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) repr(ds) # preserve the duck array type and don't cast to array assert isinstance(ds["var1"].load().data, DuckArrayWrapper) assert isinstance( ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper ) ds.close() def test_dropna(self) -> None: x = np.random.randn(4, 4) x[::2, 0] = np.nan y = np.random.randn(4) y[-1] = np.nan ds = Dataset({"foo": (("a", "b"), x), "bar": (("b", y))}) expected = ds.isel(a=slice(1, None, 2)) actual = ds.dropna("a") assert_identical(actual, expected) expected = ds.isel(b=slice(1, 3)) actual = ds.dropna("b") assert_identical(actual, expected) actual = ds.dropna("b", subset=["foo", "bar"]) assert_identical(actual, expected) expected = ds.isel(b=slice(1, None)) actual = ds.dropna("b", subset=["foo"]) assert_identical(actual, expected) expected = ds.isel(b=slice(3)) actual = ds.dropna("b", subset=["bar"]) assert_identical(actual, expected) actual = ds.dropna("a", subset=[]) assert_identical(actual, ds) actual = ds.dropna("a", subset=["bar"]) assert_identical(actual, ds) actual = ds.dropna("a", how="all") assert_identical(actual, ds) actual = ds.dropna("b", how="all", subset=["bar"]) expected = ds.isel(b=[0, 1, 2]) assert_identical(actual, expected) actual = ds.dropna("b", thresh=1, subset=["bar"]) assert_identical(actual, expected) actual = ds.dropna("b", thresh=2) assert_identical(actual, ds) actual = ds.dropna("b", thresh=4) expected = ds.isel(b=[1, 2, 3]) assert_identical(actual, expected) actual = ds.dropna("a", thresh=3) expected = ds.isel(a=[1, 3]) assert_identical(actual, ds) with pytest.raises( ValueError, match=r"'foo' not found in data dimensions \('a', 'b'\)", ): ds.dropna("foo") with pytest.raises(ValueError, match=r"invalid how"): ds.dropna("a", how="somehow") # type: ignore[arg-type] with pytest.raises(TypeError, match=r"must specify how or thresh"): ds.dropna("a", how=None) # type: ignore[arg-type] @pytest.mark.parametrize( "fill_value,extension_array", [ pytest.param("a", pd.Categorical([pd.NA, "a", "b"]), id="category"), ] + ( [ pytest.param( 0, pd.array([pd.NA, 1, 1], dtype="int64[pyarrow]"), id="int64[pyarrow]", ) ] if has_pyarrow else [] ), ) def test_fillna_extension_array(self, fill_value, extension_array) -> None: srs = pd.DataFrame({"data": extension_array}, index=np.array([1, 2, 3])) ds = srs.to_xarray() filled = ds.fillna(fill_value) assert filled["data"].dtype == extension_array.dtype assert ( filled["data"].values == np.array([fill_value, *srs["data"].values[1:]], dtype="object") ).all() @pytest.mark.parametrize( "extension_array", [ pytest.param(pd.Categorical([pd.NA, "a", "b"]), id="category"), ] + ( [ pytest.param( pd.array([pd.NA, 1, 1], dtype="int64[pyarrow]"), id="int64[pyarrow]" ) ] if has_pyarrow else [] ), ) def test_dropna_extension_array(self, extension_array) -> None: srs = pd.DataFrame({"data": extension_array}, index=np.array([1, 2, 3])) ds = srs.to_xarray() dropped = ds.dropna("index") assert dropped["data"].dtype == extension_array.dtype assert (dropped["data"].values == srs["data"].values[1:]).all() def test_fillna(self) -> None: ds = Dataset({"a": ("x", [np.nan, 1, np.nan, 3])}, {"x": [0, 1, 2, 3]}) # fill with -1 actual1 = ds.fillna(-1) expected = Dataset({"a": ("x", [-1, 1, -1, 3])}, {"x": [0, 1, 2, 3]}) assert_identical(expected, actual1) actual2 = ds.fillna({"a": -1}) assert_identical(expected, actual2) other = Dataset({"a": -1}) actual3 = ds.fillna(other) assert_identical(expected, actual3) actual4 = ds.fillna({"a": other.a}) assert_identical(expected, actual4) # fill with range(4) b = DataArray(range(4), coords=[("x", range(4))]) actual5 = ds.fillna(b) expected = b.rename("a").to_dataset() assert_identical(expected, actual5) actual6 = ds.fillna(expected) assert_identical(expected, actual6) actual7 = ds.fillna(np.arange(4)) assert_identical(expected, actual7) actual8 = ds.fillna(b[:3]) assert_identical(expected, actual8) # okay to only include some data variables ds["b"] = np.nan actual9 = ds.fillna({"a": -1}) expected = Dataset( {"a": ("x", [-1, 1, -1, 3]), "b": np.nan}, {"x": [0, 1, 2, 3]} ) assert_identical(expected, actual9) # but new data variables is not okay with pytest.raises(ValueError, match=r"must be contained"): ds.fillna({"x": 0}) # empty argument should be OK result1 = ds.fillna({}) assert_identical(ds, result1) result2 = ds.fillna(Dataset(coords={"c": 42})) expected = ds.assign_coords(c=42) assert_identical(expected, result2) da = DataArray(range(5), name="a", attrs={"attr": "da"}) actual10 = da.fillna(1) assert actual10.name == "a" assert actual10.attrs == da.attrs ds = Dataset({"a": da}, attrs={"attr": "ds"}) actual11 = ds.fillna({"a": 1}) assert actual11.attrs == ds.attrs assert actual11.a.name == "a" assert actual11.a.attrs == ds.a.attrs @pytest.mark.parametrize( "func", [lambda x: x.clip(0, 1), lambda x: np.float64(1.0) * x, np.abs, abs] ) def test_propagate_attrs(self, func) -> None: da = DataArray(range(5), name="a", attrs={"attr": "da"}) ds = Dataset({"a": da}, attrs={"attr": "ds"}) # test defaults assert func(ds).attrs == ds.attrs with set_options(keep_attrs=False): assert func(ds).attrs != ds.attrs assert func(ds).a.attrs != ds.a.attrs with set_options(keep_attrs=False): assert func(ds).attrs != ds.attrs assert func(ds).a.attrs != ds.a.attrs with set_options(keep_attrs=True): assert func(ds).attrs == ds.attrs assert func(ds).a.attrs == ds.a.attrs def test_where(self) -> None: ds = Dataset({"a": ("x", range(5))}) expected1 = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])}) actual1 = ds.where(ds > 1) assert_identical(expected1, actual1) actual2 = ds.where(ds.a > 1) assert_identical(expected1, actual2) actual3 = ds.where(ds.a.values > 1) assert_identical(expected1, actual3) actual4 = ds.where(True) assert_identical(ds, actual4) expected5 = ds.copy(deep=True) expected5["a"].values = np.array([np.nan] * 5) actual5 = ds.where(False) assert_identical(expected5, actual5) # 2d ds = Dataset({"a": (("x", "y"), [[0, 1], [2, 3]])}) expected6 = Dataset({"a": (("x", "y"), [[np.nan, 1], [2, 3]])}) actual6 = ds.where(ds > 0) assert_identical(expected6, actual6) # attrs da = DataArray(range(5), name="a", attrs={"attr": "da"}) actual7 = da.where(da.values > 1) assert actual7.name == "a" assert actual7.attrs == da.attrs ds = Dataset({"a": da}, attrs={"attr": "ds"}) actual8 = ds.where(ds > 0) assert actual8.attrs == ds.attrs assert actual8.a.name == "a" assert actual8.a.attrs == ds.a.attrs # lambda ds = Dataset({"a": ("x", range(5))}) expected9 = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])}) actual9 = ds.where(lambda x: x > 1) assert_identical(expected9, actual9) def test_where_other(self) -> None: ds = Dataset({"a": ("x", range(5))}, {"x": range(5)}) expected = Dataset({"a": ("x", [-1, -1, 2, 3, 4])}, {"x": range(5)}) actual = ds.where(ds > 1, -1) assert_equal(expected, actual) assert actual.a.dtype == int actual = ds.where(lambda x: x > 1, -1) assert_equal(expected, actual) actual = ds.where(ds > 1, other=-1, drop=True) expected_nodrop = ds.where(ds > 1, -1) _, expected = xr.align(actual, expected_nodrop, join="left") assert_equal(actual, expected) assert actual.a.dtype == int with pytest.raises(ValueError, match=r"cannot align .* are not equal"): ds.where(ds > 1, ds.isel(x=slice(3))) with pytest.raises(ValueError, match=r"exact match required"): ds.where(ds > 1, ds.assign(b=2)) def test_where_drop(self) -> None: # if drop=True # 1d # data array case array = DataArray(range(5), coords=[range(5)], dims=["x"]) expected1 = DataArray(range(5)[2:], coords=[range(5)[2:]], dims=["x"]) actual1 = array.where(array > 1, drop=True) assert_identical(expected1, actual1) # dataset case ds = Dataset({"a": array}) expected2 = Dataset({"a": expected1}) actual2 = ds.where(ds > 1, drop=True) assert_identical(expected2, actual2) actual3 = ds.where(ds.a > 1, drop=True) assert_identical(expected2, actual3) with pytest.raises(TypeError, match=r"must be a"): ds.where(np.arange(5) > 1, drop=True) # 1d with odd coordinates array = DataArray( np.array([2, 7, 1, 8, 3]), coords=[np.array([3, 1, 4, 5, 9])], dims=["x"] ) expected4 = DataArray( np.array([7, 8, 3]), coords=[np.array([1, 5, 9])], dims=["x"] ) actual4 = array.where(array > 2, drop=True) assert_identical(expected4, actual4) # 1d multiple variables ds = Dataset({"a": (("x"), [0, 1, 2, 3]), "b": (("x"), [4, 5, 6, 7])}) expected5 = Dataset( {"a": (("x"), [np.nan, 1, 2, 3]), "b": (("x"), [4, 5, 6, np.nan])} ) actual5 = ds.where((ds > 0) & (ds < 7), drop=True) assert_identical(expected5, actual5) # 2d ds = Dataset({"a": (("x", "y"), [[0, 1], [2, 3]])}) expected6 = Dataset({"a": (("x", "y"), [[np.nan, 1], [2, 3]])}) actual6 = ds.where(ds > 0, drop=True) assert_identical(expected6, actual6) # 2d with odd coordinates ds = Dataset( {"a": (("x", "y"), [[0, 1], [2, 3]])}, coords={ "x": [4, 3], "y": [1, 2], "z": (["x", "y"], [[np.exp(1), np.pi], [np.pi * np.exp(1), np.pi * 3]]), }, ) expected7 = Dataset( {"a": (("x", "y"), [[3]])}, coords={"x": [3], "y": [2], "z": (["x", "y"], [[np.pi * 3]])}, ) actual7 = ds.where(ds > 2, drop=True) assert_identical(expected7, actual7) # 2d multiple variables ds = Dataset( {"a": (("x", "y"), [[0, 1], [2, 3]]), "b": (("x", "y"), [[4, 5], [6, 7]])} ) expected8 = Dataset( { "a": (("x", "y"), [[np.nan, 1], [2, 3]]), "b": (("x", "y"), [[4, 5], [6, 7]]), } ) actual8 = ds.where(ds > 0, drop=True) assert_identical(expected8, actual8) # mixed dimensions: PR#6690, Issue#6227 ds = xr.Dataset( { "a": ("x", [1, 2, 3]), "b": ("y", [2, 3, 4]), "c": (("x", "y"), np.arange(9).reshape((3, 3))), } ) expected9 = xr.Dataset( { "a": ("x", [np.nan, 3]), "b": ("y", [np.nan, 3, 4]), "c": (("x", "y"), np.arange(3.0, 9.0).reshape((2, 3))), } ) actual9 = ds.where(ds > 2, drop=True) assert actual9.sizes["x"] == 2 assert_identical(expected9, actual9) def test_where_drop_empty(self) -> None: # regression test for GH1341 array = DataArray(np.random.rand(100, 10), dims=["nCells", "nVertLevels"]) mask = DataArray(np.zeros((100,), dtype="bool"), dims="nCells") actual = array.where(mask, drop=True) expected = DataArray(np.zeros((0, 10)), dims=["nCells", "nVertLevels"]) assert_identical(expected, actual) def test_where_drop_no_indexes(self) -> None: ds = Dataset({"foo": ("x", [0.0, 1.0])}) expected = Dataset({"foo": ("x", [1.0])}) actual = ds.where(ds == 1, drop=True) assert_identical(expected, actual) def test_reduce(self) -> None: data = create_test_data() assert len(data.mean().coords) == 0 actual = data.max() expected = Dataset({k: v.max() for k, v in data.data_vars.items()}) assert_equal(expected, actual) assert_equal(data.min(dim=["dim1"]), data.min(dim="dim1")) for reduct, expected_dims in [ ("dim2", ["dim3", "time", "dim1"]), (["dim2", "time"], ["dim3", "dim1"]), (("dim2", "time"), ["dim3", "dim1"]), ((), ["dim2", "dim3", "time", "dim1"]), ]: actual_dims = list(data.min(dim=reduct).dims) assert actual_dims == expected_dims assert_equal(data.mean(dim=[]), data) with pytest.raises(ValueError): data.mean(axis=0) def test_reduce_coords(self) -> None: # regression test for GH1470 data = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"b": 4}) expected = xr.Dataset({"a": 2}, coords={"b": 4}) actual = data.mean("x") assert_identical(actual, expected) # should be consistent actual = data["a"].mean("x").to_dataset() assert_identical(actual, expected) def test_mean_uint_dtype(self) -> None: data = xr.Dataset( { "a": (("x", "y"), np.arange(6).reshape(3, 2).astype("uint")), "b": (("x",), np.array([0.1, 0.2, np.nan])), } ) actual = data.mean("x", skipna=True) expected = xr.Dataset( {"a": data["a"].mean("x"), "b": data["b"].mean("x", skipna=True)} ) assert_identical(actual, expected) def test_reduce_bad_dim(self) -> None: data = create_test_data() with pytest.raises( ValueError, match=re.escape("Dimension(s) 'bad_dim' do not exist"), ): data.mean(dim="bad_dim") def test_reduce_cumsum(self) -> None: data = xr.Dataset( {"a": 1, "b": ("x", [1, 2]), "c": (("x", "y"), [[np.nan, 3], [0, 4]])} ) assert_identical(data.fillna(0), data.cumsum("y")) expected = xr.Dataset( {"a": 1, "b": ("x", [1, 3]), "c": (("x", "y"), [[0, 3], [0, 7]])} ) assert_identical(expected, data.cumsum()) @pytest.mark.parametrize( "reduct, expected", [ ("dim1", ["dim2", "dim3", "time", "dim1"]), ("dim2", ["dim3", "time", "dim1", "dim2"]), ("dim3", ["dim2", "time", "dim1", "dim3"]), ("time", ["dim2", "dim3", "dim1"]), ], ) @pytest.mark.parametrize("func", ["cumsum", "cumprod"]) def test_reduce_cumsum_test_dims(self, reduct, expected, func) -> None: data = create_test_data() with pytest.raises( ValueError, match=re.escape("Dimension(s) 'bad_dim' do not exist"), ): getattr(data, func)(dim="bad_dim") # ensure dimensions are correct actual = getattr(data, func)(dim=reduct).dims assert list(actual) == expected def test_reduce_non_numeric(self) -> None: data1 = create_test_data(seed=44, use_extension_array=True) data2 = create_test_data(seed=44) add_vars = {"var6": ["dim1", "dim2"], "var7": ["dim1"]} for v, dims in sorted(add_vars.items()): size = tuple(data1.sizes[d] for d in dims) data = np.random.randint(0, 100, size=size).astype(np.str_) data1[v] = (dims, data, {"foo": "variable"}) # var4 and var5 are extension arrays and should be dropped assert ( "var4" not in data1.mean() and "var5" not in data1.mean() and "var6" not in data1.mean() and "var7" not in data1.mean() ) assert_equal(data1.mean(), data2.mean()) assert_equal(data1.mean(dim="dim1"), data2.mean(dim="dim1")) assert "var6" not in data1.mean(dim="dim2") and "var7" in data1.mean(dim="dim2") @pytest.mark.filterwarnings( "ignore:Once the behaviour of DataArray:DeprecationWarning" ) def test_reduce_strings(self) -> None: expected = Dataset({"x": "a"}) ds = Dataset({"x": ("y", ["a", "b"])}) ds.coords["y"] = [-10, 10] actual = ds.min() assert_identical(expected, actual) expected = Dataset({"x": "b"}) actual = ds.max() assert_identical(expected, actual) expected = Dataset({"x": 0}) actual = ds.argmin() assert_identical(expected, actual) expected = Dataset({"x": 1}) actual = ds.argmax() assert_identical(expected, actual) expected = Dataset({"x": -10}) actual = ds.idxmin() assert_identical(expected, actual) expected = Dataset({"x": 10}) actual = ds.idxmax() assert_identical(expected, actual) expected = Dataset({"x": b"a"}) ds = Dataset({"x": ("y", np.array(["a", "b"], "S1"))}) actual = ds.min() assert_identical(expected, actual) expected = Dataset({"x": "a"}) ds = Dataset({"x": ("y", np.array(["a", "b"], "U1"))}) actual = ds.min() assert_identical(expected, actual) def test_reduce_dtypes(self) -> None: # regression test for GH342 expected = Dataset({"x": 1}) actual = Dataset({"x": True}).sum() assert_identical(expected, actual) # regression test for GH505 expected = Dataset({"x": 3}) actual = Dataset({"x": ("y", np.array([1, 2], "uint16"))}).sum() assert_identical(expected, actual) expected = Dataset({"x": 1 + 1j}) actual = Dataset({"x": ("y", [1, 1j])}).sum() assert_identical(expected, actual) def test_reduce_keep_attrs(self) -> None: data = create_test_data() _attrs = {"attr1": "value1", "attr2": 2929} attrs = dict(_attrs) data.attrs = attrs # Test default behavior (keeps attrs for reduction operations) ds = data.mean() assert ds.attrs == attrs for k, v in ds.data_vars.items(): assert v.attrs == data[k].attrs # Test explicitly keeping attrs ds = data.mean(keep_attrs=True) assert ds.attrs == attrs for k, v in ds.data_vars.items(): assert v.attrs == data[k].attrs # Test explicitly dropping attrs ds = data.mean(keep_attrs=False) assert ds.attrs == {} for v in ds.data_vars.values(): assert v.attrs == {} @pytest.mark.filterwarnings( "ignore:Once the behaviour of DataArray:DeprecationWarning" ) def test_reduce_argmin(self) -> None: # regression test for #205 ds = Dataset({"a": ("x", [0, 1])}) expected = Dataset({"a": ([], 0)}) actual = ds.argmin() assert_identical(expected, actual) actual = ds.argmin("x") assert_identical(expected, actual) def test_reduce_scalars(self) -> None: ds = Dataset({"x": ("a", [2, 2]), "y": 2, "z": ("b", [2])}) expected = Dataset({"x": 0, "y": 0, "z": 0}) actual = ds.var() assert_identical(expected, actual) expected = Dataset({"x": 0, "y": 0, "z": ("b", [0])}) actual = ds.var("a") assert_identical(expected, actual) def test_reduce_only_one_axis(self) -> None: def mean_only_one_axis(x, axis): if not isinstance(axis, integer_types): raise TypeError("non-integer axis") return x.mean(axis) ds = Dataset({"a": (["x", "y"], [[0, 1, 2, 3, 4]])}) expected = Dataset({"a": ("x", [2])}) actual = ds.reduce(mean_only_one_axis, "y") assert_identical(expected, actual) with pytest.raises( TypeError, match=r"missing 1 required positional argument: 'axis'" ): ds.reduce(mean_only_one_axis) def test_reduce_no_axis(self) -> None: def total_sum(x): return np.sum(x.flatten()) ds = Dataset({"a": (["x", "y"], [[0, 1, 2, 3, 4]])}) expected = Dataset({"a": ((), 10)}) actual = ds.reduce(total_sum) assert_identical(expected, actual) with pytest.raises(TypeError, match=r"unexpected keyword argument 'axis'"): ds.reduce(total_sum, dim="x") def test_reduce_keepdims(self) -> None: ds = Dataset( {"a": (["x", "y"], [[0, 1, 2, 3, 4]])}, coords={ "y": [0, 1, 2, 3, 4], "x": [0], "lat": (["x", "y"], [[0, 1, 2, 3, 4]]), "c": -999.0, }, ) # Shape should match behaviour of numpy reductions with keepdims=True # Coordinates involved in the reduction should be removed actual = ds.mean(keepdims=True) expected = Dataset( {"a": (["x", "y"], np.mean(ds.a, keepdims=True).data)}, coords={"c": ds.c} ) assert_identical(expected, actual) actual = ds.mean("x", keepdims=True) expected = Dataset( {"a": (["x", "y"], np.mean(ds.a, axis=0, keepdims=True).data)}, coords={"y": ds.y, "c": ds.c}, ) assert_identical(expected, actual) @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize("skipna", [True, False, None]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) def test_quantile(self, q, skipna, compute_backend) -> None: ds = create_test_data(seed=123) ds.var1.data[0, 0] = np.nan for dim in [None, "dim1", ["dim1"]]: ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) if is_scalar(q): assert "quantile" not in ds_quantile.dims else: assert "quantile" in ds_quantile.dims for var, dar in ds.data_vars.items(): assert var in ds_quantile assert_identical( ds_quantile[var], dar.quantile(q, dim=dim, skipna=skipna) ) dim = ["dim1", "dim2"] ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) assert "dim3" in ds_quantile.dims assert all(d not in ds_quantile.dims for d in dim) @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize("skipna", [True, False]) def test_quantile_skipna(self, skipna, compute_backend) -> None: q = 0.1 dim = "time" ds = Dataset({"a": ([dim], np.arange(0, 11))}) ds = ds.where(ds >= 1) result = ds.quantile(q=q, dim=dim, skipna=skipna) value = 1.9 if skipna else np.nan expected = Dataset({"a": value}, coords={"quantile": q}) assert_identical(result, expected) @pytest.mark.parametrize("method", ["midpoint", "lower"]) def test_quantile_method(self, method) -> None: ds = create_test_data(seed=123) q = [0.25, 0.5, 0.75] result = ds.quantile(q, method=method) assert_identical(result.var1, ds.var1.quantile(q, method=method)) assert_identical(result.var2, ds.var2.quantile(q, method=method)) assert_identical(result.var3, ds.var3.quantile(q, method=method)) @pytest.mark.filterwarnings( "default:The `interpolation` argument to quantile was renamed to `method`:FutureWarning" ) @pytest.mark.parametrize("method", ["midpoint", "lower"]) def test_quantile_interpolation_deprecated(self, method) -> None: ds = create_test_data(seed=123) q = [0.25, 0.5, 0.75] with pytest.warns( FutureWarning, match="`interpolation` argument to quantile was renamed to `method`", ): ds.quantile(q, interpolation=method) with warnings.catch_warnings(record=True): with pytest.raises(TypeError, match="interpolation and method keywords"): ds.quantile(q, method=method, interpolation=method) @requires_bottleneck def test_rank(self) -> None: ds = create_test_data(seed=1234) # only ds.var3 depends on dim3 z = ds.rank("dim3") assert ["var3"] == list(z.data_vars) # same as dataarray version x = z.var3 y = ds.var3.rank("dim3") assert_equal(x, y) # coordinates stick assert list(z.coords) == list(ds.coords) assert list(x.coords) == list(y.coords) # invalid dim with pytest.raises( ValueError, match=re.escape( "Dimension 'invalid_dim' not found in data dimensions ('dim3', 'dim1')" ), ): x.rank("invalid_dim") def test_rank_use_bottleneck(self) -> None: ds = Dataset({"a": ("x", [0, np.nan, 2]), "b": ("y", [4, 6, 3, 4])}) with xr.set_options(use_bottleneck=False): with pytest.raises(RuntimeError): ds.rank("x") def test_count(self) -> None: ds = Dataset({"x": ("a", [np.nan, 1]), "y": 0, "z": np.nan}) expected = Dataset({"x": 1, "y": 1, "z": 0}) actual = ds.count() assert_identical(expected, actual) def test_map(self) -> None: data = create_test_data() data.attrs["foo"] = "bar" # data.map keeps all attrs by default assert_identical(data.map(np.mean), data.mean()) expected = data.mean(keep_attrs=True) actual = data.map(lambda x: x.mean(keep_attrs=True), keep_attrs=True) assert_identical(expected, actual) assert_identical(data.map(lambda x: x, keep_attrs=True), data.drop_vars("time")) def scale(x, multiple=1): return multiple * x actual = data.map(scale, multiple=2) assert_equal(actual["var1"], 2 * data["var1"]) assert_identical(actual["numbers"], data["numbers"]) actual = data.map(np.asarray) expected = data.drop_vars("time") # time is not used on a data var assert_equal(expected, actual) def test_map_coords_attrs(self) -> None: ds = xr.Dataset( { "a": ( ["x", "y", "z"], np.arange(24).reshape(3, 4, 2), {"attr1": "value1"}, ), "b": ("y", np.arange(4), {"attr2": "value2"}), }, coords={ "x": ("x", np.array([-1, 0, 1]), {"attr3": "value3"}), "z": ("z", list("ab"), {"attr4": "value4"}), }, ) def func(arr): if "y" not in arr.dims: return arr # drop attrs from coords return arr.mean(dim="y").drop_attrs() expected = ds.mean(dim="y", keep_attrs=True) actual = ds.map(func, keep_attrs=True) assert_identical(actual, expected) assert actual["x"].attrs ds["x"].attrs["y"] = "x" assert ds["x"].attrs != actual["x"].attrs def test_map_non_dataarray_outputs(self) -> None: # Test that map handles non-DataArray outputs by converting them # Regression test for GH10835 ds = xr.Dataset({"foo": ("x", [1, 2, 3]), "bar": ("y", [4, 5])}) # Scalar output result = ds.map(lambda x: 1) expected = xr.Dataset({"foo": 1, "bar": 1}) assert_identical(result, expected) # Numpy array output with same shape result = ds.map(lambda x: x.values) expected = ds.copy() assert_identical(result, expected) # Mixed: some return scalars, some return arrays def mixed_func(x): if "x" in x.dims: return 42 return x result = ds.map(mixed_func) expected = xr.Dataset({"foo": 42, "bar": ("y", [4, 5])}) assert_identical(result, expected) def test_apply_pending_deprecated_map(self) -> None: data = create_test_data() data.attrs["foo"] = "bar" with pytest.warns(PendingDeprecationWarning): # data.apply keeps all attrs by default assert_identical(data.apply(np.mean), data.mean()) def make_example_math_dataset(self): variables = { "bar": ("x", np.arange(100, 400, 100)), "foo": (("x", "y"), 1.0 * np.arange(12).reshape(3, 4)), } coords = {"abc": ("x", ["a", "b", "c"]), "y": 10 * np.arange(4)} ds = Dataset(variables, coords) ds["foo"][0, 0] = np.nan return ds def test_dataset_number_math(self) -> None: ds = self.make_example_math_dataset() assert_identical(ds, +ds) assert_identical(ds, ds + 0) assert_identical(ds, 0 + ds) assert_identical(ds, ds + np.array(0)) assert_identical(ds, np.array(0) + ds) actual = ds.copy(deep=True) actual += 0 assert_identical(ds, actual) # casting nan warns @pytest.mark.filterwarnings("ignore:invalid value encountered in cast") def test_unary_ops(self) -> None: ds = self.make_example_math_dataset() assert_identical(ds.map(abs), abs(ds)) assert_identical(ds.map(lambda x: x + 4), ds + 4) for func in [ lambda x: x.isnull(), lambda x: x.round(), lambda x: x.astype(int), ]: assert_identical(ds.map(func), func(ds)) assert_identical(ds.isnull(), ~ds.notnull()) # don't actually patch these methods in with pytest.raises(AttributeError): _ = ds.item with pytest.raises(AttributeError): _ = ds.searchsorted def test_dataset_array_math(self) -> None: ds = self.make_example_math_dataset() expected = ds.map(lambda x: x - ds["foo"]) assert_identical(expected, ds - ds["foo"]) assert_identical(expected, -ds["foo"] + ds) assert_identical(expected, ds - ds["foo"].variable) assert_identical(expected, -ds["foo"].variable + ds) actual = ds.copy(deep=True) actual -= ds["foo"] assert_identical(expected, actual) expected = ds.map(lambda x: x + ds["bar"]) assert_identical(expected, ds + ds["bar"]) actual = ds.copy(deep=True) actual += ds["bar"] assert_identical(expected, actual) expected = Dataset({"bar": ds["bar"] + np.arange(3)}) assert_identical(expected, ds[["bar"]] + np.arange(3)) assert_identical(expected, np.arange(3) + ds[["bar"]]) def test_dataset_dataset_math(self) -> None: ds = self.make_example_math_dataset() assert_identical(ds, ds + 0 * ds) assert_identical(ds, ds + {"foo": 0, "bar": 0}) expected = ds.map(lambda x: 2 * x) assert_identical(expected, 2 * ds) assert_identical(expected, ds + ds) assert_identical(expected, ds + ds.data_vars) assert_identical(expected, ds + dict(ds.data_vars)) actual = ds.copy(deep=True) expected_id = id(actual) actual += ds assert_identical(expected, actual) assert expected_id == id(actual) assert_identical(ds == ds, ds.notnull()) subsampled = ds.isel(y=slice(2)) expected = 2 * subsampled assert_identical(expected, subsampled + ds) assert_identical(expected, ds + subsampled) def test_dataset_math_auto_align(self) -> None: ds = self.make_example_math_dataset() subset = ds.isel(y=[1, 3]) expected = 2 * subset actual = ds + subset assert_identical(expected, actual) actual = ds.isel(y=slice(1)) + ds.isel(y=slice(1, None)) expected = 2 * ds.drop_sel(y=ds.y) assert_equal(actual, expected) actual = ds + ds[["bar"]] expected = (2 * ds[["bar"]]).merge(ds.coords, compat="override") assert_identical(expected, actual) assert_identical(ds + Dataset(), ds.coords.to_dataset()) assert_identical(Dataset() + Dataset(), Dataset()) ds2 = Dataset(coords={"bar": 42}) assert_identical(ds + ds2, ds.coords.merge(ds2)) # maybe unary arithmetic with empty datasets should raise instead? assert_identical(Dataset() + 1, Dataset()) actual = ds.copy(deep=True) other = ds.isel(y=slice(2)) actual += other expected = ds + other.reindex_like(ds) assert_identical(expected, actual) def test_dataset_math_errors(self) -> None: ds = self.make_example_math_dataset() with pytest.raises(TypeError): ds["foo"] += ds with pytest.raises(TypeError): ds["foo"].variable += ds with pytest.raises(ValueError, match=r"must have the same"): ds += ds[["bar"]] # verify we can rollback in-place operations if something goes wrong # nb. inplace datetime64 math actually will work with an integer array # but not floats thanks to numpy's inconsistent handling other = DataArray(np.datetime64("2000-01-01"), coords={"c": 2}) actual = ds.copy(deep=True) with pytest.raises(TypeError): actual += other assert_identical(actual, ds) def test_dataset_transpose(self) -> None: ds = Dataset( { "a": (("x", "y"), np.random.randn(3, 4)), "b": (("y", "x"), np.random.randn(4, 3)), }, coords={ "x": range(3), "y": range(4), "xy": (("x", "y"), np.random.randn(3, 4)), }, ) actual = ds.transpose() expected = Dataset( {"a": (("y", "x"), ds.a.values.T), "b": (("x", "y"), ds.b.values.T)}, coords={ "x": ds.x.values, "y": ds.y.values, "xy": (("y", "x"), ds.xy.values.T), }, ) assert_identical(expected, actual) actual = ds.transpose(...) expected = ds assert_identical(expected, actual) actual = ds.transpose("x", "y") expected = ds.map(lambda x: x.transpose("x", "y", transpose_coords=True)) assert_identical(expected, actual) ds = create_test_data() actual = ds.transpose() for k in ds.variables: assert actual[k].dims[::-1] == ds[k].dims new_order = ("dim2", "dim3", "dim1", "time") actual = ds.transpose(*new_order) for k in ds.variables: expected_dims = tuple(d for d in new_order if d in ds[k].dims) assert actual[k].dims == expected_dims # same as above but with ellipsis new_order = ("dim2", "dim3", "dim1", "time") actual = ds.transpose("dim2", "dim3", ...) for k in ds.variables: expected_dims = tuple(d for d in new_order if d in ds[k].dims) assert actual[k].dims == expected_dims # test missing dimension, raise error with pytest.raises(ValueError): ds.transpose(..., "not_a_dim") # test missing dimension, ignore error actual = ds.transpose(..., "not_a_dim", missing_dims="ignore") expected_ell = ds.transpose(...) assert_identical(expected_ell, actual) # test missing dimension, raise warning with pytest.warns(UserWarning): actual = ds.transpose(..., "not_a_dim", missing_dims="warn") assert_identical(expected_ell, actual) assert "T" not in dir(ds) def test_dataset_ellipsis_transpose_different_ordered_vars(self) -> None: # https://github.com/pydata/xarray/issues/1081#issuecomment-544350457 ds = Dataset( dict( a=(("w", "x", "y", "z"), np.ones((2, 3, 4, 5))), b=(("x", "w", "y", "z"), np.zeros((3, 2, 4, 5))), ) ) result = ds.transpose(..., "z", "y") assert list(result["a"].dims) == list("wxzy") assert list(result["b"].dims) == list("xwzy") def test_dataset_retains_period_index_on_transpose(self) -> None: ds = create_test_data() ds["time"] = pd.period_range("2000-01-01", periods=20) transposed = ds.transpose() assert isinstance(transposed.time.to_index(), pd.PeriodIndex) def test_dataset_diff_n1_simple(self) -> None: ds = Dataset({"foo": ("x", [5, 5, 6, 6])}) actual = ds.diff("x") expected = Dataset({"foo": ("x", [0, 1, 0])}) assert_equal(expected, actual) def test_dataset_diff_n1_label(self) -> None: ds = Dataset({"foo": ("x", [5, 5, 6, 6])}, {"x": [0, 1, 2, 3]}) actual = ds.diff("x", label="lower") expected = Dataset({"foo": ("x", [0, 1, 0])}, {"x": [0, 1, 2]}) assert_equal(expected, actual) actual = ds.diff("x", label="upper") expected = Dataset({"foo": ("x", [0, 1, 0])}, {"x": [1, 2, 3]}) assert_equal(expected, actual) def test_dataset_diff_n1(self) -> None: ds = create_test_data(seed=1) actual = ds.diff("dim2") expected_dict = {} expected_dict["var1"] = DataArray( np.diff(ds["var1"].values, axis=1), {"dim2": ds["dim2"].values[1:]}, ["dim1", "dim2"], ) expected_dict["var2"] = DataArray( np.diff(ds["var2"].values, axis=1), {"dim2": ds["dim2"].values[1:]}, ["dim1", "dim2"], ) expected_dict["var3"] = ds["var3"] expected = Dataset(expected_dict, coords={"time": ds["time"].values}) expected.coords["numbers"] = ("dim3", ds["numbers"].values) assert_equal(expected, actual) def test_dataset_diff_n2(self) -> None: ds = create_test_data(seed=1) actual = ds.diff("dim2", n=2) expected_dict = {} expected_dict["var1"] = DataArray( np.diff(ds["var1"].values, axis=1, n=2), {"dim2": ds["dim2"].values[2:]}, ["dim1", "dim2"], ) expected_dict["var2"] = DataArray( np.diff(ds["var2"].values, axis=1, n=2), {"dim2": ds["dim2"].values[2:]}, ["dim1", "dim2"], ) expected_dict["var3"] = ds["var3"] expected = Dataset(expected_dict, coords={"time": ds["time"].values}) expected.coords["numbers"] = ("dim3", ds["numbers"].values) assert_equal(expected, actual) def test_dataset_diff_exception_n_neg(self) -> None: ds = create_test_data(seed=1) with pytest.raises(ValueError, match=r"must be non-negative"): ds.diff("dim2", n=-1) def test_dataset_diff_exception_label_str(self) -> None: ds = create_test_data(seed=1) with pytest.raises(ValueError, match=r"'label' argument has to"): ds.diff("dim2", label="raise_me") # type: ignore[arg-type] @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"foo": -10}]) def test_shift(self, fill_value) -> None: coords = {"bar": ("x", list("abc")), "x": [-4, 3, 2]} attrs = {"meta": "data"} ds = Dataset({"foo": ("x", [1, 2, 3])}, coords, attrs) actual = ds.shift(x=1, fill_value=fill_value) if fill_value == dtypes.NA: # if we supply the default, we expect the missing value for a # float array fill_value = np.nan elif isinstance(fill_value, dict): fill_value = fill_value.get("foo", np.nan) expected = Dataset({"foo": ("x", [fill_value, 1, 2])}, coords, attrs) assert_identical(expected, actual) with pytest.raises(ValueError, match=r"dimensions"): ds.shift(foo=123) def test_roll_coords(self) -> None: coords = {"bar": ("x", list("abc")), "x": [-4, 3, 2]} attrs = {"meta": "data"} ds = Dataset({"foo": ("x", [1, 2, 3])}, coords, attrs) actual = ds.roll(x=1, roll_coords=True) ex_coords = {"bar": ("x", list("cab")), "x": [2, -4, 3]} expected = Dataset({"foo": ("x", [3, 1, 2])}, ex_coords, attrs) assert_identical(expected, actual) with pytest.raises(ValueError, match=r"dimensions"): ds.roll(foo=123, roll_coords=True) def test_roll_no_coords(self) -> None: coords = {"bar": ("x", list("abc")), "x": [-4, 3, 2]} attrs = {"meta": "data"} ds = Dataset({"foo": ("x", [1, 2, 3])}, coords, attrs) actual = ds.roll(x=1) expected = Dataset({"foo": ("x", [3, 1, 2])}, coords, attrs) assert_identical(expected, actual) with pytest.raises(ValueError, match=r"dimensions"): ds.roll(abc=321) def test_roll_multidim(self) -> None: # regression test for 2445 arr = xr.DataArray( [[1, 2, 3], [4, 5, 6]], coords={"x": range(3), "y": range(2)}, dims=("y", "x"), ) actual = arr.roll(x=1, roll_coords=True) expected = xr.DataArray( [[3, 1, 2], [6, 4, 5]], coords=[("y", [0, 1]), ("x", [2, 0, 1])] ) assert_identical(expected, actual) def test_real_and_imag(self) -> None: attrs = {"foo": "bar"} ds = Dataset({"x": ((), 1 + 2j, attrs)}, attrs=attrs) expected_re = Dataset({"x": ((), 1, attrs)}, attrs=attrs) assert_identical(ds.real, expected_re) expected_im = Dataset({"x": ((), 2, attrs)}, attrs=attrs) assert_identical(ds.imag, expected_im) def test_setattr_raises(self) -> None: ds = Dataset({}, coords={"scalar": 1}, attrs={"foo": "bar"}) with pytest.raises(AttributeError, match=r"cannot set attr"): ds.scalar = 2 with pytest.raises(AttributeError, match=r"cannot set attr"): ds.foo = 2 with pytest.raises(AttributeError, match=r"cannot set attr"): ds.other = 2 def test_filter_by_attrs(self) -> None: precip = dict(standard_name="convective_precipitation_flux") temp0 = dict(standard_name="air_potential_temperature", height="0 m") temp10 = dict(standard_name="air_potential_temperature", height="10 m") ds = Dataset( { "temperature_0": (["t"], [0], temp0), "temperature_10": (["t"], [0], temp10), "precipitation": (["t"], [0], precip), }, coords={"time": (["t"], [0], dict(axis="T", long_name="time_in_seconds"))}, ) # Test return empty Dataset. ds.filter_by_attrs(standard_name="invalid_standard_name") new_ds = ds.filter_by_attrs(standard_name="invalid_standard_name") assert not bool(new_ds.data_vars) # Test return one DataArray. new_ds = ds.filter_by_attrs(standard_name="convective_precipitation_flux") assert new_ds["precipitation"].standard_name == "convective_precipitation_flux" assert_equal(new_ds["precipitation"], ds["precipitation"]) # Test filter coordinates new_ds = ds.filter_by_attrs(long_name="time_in_seconds") assert new_ds["time"].long_name == "time_in_seconds" assert not bool(new_ds.data_vars) # Test return more than one DataArray. new_ds = ds.filter_by_attrs(standard_name="air_potential_temperature") assert len(new_ds.data_vars) == 2 for var in new_ds.data_vars: assert new_ds[var].standard_name == "air_potential_temperature" # Test callable. new_ds = ds.filter_by_attrs(height=lambda v: v is not None) assert len(new_ds.data_vars) == 2 for var in new_ds.data_vars: assert new_ds[var].standard_name == "air_potential_temperature" new_ds = ds.filter_by_attrs(height="10 m") assert len(new_ds.data_vars) == 1 for var in new_ds.data_vars: assert new_ds[var].height == "10 m" # Test return empty Dataset due to conflicting filters new_ds = ds.filter_by_attrs( standard_name="convective_precipitation_flux", height="0 m" ) assert not bool(new_ds.data_vars) # Test return one DataArray with two filter conditions new_ds = ds.filter_by_attrs( standard_name="air_potential_temperature", height="0 m" ) for var in new_ds.data_vars: assert new_ds[var].standard_name == "air_potential_temperature" assert new_ds[var].height == "0 m" assert new_ds[var].height != "10 m" # Test return empty Dataset due to conflicting callables new_ds = ds.filter_by_attrs( standard_name=lambda v: False, height=lambda v: True ) assert not bool(new_ds.data_vars) def test_binary_op_propagate_indexes(self) -> None: ds = Dataset( {"d1": DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]})} ) expected = ds.xindexes["x"] actual = (ds * 2).xindexes["x"] assert expected is actual def test_binary_op_join_setting(self) -> None: # arithmetic_join applies to data array coordinates missing_2 = xr.Dataset({"x": [0, 1]}) missing_0 = xr.Dataset({"x": [1, 2]}) with xr.set_options(arithmetic_join="outer"): actual = missing_2 + missing_0 expected = xr.Dataset({"x": [0, 1, 2]}) assert_equal(actual, expected) # arithmetic join also applies to data_vars ds1 = xr.Dataset({"foo": 1, "bar": 2}) ds2 = xr.Dataset({"bar": 2, "baz": 3}) expected = xr.Dataset({"bar": 4}) # default is inner joining actual = ds1 + ds2 assert_equal(actual, expected) with xr.set_options(arithmetic_join="outer"): expected = xr.Dataset({"foo": np.nan, "bar": 4, "baz": np.nan}) actual = ds1 + ds2 assert_equal(actual, expected) with xr.set_options(arithmetic_join="left"): expected = xr.Dataset({"foo": np.nan, "bar": 4}) actual = ds1 + ds2 assert_equal(actual, expected) with xr.set_options(arithmetic_join="right"): expected = xr.Dataset({"bar": 4, "baz": np.nan}) actual = ds1 + ds2 assert_equal(actual, expected) def test_binary_op_compat_setting(self) -> None: # Setting up a clash of non-index coordinate 'foo': a = xr.Dataset( data_vars={"var": (["x"], [0, 0, 0])}, coords={ "x": [1, 2, 3], "foo": (["x"], [1.0, 2.0, np.nan]), }, ) b = xr.Dataset( data_vars={"var": (["x"], [0, 0, 0])}, coords={ "x": [1, 2, 3], "foo": (["x"], [np.nan, 2.0, 3.0]), }, ) with xr.set_options(arithmetic_compat="minimal"): assert_equal(a + b, a.drop_vars("foo")) with xr.set_options(arithmetic_compat="override"): assert_equal(a + b, a) assert_equal(b + a, b) with xr.set_options(arithmetic_compat="no_conflicts"): expected = a.assign_coords(foo=(["x"], [1.0, 2.0, 3.0])) assert_equal(a + b, expected) assert_equal(b + a, expected) with xr.set_options(arithmetic_compat="equals"): with pytest.raises(MergeError): a + b with pytest.raises(MergeError): b + a @pytest.mark.parametrize( ["keep_attrs", "expected"], ( pytest.param(False, {}, id="False"), pytest.param( True, {"foo": "a", "bar": "b", "baz": "c"}, id="True" ), # drop_conflicts combines non-conflicting attrs ), ) def test_binary_ops_keep_attrs(self, keep_attrs, expected) -> None: ds1 = xr.Dataset({"a": 1}, attrs={"foo": "a", "bar": "b"}) ds2 = xr.Dataset({"a": 1}, attrs={"foo": "a", "baz": "c"}) with xr.set_options(keep_attrs=keep_attrs): ds_result = ds1 + ds2 assert ds_result.attrs == expected def test_binary_ops_attrs_drop_conflicts(self) -> None: # Test that binary operations combine attrs with drop_conflicts behavior attrs1 = {"units": "meters", "long_name": "distance", "source": "sensor_a"} attrs2 = {"units": "feet", "resolution": "high", "source": "sensor_b"} ds1 = xr.Dataset({"a": 1}, attrs=attrs1) ds2 = xr.Dataset({"a": 2}, attrs=attrs2) # With keep_attrs=True (default), should combine attrs dropping conflicts result = ds1 + ds2 # "units" and "source" conflict, so they're dropped # "long_name" only in ds1, "resolution" only in ds2, so they're kept assert result.attrs == {"long_name": "distance", "resolution": "high"} # Test with identical values for some attrs attrs3 = {"units": "meters", "type": "data", "source": "sensor_c"} ds3 = xr.Dataset({"a": 3}, attrs=attrs3) result2 = ds1 + ds3 # "units" has same value, so kept; "source" conflicts, so dropped # "long_name" from ds1, "type" from ds3 assert result2.attrs == { "units": "meters", "long_name": "distance", "type": "data", } # With keep_attrs=False, attrs should be empty with xr.set_options(keep_attrs=False): result3 = ds1 + ds2 assert result3.attrs == {} def test_full_like(self) -> None: # For more thorough tests, see test_variable.py # Note: testing data_vars with mismatched dtypes ds = Dataset( { "d1": DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]}), "d2": DataArray([1.1, 2.2, 3.3], dims=["y"]), }, attrs={"foo": "bar"}, ) actual = full_like(ds, 2) expected = ds.copy(deep=True) # https://github.com/python/mypy/issues/3004 expected["d1"].values = [2, 2, 2] # type: ignore[assignment,unused-ignore] expected["d2"].values = [2.0, 2.0, 2.0] # type: ignore[assignment,unused-ignore] assert expected["d1"].dtype == int assert expected["d2"].dtype == float assert_identical(expected, actual) # override dtype actual = full_like(ds, fill_value=True, dtype=bool) expected = ds.copy(deep=True) expected["d1"].values = [True, True, True] # type: ignore[assignment,unused-ignore] expected["d2"].values = [True, True, True] # type: ignore[assignment,unused-ignore] assert expected["d1"].dtype == bool assert expected["d2"].dtype == bool assert_identical(expected, actual) # with multiple fill values actual = full_like(ds, {"d1": 1, "d2": 2.3}) expected = ds.assign(d1=("x", [1, 1, 1]), d2=("y", [2.3, 2.3, 2.3])) assert expected["d1"].dtype == int assert expected["d2"].dtype == float assert_identical(expected, actual) # override multiple dtypes actual = full_like(ds, fill_value={"d1": 1, "d2": 2.3}, dtype={"d1": bool}) expected = ds.assign(d1=("x", [True, True, True]), d2=("y", [2.3, 2.3, 2.3])) assert expected["d1"].dtype == bool assert expected["d2"].dtype == float assert_identical(expected, actual) def test_combine_first(self) -> None: dsx0 = DataArray([0, 0], [("x", ["a", "b"])]).to_dataset(name="dsx0") dsx1 = DataArray([1, 1], [("x", ["b", "c"])]).to_dataset(name="dsx1") actual = dsx0.combine_first(dsx1) expected = Dataset( {"dsx0": ("x", [0, 0, np.nan]), "dsx1": ("x", [np.nan, 1, 1])}, coords={"x": ["a", "b", "c"]}, ) assert_equal(actual, expected) assert_equal(actual, xr.merge([dsx0, dsx1], join="outer")) # works just like xr.merge([self, other]) dsy2 = DataArray([2, 2, 2], [("x", ["b", "c", "d"])]).to_dataset(name="dsy2") actual = dsx0.combine_first(dsy2) expected = xr.merge([dsy2, dsx0], join="outer") assert_equal(actual, expected) def test_sortby(self) -> None: ds = Dataset( { "A": DataArray( [[1, 2], [3, 4], [5, 6]], [("x", ["c", "b", "a"]), ("y", [1, 0])] ), "B": DataArray([[5, 6], [7, 8], [9, 10]], dims=["x", "y"]), } ) sorted1d = Dataset( { "A": DataArray( [[5, 6], [3, 4], [1, 2]], [("x", ["a", "b", "c"]), ("y", [1, 0])] ), "B": DataArray([[9, 10], [7, 8], [5, 6]], dims=["x", "y"]), } ) sorted2d = Dataset( { "A": DataArray( [[6, 5], [4, 3], [2, 1]], [("x", ["a", "b", "c"]), ("y", [0, 1])] ), "B": DataArray([[10, 9], [8, 7], [6, 5]], dims=["x", "y"]), } ) expected = sorted1d dax = DataArray([100, 99, 98], [("x", ["c", "b", "a"])]) actual = ds.sortby(dax) assert_equal(actual, expected) # test descending order sort actual = ds.sortby(dax, ascending=False) assert_equal(actual, ds) # test alignment (fills in nan for 'c') dax_short = DataArray([98, 97], [("x", ["b", "a"])]) actual = ds.sortby(dax_short) assert_equal(actual, expected) # test 1-D lexsort # dax0 is sorted first to give indices of [1, 2, 0] # and then dax1 would be used to move index 2 ahead of 1 dax0 = DataArray([100, 95, 95], [("x", ["c", "b", "a"])]) dax1 = DataArray([0, 1, 0], [("x", ["c", "b", "a"])]) actual = ds.sortby([dax0, dax1]) # lexsort underneath gives [2, 1, 0] assert_equal(actual, expected) expected = sorted2d # test multi-dim sort by 1D dataarray values day = DataArray([90, 80], [("y", [1, 0])]) actual = ds.sortby([day, dax]) assert_equal(actual, expected) # test exception-raising with pytest.raises(KeyError): actual = ds.sortby("z") with pytest.raises(ValueError) as excinfo: actual = ds.sortby(ds["A"]) assert "DataArray is not 1-D" in str(excinfo.value) expected = sorted1d actual = ds.sortby("x") assert_equal(actual, expected) # test pandas.MultiIndex indices = (("b", 1), ("b", 0), ("a", 1), ("a", 0)) midx = pd.MultiIndex.from_tuples(indices, names=["one", "two"]) ds_midx = Dataset( { "A": DataArray( [[1, 2], [3, 4], [5, 6], [7, 8]], [("x", midx), ("y", [1, 0])] ), "B": DataArray([[5, 6], [7, 8], [9, 10], [11, 12]], dims=["x", "y"]), } ) actual = ds_midx.sortby("x") midx_reversed = pd.MultiIndex.from_tuples( tuple(reversed(indices)), names=["one", "two"] ) expected = Dataset( { "A": DataArray( [[7, 8], [5, 6], [3, 4], [1, 2]], [("x", midx_reversed), ("y", [1, 0])], ), "B": DataArray([[11, 12], [9, 10], [7, 8], [5, 6]], dims=["x", "y"]), } ) assert_equal(actual, expected) # multi-dim sort by coordinate objects expected = sorted2d actual = ds.sortby(["x", "y"]) assert_equal(actual, expected) # test descending order sort actual = ds.sortby(["x", "y"], ascending=False) assert_equal(actual, ds) def test_attribute_access(self) -> None: ds = create_test_data(seed=1) for key in ["var1", "var2", "var3", "time", "dim1", "dim2", "dim3", "numbers"]: assert_equal(ds[key], getattr(ds, key)) assert key in dir(ds) for key in ["dim3", "dim1", "numbers"]: assert_equal(ds["var3"][key], getattr(ds.var3, key)) assert key in dir(ds["var3"]) # attrs assert ds["var3"].attrs["foo"] == ds.var3.foo assert "foo" in dir(ds["var3"]) def test_ipython_key_completion(self) -> None: ds = create_test_data(seed=1) actual = ds._ipython_key_completions_() expected = ["var1", "var2", "var3", "time", "dim1", "dim2", "dim3", "numbers"] for item in actual: ds[item] # should not raise assert sorted(actual) == sorted(expected) # for dataarray actual = ds["var3"]._ipython_key_completions_() expected = ["dim3", "dim1", "numbers"] for item in actual: ds["var3"][item] # should not raise assert sorted(actual) == sorted(expected) # MultiIndex ds_midx = ds.stack(dim12=["dim2", "dim3"]) actual = ds_midx._ipython_key_completions_() expected = [ "var1", "var2", "var3", "time", "dim1", "dim2", "dim3", "numbers", "dim12", ] for item in actual: ds_midx[item] # should not raise assert sorted(actual) == sorted(expected) # coords actual = ds.coords._ipython_key_completions_() expected = ["time", "dim1", "dim2", "dim3", "numbers"] for item in actual: ds.coords[item] # should not raise assert sorted(actual) == sorted(expected) actual = ds["var3"].coords._ipython_key_completions_() expected = ["dim1", "dim3", "numbers"] for item in actual: ds["var3"].coords[item] # should not raise assert sorted(actual) == sorted(expected) coords = Coordinates(ds.coords) actual = coords._ipython_key_completions_() expected = ["time", "dim2", "dim3", "numbers"] for item in actual: coords[item] # should not raise assert sorted(actual) == sorted(expected) # data_vars actual = ds.data_vars._ipython_key_completions_() expected = ["var1", "var2", "var3", "dim1"] for item in actual: ds.data_vars[item] # should not raise assert sorted(actual) == sorted(expected) def test_polyfit_output(self) -> None: ds = create_test_data(seed=1) out = ds.polyfit("dim2", 2, full=False) assert "var1_polyfit_coefficients" in out out = ds.polyfit("dim1", 2, full=True) assert "var1_polyfit_coefficients" in out assert "dim1_matrix_rank" in out out = ds.polyfit("time", 2) assert len(out.data_vars) == 0 def test_polyfit_weighted(self) -> None: ds = create_test_data(seed=1) ds = ds.broadcast_like(ds) # test more than 2 dimensions (issue #9972) ds_copy = ds.copy(deep=True) expected = ds.polyfit("dim2", 2) actual = ds.polyfit("dim2", 2, w=np.ones(ds.sizes["dim2"])) xr.testing.assert_identical(expected, actual) # Make sure weighted polyfit does not change the original object (issue #5644) xr.testing.assert_identical(ds, ds_copy) def test_polyfit_coord(self) -> None: # Make sure polyfit works when given a non-dimension coordinate. ds = create_test_data(seed=1) out = ds.polyfit("numbers", 2, full=False) assert "var3_polyfit_coefficients" in out assert "dim1" in out.dims assert "dim2" not in out assert "dim3" not in out def test_polyfit_coord_output(self) -> None: da = xr.DataArray( [1, 3, 2], dims=["x"], coords=dict(x=["a", "b", "c"], y=("x", [0, 1, 2])) ) out = da.polyfit("y", deg=1)["polyfit_coefficients"] assert out.sel(degree=0).item() == pytest.approx(1.5) assert out.sel(degree=1).item() == pytest.approx(0.5) def test_polyfit_warnings(self) -> None: ds = create_test_data(seed=1) with warnings.catch_warnings(record=True) as ws: ds.var1.polyfit("dim2", 10, full=False) assert len(ws) == 1 assert ws[0].category == RankWarning ds.var1.polyfit("dim2", 10, full=True) assert len(ws) == 1 def test_polyfit_polyval(self) -> None: da = xr.DataArray( np.arange(1, 10).astype(np.float64), dims=["x"], coords=dict(x=np.arange(9)) ) out = da.polyfit("x", 3, full=False) da_fitval = xr.polyval(da.x, out.polyfit_coefficients) # polyval introduces very small errors (1e-16 here) xr.testing.assert_allclose(da_fitval, da) da = da.assign_coords(x=xr.date_range("2001-01-01", periods=9, freq="YS")) out = da.polyfit("x", 3, full=False) da_fitval = xr.polyval(da.x, out.polyfit_coefficients) xr.testing.assert_allclose(da_fitval, da, rtol=1e-3) @requires_cftime def test_polyfit_polyval_cftime(self) -> None: da = xr.DataArray( np.arange(1, 10).astype(np.float64), dims=["x"], coords=dict( x=xr.date_range("2001-01-01", periods=9, freq="YS", calendar="noleap") ), ) out = da.polyfit("x", 3, full=False) da_fitval = xr.polyval(da.x, out.polyfit_coefficients) np.testing.assert_allclose(da_fitval, da) @staticmethod def _test_data_var_interior( original_data_var, padded_data_var, padded_dim_name, expected_pad_values ): np.testing.assert_equal( np.unique(padded_data_var.isel({padded_dim_name: [0, -1]})), expected_pad_values, ) np.testing.assert_array_equal( padded_data_var.isel({padded_dim_name: slice(1, -1)}), original_data_var ) @pytest.mark.parametrize("padded_dim_name", ["dim1", "dim2", "dim3", "time"]) @pytest.mark.parametrize( ["constant_values"], [ pytest.param(None, id="default"), pytest.param(42, id="scalar"), pytest.param((42, 43), id="tuple"), pytest.param({"dim1": 42, "dim2": 43}, id="per dim scalar"), pytest.param({"dim1": (42, 43), "dim2": (43, 44)}, id="per dim tuple"), pytest.param({"var1": 42, "var2": (42, 43)}, id="per var"), pytest.param({"var1": 42, "dim1": (42, 43)}, id="mixed"), ], ) def test_pad(self, padded_dim_name, constant_values) -> None: ds = create_test_data(seed=1) padded = ds.pad({padded_dim_name: (1, 1)}, constant_values=constant_values) # test padded dim values and size for ds_dim_name, ds_dim in ds.sizes.items(): if ds_dim_name == padded_dim_name: np.testing.assert_equal(padded.sizes[ds_dim_name], ds_dim + 2) if ds_dim_name in padded.coords: assert padded[ds_dim_name][[0, -1]].isnull().all() else: np.testing.assert_equal(padded.sizes[ds_dim_name], ds_dim) # check if coord "numbers" with dimension dim3 is padded correctly if padded_dim_name == "dim3": assert padded["numbers"][[0, -1]].isnull().all() # twarning: passes but dtype changes from int to float np.testing.assert_array_equal(padded["numbers"][1:-1], ds["numbers"]) # test if data_vars are paded with correct values for data_var_name, data_var in padded.data_vars.items(): if padded_dim_name in data_var.dims: if utils.is_dict_like(constant_values): if ( expected := constant_values.get(data_var_name, None) ) is not None or ( expected := constant_values.get(padded_dim_name, None) ) is not None: self._test_data_var_interior( ds[data_var_name], data_var, padded_dim_name, expected ) else: self._test_data_var_interior( ds[data_var_name], data_var, padded_dim_name, 0 ) elif constant_values: self._test_data_var_interior( ds[data_var_name], data_var, padded_dim_name, constant_values ) else: self._test_data_var_interior( ds[data_var_name], data_var, padded_dim_name, np.nan ) else: assert_array_equal(data_var, ds[data_var_name]) @pytest.mark.parametrize( ["keep_attrs", "attrs", "expected"], [ pytest.param(None, {"a": 1, "b": 2}, {"a": 1, "b": 2}, id="default"), pytest.param(False, {"a": 1, "b": 2}, {}, id="False"), pytest.param(True, {"a": 1, "b": 2}, {"a": 1, "b": 2}, id="True"), ], ) def test_pad_keep_attrs(self, keep_attrs, attrs, expected) -> None: ds = xr.Dataset( {"a": ("x", [1, 2], attrs), "b": ("y", [1, 2], attrs)}, coords={"c": ("x", [-1, 1], attrs), "d": ("y", [-1, 1], attrs)}, attrs=attrs, ) expected = xr.Dataset( {"a": ("x", [0, 1, 2, 0], expected), "b": ("y", [1, 2], attrs)}, coords={ "c": ("x", [np.nan, -1, 1, np.nan], expected), "d": ("y", [-1, 1], attrs), }, attrs=expected, ) keep_attrs_ = "default" if keep_attrs is None else keep_attrs with set_options(keep_attrs=keep_attrs_): actual = ds.pad({"x": (1, 1)}, mode="constant", constant_values=0) xr.testing.assert_identical(actual, expected) actual = ds.pad( {"x": (1, 1)}, mode="constant", constant_values=0, keep_attrs=keep_attrs ) xr.testing.assert_identical(actual, expected) def test_astype_attrs(self) -> None: data = create_test_data(seed=123) data.attrs["foo"] = "bar" assert data.attrs == data.astype(float).attrs assert data.var1.attrs == data.astype(float).var1.attrs assert not data.astype(float, keep_attrs=False).attrs assert not data.astype(float, keep_attrs=False).var1.attrs @pytest.mark.parametrize("parser", ["pandas", "python"]) @pytest.mark.parametrize( "engine", ["python", None, pytest.param("numexpr", marks=[requires_numexpr])] ) @pytest.mark.parametrize( "backend", ["numpy", pytest.param("dask", marks=[requires_dask])] ) def test_query(self, backend, engine, parser) -> None: """Test querying a dataset.""" # setup test data np.random.seed(42) a = np.arange(0, 10, 1) b = np.random.randint(0, 100, size=10) c = np.linspace(0, 1, 20) d = np.random.choice(["foo", "bar", "baz"], size=30, replace=True).astype( object ) e = np.arange(0, 10 * 20).reshape(10, 20) f = np.random.normal(0, 1, size=(10, 20, 30)) if backend == "numpy": ds = Dataset( { "a": ("x", a), "b": ("x", b), "c": ("y", c), "d": ("z", d), "e": (("x", "y"), e), "f": (("x", "y", "z"), f), }, coords={ "a2": ("x", a), "b2": ("x", b), "c2": ("y", c), "d2": ("z", d), "e2": (("x", "y"), e), "f2": (("x", "y", "z"), f), }, ) elif backend == "dask": ds = Dataset( { "a": ("x", da.from_array(a, chunks=3)), "b": ("x", da.from_array(b, chunks=3)), "c": ("y", da.from_array(c, chunks=7)), "d": ("z", da.from_array(d, chunks=12)), "e": (("x", "y"), da.from_array(e, chunks=(3, 7))), "f": (("x", "y", "z"), da.from_array(f, chunks=(3, 7, 12))), }, coords={ "a2": ("x", a), "b2": ("x", b), "c2": ("y", c), "d2": ("z", d), "e2": (("x", "y"), e), "f2": (("x", "y", "z"), f), }, ) # query single dim, single variable with raise_if_dask_computes(): actual = ds.query(x="a2 > 5", engine=engine, parser=parser) expect = ds.isel(x=(a > 5)) assert_identical(expect, actual) # query single dim, single variable, via dict with raise_if_dask_computes(): actual = ds.query(dict(x="a2 > 5"), engine=engine, parser=parser) expect = ds.isel(dict(x=(a > 5))) assert_identical(expect, actual) # query single dim, single variable with raise_if_dask_computes(): actual = ds.query(x="b2 > 50", engine=engine, parser=parser) expect = ds.isel(x=(b > 50)) assert_identical(expect, actual) # query single dim, single variable with raise_if_dask_computes(): actual = ds.query(y="c2 < .5", engine=engine, parser=parser) expect = ds.isel(y=(c < 0.5)) assert_identical(expect, actual) # query single dim, single string variable if parser == "pandas": # N.B., this query currently only works with the pandas parser # xref https://github.com/pandas-dev/pandas/issues/40436 with raise_if_dask_computes(): actual = ds.query(z='d2 == "bar"', engine=engine, parser=parser) expect = ds.isel(z=(d == "bar")) assert_identical(expect, actual) # query single dim, multiple variables with raise_if_dask_computes(): actual = ds.query(x="(a2 > 5) & (b2 > 50)", engine=engine, parser=parser) expect = ds.isel(x=((a > 5) & (b > 50))) assert_identical(expect, actual) # query single dim, multiple variables with computation with raise_if_dask_computes(): actual = ds.query(x="(a2 * b2) > 250", engine=engine, parser=parser) expect = ds.isel(x=(a * b) > 250) assert_identical(expect, actual) # check pandas query syntax is supported if parser == "pandas": with raise_if_dask_computes(): actual = ds.query( x="(a2 > 5) and (b2 > 50)", engine=engine, parser=parser ) expect = ds.isel(x=((a > 5) & (b > 50))) assert_identical(expect, actual) # query multiple dims via kwargs with raise_if_dask_computes(): actual = ds.query(x="a2 > 5", y="c2 < .5", engine=engine, parser=parser) expect = ds.isel(x=(a > 5), y=(c < 0.5)) assert_identical(expect, actual) # query multiple dims via kwargs if parser == "pandas": with raise_if_dask_computes(): actual = ds.query( x="a2 > 5", y="c2 < .5", z="d2 == 'bar'", engine=engine, parser=parser, ) expect = ds.isel(x=(a > 5), y=(c < 0.5), z=(d == "bar")) assert_identical(expect, actual) # query multiple dims via dict with raise_if_dask_computes(): actual = ds.query( dict(x="a2 > 5", y="c2 < .5"), engine=engine, parser=parser ) expect = ds.isel(dict(x=(a > 5), y=(c < 0.5))) assert_identical(expect, actual) # query multiple dims via dict if parser == "pandas": with raise_if_dask_computes(): actual = ds.query( dict(x="a2 > 5", y="c2 < .5", z="d2 == 'bar'"), engine=engine, parser=parser, ) expect = ds.isel(dict(x=(a > 5), y=(c < 0.5), z=(d == "bar"))) assert_identical(expect, actual) # test error handling with pytest.raises(ValueError): ds.query("a > 5") # type: ignore[arg-type] # must be dict or kwargs with pytest.raises(ValueError): ds.query(x=(a > 5)) with pytest.raises(IndexError): ds.query(y="a > 5") # wrong length dimension with pytest.raises(IndexError): ds.query(x="c < .5") # wrong length dimension with pytest.raises(IndexError): ds.query(x="e > 100") # wrong number of dimensions with pytest.raises(UndefinedVariableError): ds.query(x="spam > 50") # name not present # pytest tests — new tests should go here, rather than in the class. @pytest.mark.parametrize("test_elements", ([1, 2], np.array([1, 2]), DataArray([1, 2]))) def test_isin(test_elements, backend) -> None: expected = Dataset( data_vars={ "var1": (("dim1",), [0, 1]), "var2": (("dim1",), [1, 1]), "var3": (("dim1",), [0, 1]), } ).astype("bool") if backend == "dask": expected = expected.chunk() result = Dataset( data_vars={ "var1": (("dim1",), [0, 1]), "var2": (("dim1",), [1, 2]), "var3": (("dim1",), [0, 1]), } ).isin(test_elements) assert_equal(result, expected) def test_isin_dataset() -> None: ds = Dataset({"x": [1, 2]}) with pytest.raises(TypeError): ds.isin(ds) @pytest.mark.parametrize( "unaligned_coords", ( {"x": [2, 1, 0]}, {"x": (["x"], np.asarray([2, 1, 0]))}, {"x": (["x"], np.asarray([1, 2, 0]))}, {"x": pd.Index([2, 1, 0])}, {"x": Variable(dims="x", data=[0, 2, 1])}, {"x": IndexVariable(dims="x", data=[0, 1, 2])}, {"y": 42}, {"y": ("x", [2, 1, 0])}, {"y": ("x", np.asarray([2, 1, 0]))}, {"y": (["x"], np.asarray([2, 1, 0]))}, ), ) @pytest.mark.parametrize("coords", ({"x": ("x", [0, 1, 2])}, {"x": [0, 1, 2]})) def test_dataset_constructor_aligns_to_explicit_coords( unaligned_coords, coords ) -> None: a = xr.DataArray([1, 2, 3], dims=["x"], coords=unaligned_coords) expected = xr.Dataset(coords=coords) expected["a"] = a result = xr.Dataset({"a": a}, coords=coords) assert_equal(expected, result) def test_error_message_on_set_supplied() -> None: with pytest.raises(TypeError, match="has invalid type "): xr.Dataset(dict(date=[1, 2, 3], sec={4})) @pytest.mark.parametrize("unaligned_coords", ({"y": ("b", np.asarray([2, 1, 0]))},)) def test_constructor_raises_with_invalid_coords(unaligned_coords) -> None: with pytest.raises(ValueError, match="not a subset of the DataArray dimensions"): xr.DataArray([1, 2, 3], dims=["x"], coords=unaligned_coords) @pytest.mark.parametrize("ds", [3], indirect=True) def test_dir_expected_attrs(ds) -> None: some_expected_attrs = {"pipe", "mean", "isnull", "var1", "dim2", "numbers"} result = dir(ds) assert set(result) >= some_expected_attrs def test_dir_non_string(ds) -> None: # add a numbered key to ensure this doesn't break dir ds[5] = "foo" result = dir(ds) assert 5 not in result # GH2172 sample_data = np.random.uniform(size=[2, 2000, 10000]) x = xr.Dataset({"sample_data": (sample_data.shape, sample_data)}) x2 = x["sample_data"] dir(x2) def test_dir_unicode(ds) -> None: ds["unicode"] = "uni" result = dir(ds) assert "unicode" in result def test_raise_no_warning_for_nan_in_binary_ops() -> None: with assert_no_warnings(): _ = Dataset(data_vars={"x": ("y", [1, 2, np.nan])}) > 0 @pytest.mark.filterwarnings("error") @pytest.mark.parametrize("ds", (2,), indirect=True) def test_raise_no_warning_assert_close(ds) -> None: assert_allclose(ds, ds) @pytest.mark.parametrize("dask", [True, False]) @pytest.mark.parametrize("edge_order", [1, 2]) def test_differentiate(dask, edge_order) -> None: rs = np.random.default_rng(42) coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8] da = xr.DataArray( rs.random((8, 6)), dims=["x", "y"], coords={"x": coord, "z": 3, "x2d": (("x", "y"), rs.random((8, 6)))}, ) if dask and has_dask: da = da.chunk({"x": 4}) ds = xr.Dataset({"var": da}) # along x actual = da.differentiate("x", edge_order) expected_x = xr.DataArray( np.gradient(da, da["x"], axis=0, edge_order=edge_order), dims=da.dims, coords=da.coords, ) assert_equal(expected_x, actual) assert_equal( ds["var"].differentiate("x", edge_order=edge_order), ds.differentiate("x", edge_order=edge_order)["var"], ) # coordinate should not change assert_equal(da["x"], actual["x"]) # along y actual = da.differentiate("y", edge_order) expected_y = xr.DataArray( np.gradient(da, da["y"], axis=1, edge_order=edge_order), dims=da.dims, coords=da.coords, ) assert_equal(expected_y, actual) assert_equal(actual, ds.differentiate("y", edge_order=edge_order)["var"]) assert_equal( ds["var"].differentiate("y", edge_order=edge_order), ds.differentiate("y", edge_order=edge_order)["var"], ) with pytest.raises(ValueError): da.differentiate("x2d") @pytest.mark.parametrize("dask", [True, False]) def test_differentiate_datetime(dask) -> None: rs = np.random.default_rng(42) coord = np.array( [ "2004-07-13", "2006-01-13", "2010-08-13", "2010-09-13", "2010-10-11", "2010-12-13", "2011-02-13", "2012-08-13", ], dtype="datetime64", ) da = xr.DataArray( rs.random((8, 6)), dims=["x", "y"], coords={"x": coord, "z": 3, "x2d": (("x", "y"), rs.random((8, 6)))}, ) if dask and has_dask: da = da.chunk({"x": 4}) # along x actual = da.differentiate("x", edge_order=1, datetime_unit="D") expected_x = xr.DataArray( np.gradient( da, da["x"].variable._to_numeric(datetime_unit="D"), axis=0, edge_order=1 ), dims=da.dims, coords=da.coords, ) assert_equal(expected_x, actual) actual2 = da.differentiate("x", edge_order=1, datetime_unit="h") assert np.allclose(actual, actual2 * 24) # for datetime variable actual = da["x"].differentiate("x", edge_order=1, datetime_unit="D") assert np.allclose(actual, 1.0) # with different date unit da = xr.DataArray(coord.astype("datetime64[ms]"), dims=["x"], coords={"x": coord}) actual = da.differentiate("x", edge_order=1) assert np.allclose(actual, 1.0) @requires_cftime @pytest.mark.parametrize("dask", [True, False]) def test_differentiate_cftime(dask) -> None: rs = np.random.default_rng(42) coord = xr.date_range("2000", periods=8, freq="2ME", use_cftime=True) da = xr.DataArray( rs.random((8, 6)), coords={"time": coord, "z": 3, "t2d": (("time", "y"), rs.random((8, 6)))}, dims=["time", "y"], ) if dask and has_dask: da = da.chunk({"time": 4}) actual = da.differentiate("time", edge_order=1, datetime_unit="D") expected_data = np.gradient( da, da["time"].variable._to_numeric(datetime_unit="D"), axis=0, edge_order=1 ) expected = xr.DataArray(expected_data, coords=da.coords, dims=da.dims) assert_equal(expected, actual) actual2 = da.differentiate("time", edge_order=1, datetime_unit="h") assert_allclose(actual, actual2 * 24) # Test the differentiation of datetimes themselves actual = da["time"].differentiate("time", edge_order=1, datetime_unit="D") assert_allclose(actual, xr.ones_like(da["time"]).astype(float)) @pytest.mark.parametrize("dask", [True, False]) def test_integrate(dask) -> None: rs = np.random.default_rng(42) coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8] da = xr.DataArray( rs.random((8, 6)), dims=["x", "y"], coords={ "x": coord, "x2": (("x",), rs.random(8)), "z": 3, "x2d": (("x", "y"), rs.random((8, 6))), }, ) if dask and has_dask: da = da.chunk({"x": 4}) ds = xr.Dataset({"var": da}) # along x actual = da.integrate("x") # coordinate that contains x should be dropped. expected_x = xr.DataArray( trapezoid(da.compute(), da["x"], axis=0), dims=["y"], coords={k: v for k, v in da.coords.items() if "x" not in v.dims}, ) assert_allclose(expected_x, actual.compute()) assert_equal(ds["var"].integrate("x"), ds.integrate("x")["var"]) # make sure result is also a dask array (if the source is dask array) assert isinstance(actual.data, type(da.data)) # along y actual = da.integrate("y") expected_y = xr.DataArray( trapezoid(da, da["y"], axis=1), dims=["x"], coords={k: v for k, v in da.coords.items() if "y" not in v.dims}, ) assert_allclose(expected_y, actual.compute()) assert_equal(actual, ds.integrate("y")["var"]) assert_equal(ds["var"].integrate("y"), ds.integrate("y")["var"]) # along x and y actual = da.integrate(("y", "x")) assert actual.ndim == 0 with pytest.raises(ValueError): da.integrate("x2d") @requires_scipy @pytest.mark.parametrize("dask", [True, False]) def test_cumulative_integrate(dask) -> None: rs = np.random.default_rng(43) coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8] da = xr.DataArray( rs.random((8, 6)), dims=["x", "y"], coords={ "x": coord, "x2": (("x",), rs.random(8)), "z": 3, "x2d": (("x", "y"), rs.random((8, 6))), }, ) if dask and has_dask: da = da.chunk({"x": 4}) ds = xr.Dataset({"var": da}) # along x actual = da.cumulative_integrate("x") from scipy.integrate import cumulative_trapezoid expected_x = xr.DataArray( cumulative_trapezoid(da.compute(), da["x"], axis=0, initial=0.0), # type: ignore[call-overload,unused-ignore] dims=["x", "y"], coords=da.coords, ) assert_allclose(expected_x, actual.compute()) assert_equal( ds["var"].cumulative_integrate("x"), ds.cumulative_integrate("x")["var"], ) # make sure result is also a dask array (if the source is dask array) assert isinstance(actual.data, type(da.data)) # along y actual = da.cumulative_integrate("y") expected_y = xr.DataArray( cumulative_trapezoid(da, da["y"], axis=1, initial=0.0), # type: ignore[call-overload,unused-ignore] dims=["x", "y"], coords=da.coords, ) assert_allclose(expected_y, actual.compute()) assert_equal(actual, ds.cumulative_integrate("y")["var"]) assert_equal( ds["var"].cumulative_integrate("y"), ds.cumulative_integrate("y")["var"], ) # along x and y actual = da.cumulative_integrate(("y", "x")) assert actual.ndim == 2 with pytest.raises(ValueError): da.cumulative_integrate("x2d") @pytest.mark.parametrize("dask", [True, False]) @pytest.mark.parametrize("which_datetime", ["np", "cftime"]) def test_trapezoid_datetime(dask, which_datetime) -> None: rs = np.random.default_rng(42) coord: ArrayLike if which_datetime == "np": coord = np.array( [ "2004-07-13", "2006-01-13", "2010-08-13", "2010-09-13", "2010-10-11", "2010-12-13", "2011-02-13", "2012-08-13", ], dtype="datetime64", ) else: if not has_cftime: pytest.skip("Test requires cftime.") coord = xr.date_range("2000", periods=8, freq="2D", use_cftime=True) da = xr.DataArray( rs.random((8, 6)), coords={"time": coord, "z": 3, "t2d": (("time", "y"), rs.random((8, 6)))}, dims=["time", "y"], ) if dask and has_dask: da = da.chunk({"time": 4}) actual = da.integrate("time", datetime_unit="D") expected_data = trapezoid( da.compute().data, duck_array_ops.datetime_to_numeric(da["time"].data, datetime_unit="D"), axis=0, ) expected = xr.DataArray( expected_data, dims=["y"], coords={k: v for k, v in da.coords.items() if "time" not in v.dims}, ) assert_allclose(expected, actual.compute()) # make sure result is also a dask array (if the source is dask array) assert isinstance(actual.data, type(da.data)) actual2 = da.integrate("time", datetime_unit="h") assert_allclose(actual, actual2 / 24.0) def test_no_dict() -> None: d = Dataset() with pytest.raises(AttributeError): _ = d.__dict__ def test_subclass_slots() -> None: """Test that Dataset subclasses must explicitly define ``__slots__``. .. note:: As of 0.13.0, this is actually mitigated into a FutureWarning for any class defined outside of the xarray package. """ with pytest.raises(AttributeError) as e: class MyDS(Dataset): pass assert str(e.value) == "MyDS must explicitly define __slots__" def test_weakref() -> None: """Classes with __slots__ are incompatible with the weakref module unless they explicitly state __weakref__ among their slots """ from weakref import ref ds = Dataset() r = ref(ds) assert r() is ds def test_deepcopy_obj_array() -> None: x0 = Dataset(dict(foo=DataArray(np.array([object()])))) x1 = deepcopy(x0) assert x0["foo"].values[0] is not x1["foo"].values[0] def test_deepcopy_recursive() -> None: # GH:issue:7111 # direct recursion ds = xr.Dataset({"a": (["x"], [1, 2])}) ds.attrs["other"] = ds # TODO: cannot use assert_identical on recursive Vars yet... # lets just ensure that deep copy works without RecursionError ds.copy(deep=True) # indirect recursion ds2 = xr.Dataset({"b": (["y"], [3, 4])}) ds.attrs["other"] = ds2 ds2.attrs["other"] = ds # TODO: cannot use assert_identical on recursive Vars yet... # lets just ensure that deep copy works without RecursionError ds.copy(deep=True) ds2.copy(deep=True) def test_clip(ds) -> None: result = ds.clip(min=0.5) assert all((result.min(...) >= 0.5).values()) result = ds.clip(max=0.5) assert all((result.max(...) <= 0.5).values()) result = ds.clip(min=0.25, max=0.75) assert all((result.min(...) >= 0.25).values()) assert all((result.max(...) <= 0.75).values()) result = ds.clip(min=ds.mean("y"), max=ds.mean("y")) assert result.sizes == ds.sizes class TestDropDuplicates: @pytest.mark.parametrize("keep", ["first", "last", False]) def test_drop_duplicates_1d(self, keep) -> None: ds = xr.Dataset( {"a": ("time", [0, 5, 6, 7]), "b": ("time", [9, 3, 8, 2])}, coords={"time": [0, 0, 1, 2]}, ) if keep == "first": a = [0, 6, 7] b = [9, 8, 2] time = [0, 1, 2] elif keep == "last": a = [5, 6, 7] b = [3, 8, 2] time = [0, 1, 2] else: a = [6, 7] b = [8, 2] time = [1, 2] expected = xr.Dataset( {"a": ("time", a), "b": ("time", b)}, coords={"time": time} ) result = ds.drop_duplicates("time", keep=keep) assert_equal(expected, result) with pytest.raises( ValueError, match=re.escape( "Dimensions ('space',) not found in data dimensions ('time',)" ), ): ds.drop_duplicates("space", keep=keep) class TestNumpyCoercion: def test_from_numpy(self) -> None: ds = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", [4, 5, 6])}) assert_identical(ds.as_numpy(), ds) @requires_dask def test_from_dask(self) -> None: ds = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", [4, 5, 6])}) ds_chunked = ds.chunk(1) assert_identical(ds_chunked.as_numpy(), ds.compute()) @requires_pint def test_from_pint(self) -> None: from pint import Quantity arr = np.array([1, 2, 3]) ds = xr.Dataset( {"a": ("x", Quantity(arr, units="Pa"))}, coords={"lat": ("x", Quantity(arr + 3, units="m"))}, ) expected = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", arr + 3)}) assert_identical(ds.as_numpy(), expected) @requires_sparse def test_from_sparse(self) -> None: import sparse arr = np.diagflat([1, 2, 3]) sparr = sparse.COO.from_numpy(arr) ds = xr.Dataset( {"a": (["x", "y"], sparr)}, coords={"elev": (("x", "y"), sparr + 3)} ) expected = xr.Dataset( {"a": (["x", "y"], arr)}, coords={"elev": (("x", "y"), arr + 3)} ) assert_identical(ds.as_numpy(), expected) @requires_cupy def test_from_cupy(self) -> None: import cupy as cp arr = np.array([1, 2, 3]) ds = xr.Dataset( {"a": ("x", cp.array(arr))}, coords={"lat": ("x", cp.array(arr + 3))} ) expected = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", arr + 3)}) assert_identical(ds.as_numpy(), expected) @requires_dask @requires_pint def test_from_pint_wrapping_dask(self) -> None: import dask from pint import Quantity arr = np.array([1, 2, 3]) d = dask.array.from_array(arr) ds = xr.Dataset( {"a": ("x", Quantity(d, units="Pa"))}, coords={"lat": ("x", Quantity(d, units="m") * 2)}, ) result = ds.as_numpy() expected = xr.Dataset({"a": ("x", arr)}, coords={"lat": ("x", arr * 2)}) assert_identical(result, expected) def test_string_keys_typing() -> None: """Tests that string keys to `variables` are permitted by mypy""" da = xr.DataArray(np.arange(10), dims=["x"]) ds = xr.Dataset(dict(x=da)) mapping = {"y": da} ds.assign(variables=mapping) def test_transpose_error() -> None: # Transpose dataset with list as argument # Should raise error ds = xr.Dataset({"foo": (("x", "y"), [[21]]), "bar": (("x", "y"), [[12]])}) with pytest.raises( TypeError, match=re.escape( "transpose requires dim to be passed as multiple arguments. Expected `'y', 'x'`. Received `['y', 'x']` instead" ), ): ds.transpose(["y", "x"]) # type: ignore[arg-type] python-xarray-2026.01.0/xarray/tests/test_formatting.py0000664000175000017500000012070415136607163023302 0ustar alastairalastairfrom __future__ import annotations import sys from textwrap import dedent import numpy as np import pandas as pd import pytest import xarray as xr from xarray.core import formatting from xarray.core.indexes import Index from xarray.tests import has_pandas_3, requires_cftime, requires_dask, requires_netCDF4 class CustomIndex(Index): names: tuple[str, ...] def __init__(self, names: tuple[str, ...]): self.names = names def __repr__(self): return f"CustomIndex(coords={self.names})" class TestFormatting: def test_get_indexer_at_least_n_items(self) -> None: cases = [ ((20,), (slice(10),), (slice(-10, None),)), ((3, 20), (0, slice(10)), (-1, slice(-10, None))), ((2, 10), (0, slice(10)), (-1, slice(-10, None))), ((2, 5), (slice(2), slice(None)), (slice(-2, None), slice(None))), ((1, 2, 5), (0, slice(2), slice(None)), (-1, slice(-2, None), slice(None))), ((2, 3, 5), (0, slice(2), slice(None)), (-1, slice(-2, None), slice(None))), ( (1, 10, 1), (0, slice(10), slice(None)), (-1, slice(-10, None), slice(None)), ), ( (2, 5, 1), (slice(2), slice(None), slice(None)), (slice(-2, None), slice(None), slice(None)), ), ((2, 5, 3), (0, slice(4), slice(None)), (-1, slice(-4, None), slice(None))), ( (2, 3, 3), (slice(2), slice(None), slice(None)), (slice(-2, None), slice(None), slice(None)), ), ] for shape, start_expected, end_expected in cases: actual = formatting._get_indexer_at_least_n_items(shape, 10, from_end=False) assert start_expected == actual actual = formatting._get_indexer_at_least_n_items(shape, 10, from_end=True) assert end_expected == actual def test_first_n_items(self) -> None: array = np.arange(100).reshape(10, 5, 2) for n in [3, 10, 13, 100, 200]: actual = formatting.first_n_items(array, n) expected = array.flat[:n] assert (expected == actual).all() with pytest.raises(ValueError, match=r"at least one item"): formatting.first_n_items(array, 0) def test_last_n_items(self) -> None: array = np.arange(100).reshape(10, 5, 2) for n in [3, 10, 13, 100, 200]: actual = formatting.last_n_items(array, n) expected = array.flat[-n:] assert (expected == actual).all() with pytest.raises(ValueError, match=r"at least one item"): formatting.first_n_items(array, 0) def test_last_item(self) -> None: array = np.arange(100) reshape = ((10, 10), (1, 100), (2, 2, 5, 5)) expected = np.array([99]) for r in reshape: result = formatting.last_item(array.reshape(r)) assert result == expected def test_format_item(self) -> None: cases = [ (pd.Timestamp("2000-01-01T12"), "2000-01-01T12:00:00"), (pd.Timestamp("2000-01-01"), "2000-01-01"), (pd.Timestamp("NaT"), "NaT"), (pd.Timedelta("10 days 1 hour"), "10 days 01:00:00"), (pd.Timedelta("-3 days"), "-3 days +00:00:00"), (pd.Timedelta("3 hours"), "0 days 03:00:00"), (pd.Timedelta("NaT"), "NaT"), ("foo", "'foo'"), (b"foo", "b'foo'"), (1, "1"), (1.0, "1.0"), (np.float16(1.1234), "1.123"), (np.float32(1.0111111), "1.011"), (np.float64(22.222222), "22.22"), (np.zeros((1, 1)), "[[0.]]"), (np.zeros(2), "[0. 0.]"), (np.zeros((2, 2)), "[[0. 0.]\n [0. 0.]]"), ] for item, expected in cases: actual = formatting.format_item(item) assert expected == actual def test_format_items(self) -> None: cases = [ (np.arange(4) * np.timedelta64(1, "D"), "0 days 1 days 2 days 3 days"), ( np.arange(4) * np.timedelta64(3, "h"), "00:00:00 03:00:00 06:00:00 09:00:00", ), ( np.arange(4) * np.timedelta64(500, "ms"), "00:00:00 00:00:00.500000 00:00:01 00:00:01.500000", ), (pd.to_timedelta(["NaT", "0s", "1s", "NaT"]), "NaT 00:00:00 00:00:01 NaT"), # type: ignore[arg-type, unused-ignore] ( pd.to_timedelta(["1 day 1 hour", "1 day", "0 hours"]), # type: ignore[arg-type, unused-ignore] "1 days 01:00:00 1 days 00:00:00 0 days 00:00:00", ), ([1, 2, 3], "1 2 3"), ] for item, expected in cases: actual = " ".join(formatting.format_items(item)) assert expected == actual def test_format_array_flat(self) -> None: actual = formatting.format_array_flat(np.arange(100), 2) expected = "..." assert expected == actual actual = formatting.format_array_flat(np.arange(100), 9) expected = "0 ... 99" assert expected == actual actual = formatting.format_array_flat(np.arange(100), 10) expected = "0 1 ... 99" assert expected == actual actual = formatting.format_array_flat(np.arange(100), 13) expected = "0 1 ... 98 99" assert expected == actual actual = formatting.format_array_flat(np.arange(100), 15) expected = "0 1 2 ... 98 99" assert expected == actual # NB: Probably not ideal; an alternative would be cutting after the # first ellipsis actual = formatting.format_array_flat(np.arange(100.0), 11) expected = "0.0 ... ..." assert expected == actual actual = formatting.format_array_flat(np.arange(100.0), 12) expected = "0.0 ... 99.0" assert expected == actual actual = formatting.format_array_flat(np.arange(3), 5) expected = "0 1 2" assert expected == actual actual = formatting.format_array_flat(np.arange(4.0), 11) expected = "0.0 ... 3.0" assert expected == actual actual = formatting.format_array_flat(np.arange(0), 0) expected = "" assert expected == actual actual = formatting.format_array_flat(np.arange(1), 1) expected = "0" assert expected == actual actual = formatting.format_array_flat(np.arange(2), 3) expected = "0 1" assert expected == actual actual = formatting.format_array_flat(np.arange(4), 7) expected = "0 1 2 3" assert expected == actual actual = formatting.format_array_flat(np.arange(5), 7) expected = "0 ... 4" assert expected == actual long_str = [" ".join(["hello world" for _ in range(100)])] actual = formatting.format_array_flat(np.asarray([long_str]), 21) expected = "'hello world hello..." assert expected == actual def test_pretty_print(self) -> None: assert formatting.pretty_print("abcdefghij", 8) == "abcde..." assert formatting.pretty_print("ß", 1) == "ß" def test_maybe_truncate(self) -> None: assert formatting.maybe_truncate("ß", 10) == "ß" def test_format_timestamp_invalid_pandas_format(self) -> None: expected = "2021-12-06 17:00:00 00" with pytest.raises(ValueError): formatting.format_timestamp(expected) def test_format_timestamp_out_of_bounds(self) -> None: from datetime import datetime date = datetime(1300, 12, 1) expected = "1300-12-01" result = formatting.format_timestamp(date) assert result == expected date = datetime(2300, 12, 1) expected = "2300-12-01" result = formatting.format_timestamp(date) assert result == expected def test_attribute_repr(self) -> None: short = formatting.summarize_attr("key", "Short string") long = formatting.summarize_attr("key", 100 * "Very long string ") newlines = formatting.summarize_attr("key", "\n\n\n") tabs = formatting.summarize_attr("key", "\t\t\t") assert short == " key: Short string" assert len(long) <= 80 assert long.endswith("...") assert "\n" not in newlines assert "\t" not in tabs def test_index_repr(self) -> None: coord_names = ("x", "y") index = CustomIndex(coord_names) names = ("x",) normal = formatting.summarize_index(names, index, col_width=20) assert names[0] in normal assert len(normal.splitlines()) == len(names) assert "CustomIndex" in normal class IndexWithInlineRepr(CustomIndex): def _repr_inline_(self, max_width: int): return f"CustomIndex[{', '.join(self.names)}]" index = IndexWithInlineRepr(coord_names) inline = formatting.summarize_index(names, index, col_width=20) assert names[0] in inline assert index._repr_inline_(max_width=40) in inline @pytest.mark.parametrize( "names", ( ("x",), ("x", "y"), ("x", "y", "z"), ("x", "y", "z", "a"), ), ) def test_index_repr_grouping(self, names) -> None: index = CustomIndex(names) normal = formatting.summarize_index(names, index, col_width=20) assert all(name in normal for name in names) assert len(normal.splitlines()) == len(names) assert "CustomIndex" in normal hint_chars = [line[2] for line in normal.splitlines()] if len(names) <= 1: assert hint_chars == [" "] else: assert hint_chars[0] == "┌" and hint_chars[-1] == "└" assert len(names) == 2 or hint_chars[1:-1] == ["│"] * (len(names) - 2) def test_diff_array_repr(self) -> None: da_a = xr.DataArray( np.array([[1, 2, 3], [4, 5, 6]], dtype="int64"), dims=("x", "y"), coords={ "x": np.array(["a", "b"], dtype="U1"), "y": np.array([1, 2, 3], dtype="int64"), }, attrs={"units": "m", "description": "desc"}, ) da_b = xr.DataArray( np.array([1, 2], dtype="int64"), dims="x", coords={ "x": np.array(["a", "c"], dtype="U1"), "label": ("x", np.array([1, 2], dtype="int64")), }, attrs={"units": "kg"}, ) byteorder = "<" if sys.byteorder == "little" else ">" str_dtype = "str" if has_pandas_3 else "object" expected = dedent( f"""\ Left and right DataArray objects are not identical Differing dimensions: (x: 2, y: 3) != (x: 2) Differing values: L array([[1, 2, 3], [4, 5, 6]], dtype=int64) R array([1, 2], dtype=int64) Differing coordinates: L * x (x) {byteorder}U1 8B 'a' 'b' R * x (x) {byteorder}U1 8B 'a' 'c' Coordinates only on the left object: * y (y) int64 24B 1 2 3 Coordinates only on the right object: label (x) int64 16B 1 2 Indexes only on the left object: ['y'] Differing indexes: L x Index(['a', 'b'], dtype='{str_dtype}', name='x') R x Index(['a', 'c'], dtype='{str_dtype}', name='x') Differing attributes: L units: m R units: kg Attributes only on the left object: description: desc""" ) actual = formatting.diff_array_repr(da_a, da_b, "identical") try: assert actual == expected except AssertionError: # depending on platform, dtype may not be shown in numpy array repr assert actual == expected.replace(", dtype=int64", "") da_a = xr.DataArray( np.array([[1, 2, 3], [4, 5, 6]], dtype="int8"), dims=("x", "y"), coords=xr.Coordinates( { "x": np.array([True, False], dtype="bool"), "y": np.array([1, 2, 3], dtype="int16"), }, indexes={"y": CustomIndex(("y",))}, ), ) da_b = xr.DataArray( np.array([1, 2], dtype="int8"), dims="x", coords=xr.Coordinates( { "x": np.array([True, False], dtype="bool"), "label": ("x", np.array([1, 2], dtype="int16")), }, indexes={"label": CustomIndex(("label",))}, ), ) expected = dedent( """\ Left and right DataArray objects are not equal Differing dimensions: (x: 2, y: 3) != (x: 2) Differing values: L array([[1, 2, 3], [4, 5, 6]], dtype=int8) R array([1, 2], dtype=int8) Coordinates only on the left object: * y (y) int16 6B 1 2 3 Coordinates only on the right object: * label (x) int16 4B 1 2 """.rstrip() ) actual = formatting.diff_array_repr(da_a, da_b, "equals") assert actual == expected va = xr.Variable( "x", np.array([1, 2, 3], dtype="int64"), {"title": "test Variable"} ) vb = xr.Variable(("x", "y"), np.array([[1, 2, 3], [4, 5, 6]], dtype="int64")) expected = dedent( """\ Left and right Variable objects are not equal Differing dimensions: (x: 3) != (x: 2, y: 3) Differing values: L array([1, 2, 3], dtype=int64) R array([[1, 2, 3], [4, 5, 6]], dtype=int64)""" ) actual = formatting.diff_array_repr(va, vb, "equals") try: assert actual == expected except AssertionError: assert actual == expected.replace(", dtype=int64", "") @pytest.mark.filterwarnings("error") def test_diff_attrs_repr_with_array(self) -> None: attrs_a = {"attr": np.array([0, 1])} attrs_b = {"attr": 1} expected = dedent( """\ Differing attributes: L attr: [0 1] R attr: 1 """ ).strip() actual = formatting.diff_attrs_repr(attrs_a, attrs_b, "equals") assert expected == actual attrs_c = {"attr": np.array([-3, 5])} expected = dedent( """\ Differing attributes: L attr: [0 1] R attr: [-3 5] """ ).strip() actual = formatting.diff_attrs_repr(attrs_a, attrs_c, "equals") assert expected == actual # should not raise a warning attrs_c = {"attr": np.array([0, 1, 2])} expected = dedent( """\ Differing attributes: L attr: [0 1] R attr: [0 1 2] """ ).strip() actual = formatting.diff_attrs_repr(attrs_a, attrs_c, "equals") assert expected == actual def test__diff_mapping_repr_array_attrs_on_variables(self) -> None: a = { "a": xr.DataArray( dims="x", data=np.array([1], dtype="int16"), attrs={"b": np.array([1, 2], dtype="int8")}, ) } b = { "a": xr.DataArray( dims="x", data=np.array([1], dtype="int16"), attrs={"b": np.array([2, 3], dtype="int8")}, ) } actual = formatting.diff_data_vars_repr(a, b, compat="identical", col_width=8) expected = dedent( """\ Differing data variables: L a (x) int16 2B 1 Differing variable attributes: b: [1 2] R a (x) int16 2B 1 Differing variable attributes: b: [2 3] """.rstrip() ) assert actual == expected def test_diff_dataset_repr(self) -> None: ds_a = xr.Dataset( data_vars={ "var1": (("x", "y"), np.array([[1, 2, 3], [4, 5, 6]], dtype="int64")), "var2": ("x", np.array([3, 4], dtype="int64")), }, coords={ "x": ( "x", np.array(["a", "b"], dtype="U1"), {"foo": "bar", "same": "same"}, ), "y": np.array([1, 2, 3], dtype="int64"), }, attrs={"title": "mytitle", "description": "desc"}, ) ds_b = xr.Dataset( data_vars={"var1": ("x", np.array([1, 2], dtype="int64"))}, coords={ "x": ( "x", np.array(["a", "c"], dtype="U1"), {"source": 0, "foo": "baz", "same": "same"}, ), "label": ("x", np.array([1, 2], dtype="int64")), }, attrs={"title": "newtitle"}, ) byteorder = "<" if sys.byteorder == "little" else ">" str_dtype = "str" if has_pandas_3 else "object" expected = dedent( f"""\ Left and right Dataset objects are not identical Differing dimensions: (x: 2, y: 3) != (x: 2) Differing coordinates: L * x (x) {byteorder}U1 8B 'a' 'b' Differing variable attributes: foo: bar R * x (x) {byteorder}U1 8B 'a' 'c' Differing variable attributes: source: 0 foo: baz Coordinates only on the left object: * y (y) int64 24B 1 2 3 Coordinates only on the right object: label (x) int64 16B 1 2 Differing data variables: L var1 (x, y) int64 48B 1 2 3 4 5 6 R var1 (x) int64 16B 1 2 Data variables only on the left object: var2 (x) int64 16B 3 4 Indexes only on the left object: ['y'] Differing indexes: L x Index(['a', 'b'], dtype='{str_dtype}', name='x') R x Index(['a', 'c'], dtype='{str_dtype}', name='x') Differing attributes: L title: mytitle R title: newtitle Attributes only on the left object: description: desc""" ) actual = formatting.diff_dataset_repr(ds_a, ds_b, "identical") assert actual == expected def test_array_repr(self) -> None: ds = xr.Dataset( coords={ "foo": np.array([1, 2, 3], dtype=np.uint64), "bar": np.array([1, 2, 3], dtype=np.uint64), } ) ds[(1, 2)] = xr.DataArray(np.array([0], dtype=np.uint64), dims="test") ds_12 = ds[(1, 2)] # Test repr function behaves correctly: actual = formatting.array_repr(ds_12) expected = dedent( """\ Size: 8B array([0], dtype=uint64) Dimensions without coordinates: test""" ) assert actual == expected # Test repr, str prints returns correctly as well: assert repr(ds_12) == expected assert str(ds_12) == expected # f-strings (aka format(...)) by default should use the repr: actual = f"{ds_12}" assert actual == expected with xr.set_options(display_expand_data=False): actual = formatting.array_repr(ds[(1, 2)]) expected = dedent( """\ Size: 8B 0 Dimensions without coordinates: test""" ) assert actual == expected def test_array_repr_variable(self) -> None: var = xr.Variable("x", [0, 1]) formatting.array_repr(var) with xr.set_options(display_expand_data=False): formatting.array_repr(var) def test_array_repr_recursive(self) -> None: # GH:issue:7111 # direct recursion var = xr.Variable("x", [0, 1]) var.attrs["x"] = var formatting.array_repr(var) da = xr.DataArray([0, 1], dims=["x"]) da.attrs["x"] = da formatting.array_repr(da) # indirect recursion var.attrs["x"] = da da.attrs["x"] = var formatting.array_repr(var) formatting.array_repr(da) @requires_dask def test_array_scalar_format(self) -> None: # Test numpy scalars: var = xr.DataArray(np.array(0)) assert format(var, "") == repr(var) assert format(var, "d") == "0" assert format(var, ".2f") == "0.00" # Test dask scalars, not supported however: import dask.array as da var = xr.DataArray(da.array(0)) assert format(var, "") == repr(var) with pytest.raises(TypeError) as excinfo: format(var, ".2f") assert "unsupported format string passed to" in str(excinfo.value) # Test numpy arrays raises: var = xr.DataArray([0.1, 0.2]) with pytest.raises(NotImplementedError) as excinfo: # type: ignore[assignment] format(var, ".2f") assert "Using format_spec is only supported" in str(excinfo.value) def test_datatree_print_empty_node(self): dt: xr.DataTree = xr.DataTree(name="root") printout = str(dt) assert printout == "\nGroup: /" def test_datatree_print_empty_node_with_attrs(self): dat = xr.Dataset(attrs={"note": "has attrs"}) dt: xr.DataTree = xr.DataTree(name="root", dataset=dat) printout = str(dt) assert printout == dedent( """\ Group: / Attributes: note: has attrs""" ) def test_datatree_print_node_with_data(self): dat = xr.Dataset({"a": [0, 2]}) dt: xr.DataTree = xr.DataTree(name="root", dataset=dat) printout = str(dt) expected = [ "", "Group: /", "Dimensions", "Coordinates", "a", ] for expected_line, printed_line in zip( expected, printout.splitlines(), strict=True ): assert expected_line in printed_line def test_datatree_printout_nested_node(self): dat = xr.Dataset({"a": [0, 2]}) root = xr.DataTree.from_dict( { "/results": dat, } ) printout = str(root) assert printout.splitlines()[3].startswith(" ") def test_datatree_repr_of_node_with_data(self): dat = xr.Dataset({"a": [0, 2]}) dt: xr.DataTree = xr.DataTree(name="root", dataset=dat) assert "Coordinates" in repr(dt) def test_diff_datatree_repr_different_groups(self): dt_1: xr.DataTree = xr.DataTree.from_dict({"a": None}) dt_2: xr.DataTree = xr.DataTree.from_dict({"b": None}) expected = dedent( """\ Left and right DataTree objects are not identical Children at root node do not match: ['a'] vs ['b']""" ) actual = formatting.diff_datatree_repr(dt_1, dt_2, "identical") assert actual == expected def test_diff_datatree_repr_different_subgroups(self): dt_1: xr.DataTree = xr.DataTree.from_dict({"a": None, "a/b": None, "a/c": None}) dt_2: xr.DataTree = xr.DataTree.from_dict({"a": None, "a/b": None}) expected = dedent( """\ Left and right DataTree objects are not isomorphic Children at node 'a' do not match: ['b', 'c'] vs ['b']""" ) actual = formatting.diff_datatree_repr(dt_1, dt_2, "isomorphic") assert actual == expected def test_diff_datatree_repr_node_data(self): # casting to int64 explicitly ensures that int64s are created on all architectures ds1 = xr.Dataset({"u": np.int64(0), "v": np.int64(1)}) ds3 = xr.Dataset({"w": np.int64(5)}) dt_1: xr.DataTree = xr.DataTree.from_dict({"a": ds1, "a/b": ds3}) ds2 = xr.Dataset({"u": np.int64(0)}) ds4 = xr.Dataset({"w": np.int64(6)}) dt_2: xr.DataTree = xr.DataTree.from_dict({"a": ds2, "a/b": ds4}, name="foo") expected = dedent( """\ Left and right DataTree objects are not identical Differing names: None != 'foo' Data at node 'a' does not match: Data variables only on the left object: v int64 8B 1 Data at node 'a/b' does not match: Differing data variables: L w int64 8B 5 R w int64 8B 6""" ) actual = formatting.diff_datatree_repr(dt_1, dt_2, "identical") assert actual == expected def test_diff_datatree_repr_equals(self) -> None: ds1 = xr.Dataset(data_vars={"data": ("y", [5, 2])}) ds2 = xr.Dataset(data_vars={"data": (("x", "y"), [[5, 2]])}) dt1 = xr.DataTree.from_dict({"node": ds1}) dt2 = xr.DataTree.from_dict({"node": ds2}) expected = dedent( """\ Left and right DataTree objects are not equal Data at node 'node' does not match: Differing dimensions: (y: 2) != (x: 1, y: 2) Differing data variables: L data (y) int64 16B 5 2 R data (x, y) int64 16B 5 2""" ) actual = formatting.diff_datatree_repr(dt1, dt2, "equals") assert actual == expected def test_inline_variable_array_repr_custom_repr() -> None: class CustomArray: def __init__(self, value, attr): self.value = value self.attr = attr def _repr_inline_(self, width): formatted = f"({self.attr}) {self.value}" if len(formatted) > width: formatted = f"({self.attr}) ..." return formatted def __array_namespace__(self, *args, **kwargs): return NotImplemented @property def shape(self) -> tuple[int, ...]: return self.value.shape @property def dtype(self): return self.value.dtype @property def ndim(self): return self.value.ndim value = CustomArray(np.array([20, 40]), "m") variable = xr.Variable("x", value) max_width = 10 actual = formatting.inline_variable_array_repr(variable, max_width=10) assert actual == value._repr_inline_(max_width) def test_set_numpy_options() -> None: original_options = np.get_printoptions() with formatting.set_numpy_options(threshold=10): assert len(repr(np.arange(500))) < 200 # original options are restored assert np.get_printoptions() == original_options def test_short_array_repr() -> None: cases = [ np.random.randn(500), np.random.randn(20, 20), np.random.randn(5, 10, 15), np.random.randn(5, 10, 15, 3), np.random.randn(100, 5, 1), ] # number of lines: # for default numpy repr: 167, 140, 254, 248, 599 # for short_array_repr: 1, 7, 24, 19, 25 for array in cases: num_lines = formatting.short_array_repr(array).count("\n") + 1 assert num_lines < 30 # threshold option (default: 200) array2 = np.arange(100) assert "..." not in formatting.short_array_repr(array2) with xr.set_options(display_values_threshold=10): assert "..." in formatting.short_array_repr(array2) def test_large_array_repr_length() -> None: da = xr.DataArray(np.random.randn(100, 5, 1)) result = repr(da).splitlines() assert len(result) < 50 @requires_netCDF4 def test_repr_file_collapsed(tmp_path) -> None: arr_to_store = xr.DataArray(np.arange(300, dtype=np.int64), dims="test") arr_to_store.to_netcdf(tmp_path / "test.nc", engine="netcdf4") with ( xr.open_dataarray(tmp_path / "test.nc") as arr, xr.set_options(display_expand_data=False), ): actual = repr(arr) expected = dedent( """\ Size: 2kB [300 values with dtype=int64] Dimensions without coordinates: test""" ) assert actual == expected arr_loaded = arr.compute() actual = arr_loaded.__repr__() expected = dedent( """\ Size: 2kB 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 288 289 290 291 292 293 294 295 296 297 298 299 Dimensions without coordinates: test""" ) assert actual == expected @pytest.mark.parametrize( "display_max_rows, n_vars, n_attr", [(50, 40, 30), (35, 40, 30), (11, 40, 30), (1, 40, 30)], ) def test__mapping_repr(display_max_rows, n_vars, n_attr) -> None: long_name = "long_name" a = np.char.add(long_name, np.arange(0, n_vars).astype(str)) b = np.char.add("attr_", np.arange(0, n_attr).astype(str)) c = np.char.add("coord", np.arange(0, n_vars).astype(str)) attrs = dict.fromkeys(b, 2) coords = {_c: np.array([0, 1], dtype=np.uint64) for _c in c} data_vars = dict() for v, _c in zip(a, coords.items(), strict=True): data_vars[v] = xr.DataArray( name=v, data=np.array([3, 4], dtype=np.uint64), dims=[_c[0]], coords=dict([_c]), ) ds = xr.Dataset(data_vars) ds.attrs = attrs with xr.set_options(display_max_rows=display_max_rows): # Parse the data_vars print and show only data_vars rows: summary = formatting.dataset_repr(ds).split("\n") summary = [v for v in summary if long_name in v] # The length should be less than or equal to display_max_rows: len_summary = len(summary) data_vars_print_size = min(display_max_rows, len_summary) assert len_summary == data_vars_print_size summary = formatting.data_vars_repr(ds.data_vars).split("\n") summary = [v for v in summary if long_name in v] # The length should be equal to the number of data variables len_summary = len(summary) assert len_summary == n_vars summary = formatting.coords_repr(ds.coords).split("\n") summary = [v for v in summary if "coord" in v] # The length should be equal to the number of data variables len_summary = len(summary) assert len_summary == n_vars with xr.set_options( display_max_rows=display_max_rows, display_expand_coords=False, display_expand_data_vars=False, display_expand_attrs=False, ): actual = formatting.dataset_repr(ds) col_width = formatting._calculate_col_width(ds.variables) dims_start = formatting.pretty_print("Dimensions:", col_width) dims_values = formatting.dim_summary_limited( ds.sizes, col_width=col_width + 1, max_rows=display_max_rows ) expected_size = "1kB" expected = f"""\ Size: {expected_size} {dims_start}({dims_values}) Coordinates: ({n_vars}) Data variables: ({n_vars}) Attributes: ({n_attr})""" expected = dedent(expected) assert actual == expected def test__mapping_repr_recursive() -> None: # GH:issue:7111 # direct recursion ds = xr.Dataset({"a": ("x", [1, 2, 3])}) ds.attrs["ds"] = ds formatting.dataset_repr(ds) # indirect recursion ds2 = xr.Dataset({"b": ("y", [1, 2, 3])}) ds.attrs["ds"] = ds2 ds2.attrs["ds"] = ds formatting.dataset_repr(ds2) def test__element_formatter(n_elements: int = 100) -> None: expected = """\ Dimensions without coordinates: dim_0: 3, dim_1: 3, dim_2: 3, dim_3: 3, dim_4: 3, dim_5: 3, dim_6: 3, dim_7: 3, dim_8: 3, dim_9: 3, dim_10: 3, dim_11: 3, dim_12: 3, dim_13: 3, dim_14: 3, dim_15: 3, dim_16: 3, dim_17: 3, dim_18: 3, dim_19: 3, dim_20: 3, dim_21: 3, dim_22: 3, dim_23: 3, ... dim_76: 3, dim_77: 3, dim_78: 3, dim_79: 3, dim_80: 3, dim_81: 3, dim_82: 3, dim_83: 3, dim_84: 3, dim_85: 3, dim_86: 3, dim_87: 3, dim_88: 3, dim_89: 3, dim_90: 3, dim_91: 3, dim_92: 3, dim_93: 3, dim_94: 3, dim_95: 3, dim_96: 3, dim_97: 3, dim_98: 3, dim_99: 3""" expected = dedent(expected) intro = "Dimensions without coordinates: " elements = [ f"{k}: {v}" for k, v in {f"dim_{k}": 3 for k in np.arange(n_elements)}.items() ] values = xr.core.formatting._element_formatter( elements, col_width=len(intro), max_rows=12 ) actual = intro + values assert expected == actual def test_lazy_array_wont_compute() -> None: from xarray.core.indexing import LazilyIndexedArray class LazilyIndexedArrayNotComputable(LazilyIndexedArray): def __array__( self, dtype: np.typing.DTypeLike | None = None, /, *, copy: bool | None = None, ) -> np.ndarray: raise NotImplementedError("Computing this array is not possible.") arr = LazilyIndexedArrayNotComputable(np.array([1, 2])) var = xr.DataArray(arr) # These will crash if var.data are converted to numpy arrays: var.__repr__() var._repr_html_() @pytest.mark.parametrize("as_dataset", (False, True)) def test_format_xindexes_none(as_dataset: bool) -> None: # ensure repr for empty xindexes can be displayed #8367 expected = """\ Indexes: *empty*""" expected = dedent(expected) obj: xr.DataArray | xr.Dataset = xr.DataArray() obj = obj._to_temp_dataset() if as_dataset else obj actual = repr(obj.xindexes) assert actual == expected @pytest.mark.parametrize("as_dataset", (False, True)) def test_format_xindexes(as_dataset: bool) -> None: expected = """\ Indexes: x PandasIndex""" expected = dedent(expected) obj: xr.DataArray | xr.Dataset = xr.DataArray([1], coords={"x": [1]}) obj = obj._to_temp_dataset() if as_dataset else obj actual = repr(obj.xindexes) assert actual == expected @requires_cftime def test_empty_cftimeindex_repr() -> None: index = xr.coding.cftimeindex.CFTimeIndex([]) expected = """\ Indexes: time CFTimeIndex([], dtype='object', length=0, calendar=None, freq=None)""" expected = dedent(expected) da = xr.DataArray([], coords={"time": index}) actual = repr(da.indexes) assert actual == expected def test_display_nbytes() -> None: xds = xr.Dataset( { "foo": np.arange(1200, dtype=np.int16), "bar": np.arange(111, dtype=np.int16), } ) # Note: int16 is used to ensure that dtype is shown in the # numpy array representation for all OSes included Windows actual = repr(xds) expected = """ Size: 3kB Dimensions: (foo: 1200, bar: 111) Coordinates: * foo (foo) int16 2kB 0 1 2 3 4 5 6 ... 1194 1195 1196 1197 1198 1199 * bar (bar) int16 222B 0 1 2 3 4 5 6 7 ... 104 105 106 107 108 109 110 Data variables: *empty* """.strip() assert actual == expected actual = repr(xds["foo"]) array_repr = repr(xds.foo.data).replace("\n ", "") expected = f""" Size: 2kB {array_repr} Coordinates: * foo (foo) int16 2kB 0 1 2 3 4 5 6 ... 1194 1195 1196 1197 1198 1199 """.strip() assert actual == expected def test_array_repr_dtypes(): # These dtypes are expected to be represented similarly # on Ubuntu, macOS and Windows environments of the CI. # Unsigned integer could be used as easy replacements # for tests where the data-type does not matter, # but the repr does, including the size # (size of an int == size of a uint) # Signed integer dtypes ds = xr.DataArray(np.array([0], dtype="int8"), dims="x") actual = repr(ds) expected = """ Size: 1B array([0], dtype=int8) Dimensions without coordinates: x """.strip() assert actual == expected ds = xr.DataArray(np.array([0], dtype="int16"), dims="x") actual = repr(ds) expected = """ Size: 2B array([0], dtype=int16) Dimensions without coordinates: x """.strip() assert actual == expected # Unsigned integer dtypes ds = xr.DataArray(np.array([0], dtype="uint8"), dims="x") actual = repr(ds) expected = """ Size: 1B array([0], dtype=uint8) Dimensions without coordinates: x """.strip() assert actual == expected ds = xr.DataArray(np.array([0], dtype="uint16"), dims="x") actual = repr(ds) expected = """ Size: 2B array([0], dtype=uint16) Dimensions without coordinates: x """.strip() assert actual == expected ds = xr.DataArray(np.array([0], dtype="uint32"), dims="x") actual = repr(ds) expected = """ Size: 4B array([0], dtype=uint32) Dimensions without coordinates: x """.strip() assert actual == expected ds = xr.DataArray(np.array([0], dtype="uint64"), dims="x") actual = repr(ds) expected = """ Size: 8B array([0], dtype=uint64) Dimensions without coordinates: x """.strip() assert actual == expected # Float dtypes ds = xr.DataArray(np.array([0.0]), dims="x") actual = repr(ds) expected = """ Size: 8B array([0.]) Dimensions without coordinates: x """.strip() assert actual == expected ds = xr.DataArray(np.array([0], dtype="float16"), dims="x") actual = repr(ds) expected = """ Size: 2B array([0.], dtype=float16) Dimensions without coordinates: x """.strip() assert actual == expected ds = xr.DataArray(np.array([0], dtype="float32"), dims="x") actual = repr(ds) expected = """ Size: 4B array([0.], dtype=float32) Dimensions without coordinates: x """.strip() assert actual == expected ds = xr.DataArray(np.array([0], dtype="float64"), dims="x") actual = repr(ds) expected = """ Size: 8B array([0.]) Dimensions without coordinates: x """.strip() assert actual == expected # Signed integer dtypes array = np.array([0]) ds = xr.DataArray(array, dims="x") actual = repr(ds) expected = f""" Size: {array.dtype.itemsize}B {array!r} Dimensions without coordinates: x """.strip() assert actual == expected array = np.array([0], dtype="int32") ds = xr.DataArray(array, dims="x") actual = repr(ds) expected = f""" Size: 4B {array!r} Dimensions without coordinates: x """.strip() assert actual == expected array = np.array([0], dtype="int64") ds = xr.DataArray(array, dims="x") actual = repr(ds) expected = f""" Size: 8B {array!r} Dimensions without coordinates: x """.strip() assert actual == expected def test_repr_pandas_range_index() -> None: # lazy data repr but values shown in inline repr xidx = xr.indexes.PandasIndex(pd.RangeIndex(10), "x") ds = xr.Dataset(coords=xr.Coordinates.from_xindex(xidx)) actual = repr(ds.x) expected = """ Size: 80B [10 values with dtype=int64] Coordinates: * x (x) int64 80B 0 1 2 3 4 5 6 7 8 9 """.strip() assert actual == expected def test_repr_pandas_multi_index() -> None: # lazy data repr but values shown in inline repr midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=["foo", "bar"]) coords = xr.Coordinates.from_pandas_multiindex(midx, "x") ds = xr.Dataset(coords=coords) actual = repr(ds.x) expected = """ Size: 32B [4 values with dtype=object] Coordinates: * x (x) object 32B MultiIndex * foo (x) object 32B 'a' 'a' 'b' 'b' * bar (x) int64 32B 1 2 1 2 """.strip() assert actual == expected actual = repr(ds.foo) expected = """ Size: 32B [4 values with dtype=object] Coordinates: * x (x) object 32B MultiIndex * foo (x) object 32B 'a' 'a' 'b' 'b' * bar (x) int64 32B 1 2 1 2 """.strip() assert actual == expected python-xarray-2026.01.0/xarray/tests/test_coding_times.py0000664000175000017500000023760615136607163023606 0ustar alastairalastairfrom __future__ import annotations import warnings from datetime import datetime, timedelta from itertools import product, starmap from typing import Literal import numpy as np import pandas as pd import pytest from pandas.errors import OutOfBoundsDatetime, OutOfBoundsTimedelta from xarray import ( DataArray, Dataset, Variable, conventions, date_range, decode_cf, ) from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder from xarray.coding.times import ( _encode_datetime_with_cftime, _netcdf_to_numpy_timeunit, _numpy_to_netcdf_timeunit, _should_cftime_be_used, cftime_to_nptime, decode_cf_datetime, decode_cf_timedelta, encode_cf_datetime, encode_cf_timedelta, format_cftime_datetime, infer_datetime_units, infer_timedelta_units, ) from xarray.coding.variables import SerializationWarning from xarray.conventions import _update_bounds_attributes, cf_encoder from xarray.core.common import contains_cftime_datetimes from xarray.core.types import PDDatetimeUnitOptions from xarray.core.utils import is_duck_dask_array from xarray.testing import assert_equal, assert_identical from xarray.tests import ( _ALL_CALENDARS, _NON_STANDARD_CALENDARS, _STANDARD_CALENDAR_NAMES, _STANDARD_CALENDARS, DuckArrayWrapper, FirstElementAccessibleArray, _all_cftime_date_types, arm_xfail, assert_array_equal, assert_duckarray_allclose, assert_duckarray_equal, assert_no_warnings, has_cftime, requires_cftime, requires_dask, ) _CF_DATETIME_NUM_DATES_UNITS = [ (np.arange(10), "days since 2000-01-01", "s"), (np.arange(10).astype("float64"), "days since 2000-01-01", "s"), (np.arange(10).astype("float32"), "days since 2000-01-01", "s"), (np.arange(10).reshape(2, 5), "days since 2000-01-01", "s"), (12300 + np.arange(5), "hours since 1680-01-01 00:00:00", "s"), # here we add a couple minor formatting errors to test # the robustness of the parsing algorithm. (12300 + np.arange(5), "hour since 1680-01-01 00:00:00", "s"), (12300 + np.arange(5), "Hour since 1680-01-01 00:00:00", "s"), (12300 + np.arange(5), " Hour since 1680-01-01 00:00:00 ", "s"), (10, "days since 2000-01-01", "s"), ([10], "daYs since 2000-01-01", "s"), ([[10]], "days since 2000-01-01", "s"), ([10, 10], "days since 2000-01-01", "s"), (np.array(10), "days since 2000-01-01", "s"), (0, "days since 1000-01-01", "s"), ([0], "days since 1000-01-01", "s"), ([[0]], "days since 1000-01-01", "s"), (np.arange(2), "days since 1000-01-01", "s"), (np.arange(0, 100000, 20000), "days since 1900-01-01", "s"), (np.arange(0, 100000, 20000), "days since 1-01-01", "s"), (17093352.0, "hours since 1-1-1 00:00:0.0", "s"), ([0.5, 1.5], "hours since 1900-01-01T00:00:00", "s"), (0, "milliseconds since 2000-01-01T00:00:00", "s"), (0, "microseconds since 2000-01-01T00:00:00", "s"), (np.int32(788961600), "seconds since 1981-01-01", "s"), # GH2002 (12300 + np.arange(5), "hour since 1680-01-01 00:00:00.500000", "us"), (164375, "days since 1850-01-01 00:00:00", "s"), (164374.5, "days since 1850-01-01 00:00:00", "s"), ([164374.5, 168360.5], "days since 1850-01-01 00:00:00", "s"), ] _CF_DATETIME_TESTS = [ num_dates_units + (calendar,) for num_dates_units, calendar in product( _CF_DATETIME_NUM_DATES_UNITS, _STANDARD_CALENDAR_NAMES ) ] @requires_cftime @pytest.mark.filterwarnings("ignore:Ambiguous reference date string") @pytest.mark.filterwarnings("ignore:Times can't be serialized faithfully") @pytest.mark.parametrize( ["num_dates", "units", "minimum_resolution", "calendar"], _CF_DATETIME_TESTS ) def test_cf_datetime( num_dates, units: str, minimum_resolution: PDDatetimeUnitOptions, calendar: str, time_unit: PDDatetimeUnitOptions, ) -> None: import cftime expected = cftime.num2date( num_dates, units, calendar, only_use_cftime_datetimes=True ) with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unable to decode time axis") actual = decode_cf_datetime(num_dates, units, calendar, time_unit=time_unit) if actual.dtype.kind != "O": if np.timedelta64(1, time_unit) > np.timedelta64(1, minimum_resolution): expected_unit = minimum_resolution else: expected_unit = time_unit expected = cftime_to_nptime(expected, time_unit=expected_unit) assert_array_equal(actual, expected) encoded1, _, _ = encode_cf_datetime(actual, units, calendar) assert_array_equal(num_dates, encoded1) if hasattr(num_dates, "ndim") and num_dates.ndim == 1 and "1000" not in units: # verify that wrapping with a pandas.Index works # note that it *does not* currently work to put # non-datetime64 compatible dates into a pandas.Index encoded2, _, _ = encode_cf_datetime(pd.Index(actual), units, calendar) assert_array_equal(num_dates, encoded2) @requires_cftime def test_decode_cf_datetime_overflow(time_unit: PDDatetimeUnitOptions) -> None: # checks for # https://github.com/pydata/pandas/issues/14068 # https://github.com/pydata/xarray/issues/975 from cftime import DatetimeGregorian datetime = DatetimeGregorian units = "days since 2000-01-01 00:00:00" # date after 2262 and before 1678 days = (-117710, 95795) expected = (datetime(1677, 9, 20), datetime(2262, 4, 12)) for i, day in enumerate(days): with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unable to decode time axis") result = decode_cf_datetime( day, units, calendar="standard", time_unit=time_unit ) assert result == expected[i] # additional check to see if type/dtypes are correct if time_unit == "ns": assert isinstance(result.item(), datetime) else: assert result.dtype == np.dtype(f"=M8[{time_unit}]") def test_decode_cf_datetime_non_standard_units() -> None: expected = pd.date_range(periods=100, start="1970-01-01", freq="h") # netCDFs from madis.noaa.gov use this format for their time units # they cannot be parsed by cftime, but pd.Timestamp works units = "hours since 1-1-1970" actual = decode_cf_datetime(np.arange(100), units) assert_array_equal(actual, expected) @requires_cftime def test_decode_cf_datetime_non_iso_strings() -> None: # datetime strings that are _almost_ ISO compliant but not quite, # but which cftime.num2date can still parse correctly expected = pd.date_range(periods=100, start="2000-01-01", freq="h") cases = [ (np.arange(100), "hours since 2000-01-01 0"), (np.arange(100), "hours since 2000-1-1 0"), (np.arange(100), "hours since 2000-01-01 0:00"), ] for num_dates, units in cases: actual = decode_cf_datetime(num_dates, units) assert_array_equal(actual, expected) @requires_cftime @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_decode_standard_calendar_inside_timestamp_range( calendar, time_unit: PDDatetimeUnitOptions ) -> None: import cftime units = "hours since 0001-01-01" times = pd.date_range( "2001-04-01-00", end="2001-04-30-23", unit=time_unit, freq="h" ) # to_pydatetime() will return microsecond time = cftime.date2num(times.to_pydatetime(), units, calendar=calendar) expected = times.values # for cftime we get "us" resolution # ns resolution is handled by cftime due to the reference date # being out of bounds, but the times themselves are # representable with nanosecond resolution. actual = decode_cf_datetime(time, units, calendar=calendar, time_unit=time_unit) assert actual.dtype == np.dtype(f"=M8[{time_unit}]") assert_array_equal(actual, expected) @requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) def test_decode_non_standard_calendar_inside_timestamp_range(calendar) -> None: import cftime units = "days since 0001-01-01" times = pd.date_range("2001-04-01-00", end="2001-04-30-23", freq="h") non_standard_time = cftime.date2num(times.to_pydatetime(), units, calendar=calendar) expected = cftime.num2date( non_standard_time, units, calendar=calendar, only_use_cftime_datetimes=True ) expected_dtype = np.dtype("O") actual = decode_cf_datetime(non_standard_time, units, calendar=calendar) assert actual.dtype == expected_dtype assert_array_equal(actual, expected) @requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) def test_decode_dates_outside_timestamp_range( calendar, time_unit: PDDatetimeUnitOptions ) -> None: import cftime units = "days since 0001-01-01" times = [datetime(1, 4, 1, h) for h in range(1, 5)] time = cftime.date2num(times, units, calendar=calendar) expected = cftime.num2date( time, units, calendar=calendar, only_use_cftime_datetimes=True ) if calendar == "proleptic_gregorian" and time_unit != "ns": expected = cftime_to_nptime(expected, time_unit=time_unit) expected_date_type = type(expected[0]) with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unable to decode time axis") actual = decode_cf_datetime(time, units, calendar=calendar, time_unit=time_unit) assert all(isinstance(value, expected_date_type) for value in actual) assert_array_equal(actual, expected) @requires_cftime @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) @pytest.mark.parametrize("num_time", [735368, [735368], [[735368]]]) def test_decode_standard_calendar_single_element_inside_timestamp_range( calendar, time_unit: PDDatetimeUnitOptions, num_time, ) -> None: units = "days since 0001-01-01" with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unable to decode time axis") actual = decode_cf_datetime( num_time, units, calendar=calendar, time_unit=time_unit ) assert actual.dtype == np.dtype(f"=M8[{time_unit}]") @requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) def test_decode_non_standard_calendar_single_element_inside_timestamp_range( calendar, ) -> None: units = "days since 0001-01-01" for num_time in [735368, [735368], [[735368]]]: with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unable to decode time axis") actual = decode_cf_datetime(num_time, units, calendar=calendar) assert actual.dtype == np.dtype("O") @requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) def test_decode_single_element_outside_timestamp_range(calendar) -> None: import cftime units = "days since 0001-01-01" for days in [1, 1470376]: for num_time in [days, [days], [[days]]]: with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unable to decode time axis") actual = decode_cf_datetime(num_time, units, calendar=calendar) expected = cftime.num2date( days, units, calendar, only_use_cftime_datetimes=True ) assert isinstance(actual.item(), type(expected)) @requires_cftime @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_decode_standard_calendar_multidim_time_inside_timestamp_range( calendar, time_unit: PDDatetimeUnitOptions, ) -> None: import cftime units = "days since 0001-01-01" times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D") times2 = pd.date_range("2001-05-01", end="2001-05-05", freq="D") time1 = cftime.date2num(times1.to_pydatetime(), units, calendar=calendar) time2 = cftime.date2num(times2.to_pydatetime(), units, calendar=calendar) mdim_time = np.empty((len(time1), 2)) mdim_time[:, 0] = time1 mdim_time[:, 1] = time2 expected1 = times1.values expected2 = times2.values actual = decode_cf_datetime( mdim_time, units, calendar=calendar, time_unit=time_unit ) assert actual.dtype == np.dtype(f"=M8[{time_unit}]") assert_array_equal(actual[:, 0], expected1) assert_array_equal(actual[:, 1], expected2) @requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range( calendar, ) -> None: import cftime units = "days since 0001-01-01" times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D") times2 = pd.date_range("2001-05-01", end="2001-05-05", freq="D") time1 = cftime.date2num(times1.to_pydatetime(), units, calendar=calendar) time2 = cftime.date2num(times2.to_pydatetime(), units, calendar=calendar) mdim_time = np.empty((len(time1), 2)) mdim_time[:, 0] = time1 mdim_time[:, 1] = time2 if cftime.__name__ == "cftime": expected1 = cftime.num2date( time1, units, calendar, only_use_cftime_datetimes=True ) expected2 = cftime.num2date( time2, units, calendar, only_use_cftime_datetimes=True ) else: expected1 = cftime.num2date(time1, units, calendar) expected2 = cftime.num2date(time2, units, calendar) expected_dtype = np.dtype("O") actual = decode_cf_datetime(mdim_time, units, calendar=calendar) assert actual.dtype == expected_dtype assert_array_equal(actual[:, 0], expected1) assert_array_equal(actual[:, 1], expected2) @requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) def test_decode_multidim_time_outside_timestamp_range( calendar, time_unit: PDDatetimeUnitOptions ) -> None: import cftime units = "days since 0001-01-01" times1 = [datetime(1, 4, day) for day in range(1, 6)] times2 = [datetime(1, 5, day) for day in range(1, 6)] time1 = cftime.date2num(times1, units, calendar=calendar) time2 = cftime.date2num(times2, units, calendar=calendar) mdim_time = np.empty((len(time1), 2)) mdim_time[:, 0] = time1 mdim_time[:, 1] = time2 expected1 = cftime.num2date(time1, units, calendar, only_use_cftime_datetimes=True) expected2 = cftime.num2date(time2, units, calendar, only_use_cftime_datetimes=True) if calendar == "proleptic_gregorian" and time_unit != "ns": expected1 = cftime_to_nptime(expected1, time_unit=time_unit) expected2 = cftime_to_nptime(expected2, time_unit=time_unit) with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unable to decode time axis") actual = decode_cf_datetime( mdim_time, units, calendar=calendar, time_unit=time_unit ) dtype: np.dtype dtype = np.dtype("O") if calendar == "proleptic_gregorian" and time_unit != "ns": dtype = np.dtype(f"=M8[{time_unit}]") assert actual.dtype == dtype assert_array_equal(actual[:, 0], expected1) assert_array_equal(actual[:, 1], expected2) @requires_cftime @pytest.mark.parametrize( ("calendar", "num_time"), [("360_day", 720058.0), ("all_leap", 732059.0), ("366_day", 732059.0)], ) def test_decode_non_standard_calendar_single_element(calendar, num_time) -> None: import cftime units = "days since 0001-01-01" actual = decode_cf_datetime(num_time, units, calendar=calendar) expected = np.asarray( cftime.num2date(num_time, units, calendar, only_use_cftime_datetimes=True) ) assert actual.dtype == np.dtype("O") assert expected == actual @requires_cftime def test_decode_360_day_calendar() -> None: import cftime calendar = "360_day" # ensure leap year doesn't matter for year in [2010, 2011, 2012, 2013, 2014]: units = f"days since {year}-01-01" num_times = np.arange(100) expected = cftime.num2date( num_times, units, calendar, only_use_cftime_datetimes=True ) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") actual = decode_cf_datetime(num_times, units, calendar=calendar) assert len(w) == 0 assert actual.dtype == np.dtype("O") assert_array_equal(actual, expected) @requires_cftime def test_decode_abbreviation() -> None: """Test making sure we properly fall back to cftime on abbreviated units.""" import cftime val = np.array([1586628000000.0]) units = "msecs since 1970-01-01T00:00:00Z" actual = decode_cf_datetime(val, units) expected = cftime_to_nptime(cftime.num2date(val, units)) assert_array_equal(actual, expected) @arm_xfail @requires_cftime @pytest.mark.parametrize( ["num_dates", "units", "expected_list"], [ ([np.nan], "days since 2000-01-01", ["NaT"]), ([np.nan, 0], "days since 2000-01-01", ["NaT", "2000-01-01T00:00:00Z"]), ( [np.nan, 0, 1], "days since 2000-01-01", ["NaT", "2000-01-01T00:00:00Z", "2000-01-02T00:00:00Z"], ), ], ) def test_cf_datetime_nan(num_dates, units, expected_list) -> None: with warnings.catch_warnings(): warnings.filterwarnings("ignore", "All-NaN") actual = decode_cf_datetime(num_dates, units) # use pandas because numpy will deprecate timezone-aware conversions expected = pd.to_datetime(expected_list).to_numpy(dtype="datetime64[ns]") assert_array_equal(expected, actual) @requires_cftime def test_decoded_cf_datetime_array_2d(time_unit: PDDatetimeUnitOptions) -> None: # regression test for GH1229 variable = Variable( ("x", "y"), np.array([[0, 1], [2, 3]]), {"units": "days since 2000-01-01"} ) result = CFDatetimeCoder(time_unit=time_unit).decode(variable) assert result.dtype == f"datetime64[{time_unit}]" expected = pd.date_range("2000-01-01", periods=4).values.reshape(2, 2) assert_array_equal(np.asarray(result), expected) @pytest.mark.parametrize("decode_times", [True, False]) @pytest.mark.parametrize("mask_and_scale", [True, False]) def test_decode_datetime_mask_and_scale( decode_times: bool, mask_and_scale: bool ) -> None: attrs = { "units": "nanoseconds since 1970-01-01", "calendar": "proleptic_gregorian", "_FillValue": np.int16(-1), "add_offset": 100000.0, } encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) decoded = conventions.decode_cf_variable( "foo", encoded, mask_and_scale=mask_and_scale, decode_times=decode_times ) result = conventions.encode_cf_variable(decoded, name="foo") assert_identical(encoded, result) assert encoded.dtype == result.dtype FREQUENCIES_TO_ENCODING_UNITS = { "ns": "nanoseconds", "us": "microseconds", "ms": "milliseconds", "s": "seconds", "min": "minutes", "h": "hours", "D": "days", } @pytest.mark.parametrize(("freq", "units"), FREQUENCIES_TO_ENCODING_UNITS.items()) def test_infer_datetime_units(freq, units) -> None: dates = pd.date_range("2000", periods=2, freq=freq) expected = f"{units} since 2000-01-01 00:00:00" assert expected == infer_datetime_units(dates) @pytest.mark.parametrize( ["dates", "expected"], [ ( pd.to_datetime(["1900-01-01", "1900-01-02", "NaT"], unit="ns"), "days since 1900-01-01 00:00:00", ), ( pd.to_datetime(["NaT", "1900-01-01"], unit="ns"), "days since 1900-01-01 00:00:00", ), (pd.to_datetime(["NaT"], unit="ns"), "days since 1970-01-01 00:00:00"), ], ) def test_infer_datetime_units_with_NaT(dates, expected) -> None: assert expected == infer_datetime_units(dates) _CFTIME_DATETIME_UNITS_TESTS = [ ([(1900, 1, 1), (1900, 1, 1)], "days since 1900-01-01 00:00:00.000000"), ( [(1900, 1, 1), (1900, 1, 2), (1900, 1, 2, 0, 0, 1)], "seconds since 1900-01-01 00:00:00.000000", ), ( [(1900, 1, 1), (1900, 1, 8), (1900, 1, 16)], "days since 1900-01-01 00:00:00.000000", ), ] @requires_cftime @pytest.mark.parametrize( "calendar", _NON_STANDARD_CALENDARS + ["gregorian", "proleptic_gregorian"] ) @pytest.mark.parametrize(("date_args", "expected"), _CFTIME_DATETIME_UNITS_TESTS) def test_infer_cftime_datetime_units(calendar, date_args, expected) -> None: date_type = _all_cftime_date_types()[calendar] dates = list(starmap(date_type, date_args)) assert expected == infer_datetime_units(dates) @pytest.mark.filterwarnings("ignore:Timedeltas can't be serialized faithfully") @pytest.mark.parametrize( ["timedeltas", "units", "numbers"], [ ("1D", "days", np.int64(1)), (["1D", "2D", "3D"], "days", np.array([1, 2, 3], "int64")), ("1h", "hours", np.int64(1)), ("1ms", "milliseconds", np.int64(1)), ("1us", "microseconds", np.int64(1)), ("1ns", "nanoseconds", np.int64(1)), (["NaT", "0s", "1s"], None, [np.iinfo(np.int64).min, 0, 1]), (["30m", "60m"], "hours", [0.5, 1.0]), ("NaT", "days", np.iinfo(np.int64).min), (["NaT", "NaT"], "days", [np.iinfo(np.int64).min, np.iinfo(np.int64).min]), ], ) def test_cf_timedelta(timedeltas, units, numbers) -> None: if timedeltas == "NaT": timedeltas = np.timedelta64("NaT", "ns") else: timedeltas = pd.to_timedelta(timedeltas).as_unit("ns").to_numpy() numbers = np.array(numbers) expected = numbers actual, _ = encode_cf_timedelta(timedeltas, units) assert_array_equal(expected, actual) assert expected.dtype == actual.dtype if units is not None: expected = timedeltas actual = decode_cf_timedelta(numbers, units) assert_array_equal(expected, actual) assert expected.dtype == actual.dtype expected = np.timedelta64("NaT", "ns") actual = decode_cf_timedelta(np.array(np.nan), "days") assert_array_equal(expected, actual) assert expected.dtype == actual.dtype def test_cf_timedelta_2d() -> None: units = "days" numbers = np.atleast_2d([1, 2, 3]) timedeltas = pd.to_timedelta(["1D", "2D", "3D"]).as_unit("ns") timedeltas_2d = np.atleast_2d(timedeltas.to_numpy()) expected = timedeltas_2d actual = decode_cf_timedelta(numbers, units) assert_array_equal(expected, actual) assert expected.dtype == actual.dtype @pytest.mark.parametrize("encoding_unit", FREQUENCIES_TO_ENCODING_UNITS.values()) def test_decode_cf_timedelta_time_unit( time_unit: PDDatetimeUnitOptions, encoding_unit ) -> None: encoded = 1 encoding_unit_as_numpy = _netcdf_to_numpy_timeunit(encoding_unit) if np.timedelta64(1, time_unit) > np.timedelta64(1, encoding_unit_as_numpy): expected = np.timedelta64(encoded, encoding_unit_as_numpy) else: expected = np.timedelta64(encoded, encoding_unit_as_numpy).astype( f"timedelta64[{time_unit}]" ) result = decode_cf_timedelta(encoded, encoding_unit, time_unit) assert result == expected assert result.dtype == expected.dtype def test_decode_cf_timedelta_time_unit_out_of_bounds( time_unit: PDDatetimeUnitOptions, ) -> None: # Define a scale factor that will guarantee overflow with the given # time_unit. scale_factor = np.timedelta64(1, time_unit) // np.timedelta64(1, "ns") encoded = scale_factor * 300 * 365 with pytest.raises(OutOfBoundsTimedelta): decode_cf_timedelta(encoded, "days", time_unit) def test_cf_timedelta_roundtrip_large_value(time_unit: PDDatetimeUnitOptions) -> None: value = np.timedelta64(np.iinfo(np.int64).max, time_unit) encoded, units = encode_cf_timedelta(value) decoded = decode_cf_timedelta(encoded, units, time_unit=time_unit) assert value == decoded assert value.dtype == decoded.dtype @pytest.mark.parametrize( ["deltas", "expected"], [ (pd.to_timedelta(["1 day", "2 days"]), "days"), (pd.to_timedelta(["1h", "1 day 1 hour"]), "hours"), (pd.to_timedelta(["1m", "2m", np.nan]), "minutes"), (pd.to_timedelta(["1m3s", "1m4s"]), "seconds"), ], ) def test_infer_timedelta_units(deltas, expected) -> None: assert expected == infer_timedelta_units(deltas) @requires_cftime @pytest.mark.parametrize( ["date_args", "expected"], [ ((1, 2, 3, 4, 5, 6), "0001-02-03 04:05:06.000000"), ((10, 2, 3, 4, 5, 6), "0010-02-03 04:05:06.000000"), ((100, 2, 3, 4, 5, 6), "0100-02-03 04:05:06.000000"), ((1000, 2, 3, 4, 5, 6), "1000-02-03 04:05:06.000000"), ], ) def test_format_cftime_datetime(date_args, expected) -> None: date_types = _all_cftime_date_types() for date_type in date_types.values(): result = format_cftime_datetime(date_type(*date_args)) assert result == expected @pytest.mark.parametrize("calendar", _ALL_CALENDARS) def test_decode_cf(calendar, time_unit: PDDatetimeUnitOptions) -> None: days = [1.0, 2.0, 3.0] # TODO: GH5690 — do we want to allow this type for `coords`? da = DataArray(days, coords=[days], dims=["time"], name="test") ds = da.to_dataset() for v in ["test", "time"]: ds[v].attrs["units"] = "days since 2001-01-01" ds[v].attrs["calendar"] = calendar if not has_cftime and calendar not in _STANDARD_CALENDAR_NAMES: with pytest.raises(ValueError): ds = decode_cf(ds) else: ds = decode_cf(ds, decode_times=CFDatetimeCoder(time_unit=time_unit)) if calendar not in _STANDARD_CALENDAR_NAMES: assert ds.test.dtype == np.dtype("O") else: assert ds.test.dtype == np.dtype(f"=M8[{time_unit}]") def test_decode_cf_time_bounds(time_unit: PDDatetimeUnitOptions) -> None: da = DataArray( np.arange(6, dtype="int64").reshape((3, 2)), coords={"time": [1, 2, 3]}, dims=("time", "nbnd"), name="time_bnds", ) attrs = { "units": "days since 2001-01", "calendar": "standard", "bounds": "time_bnds", } ds = da.to_dataset() ds["time"].attrs.update(attrs) _update_bounds_attributes(ds.variables) assert ds.variables["time_bnds"].attrs == { "units": "days since 2001-01", "calendar": "standard", } dsc = decode_cf(ds, decode_times=CFDatetimeCoder(time_unit=time_unit)) assert dsc.time_bnds.dtype == np.dtype(f"=M8[{time_unit}]") dsc = decode_cf(ds, decode_times=False) assert dsc.time_bnds.dtype == np.dtype("int64") # Do not overwrite existing attrs ds = da.to_dataset() ds["time"].attrs.update(attrs) bnd_attr = {"units": "hours since 2001-01", "calendar": "noleap"} ds["time_bnds"].attrs.update(bnd_attr) _update_bounds_attributes(ds.variables) assert ds.variables["time_bnds"].attrs == bnd_attr # If bounds variable not available do not complain ds = da.to_dataset() ds["time"].attrs.update(attrs) ds["time"].attrs["bounds"] = "fake_var" _update_bounds_attributes(ds.variables) @requires_cftime def test_encode_time_bounds() -> None: time = pd.date_range("2000-01-16", periods=1) time_bounds = pd.date_range("2000-01-01", periods=2, freq="MS") ds = Dataset(dict(time=time, time_bounds=time_bounds)) ds.time.attrs = {"bounds": "time_bounds"} ds.time.encoding = {"calendar": "noleap", "units": "days since 2000-01-01"} expected = {} # expected['time'] = Variable(data=np.array([15]), dims=['time']) expected["time_bounds"] = Variable(data=np.array([0, 31]), dims=["time_bounds"]) encoded, _ = cf_encoder(ds.variables, ds.attrs) assert_equal(encoded["time_bounds"], expected["time_bounds"]) assert "calendar" not in encoded["time_bounds"].attrs assert "units" not in encoded["time_bounds"].attrs # if time_bounds attrs are same as time attrs, it doesn't matter ds.time_bounds.encoding = {"calendar": "noleap", "units": "days since 2000-01-01"} encoded, _ = cf_encoder(dict(ds.variables.items()), ds.attrs) assert_equal(encoded["time_bounds"], expected["time_bounds"]) assert "calendar" not in encoded["time_bounds"].attrs assert "units" not in encoded["time_bounds"].attrs # for CF-noncompliant case of time_bounds attrs being different from # time attrs; preserve them for faithful roundtrip ds.time_bounds.encoding = {"calendar": "noleap", "units": "days since 1849-01-01"} encoded, _ = cf_encoder(dict(ds.variables.items()), ds.attrs) with pytest.raises(AssertionError): assert_equal(encoded["time_bounds"], expected["time_bounds"]) assert "calendar" not in encoded["time_bounds"].attrs assert encoded["time_bounds"].attrs["units"] == ds.time_bounds.encoding["units"] ds.time.encoding = {} with pytest.warns(UserWarning): cf_encoder(ds.variables, ds.attrs) @pytest.fixture(params=_ALL_CALENDARS) def calendar(request): return request.param @pytest.fixture def times(calendar): import cftime return cftime.num2date( np.arange(4), units="hours since 2000-01-01", calendar=calendar, only_use_cftime_datetimes=True, ) @pytest.fixture def data(times): data = np.random.rand(2, 2, 4) lons = np.linspace(0, 11, 2) lats = np.linspace(0, 20, 2) return DataArray( data, coords=[lons, lats, times], dims=["lon", "lat", "time"], name="data" ) @pytest.fixture def times_3d(times): lons = np.linspace(0, 11, 2) lats = np.linspace(0, 20, 2) times_arr = np.random.choice(times, size=(2, 2, 4)) return DataArray( times_arr, coords=[lons, lats, times], dims=["lon", "lat", "time"], name="data" ) @requires_cftime def test_contains_cftime_datetimes_1d(data) -> None: assert contains_cftime_datetimes(data.time.variable) @requires_cftime @requires_dask def test_contains_cftime_datetimes_dask_1d(data) -> None: assert contains_cftime_datetimes(data.time.variable.chunk()) @requires_cftime def test_contains_cftime_datetimes_3d(times_3d) -> None: assert contains_cftime_datetimes(times_3d.variable) @requires_cftime @requires_dask def test_contains_cftime_datetimes_dask_3d(times_3d) -> None: assert contains_cftime_datetimes(times_3d.variable.chunk()) @pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])]) def test_contains_cftime_datetimes_non_cftimes(non_cftime_data) -> None: assert not contains_cftime_datetimes(non_cftime_data.variable) @requires_dask @pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])]) def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data) -> None: assert not contains_cftime_datetimes(non_cftime_data.variable.chunk()) @requires_cftime @pytest.mark.parametrize("shape", [(24,), (8, 3), (2, 4, 3)]) def test_encode_cf_datetime_overflow(shape) -> None: # Test for fix to GH 2272 dates = pd.date_range("2100", periods=24).values.reshape(shape) units = "days since 1800-01-01" calendar = "standard" num, _, _ = encode_cf_datetime(dates, units, calendar) roundtrip = decode_cf_datetime(num, units, calendar) np.testing.assert_array_equal(dates, roundtrip) def test_encode_expected_failures() -> None: dates = pd.date_range("2000", periods=3) with pytest.raises(ValueError, match="invalid time units"): encode_cf_datetime(dates, units="days after 2000-01-01") with pytest.raises(ValueError, match="invalid reference date"): encode_cf_datetime(dates, units="days since NO_YEAR") def test_encode_cf_datetime_pandas_min() -> None: # GH 2623 dates = pd.date_range("2000", periods=3) num, units, calendar = encode_cf_datetime(dates) expected_num = np.array([0.0, 1.0, 2.0]) expected_units = "days since 2000-01-01 00:00:00" expected_calendar = "proleptic_gregorian" np.testing.assert_array_equal(num, expected_num) assert units == expected_units assert calendar == expected_calendar @requires_cftime def test_encode_cf_datetime_invalid_pandas_valid_cftime() -> None: num, units, calendar = encode_cf_datetime( pd.date_range("2000", periods=3), # Pandas fails to parse this unit, but cftime is quite happy with it "days since 1970-01-01 00:00:00 00", "standard", ) expected_num = [10957, 10958, 10959] expected_units = "days since 1970-01-01 00:00:00 00" expected_calendar = "standard" assert_array_equal(num, expected_num) assert units == expected_units assert calendar == expected_calendar @requires_cftime def test_time_units_with_timezone_roundtrip(calendar) -> None: # Regression test for GH 2649 expected_units = "days since 2000-01-01T00:00:00-05:00" expected_num_dates = np.array([1, 2, 3]) dates = decode_cf_datetime(expected_num_dates, expected_units, calendar) # Check that dates were decoded to UTC; here the hours should all # equal 5. result_hours = DataArray(dates).dt.hour expected_hours = DataArray([5, 5, 5]) assert_equal(result_hours, expected_hours) # Check that the encoded values are accurately roundtripped. result_num_dates, result_units, result_calendar = encode_cf_datetime( dates, expected_units, calendar ) if calendar in _STANDARD_CALENDARS: assert_duckarray_equal(result_num_dates, expected_num_dates) else: # cftime datetime arithmetic is not quite exact. assert_duckarray_allclose(result_num_dates, expected_num_dates) assert result_units == expected_units assert result_calendar == calendar @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_use_cftime_default_standard_calendar_in_range(calendar) -> None: numerical_dates = [0, 1] units = "days since 2000-01-01" expected = pd.date_range("2000", periods=2) with assert_no_warnings(): result = decode_cf_datetime(numerical_dates, units, calendar) np.testing.assert_array_equal(result, expected) @requires_cftime @pytest.mark.parametrize("calendar", ["standard", "gregorian"]) @pytest.mark.parametrize("units_year", [1500, 1580]) def test_use_cftime_default_standard_calendar_out_of_range( calendar, units_year ) -> None: from cftime import num2date numerical_dates = [0, 1] units = f"days since {units_year}-01-01" expected = num2date( numerical_dates, units, calendar, only_use_cftime_datetimes=True ) with pytest.warns(SerializationWarning): result = decode_cf_datetime(numerical_dates, units, calendar) np.testing.assert_array_equal(result, expected) @requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) @pytest.mark.parametrize("units_year", [1500, 2000, 2500]) def test_use_cftime_default_non_standard_calendar( calendar, units_year, time_unit: PDDatetimeUnitOptions ) -> None: from cftime import num2date numerical_dates = [0, 1] units = f"days since {units_year}-01-01" expected = num2date( numerical_dates, units, calendar, only_use_cftime_datetimes=True ) if time_unit == "ns" and units_year == 2500: with pytest.warns(SerializationWarning, match="Unable to decode time axis"): result = decode_cf_datetime( numerical_dates, units, calendar, time_unit=time_unit ) else: with assert_no_warnings(): result = decode_cf_datetime( numerical_dates, units, calendar, time_unit=time_unit ) np.testing.assert_array_equal(result, expected) @requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) @pytest.mark.parametrize("units_year", [1500, 2000, 2500]) def test_use_cftime_true(calendar, units_year) -> None: from cftime import num2date numerical_dates = [0, 1] units = f"days since {units_year}-01-01" expected = num2date( numerical_dates, units, calendar, only_use_cftime_datetimes=True ) with assert_no_warnings(): result = decode_cf_datetime(numerical_dates, units, calendar, use_cftime=True) np.testing.assert_array_equal(result, expected) @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_use_cftime_false_standard_calendar_in_range(calendar) -> None: numerical_dates = [0, 1] units = "days since 2000-01-01" expected = pd.date_range("2000", periods=2) with assert_no_warnings(): result = decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False) np.testing.assert_array_equal(result, expected) @pytest.mark.parametrize("calendar", ["standard", "gregorian"]) @pytest.mark.parametrize("units_year", [1500, 1582]) def test_use_cftime_false_standard_calendar_out_of_range(calendar, units_year) -> None: numerical_dates = [0, 1] units = f"days since {units_year}-01-01" with pytest.raises(OutOfBoundsDatetime): decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False) @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) @pytest.mark.parametrize("units_year", [1500, 2000, 2500]) def test_use_cftime_false_non_standard_calendar(calendar, units_year) -> None: numerical_dates = [0, 1] units = f"days since {units_year}-01-01" with pytest.raises(OutOfBoundsDatetime): decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False) @requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) def test_decode_ambiguous_time_warns(calendar) -> None: # GH 4422, 4506 from cftime import num2date # we don't decode non-standard calendards with # pandas so expect no warning will be emitted is_standard_calendar = calendar in _STANDARD_CALENDAR_NAMES dates = [1, 2, 3] units = "days since 1-1-1" expected = num2date(dates, units, calendar=calendar, only_use_cftime_datetimes=True) if is_standard_calendar: with pytest.warns(SerializationWarning) as record: result = decode_cf_datetime(dates, units, calendar=calendar) relevant_warnings = [ r for r in record.list if str(r.message).startswith("Ambiguous reference date string: 1-1-1") ] assert len(relevant_warnings) == 1 else: with assert_no_warnings(): result = decode_cf_datetime(dates, units, calendar=calendar) np.testing.assert_array_equal(result, expected) @pytest.mark.filterwarnings("ignore:Times can't be serialized faithfully") @pytest.mark.parametrize("encoding_units", FREQUENCIES_TO_ENCODING_UNITS.values()) @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) @pytest.mark.parametrize("use_cftime", [True, False]) def test_encode_cf_datetime_defaults_to_correct_dtype( encoding_units, freq, use_cftime ) -> None: if not has_cftime and use_cftime: pytest.skip("Test requires cftime") if (freq == "ns" or encoding_units == "nanoseconds") and use_cftime: pytest.skip("Nanosecond frequency is not valid for cftime dates.") times = date_range("2000", periods=3, freq=freq, use_cftime=use_cftime) units = f"{encoding_units} since 2000-01-01" encoded, _units, _ = encode_cf_datetime(times, units) numpy_timeunit = _netcdf_to_numpy_timeunit(encoding_units) encoding_units_as_timedelta = np.timedelta64(1, numpy_timeunit) if pd.to_timedelta(1, freq) >= encoding_units_as_timedelta: assert encoded.dtype == np.int64 else: assert encoded.dtype == np.float64 @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) def test_encode_decode_roundtrip_datetime64( freq, time_unit: PDDatetimeUnitOptions ) -> None: # See GH 4045. Prior to GH 4684 this test would fail for frequencies of # "s", "ms", "us", and "ns". initial_time = pd.date_range("1678-01-01", periods=1) times = initial_time.append(pd.date_range("1968", periods=2, freq=freq)) variable = Variable(["time"], times) encoded = conventions.encode_cf_variable(variable) decoded = conventions.decode_cf_variable( "time", encoded, decode_times=CFDatetimeCoder(time_unit=time_unit) ) assert_equal(variable, decoded) @requires_cftime @pytest.mark.parametrize("freq", ["us", "ms", "s", "min", "h", "D"]) def test_encode_decode_roundtrip_cftime(freq) -> None: initial_time = date_range("0001", periods=1, use_cftime=True) times = initial_time.append( date_range("0001", periods=2, freq=freq, use_cftime=True) + timedelta(days=291000 * 365) ) variable = Variable(["time"], times) encoded = conventions.encode_cf_variable(variable) decoder = CFDatetimeCoder(use_cftime=True) decoded = conventions.decode_cf_variable("time", encoded, decode_times=decoder) assert_equal(variable, decoded) @requires_cftime def test__encode_datetime_with_cftime() -> None: # See GH 4870. cftime versions > 1.4.0 required us to adapt the # way _encode_datetime_with_cftime was written. import cftime calendar = "gregorian" times = cftime.num2date([0, 1], "hours since 2000-01-01", calendar) encoding_units = "days since 2000-01-01" # Since netCDF files do not support storing float128 values, we ensure that # float64 values are used by setting longdouble=False in num2date. This try # except logic can be removed when xarray's minimum version of cftime is at # least 1.6.2. try: expected = cftime.date2num(times, encoding_units, calendar, longdouble=False) except TypeError: expected = cftime.date2num(times, encoding_units, calendar) result = _encode_datetime_with_cftime(times, encoding_units, calendar) np.testing.assert_equal(result, expected) @requires_cftime def test_round_trip_standard_calendar_cftime_datetimes_pre_reform() -> None: from cftime import DatetimeGregorian dates = np.array([DatetimeGregorian(1, 1, 1), DatetimeGregorian(2000, 1, 1)]) encoded = encode_cf_datetime(dates, "seconds since 2000-01-01", "standard") with pytest.warns(SerializationWarning, match="Unable to decode time axis"): decoded = decode_cf_datetime(*encoded) np.testing.assert_equal(decoded, dates) @pytest.mark.parametrize("calendar", ["standard", "gregorian"]) def test_encode_cf_datetime_gregorian_proleptic_gregorian_mismatch_error( calendar: str, time_unit: PDDatetimeUnitOptions, ) -> None: if time_unit == "ns": pytest.skip("datetime64[ns] values can only be defined post reform") dates = np.array(["0001-01-01", "2001-01-01"], dtype=f"datetime64[{time_unit}]") with pytest.raises(ValueError, match="proleptic_gregorian"): encode_cf_datetime(dates, "seconds since 2000-01-01", calendar) @pytest.mark.parametrize("calendar", ["gregorian", "Gregorian", "GREGORIAN"]) def test_decode_encode_roundtrip_with_non_lowercase_letters( calendar, time_unit: PDDatetimeUnitOptions ) -> None: # See GH 5093. times = [0, 1] units = "days since 2000-01-01" attrs = {"calendar": calendar, "units": units} variable = Variable(["time"], times, attrs) decoded = conventions.decode_cf_variable( "time", variable, decode_times=CFDatetimeCoder(time_unit=time_unit) ) encoded = conventions.encode_cf_variable(decoded) # Previously this would erroneously be an array of cftime.datetime # objects. We check here that it is decoded properly to np.datetime64. assert np.issubdtype(decoded.dtype, np.datetime64) # Use assert_identical to ensure that the calendar attribute maintained its # original form throughout the roundtripping process, uppercase letters and # all. assert_identical(variable, encoded) @requires_cftime def test_should_cftime_be_used_source_outside_range(): src = date_range( "1000-01-01", periods=100, freq="MS", calendar="noleap", use_cftime=True ) with pytest.raises( ValueError, match=r"Source time range is not valid for numpy datetimes." ): _should_cftime_be_used(src, "standard", False) @requires_cftime def test_should_cftime_be_used_target_not_npable(): src = date_range( "2000-01-01", periods=100, freq="MS", calendar="noleap", use_cftime=True ) with pytest.raises( ValueError, match=r"Calendar 'noleap' is only valid with cftime." ): _should_cftime_be_used(src, "noleap", False) @pytest.mark.parametrize( "dtype", [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64], ) def test_decode_cf_datetime_varied_integer_dtypes(dtype): units = "seconds since 2018-08-22T03:23:03Z" num_dates = dtype(50) # Set use_cftime=False to ensure we cannot mask a failure by falling back # to cftime. result = decode_cf_datetime(num_dates, units, use_cftime=False) expected = np.asarray(np.datetime64("2018-08-22T03:23:53", "ns")) np.testing.assert_equal(result, expected) @requires_cftime def test_decode_cf_datetime_uint64_with_cftime(): units = "days since 1700-01-01" num_dates = np.uint64(182621) result = decode_cf_datetime(num_dates, units) expected = np.asarray(np.datetime64("2200-01-01", "ns")) np.testing.assert_equal(result, expected) def test_decode_cf_datetime_uint64_with_pandas_overflow_error(): units = "nanoseconds since 1970-01-01" calendar = "standard" num_dates = np.uint64(1_000_000 * 86_400 * 360 * 500_000) with pytest.raises(OutOfBoundsTimedelta): decode_cf_datetime(num_dates, units, calendar, use_cftime=False) @requires_cftime def test_decode_cf_datetime_uint64_with_cftime_overflow_error(): units = "microseconds since 1700-01-01" calendar = "360_day" num_dates = np.uint64(1_000_000 * 86_400 * 360 * 500_000) with pytest.raises(OverflowError): decode_cf_datetime(num_dates, units, calendar) @pytest.mark.parametrize("use_cftime", [True, False]) def test_decode_0size_datetime(use_cftime): # GH1329 if use_cftime and not has_cftime: pytest.skip() dtype = object if use_cftime else "=M8[ns]" expected = np.array([], dtype=dtype) actual = decode_cf_datetime( np.zeros(shape=0, dtype=np.int64), units="days since 1970-01-01 00:00:00", calendar="proleptic_gregorian", use_cftime=use_cftime, ) np.testing.assert_equal(expected, actual) def test_decode_float_datetime(): num_dates = np.array([1867128, 1867134, 1867140], dtype="float32") units = "hours since 1800-01-01" calendar = "standard" expected = np.array( ["2013-01-01T00:00:00", "2013-01-01T06:00:00", "2013-01-01T12:00:00"], dtype="datetime64[ns]", ) actual = decode_cf_datetime( num_dates, units=units, calendar=calendar, use_cftime=False ) np.testing.assert_equal(actual, expected) @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) def test_decode_float_datetime_with_decimals( time_unit: PDDatetimeUnitOptions, ) -> None: # test resolution enhancement for floats values = np.array([0, 0.125, 0.25, 0.375, 0.75, 1.0], dtype="float32") expected = np.array( [ "2000-01-01T00:00:00.000", "2000-01-01T00:00:00.125", "2000-01-01T00:00:00.250", "2000-01-01T00:00:00.375", "2000-01-01T00:00:00.750", "2000-01-01T00:00:01.000", ], dtype=f"=M8[{time_unit}]", ) units = "seconds since 2000-01-01" calendar = "standard" actual = decode_cf_datetime(values, units, calendar, time_unit=time_unit) assert actual.dtype == expected.dtype np.testing.assert_equal(actual, expected) @pytest.mark.parametrize( "time_unit, num", [("s", 0.123), ("ms", 0.1234), ("us", 0.1234567)] ) def test_coding_float_datetime_warning( time_unit: PDDatetimeUnitOptions, num: float ) -> None: units = "seconds since 2000-01-01" calendar = "standard" values = np.array([num], dtype="float32") with pytest.warns( SerializationWarning, match=f"Can't decode floating point datetimes to {time_unit!r}", ): decode_cf_datetime(values, units, calendar, time_unit=time_unit) @requires_cftime def test_scalar_unit() -> None: # test that a scalar units (often NaN when using to_netcdf) does not raise an error variable = Variable(("x", "y"), np.array([[0, 1], [2, 3]]), {"units": np.nan}) result = CFDatetimeCoder().decode(variable) assert np.isnan(result.attrs["units"]) @requires_cftime def test_contains_cftime_lazy() -> None: import cftime from xarray.core.common import _contains_cftime_datetimes times = np.array( [cftime.DatetimeGregorian(1, 1, 2, 0), cftime.DatetimeGregorian(1, 1, 2, 0)], dtype=object, ) array = FirstElementAccessibleArray(times) assert _contains_cftime_datetimes(array) @pytest.mark.parametrize( "timestr, format, dtype, fill_value, use_encoding", [ ("1677-09-21T00:12:43.145224193", "ns", np.int64, 20, True), ("1970-09-21T00:12:44.145224808", "ns", np.float64, 1e30, True), ( "1677-09-21T00:12:43.145225216", "ns", np.float64, -9.223372036854776e18, True, ), ("1677-09-21T00:12:43.145224193", "ns", np.int64, None, False), ("1677-09-21T00:12:43.145225", "us", np.int64, None, False), ("1970-01-01T00:00:01.000001", "us", np.int64, None, False), ("1677-09-21T00:21:52.901038080", "ns", np.float32, 20.0, True), ], ) def test_roundtrip_datetime64_nanosecond_precision( timestr: str, format: Literal["ns", "us"], dtype: np.typing.DTypeLike | None, fill_value: int | float | None, use_encoding: bool, time_unit: PDDatetimeUnitOptions, ) -> None: # test for GH7817 time = np.datetime64(timestr, format) times = [np.datetime64("1970-01-01T00:00:00", format), np.datetime64("NaT"), time] if use_encoding: encoding = dict(dtype=dtype, _FillValue=fill_value) else: encoding = {} var = Variable(["time"], times, encoding=encoding) assert var.dtype == np.dtype(f"=M8[{format}]") encoded_var = conventions.encode_cf_variable(var) assert ( encoded_var.attrs["units"] == f"{_numpy_to_netcdf_timeunit(format)} since 1970-01-01 00:00:00" ) assert encoded_var.attrs["calendar"] == "proleptic_gregorian" assert encoded_var.data.dtype == dtype decoded_var = conventions.decode_cf_variable( "foo", encoded_var, decode_times=CFDatetimeCoder(time_unit=time_unit) ) result_unit = ( format if np.timedelta64(1, format) <= np.timedelta64(1, time_unit) else time_unit ) assert decoded_var.dtype == np.dtype(f"=M8[{result_unit}]") assert ( decoded_var.encoding["units"] == f"{_numpy_to_netcdf_timeunit(format)} since 1970-01-01 00:00:00" ) assert decoded_var.encoding["dtype"] == dtype assert decoded_var.encoding["calendar"] == "proleptic_gregorian" assert_identical(var, decoded_var) def test_roundtrip_datetime64_nanosecond_precision_warning( time_unit: PDDatetimeUnitOptions, ) -> None: # test warning if times can't be serialized faithfully times = [ np.datetime64("1970-01-01T00:01:00", time_unit), np.datetime64("NaT", time_unit), np.datetime64("1970-01-02T00:01:00", time_unit), ] units = "days since 1970-01-10T01:01:00" needed_units = "hours" new_units = f"{needed_units} since 1970-01-10T01:01:00" encoding = dict(dtype=None, _FillValue=20, units=units) var = Variable(["time"], times, encoding=encoding) with pytest.warns(UserWarning, match=f"Resolution of {needed_units!r} needed."): encoded_var = conventions.encode_cf_variable(var) assert encoded_var.dtype == np.float64 assert encoded_var.attrs["units"] == units assert encoded_var.attrs["_FillValue"] == 20.0 decoded_var = conventions.decode_cf_variable("foo", encoded_var) assert_identical(var, decoded_var) encoding = dict(dtype="int64", _FillValue=20, units=units) var = Variable(["time"], times, encoding=encoding) with pytest.warns( UserWarning, match=f"Serializing with units {new_units!r} instead." ): encoded_var = conventions.encode_cf_variable(var) assert encoded_var.dtype == np.int64 assert encoded_var.attrs["units"] == new_units assert encoded_var.attrs["_FillValue"] == 20 decoded_var = conventions.decode_cf_variable( "foo", encoded_var, decode_times=CFDatetimeCoder(time_unit=time_unit) ) assert_identical(var, decoded_var) encoding = dict(dtype="float64", _FillValue=20, units=units) var = Variable(["time"], times, encoding=encoding) with warnings.catch_warnings(): warnings.simplefilter("error") encoded_var = conventions.encode_cf_variable(var) assert encoded_var.dtype == np.float64 assert encoded_var.attrs["units"] == units assert encoded_var.attrs["_FillValue"] == 20.0 decoded_var = conventions.decode_cf_variable( "foo", encoded_var, decode_times=CFDatetimeCoder(time_unit=time_unit) ) assert_identical(var, decoded_var) encoding = dict(dtype="int64", _FillValue=20, units=new_units) var = Variable(["time"], times, encoding=encoding) with warnings.catch_warnings(): warnings.simplefilter("error") encoded_var = conventions.encode_cf_variable(var) assert encoded_var.dtype == np.int64 assert encoded_var.attrs["units"] == new_units assert encoded_var.attrs["_FillValue"] == 20 decoded_var = conventions.decode_cf_variable( "foo", encoded_var, decode_times=CFDatetimeCoder(time_unit=time_unit) ) assert_identical(var, decoded_var) @pytest.mark.parametrize( "dtype, fill_value", [(np.int64, 20), (np.int64, np.iinfo(np.int64).min), (np.float64, 1e30)], ) def test_roundtrip_timedelta64_nanosecond_precision( dtype: np.typing.DTypeLike | None, fill_value: int | float, time_unit: PDDatetimeUnitOptions, ) -> None: # test for GH7942 one_day = np.timedelta64(1, "ns") nat = np.timedelta64("nat", "ns") timedelta_values = (np.arange(5) * one_day).astype("timedelta64[ns]") timedelta_values[2] = nat timedelta_values[4] = nat encoding = dict(dtype=dtype, _FillValue=fill_value, units="nanoseconds") var = Variable(["time"], timedelta_values, encoding=encoding) encoded_var = conventions.encode_cf_variable(var) decoded_var = conventions.decode_cf_variable( "foo", encoded_var, decode_times=CFDatetimeCoder(time_unit=time_unit), decode_timedelta=CFTimedeltaCoder(time_unit=time_unit), ) assert_identical(var, decoded_var) def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None: # test warning if timedeltas can't be serialized faithfully one_day = np.timedelta64(1, "D") nat = np.timedelta64("nat", "ns") timedelta_values = (np.arange(5) * one_day).astype("timedelta64[ns]") timedelta_values[2] = nat timedelta_values[4] = np.timedelta64(12, "h").astype("timedelta64[ns]") units = "days" needed_units = "hours" wmsg = ( f"Timedeltas can't be serialized faithfully with requested units {units!r}. " f"Serializing with units {needed_units!r} instead." ) encoding = dict(dtype=np.int64, _FillValue=20, units=units) var = Variable(["time"], timedelta_values, encoding=encoding) with pytest.warns(UserWarning, match=wmsg): encoded_var = conventions.encode_cf_variable(var) assert encoded_var.dtype == np.int64 assert encoded_var.attrs["units"] == needed_units assert encoded_var.attrs["_FillValue"] == 20 decoded_var = conventions.decode_cf_variable( "foo", encoded_var, decode_timedelta=CFTimedeltaCoder(time_unit="ns") ) assert_identical(var, decoded_var) assert decoded_var.encoding["dtype"] == np.int64 _TEST_ROUNDTRIP_FLOAT_TIMES_TESTS = { "GH-8271": ( 20.0, np.array( ["1970-01-01 00:00:00", "1970-01-01 06:00:00", "NaT"], dtype="datetime64[ns]", ), "days since 1960-01-01", np.array([3653, 3653.25, 20.0]), ), "GH-9488-datetime64[ns]": ( 1.0e20, np.array(["2010-01-01 12:00:00", "NaT"], dtype="datetime64[ns]"), "seconds since 2010-01-01", np.array([43200, 1.0e20]), ), "GH-9488-timedelta64[ns]": ( 1.0e20, np.array([1_000_000_000, "NaT"], dtype="timedelta64[ns]"), "seconds", np.array([1.0, 1.0e20]), ), } @pytest.mark.parametrize( ("fill_value", "times", "units", "encoded_values"), _TEST_ROUNDTRIP_FLOAT_TIMES_TESTS.values(), ids=_TEST_ROUNDTRIP_FLOAT_TIMES_TESTS.keys(), ) def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None: # Regression test for GitHub issues #8271 and #9488 var = Variable( ["time"], times, encoding=dict(dtype=np.float64, _FillValue=fill_value, units=units), ) encoded_var = conventions.encode_cf_variable(var) np.testing.assert_array_equal(encoded_var, encoded_values) assert encoded_var.attrs["units"] == units assert encoded_var.attrs["_FillValue"] == fill_value decoded_var = conventions.decode_cf_variable( "foo", encoded_var, decode_timedelta=CFTimedeltaCoder(time_unit="ns") ) assert_identical(var, decoded_var) assert decoded_var.encoding["units"] == units assert decoded_var.encoding["_FillValue"] == fill_value _ENCODE_DATETIME64_VIA_DASK_TESTS = { "pandas-encoding-with-prescribed-units-and-dtype": ( "D", "days since 1700-01-01", np.dtype("int32"), ), "mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": pytest.param( "250YS", "days since 1700-01-01", np.dtype("int32"), marks=requires_cftime ), "pandas-encoding-with-default-units-and-dtype": ("250YS", None, None), } @requires_dask @pytest.mark.parametrize( ("freq", "units", "dtype"), _ENCODE_DATETIME64_VIA_DASK_TESTS.values(), ids=_ENCODE_DATETIME64_VIA_DASK_TESTS.keys(), ) def test_encode_cf_datetime_datetime64_via_dask( freq, units, dtype, time_unit: PDDatetimeUnitOptions ) -> None: import dask.array times_pd = pd.date_range(start="1700", freq=freq, periods=3, unit=time_unit) times = dask.array.from_array(times_pd, chunks=1) encoded_times, encoding_units, encoding_calendar = encode_cf_datetime( times, units, None, dtype ) assert is_duck_dask_array(encoded_times) assert encoded_times.chunks == times.chunks if units is not None and dtype is not None: assert encoding_units == units assert encoded_times.dtype == dtype else: expected_netcdf_time_unit = _numpy_to_netcdf_timeunit(time_unit) assert encoding_units == f"{expected_netcdf_time_unit} since 1970-01-01" assert encoded_times.dtype == np.dtype("int64") assert encoding_calendar == "proleptic_gregorian" decoded_times = decode_cf_datetime( encoded_times, encoding_units, encoding_calendar, time_unit=time_unit ) np.testing.assert_equal(decoded_times, times) assert decoded_times.dtype == times.dtype @requires_dask @pytest.mark.parametrize( ("range_function", "start", "units", "dtype"), [ (pd.date_range, "2000", None, np.dtype("int32")), (pd.date_range, "2000", "days since 2000-01-01", None), (pd.timedelta_range, "0D", None, np.dtype("int32")), (pd.timedelta_range, "0D", "days", None), ], ) def test_encode_via_dask_cannot_infer_error( range_function, start, units, dtype ) -> None: values = range_function(start=start, freq="D", periods=3) encoding = dict(units=units, dtype=dtype) variable = Variable(["time"], values, encoding=encoding).chunk({"time": 1}) with pytest.raises(ValueError, match="When encoding chunked arrays"): conventions.encode_cf_variable(variable) @requires_cftime @requires_dask @pytest.mark.parametrize( ("units", "dtype"), [("days since 1700-01-01", np.dtype("int32")), (None, None)] ) def test_encode_cf_datetime_cftime_datetime_via_dask(units, dtype) -> None: import dask.array calendar = "standard" times_idx = date_range( start="1700", freq="D", periods=3, calendar=calendar, use_cftime=True ) times = dask.array.from_array(times_idx, chunks=1) encoded_times, encoding_units, encoding_calendar = encode_cf_datetime( times, units, None, dtype ) assert is_duck_dask_array(encoded_times) assert encoded_times.chunks == times.chunks if units is not None and dtype is not None: assert encoding_units == units assert encoded_times.dtype == dtype else: assert encoding_units == "microseconds since 1970-01-01" assert encoded_times.dtype == np.int64 assert encoding_calendar == calendar decoded_times = decode_cf_datetime( encoded_times, encoding_units, encoding_calendar, use_cftime=True ) np.testing.assert_equal(decoded_times, times) @pytest.mark.parametrize( "use_cftime", [False, pytest.param(True, marks=requires_cftime)] ) @pytest.mark.parametrize("use_dask", [False, pytest.param(True, marks=requires_dask)]) def test_encode_cf_datetime_units_change(use_cftime, use_dask) -> None: times = date_range(start="2000", freq="12h", periods=3, use_cftime=use_cftime) encoding = dict(units="days since 2000-01-01", dtype=np.dtype("int64")) variable = Variable(["time"], times, encoding=encoding) if use_dask: variable = variable.chunk({"time": 1}) with pytest.raises(ValueError, match="Times can't be serialized"): conventions.encode_cf_variable(variable).compute() else: with pytest.warns(UserWarning, match="Times can't be serialized"): encoded = conventions.encode_cf_variable(variable) if use_cftime: expected_units = "hours since 2000-01-01 00:00:00.000000" else: expected_units = "hours since 2000-01-01" assert encoded.attrs["units"] == expected_units decoded = conventions.decode_cf_variable( "name", encoded, decode_times=CFDatetimeCoder(use_cftime=use_cftime) ) assert_equal(variable, decoded) @pytest.mark.parametrize("use_dask", [False, pytest.param(True, marks=requires_dask)]) def test_encode_cf_datetime_precision_loss_regression_test(use_dask) -> None: # Regression test for # https://github.com/pydata/xarray/issues/9134#issuecomment-2191446463 times = date_range("2000", periods=5, freq="ns") encoding = dict(units="seconds since 1970-01-01", dtype=np.dtype("int64")) variable = Variable(["time"], times, encoding=encoding) if use_dask: variable = variable.chunk({"time": 1}) with pytest.raises(ValueError, match="Times can't be serialized"): conventions.encode_cf_variable(variable).compute() else: with pytest.warns(UserWarning, match="Times can't be serialized"): encoded = conventions.encode_cf_variable(variable) decoded = conventions.decode_cf_variable("name", encoded) assert_equal(variable, decoded) @requires_dask @pytest.mark.parametrize( ("units", "dtype"), [("days", np.dtype("int32")), (None, None)] ) def test_encode_cf_timedelta_via_dask( units: str | None, dtype: np.dtype | None, time_unit: PDDatetimeUnitOptions ) -> None: import dask.array times_pd = pd.timedelta_range(start="0D", freq="D", periods=3, unit=time_unit) # type: ignore[call-arg,unused-ignore] times = dask.array.from_array(times_pd, chunks=1) encoded_times, encoding_units = encode_cf_timedelta(times, units, dtype) assert is_duck_dask_array(encoded_times) assert encoded_times.chunks == times.chunks if units is not None and dtype is not None: assert encoding_units == units assert encoded_times.dtype == dtype else: assert encoding_units == _numpy_to_netcdf_timeunit(time_unit) assert encoded_times.dtype == np.dtype("int64") decoded_times = decode_cf_timedelta( encoded_times, encoding_units, time_unit=time_unit ) np.testing.assert_equal(decoded_times, times) assert decoded_times.dtype == times.dtype @pytest.mark.parametrize("use_dask", [False, pytest.param(True, marks=requires_dask)]) def test_encode_cf_timedelta_units_change(use_dask) -> None: timedeltas = pd.timedelta_range(start="0h", freq="12h", periods=3) encoding = dict(units="days", dtype=np.dtype("int64")) variable = Variable(["time"], timedeltas, encoding=encoding) if use_dask: variable = variable.chunk({"time": 1}) with pytest.raises(ValueError, match="Timedeltas can't be serialized"): conventions.encode_cf_variable(variable).compute() else: # In this case we automatically modify the encoding units to continue # encoding with integer values. with pytest.warns(UserWarning, match="Timedeltas can't be serialized"): encoded = conventions.encode_cf_variable(variable) assert encoded.attrs["units"] == "hours" decoded = conventions.decode_cf_variable( "name", encoded, decode_timedelta=CFTimedeltaCoder(time_unit="ns") ) assert_equal(variable, decoded) @pytest.mark.parametrize("use_dask", [False, pytest.param(True, marks=requires_dask)]) def test_encode_cf_timedelta_small_dtype_missing_value(use_dask) -> None: # Regression test for GitHub issue #9134 timedeltas = np.array([1, 2, "NaT", 4], dtype="timedelta64[D]").astype( "timedelta64[ns]" ) encoding = dict(units="days", dtype=np.dtype("int16"), _FillValue=np.int16(-1)) variable = Variable(["time"], timedeltas, encoding=encoding) if use_dask: variable = variable.chunk({"time": 1}) encoded = conventions.encode_cf_variable(variable) decoded = conventions.decode_cf_variable("name", encoded, decode_timedelta=True) assert_equal(variable, decoded) _DECODE_TIMEDELTA_VIA_UNITS_TESTS = { "default": (True, None, np.dtype("timedelta64[ns]"), True), "decode_timedelta=True": (True, True, np.dtype("timedelta64[ns]"), False), "decode_timedelta=False": (True, False, np.dtype("int64"), False), "inherit-time_unit-from-decode_times": ( CFDatetimeCoder(time_unit="s"), None, np.dtype("timedelta64[s]"), True, ), "set-time_unit-via-CFTimedeltaCoder-decode_times=True": ( True, CFTimedeltaCoder(time_unit="s"), np.dtype("timedelta64[s]"), False, ), "set-time_unit-via-CFTimedeltaCoder-decode_times=False": ( False, CFTimedeltaCoder(time_unit="s"), np.dtype("timedelta64[s]"), False, ), "override-time_unit-from-decode_times": ( CFDatetimeCoder(time_unit="ns"), CFTimedeltaCoder(time_unit="s"), np.dtype("timedelta64[s]"), False, ), } @pytest.mark.parametrize( ("decode_times", "decode_timedelta", "expected_dtype", "warns"), list(_DECODE_TIMEDELTA_VIA_UNITS_TESTS.values()), ids=list(_DECODE_TIMEDELTA_VIA_UNITS_TESTS.keys()), ) def test_decode_timedelta_via_units( decode_times, decode_timedelta, expected_dtype, warns ) -> None: timedeltas = pd.timedelta_range(0, freq="D", periods=3) attrs = {"units": "days"} var = Variable(["time"], timedeltas, encoding=attrs) encoded = Variable(["time"], np.array([0, 1, 2]), attrs=attrs) if warns: with pytest.warns( FutureWarning, match="xarray will not decode the variable 'foo' into a timedelta64 dtype", ): decoded = conventions.decode_cf_variable( "foo", encoded, decode_times=decode_times, decode_timedelta=decode_timedelta, ) else: decoded = conventions.decode_cf_variable( "foo", encoded, decode_times=decode_times, decode_timedelta=decode_timedelta ) if decode_timedelta is False: assert_equal(encoded, decoded) else: assert_equal(var, decoded) assert decoded.dtype == expected_dtype _DECODE_TIMEDELTA_VIA_DTYPE_TESTS = { "default": (True, None, "ns", np.dtype("timedelta64[ns]")), "decode_timedelta=False": (True, False, "ns", np.dtype("int64")), "decode_timedelta=True": (True, True, "ns", np.dtype("timedelta64[ns]")), "use-original-units": (True, True, "s", np.dtype("timedelta64[s]")), "inherit-time_unit-from-decode_times": ( CFDatetimeCoder(time_unit="s"), None, "ns", np.dtype("timedelta64[s]"), ), "set-time_unit-via-CFTimedeltaCoder-decode_times=True": ( True, CFTimedeltaCoder(time_unit="s"), "ns", np.dtype("timedelta64[s]"), ), "set-time_unit-via-CFTimedeltaCoder-decode_times=False": ( False, CFTimedeltaCoder(time_unit="s"), "ns", np.dtype("timedelta64[s]"), ), "override-time_unit-from-decode_times": ( CFDatetimeCoder(time_unit="ns"), CFTimedeltaCoder(time_unit="s"), "ns", np.dtype("timedelta64[s]"), ), "decode-different-units": ( True, CFTimedeltaCoder(time_unit="us"), "s", np.dtype("timedelta64[us]"), ), } @pytest.mark.parametrize( ("decode_times", "decode_timedelta", "original_unit", "expected_dtype"), list(_DECODE_TIMEDELTA_VIA_DTYPE_TESTS.values()), ids=list(_DECODE_TIMEDELTA_VIA_DTYPE_TESTS.keys()), ) def test_decode_timedelta_via_dtype( decode_times, decode_timedelta, original_unit, expected_dtype ) -> None: timedeltas = pd.timedelta_range(0, freq="D", periods=3, unit=original_unit) # type: ignore[call-arg,unused-ignore] encoding = {"units": "days"} var = Variable(["time"], timedeltas, encoding=encoding) encoded = conventions.encode_cf_variable(var) assert encoded.attrs["dtype"] == f"timedelta64[{original_unit}]" assert encoded.attrs["units"] == encoding["units"] decoded = conventions.decode_cf_variable( "foo", encoded, decode_times=decode_times, decode_timedelta=decode_timedelta ) if decode_timedelta is False: assert_equal(encoded, decoded) else: assert_equal(var, decoded) assert decoded.dtype == expected_dtype @pytest.mark.parametrize("dtype", [np.uint64, np.int64, np.float64]) def test_decode_timedelta_dtypes(dtype) -> None: encoded = Variable(["time"], np.arange(10), {"units": "seconds"}) coder = CFTimedeltaCoder(time_unit="s") decoded = coder.decode(encoded) assert decoded.dtype.kind == "m" assert_equal(coder.encode(decoded), encoded) def test_lazy_decode_timedelta_unexpected_dtype() -> None: attrs = {"units": "seconds"} encoded = Variable(["time"], [0, 0.5, 1], attrs=attrs) decoded = conventions.decode_cf_variable( "foo", encoded, decode_timedelta=CFTimedeltaCoder(time_unit="s") ) expected_dtype_upon_lazy_decoding = np.dtype("timedelta64[s]") assert decoded.dtype == expected_dtype_upon_lazy_decoding expected_dtype_upon_loading = np.dtype("timedelta64[ms]") with pytest.warns(SerializationWarning, match="Can't decode floating"): assert decoded.load().dtype == expected_dtype_upon_loading def test_lazy_decode_timedelta_error() -> None: attrs = {"units": "seconds"} encoded = Variable(["time"], [0, np.iinfo(np.int64).max, 1], attrs=attrs) decoded = conventions.decode_cf_variable( "foo", encoded, decode_timedelta=CFTimedeltaCoder(time_unit="ms") ) with pytest.raises(OutOfBoundsTimedelta, match="overflow"): decoded.load() @pytest.mark.parametrize( "calendar", [ "standard", pytest.param( "360_day", marks=pytest.mark.skipif(not has_cftime, reason="no cftime") ), ], ) def test_duck_array_decode_times(calendar) -> None: from xarray.core.indexing import LazilyIndexedArray days = LazilyIndexedArray(DuckArrayWrapper(np.array([1.0, 2.0, 3.0]))) var = Variable( ["time"], days, {"units": "days since 2001-01-01", "calendar": calendar} ) decoded = conventions.decode_cf_variable( "foo", var, decode_times=CFDatetimeCoder(use_cftime=None) ) if calendar not in _STANDARD_CALENDAR_NAMES: assert decoded.dtype == np.dtype("O") else: assert decoded.dtype == np.dtype("=M8[ns]") @pytest.mark.parametrize("decode_timedelta", [True, False]) @pytest.mark.parametrize("mask_and_scale", [True, False]) def test_decode_timedelta_mask_and_scale( decode_timedelta: bool, mask_and_scale: bool ) -> None: attrs = { "dtype": "timedelta64[ns]", "units": "nanoseconds", "_FillValue": np.int16(-1), "add_offset": 100000.0, } encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) decoded = conventions.decode_cf_variable( "foo", encoded, mask_and_scale=mask_and_scale, decode_timedelta=decode_timedelta ) result = conventions.encode_cf_variable(decoded, name="foo") assert_identical(encoded, result) assert encoded.dtype == result.dtype def test_decode_floating_point_timedelta_no_serialization_warning() -> None: attrs = {"units": "seconds"} encoded = Variable(["time"], [0, 0.1, 0.2], attrs=attrs) decoded = conventions.decode_cf_variable("foo", encoded, decode_timedelta=True) with assert_no_warnings(): decoded.load() def test_timedelta64_coding_via_dtype(time_unit: PDDatetimeUnitOptions) -> None: timedeltas = np.array([0, 1, "NaT"], dtype=f"timedelta64[{time_unit}]") variable = Variable(["time"], timedeltas) expected_units = _numpy_to_netcdf_timeunit(time_unit) encoded = conventions.encode_cf_variable(variable) assert encoded.attrs["dtype"] == f"timedelta64[{time_unit}]" assert encoded.attrs["units"] == expected_units decoded = conventions.decode_cf_variable("timedeltas", encoded) assert decoded.encoding["dtype"] == np.dtype("int64") assert decoded.encoding["units"] == expected_units assert_identical(decoded, variable) assert decoded.dtype == variable.dtype reencoded = conventions.encode_cf_variable(decoded) assert_identical(reencoded, encoded) assert reencoded.dtype == encoded.dtype def test_timedelta_coding_via_dtype_non_pandas_coarse_resolution_warning() -> None: attrs = {"dtype": "timedelta64[D]", "units": "days"} encoded = Variable(["time"], [0, 1, 2], attrs=attrs) with pytest.warns(UserWarning, match="xarray only supports"): decoded = conventions.decode_cf_variable("timedeltas", encoded) expected_array = np.array([0, 1, 2], dtype="timedelta64[D]") expected_array = expected_array.astype("timedelta64[s]") expected = Variable(["time"], expected_array) assert_identical(decoded, expected) assert decoded.dtype == np.dtype("timedelta64[s]") @pytest.mark.xfail(reason="xarray does not recognize picoseconds as time-like") def test_timedelta_coding_via_dtype_non_pandas_fine_resolution_warning() -> None: attrs = {"dtype": "timedelta64[ps]", "units": "picoseconds"} encoded = Variable(["time"], [0, 1000, 2000], attrs=attrs) with pytest.warns(UserWarning, match="xarray only supports"): decoded = conventions.decode_cf_variable("timedeltas", encoded) expected_array = np.array([0, 1000, 2000], dtype="timedelta64[ps]") expected_array = expected_array.astype("timedelta64[ns]") expected = Variable(["time"], expected_array) assert_identical(decoded, expected) assert decoded.dtype == np.dtype("timedelta64[ns]") def test_timedelta_decode_via_dtype_invalid_encoding() -> None: attrs = {"dtype": "timedelta64[s]", "units": "seconds"} encoding = {"units": "foo"} encoded = Variable(["time"], [0, 1, 2], attrs=attrs, encoding=encoding) with pytest.raises(ValueError, match=r"Key .* already exists"): conventions.decode_cf_variable("timedeltas", encoded) @pytest.mark.parametrize("attribute", ["dtype", "units"]) def test_timedelta_encode_via_dtype_invalid_attribute(attribute) -> None: timedeltas = pd.timedelta_range(0, freq="D", periods=3) attrs = {attribute: "foo"} variable = Variable(["time"], timedeltas, attrs=attrs) with pytest.raises(ValueError, match=r"Key .* already exists"): conventions.encode_cf_variable(variable) @pytest.mark.parametrize( ("decode_via_units", "decode_via_dtype", "attrs", "expect_timedelta64"), [ (True, True, {"units": "seconds"}, True), (True, False, {"units": "seconds"}, True), (False, True, {"units": "seconds"}, False), (False, False, {"units": "seconds"}, False), (True, True, {"dtype": "timedelta64[s]", "units": "seconds"}, True), (True, False, {"dtype": "timedelta64[s]", "units": "seconds"}, True), (False, True, {"dtype": "timedelta64[s]", "units": "seconds"}, True), (False, False, {"dtype": "timedelta64[s]", "units": "seconds"}, False), ], ids=lambda x: f"{x!r}", ) def test_timedelta_decoding_options( decode_via_units, decode_via_dtype, attrs, expect_timedelta64 ) -> None: array = np.array([0, 1, 2], dtype=np.dtype("int64")) encoded = Variable(["time"], array, attrs=attrs) # Confirm we decode to the expected dtype. decode_timedelta = CFTimedeltaCoder( time_unit="s", decode_via_units=decode_via_units, decode_via_dtype=decode_via_dtype, ) decoded = conventions.decode_cf_variable( "foo", encoded, decode_timedelta=decode_timedelta ) if expect_timedelta64: assert decoded.dtype == np.dtype("timedelta64[s]") else: assert decoded.dtype == np.dtype("int64") # Confirm we exactly roundtrip. reencoded = conventions.encode_cf_variable(decoded) expected = encoded.copy() if "dtype" not in attrs and decode_via_units: expected.attrs["dtype"] = "timedelta64[s]" assert_identical(reencoded, expected) def test_timedelta_encoding_explicit_non_timedelta64_dtype() -> None: encoding = {"dtype": np.dtype("int32")} timedeltas = pd.timedelta_range(0, freq="D", periods=3) variable = Variable(["time"], timedeltas, encoding=encoding) encoded = conventions.encode_cf_variable(variable) assert encoded.attrs["units"] == "days" assert encoded.attrs["dtype"] == "timedelta64[ns]" assert encoded.dtype == np.dtype("int32") decoded = conventions.decode_cf_variable("foo", encoded) assert_identical(decoded, variable) reencoded = conventions.encode_cf_variable(decoded) assert_identical(reencoded, encoded) assert encoded.attrs["units"] == "days" assert encoded.attrs["dtype"] == "timedelta64[ns]" assert encoded.dtype == np.dtype("int32") @pytest.mark.parametrize("mask_attribute", ["_FillValue", "missing_value"]) def test_timedelta64_coding_via_dtype_with_mask( time_unit: PDDatetimeUnitOptions, mask_attribute: str ) -> None: timedeltas = np.array([0, 1, "NaT"], dtype=f"timedelta64[{time_unit}]") mask = 10 variable = Variable(["time"], timedeltas, encoding={mask_attribute: mask}) expected_dtype = f"timedelta64[{time_unit}]" expected_units = _numpy_to_netcdf_timeunit(time_unit) encoded = conventions.encode_cf_variable(variable) assert encoded.attrs["dtype"] == expected_dtype assert encoded.attrs["units"] == expected_units assert encoded.attrs[mask_attribute] == mask assert encoded[-1] == mask decoded = conventions.decode_cf_variable("timedeltas", encoded) assert decoded.encoding["dtype"] == np.dtype("int64") assert decoded.encoding["units"] == expected_units assert decoded.encoding[mask_attribute] == mask assert np.isnat(decoded[-1]) assert_identical(decoded, variable) assert decoded.dtype == variable.dtype reencoded = conventions.encode_cf_variable(decoded) assert_identical(reencoded, encoded) assert reencoded.dtype == encoded.dtype def test_roundtrip_0size_timedelta(time_unit: PDDatetimeUnitOptions) -> None: # regression test for GitHub issue #10310 encoding = {"units": "days", "dtype": np.dtype("int64")} data = np.array([], dtype=f"=m8[{time_unit}]") decoded = Variable(["time"], data, encoding=encoding) encoded = conventions.encode_cf_variable(decoded, name="foo") assert encoded.dtype == encoding["dtype"] assert encoded.attrs["units"] == encoding["units"] decoded = conventions.decode_cf_variable("foo", encoded, decode_timedelta=True) assert decoded.dtype == np.dtype(f"=m8[{time_unit}]") with assert_no_warnings(): decoded.load() assert decoded.dtype == np.dtype("=m8[s]") assert decoded.encoding == encoding def test_roundtrip_empty_datetime64_array(time_unit: PDDatetimeUnitOptions) -> None: # Regression test for GitHub issue #10722. encoding = { "units": "days since 1990-1-1", "dtype": np.dtype("float64"), "calendar": "standard", } times = date_range("2000", periods=0, unit=time_unit) variable = Variable(["time"], times, encoding=encoding) encoded = conventions.encode_cf_variable(variable, name="foo") assert encoded.dtype == np.dtype("float64") decode_times = CFDatetimeCoder(time_unit=time_unit) roundtripped = conventions.decode_cf_variable( "foo", encoded, decode_times=decode_times ) assert_identical(variable, roundtripped) assert roundtripped.dtype == variable.dtype python-xarray-2026.01.0/xarray/tests/test_nd_point_index.py0000664000175000017500000001475415136607163024140 0ustar alastairalastairimport numpy as np import pytest import xarray as xr from xarray.indexes import NDPointIndex from xarray.indexes.nd_point_index import ScipyKDTreeAdapter from xarray.tests import assert_identical, has_scipy, requires_scipy @pytest.mark.skipif(has_scipy, reason="requires scipy to be missing") def test_scipy_kdtree_adapter_missing_scipy(): points = np.random.rand(4, 2) with pytest.raises(ImportError, match=r"scipy"): ScipyKDTreeAdapter(points, options={}) @requires_scipy def test_tree_index_init() -> None: from xarray.indexes.nd_point_index import ScipyKDTreeAdapter xx, yy = np.meshgrid([1.0, 2.0], [3.0, 4.0]) ds = xr.Dataset(coords={"xx": (("y", "x"), xx), "yy": (("y", "x"), yy)}) ds_indexed1 = ds.set_xindex(("xx", "yy"), NDPointIndex) assert "xx" in ds_indexed1.xindexes assert "yy" in ds_indexed1.xindexes assert isinstance(ds_indexed1.xindexes["xx"], NDPointIndex) assert ds_indexed1.xindexes["xx"] is ds_indexed1.xindexes["yy"] ds_indexed2 = ds.set_xindex( ("xx", "yy"), NDPointIndex, tree_adapter_cls=ScipyKDTreeAdapter ) assert ds_indexed1.xindexes["xx"].equals(ds_indexed2.xindexes["yy"]) @requires_scipy def test_tree_index_init_errors() -> None: xx, yy = np.meshgrid([1.0, 2.0], [3.0, 4.0]) ds = xr.Dataset(coords={"xx": (("y", "x"), xx), "yy": (("y", "x"), yy)}) with pytest.raises(ValueError, match="number of variables"): ds.set_xindex("xx", NDPointIndex) ds2 = ds.assign_coords(yy=(("u", "v"), [[3.0, 3.0], [4.0, 4.0]])) with pytest.raises(ValueError, match="same dimensions"): ds2.set_xindex(("xx", "yy"), NDPointIndex) @requires_scipy def test_tree_index_sel() -> None: xx, yy = np.meshgrid([1.0, 2.0], [3.0, 4.0]) ds = xr.Dataset(coords={"xx": (("y", "x"), xx), "yy": (("y", "x"), yy)}).set_xindex( ("xx", "yy"), NDPointIndex ) # 1-dimensional labels actual = ds.sel( xx=xr.Variable("u", [1.1, 1.1, 1.1]), yy=xr.Variable("u", [3.1, 3.1, 3.1]), method="nearest", ) expected = xr.Dataset( coords={"xx": ("u", [1.0, 1.0, 1.0]), "yy": ("u", [3.0, 3.0, 3.0])} ) assert_identical(actual, expected) # 2-dimensional labels actual = ds.sel( xx=xr.Variable(("u", "v"), [[1.1, 1.1, 1.1], [1.9, 1.9, 1.9]]), yy=xr.Variable(("u", "v"), [[3.1, 3.1, 3.1], [3.9, 3.9, 3.9]]), method="nearest", ) expected = xr.Dataset( coords={ "xx": (("u", "v"), [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]), "yy": (("u", "v"), [[3.0, 3.0, 3.0], [4.0, 4.0, 4.0]]), }, ) assert_identical(actual, expected) # all scalar labels actual = ds.sel(xx=1.1, yy=3.1, method="nearest") expected = xr.Dataset(coords={"xx": 1.0, "yy": 3.0}) assert_identical(actual, expected) # broadcast scalar to label shape and dimensions actual = ds.sel(xx=1.1, yy=xr.Variable("u", [3.1, 3.1, 3.1]), method="nearest") expected = ds.sel( xx=xr.Variable("u", [1.1, 1.1, 1.1]), yy=xr.Variable("u", [3.1, 3.1, 3.1]), method="nearest", ) assert_identical(actual, expected) # broadcast orthogonal 1-dimensional labels actual = ds.sel( xx=xr.Variable("u", [1.1, 1.1]), yy=xr.Variable("v", [3.1, 3.1]), method="nearest", ) expected = xr.Dataset( coords={ "xx": (("u", "v"), [[1.0, 1.0], [1.0, 1.0]]), "yy": (("u", "v"), [[3.0, 3.0], [3.0, 3.0]]), }, ) assert_identical(actual, expected) # implicit dimension array-like labels actual = ds.sel( xx=[[1.1, 1.1, 1.1], [1.9, 1.9, 1.9]], yy=[[3.1, 3.1, 3.1], [3.9, 3.9, 3.9]], method="nearest", ) expected = ds.sel( xx=xr.Variable(ds.xx.dims, [[1.1, 1.1, 1.1], [1.9, 1.9, 1.9]]), yy=xr.Variable(ds.yy.dims, [[3.1, 3.1, 3.1], [3.9, 3.9, 3.9]]), method="nearest", ) assert_identical(actual, expected) @requires_scipy def test_tree_index_sel_errors() -> None: xx, yy = np.meshgrid([1.0, 2.0], [3.0, 4.0]) ds = xr.Dataset(coords={"xx": (("y", "x"), xx), "yy": (("y", "x"), yy)}).set_xindex( ("xx", "yy"), NDPointIndex ) with pytest.raises(ValueError, match="method='nearest'"): ds.sel(xx=1.1, yy=3.1) with pytest.raises(ValueError, match="missing labels"): ds.sel(xx=1.1, method="nearest") with pytest.raises(ValueError, match="invalid label value"): # invalid array-like dimensions ds.sel(xx=[1.1, 1.9], yy=[3.1, 3.9], method="nearest") # error while trying to broadcast labels with pytest.raises(xr.AlignmentError, match=r".*conflicting dimension sizes"): ds.sel( xx=xr.Variable("u", [1.1, 1.1, 1.1]), yy=xr.Variable("u", [3.1, 3.1]), method="nearest", ) @requires_scipy def test_tree_index_equals() -> None: xx1, yy1 = np.meshgrid([1.0, 2.0], [3.0, 4.0]) ds1 = xr.Dataset( coords={"xx": (("y", "x"), xx1), "yy": (("y", "x"), yy1)} ).set_xindex(("xx", "yy"), NDPointIndex) xx2, yy2 = np.meshgrid([1.0, 2.0], [3.0, 4.0]) ds2 = xr.Dataset( coords={"xx": (("y", "x"), xx2), "yy": (("y", "x"), yy2)} ).set_xindex(("xx", "yy"), NDPointIndex) xx3, yy3 = np.meshgrid([10.0, 20.0], [30.0, 40.0]) ds3 = xr.Dataset( coords={"xx": (("y", "x"), xx3), "yy": (("y", "x"), yy3)} ).set_xindex(("xx", "yy"), NDPointIndex) assert ds1.xindexes["xx"].equals(ds2.xindexes["xx"]) assert not ds1.xindexes["xx"].equals(ds3.xindexes["xx"]) def test_tree_index_rename() -> None: xx, yy = np.meshgrid([1.0, 2.0], [3.0, 4.0]) ds = xr.Dataset(coords={"xx": (("y", "x"), xx), "yy": (("y", "x"), yy)}).set_xindex( ("xx", "yy"), NDPointIndex ) ds_renamed = ds.rename_dims(y="u").rename_vars(yy="uu") assert "uu" in ds_renamed.xindexes assert isinstance(ds_renamed.xindexes["uu"], NDPointIndex) assert ds_renamed.xindexes["xx"] is ds_renamed.xindexes["uu"] # test via sel() with implicit dimension array-like labels, which relies on # NDPointIndex._coord_names and NDPointIndex._dims internal attrs actual = ds_renamed.sel( xx=[[1.1, 1.1, 1.1], [1.9, 1.9, 1.9]], uu=[[3.1, 3.1, 3.1], [3.9, 3.9, 3.9]], method="nearest", ) expected = ds_renamed.sel( xx=xr.Variable(ds_renamed.xx.dims, [[1.1, 1.1, 1.1], [1.9, 1.9, 1.9]]), uu=xr.Variable(ds_renamed.uu.dims, [[3.1, 3.1, 3.1], [3.9, 3.9, 3.9]]), method="nearest", ) assert_identical(actual, expected) python-xarray-2026.01.0/xarray/tests/test_dask.py0000664000175000017500000020213715136607163022053 0ustar alastairalastairfrom __future__ import annotations import operator import pickle import sys from contextlib import suppress from textwrap import dedent from typing import Any import numpy as np import pandas as pd import pytest import xarray as xr import xarray.ufuncs as xu from xarray import DataArray, Dataset, Variable from xarray.core import duck_array_ops from xarray.core.duck_array_ops import lazy_array_equiv from xarray.core.indexes import PandasIndex from xarray.testing import assert_chunks_equal from xarray.tests import ( assert_allclose, assert_array_equal, assert_equal, assert_frame_equal, assert_identical, mock, raise_if_dask_computes, requires_pint, requires_scipy_or_netCDF4, ) from xarray.tests.test_backends import create_tmp_file dask = pytest.importorskip("dask") da = pytest.importorskip("dask.array") dd = pytest.importorskip("dask.dataframe") ON_WINDOWS = sys.platform == "win32" def test_raise_if_dask_computes(): data = da.from_array(np.random.default_rng(0).random((4, 6)), chunks=(2, 2)) with pytest.raises(RuntimeError, match=r"Too many computes"): with raise_if_dask_computes(): data.compute() class DaskTestCase: def assertLazyAnd(self, expected, actual, test): with dask.config.set(scheduler="synchronous"): test(actual, expected) if isinstance(actual, Dataset): for k, v in actual.variables.items(): if k in actual.xindexes: assert isinstance(v.data, np.ndarray) else: assert isinstance(v.data, da.Array) elif isinstance(actual, DataArray): assert isinstance(actual.data, da.Array) for k, v in actual.coords.items(): if k in actual.xindexes: assert isinstance(v.data, np.ndarray) else: assert isinstance(v.data, da.Array) elif isinstance(actual, Variable): assert isinstance(actual.data, da.Array) else: raise AssertionError() class TestVariable(DaskTestCase): def assertLazyAndIdentical(self, expected, actual): self.assertLazyAnd(expected, actual, assert_identical) def assertLazyAndAllClose(self, expected, actual): self.assertLazyAnd(expected, actual, assert_allclose) @pytest.fixture(autouse=True) def setUp(self): self.values = np.random.default_rng(0).random((4, 6)) self.data = da.from_array(self.values, chunks=(2, 2)) self.eager_var = Variable(("x", "y"), self.values) self.lazy_var = Variable(("x", "y"), self.data) def test_basics(self): v = self.lazy_var assert self.data is v.data assert self.data.chunks == v.chunks assert_array_equal(self.values, v) def test_copy(self): self.assertLazyAndIdentical(self.eager_var, self.lazy_var.copy()) self.assertLazyAndIdentical(self.eager_var, self.lazy_var.copy(deep=True)) def test_chunk(self): test_cases: list[tuple[int | dict[str, Any], tuple[tuple[int, ...], ...]]] = [ ({}, ((2, 2), (2, 2, 2))), (3, ((3, 1), (3, 3))), ({"x": 3, "y": 3}, ((3, 1), (3, 3))), ({"x": 3}, ((3, 1), (2, 2, 2))), ({"x": (3, 1)}, ((3, 1), (2, 2, 2))), ] for chunks, expected in test_cases: rechunked = self.lazy_var.chunk(chunks) assert rechunked.chunks == expected self.assertLazyAndIdentical(self.eager_var, rechunked) expected_chunksizes = dict(zip(self.lazy_var.dims, expected, strict=True)) assert rechunked.chunksizes == expected_chunksizes def test_indexing(self): u = self.eager_var v = self.lazy_var self.assertLazyAndIdentical(u[0], v[0]) self.assertLazyAndIdentical(u[:1], v[:1]) self.assertLazyAndIdentical(u[[0, 1], [0, 1, 2]], v[[0, 1], [0, 1, 2]]) @pytest.mark.parametrize( "expected_data, index", [ (da.array([99, 2, 3, 4]), 0), (da.array([99, 99, 99, 4]), slice(2, None, -1)), (da.array([99, 99, 3, 99]), [0, -1, 1]), (da.array([99, 99, 99, 4]), np.arange(3)), (da.array([1, 99, 99, 99]), [False, True, True, True]), (da.array([1, 99, 99, 99]), np.array([False, True, True, True])), (da.array([99, 99, 99, 99]), Variable(("x"), np.array([True] * 4))), ], ) def test_setitem_dask_array(self, expected_data, index): arr = Variable(("x"), da.array([1, 2, 3, 4])) expected = Variable(("x"), expected_data) with raise_if_dask_computes(): arr[index] = 99 assert_identical(arr, expected) def test_squeeze(self): u = self.eager_var v = self.lazy_var self.assertLazyAndIdentical(u[0].squeeze(), v[0].squeeze()) def test_equals(self): v = self.lazy_var assert v.equals(v) assert isinstance(v.data, da.Array) assert v.identical(v) assert isinstance(v.data, da.Array) def test_transpose(self): u = self.eager_var v = self.lazy_var self.assertLazyAndIdentical(u.T, v.T) def test_shift(self): u = self.eager_var v = self.lazy_var self.assertLazyAndIdentical(u.shift(x=2), v.shift(x=2)) self.assertLazyAndIdentical(u.shift(x=-2), v.shift(x=-2)) assert v.data.chunks == v.shift(x=1).data.chunks def test_roll(self): u = self.eager_var v = self.lazy_var self.assertLazyAndIdentical(u.roll(x=2), v.roll(x=2)) assert v.data.chunks == v.roll(x=1).data.chunks def test_unary_op(self): u = self.eager_var v = self.lazy_var self.assertLazyAndIdentical(-u, -v) self.assertLazyAndIdentical(abs(u), abs(v)) self.assertLazyAndIdentical(u.round(), v.round()) def test_binary_op(self): u = self.eager_var v = self.lazy_var self.assertLazyAndIdentical(2 * u, 2 * v) self.assertLazyAndIdentical(u + u, v + v) self.assertLazyAndIdentical(u[0] + u, v[0] + v) def test_binary_op_bitshift(self) -> None: # bit shifts only work on ints so we need to generate # new eager and lazy vars rng = np.random.default_rng(0) values = rng.integers(low=-10000, high=10000, size=(4, 6)) data = da.from_array(values, chunks=(2, 2)) u = Variable(("x", "y"), values) v = Variable(("x", "y"), data) self.assertLazyAndIdentical(u << 2, v << 2) self.assertLazyAndIdentical(u << 5, v << 5) self.assertLazyAndIdentical(u >> 2, v >> 2) self.assertLazyAndIdentical(u >> 5, v >> 5) def test_repr(self): expected = dedent( f"""\ Size: 192B {self.lazy_var.data!r}""" ) assert expected == repr(self.lazy_var) def test_pickle(self): # Test that pickling/unpickling does not convert the dask # backend to numpy a1 = Variable(["x"], build_dask_array("x")) a1.compute() assert not a1._in_memory assert kernel_call_count == 1 a2 = pickle.loads(pickle.dumps(a1)) assert kernel_call_count == 1 assert_identical(a1, a2) assert not a1._in_memory assert not a2._in_memory def test_reduce(self): u = self.eager_var v = self.lazy_var self.assertLazyAndAllClose(u.mean(), v.mean()) self.assertLazyAndAllClose(u.std(), v.std()) with raise_if_dask_computes(): actual = v.argmax(dim="x") self.assertLazyAndAllClose(u.argmax(dim="x"), actual) with raise_if_dask_computes(): actual = v.argmin(dim="x") self.assertLazyAndAllClose(u.argmin(dim="x"), actual) self.assertLazyAndAllClose((u > 1).any(), (v > 1).any()) self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x")) with pytest.raises(NotImplementedError, match=r"only works along an axis"): v.median() with pytest.raises(NotImplementedError, match=r"only works along an axis"): v.median(v.dims) with raise_if_dask_computes(): v.reduce(duck_array_ops.mean) def test_missing_values(self): values = np.array([0, 1, np.nan, 3]) data = da.from_array(values, chunks=(2,)) eager_var = Variable("x", values) lazy_var = Variable("x", data) self.assertLazyAndIdentical(eager_var, lazy_var.fillna(lazy_var)) self.assertLazyAndIdentical(Variable("x", range(4)), lazy_var.fillna(2)) self.assertLazyAndIdentical(eager_var.count(), lazy_var.count()) def test_concat(self): u = self.eager_var v = self.lazy_var self.assertLazyAndIdentical(u, Variable.concat([v[:2], v[2:]], "x")) self.assertLazyAndIdentical(u[:2], Variable.concat([v[0], v[1]], "x")) self.assertLazyAndIdentical(u[:2], Variable.concat([u[0], v[1]], "x")) self.assertLazyAndIdentical(u[:2], Variable.concat([v[0], u[1]], "x")) self.assertLazyAndIdentical( u[:3], Variable.concat([v[[0, 2]], v[[1]]], "x", positions=[[0, 2], [1]]) ) def test_missing_methods(self): v = self.lazy_var with pytest.raises(NotImplementedError, match="dask"): v.argsort() with pytest.raises(NotImplementedError, match="dask"): v[0].item() # type: ignore[attr-defined] def test_univariate_ufunc(self): u = self.eager_var v = self.lazy_var self.assertLazyAndAllClose(np.sin(u), np.sin(v)) def test_bivariate_ufunc(self): u = self.eager_var v = self.lazy_var self.assertLazyAndAllClose(np.maximum(u, 0), np.maximum(v, 0)) self.assertLazyAndAllClose(np.maximum(u, 0), np.maximum(0, v)) def test_univariate_xufunc(self): u = self.eager_var v = self.lazy_var self.assertLazyAndAllClose(np.sin(u), xu.sin(v)) def test_bivariate_xufunc(self): u = self.eager_var v = self.lazy_var self.assertLazyAndAllClose(np.maximum(u, 0), xu.maximum(v, 0)) self.assertLazyAndAllClose(np.maximum(u, 0), xu.maximum(0, v)) def test_compute(self): u = self.eager_var v = self.lazy_var assert dask.is_dask_collection(v) (v2,) = dask.compute(v + 1) assert not dask.is_dask_collection(v2) assert ((u + 1).data == v2.data).all() def test_persist(self): u = self.eager_var v = self.lazy_var + 1 (v2,) = dask.persist(v) assert v is not v2 assert len(v2.__dask_graph__()) < len(v.__dask_graph__()) # type: ignore[arg-type] assert v2.__dask_keys__() == v.__dask_keys__() assert dask.is_dask_collection(v) assert dask.is_dask_collection(v2) self.assertLazyAndAllClose(u + 1, v) self.assertLazyAndAllClose(u + 1, v2) @requires_pint def test_tokenize_duck_dask_array(self): import pint unit_registry: pint.UnitRegistry = pint.UnitRegistry() q = unit_registry.Quantity(self.data, "meter") variable = xr.Variable(("x", "y"), q) token = dask.base.tokenize(variable) post_op = variable + 5 * unit_registry.meter assert dask.base.tokenize(variable) != dask.base.tokenize(post_op) # Immutability check assert dask.base.tokenize(variable) == token class TestDataArrayAndDataset(DaskTestCase): def assertLazyAndIdentical(self, expected, actual): self.assertLazyAnd(expected, actual, assert_identical) def assertLazyAndAllClose(self, expected, actual): self.assertLazyAnd(expected, actual, assert_allclose) def assertLazyAndEqual(self, expected, actual): self.assertLazyAnd(expected, actual, assert_equal) @pytest.fixture(autouse=True) def setUp(self): self.values = np.random.randn(4, 6) self.data = da.from_array(self.values, chunks=(2, 2)) self.eager_array = DataArray( self.values, coords={"x": range(4)}, dims=("x", "y"), name="foo" ) self.lazy_array = DataArray( self.data, coords={"x": range(4)}, dims=("x", "y"), name="foo" ) def test_chunk(self) -> None: test_cases: list[ tuple[int | str | dict[str, Any], tuple[tuple[int, ...], ...]] ] = [ ({}, ((2, 2), (2, 2, 2))), (3, ((3, 1), (3, 3))), ({"x": 3, "y": 3}, ((3, 1), (3, 3))), ({"x": 3}, ((3, 1), (2, 2, 2))), ({"x": (3, 1)}, ((3, 1), (2, 2, 2))), ({"x": "16B"}, ((1, 1, 1, 1), (2, 2, 2))), ("16B", ((1, 1, 1, 1), (1,) * 6)), ("16MB", ((4,), (6,))), ] for chunks, expected in test_cases: # Test DataArray rechunked = self.lazy_array.chunk(chunks) assert rechunked.chunks == expected self.assertLazyAndIdentical(self.eager_array, rechunked) expected_chunksizes = dict(zip(self.lazy_array.dims, expected, strict=True)) assert rechunked.chunksizes == expected_chunksizes # Test Dataset lazy_dataset = self.lazy_array.to_dataset() eager_dataset = self.eager_array.to_dataset() expected_chunksizes = dict(zip(lazy_dataset.dims, expected, strict=True)) rechunked = lazy_dataset.chunk(chunks) # type: ignore[assignment] # Dataset.chunks has a different return type to DataArray.chunks - see issue #5843 assert rechunked.chunks == expected_chunksizes self.assertLazyAndIdentical(eager_dataset, rechunked) assert rechunked.chunksizes == expected_chunksizes def test_rechunk(self): chunked = self.eager_array.chunk({"x": 2}).chunk({"y": 2}) assert chunked.chunks == ((2,) * 2, (2,) * 3) self.assertLazyAndIdentical(self.lazy_array, chunked) def test_new_chunk(self): chunked = self.eager_array.chunk() assert chunked.data.name.startswith("xarray-") def test_lazy_dataset(self): lazy_ds = Dataset({"foo": (("x", "y"), self.data)}) assert isinstance(lazy_ds.foo.variable.data, da.Array) def test_lazy_array(self): u = self.eager_array v = self.lazy_array self.assertLazyAndAllClose(u, v) self.assertLazyAndAllClose(-u, -v) self.assertLazyAndAllClose(u.T, v.T) self.assertLazyAndAllClose(u.mean(), v.mean()) self.assertLazyAndAllClose(1 + u, 1 + v) actual = xr.concat([v[:2], v[2:]], "x") self.assertLazyAndAllClose(u, actual) def test_compute(self): u = self.eager_array v = self.lazy_array assert dask.is_dask_collection(v) (v2,) = dask.compute(v + 1) assert not dask.is_dask_collection(v2) assert ((u + 1).data == v2.data).all() def test_persist(self): u = self.eager_array v = self.lazy_array + 1 (v2,) = dask.persist(v) assert v is not v2 assert len(v2.__dask_graph__()) < len(v.__dask_graph__()) assert v2.__dask_keys__() == v.__dask_keys__() assert dask.is_dask_collection(v) assert dask.is_dask_collection(v2) self.assertLazyAndAllClose(u + 1, v) self.assertLazyAndAllClose(u + 1, v2) def test_concat_loads_variables(self): # Test that concat() computes not-in-memory variables at most once # and loads them in the output, while leaving the input unaltered. d1 = build_dask_array("d1") c1 = build_dask_array("c1") d2 = build_dask_array("d2") c2 = build_dask_array("c2") d3 = build_dask_array("d3") c3 = build_dask_array("c3") # Note: c is a non-index coord. # Index coords are loaded by IndexVariable.__init__. ds1 = Dataset(data_vars={"d": ("x", d1)}, coords={"c": ("x", c1)}) ds2 = Dataset(data_vars={"d": ("x", d2)}, coords={"c": ("x", c2)}) ds3 = Dataset(data_vars={"d": ("x", d3)}, coords={"c": ("x", c3)}) assert kernel_call_count == 0 out = xr.concat( [ds1, ds2, ds3], dim="n", data_vars="different", coords="different", compat="equals", ) # each kernel is computed exactly once assert kernel_call_count == 6 # variables are loaded in the output assert isinstance(out["d"].data, np.ndarray) assert isinstance(out["c"].data, np.ndarray) out = xr.concat([ds1, ds2, ds3], dim="n", data_vars="all", coords="all") # no extra kernel calls assert kernel_call_count == 6 assert isinstance(out["d"].data, dask.array.Array) assert isinstance(out["c"].data, dask.array.Array) out = xr.concat([ds1, ds2, ds3], dim="n", data_vars=["d"], coords=["c"]) # no extra kernel calls assert kernel_call_count == 6 assert isinstance(out["d"].data, dask.array.Array) assert isinstance(out["c"].data, dask.array.Array) with xr.set_options(use_new_combine_kwarg_defaults=True): out = xr.concat([ds1, ds2, ds3], dim="n", data_vars=[], coords=[]) # no extra kernel calls assert kernel_call_count == 6 assert isinstance(out["d"].data, dask.array.Array) assert isinstance(out["c"].data, dask.array.Array) out = xr.concat( [ds1, ds2, ds3], dim="n", data_vars=[], coords=[], compat="equals" ) # variables are loaded once as we are validating that they're identical assert kernel_call_count == 12 assert isinstance(out["d"].data, np.ndarray) assert isinstance(out["c"].data, np.ndarray) out = xr.concat( [ds1, ds2, ds3], dim="n", data_vars="different", coords="different", compat="identical", ) # compat=identical doesn't do any more kernel calls than compat=equals assert kernel_call_count == 18 assert isinstance(out["d"].data, np.ndarray) assert isinstance(out["c"].data, np.ndarray) # When the test for different turns true halfway through, # stop computing variables as it would not have any benefit ds4 = Dataset(data_vars={"d": ("x", [2.0])}, coords={"c": ("x", [2.0])}) out = xr.concat( [ds1, ds2, ds4, ds3], dim="n", data_vars="different", coords="different", compat="equals", ) # the variables of ds1 and ds2 were computed, but those of ds3 didn't assert kernel_call_count == 22 assert isinstance(out["d"].data, dask.array.Array) assert isinstance(out["c"].data, dask.array.Array) # the data of ds1 and ds2 was loaded into numpy and then # concatenated to the data of ds3. Thus, only ds3 is computed now. out.compute() assert kernel_call_count == 24 # Finally, test that originals are unaltered assert ds1["d"].data is d1 assert ds1["c"].data is c1 assert ds2["d"].data is d2 assert ds2["c"].data is c2 assert ds3["d"].data is d3 assert ds3["c"].data is c3 # now check that concat() is correctly using dask name equality to skip loads out = xr.concat( [ds1, ds1, ds1], dim="n", data_vars="different", coords="different", compat="equals", ) assert kernel_call_count == 24 # variables are not loaded in the output assert isinstance(out["d"].data, dask.array.Array) assert isinstance(out["c"].data, dask.array.Array) out = xr.concat( [ds1, ds1, ds1], dim="n", data_vars=[], coords=[], compat="identical" ) assert kernel_call_count == 24 # variables are not loaded in the output assert isinstance(out["d"].data, dask.array.Array) assert isinstance(out["c"].data, dask.array.Array) out = xr.concat( [ds1, ds2.compute(), ds3], dim="n", data_vars="all", coords="different", compat="identical", ) # c1,c3 must be computed for comparison since c2 is numpy; # d2 is computed too assert kernel_call_count == 28 out = xr.concat( [ds1, ds2.compute(), ds3], dim="n", data_vars="all", coords="all", compat="identical", ) # no extra computes assert kernel_call_count == 30 # Finally, test that originals are unaltered assert ds1["d"].data is d1 assert ds1["c"].data is c1 assert ds2["d"].data is d2 assert ds2["c"].data is c2 assert ds3["d"].data is d3 assert ds3["c"].data is c3 def test_groupby(self): u = self.eager_array v = self.lazy_array expected = u.groupby("x").mean(...) with raise_if_dask_computes(): actual = v.groupby("x").mean(...) self.assertLazyAndAllClose(expected, actual) def test_rolling(self): u = self.eager_array v = self.lazy_array expected = u.rolling(x=2).mean() with raise_if_dask_computes(): actual = v.rolling(x=2).mean() self.assertLazyAndAllClose(expected, actual) @pytest.mark.parametrize("func", ["first", "last"]) def test_groupby_first_last(self, func): method = operator.methodcaller(func) u = self.eager_array v = self.lazy_array for coords in [u.coords, v.coords]: coords["ab"] = ("x", ["a", "a", "b", "b"]) expected = method(u.groupby("ab")) with raise_if_dask_computes(): actual = method(v.groupby("ab")) self.assertLazyAndAllClose(expected, actual) with raise_if_dask_computes(): actual = method(v.groupby("ab")) self.assertLazyAndAllClose(expected, actual) def test_reindex(self): u = self.eager_array.assign_coords(y=range(6)) v = self.lazy_array.assign_coords(y=range(6)) kwargs_list: list[dict[str, Any]] = [ {"x": [2, 3, 4]}, {"x": [1, 100, 2, 101, 3]}, {"x": [2.5, 3, 3.5], "y": [2, 2.5, 3]}, ] for kwargs in kwargs_list: expected = u.reindex(**kwargs) actual = v.reindex(**kwargs) self.assertLazyAndAllClose(expected, actual) def test_to_dataset_roundtrip(self): u = self.eager_array v = self.lazy_array expected = u.assign_coords(x=u["x"]) self.assertLazyAndEqual(expected, v.to_dataset("x").to_dataarray("x")) def test_merge(self): def duplicate_and_merge(array): return xr.merge([array, array.rename("bar")]).to_dataarray() expected = duplicate_and_merge(self.eager_array) actual = duplicate_and_merge(self.lazy_array) self.assertLazyAndEqual(expected, actual) def test_ufuncs(self): u = self.eager_array v = self.lazy_array self.assertLazyAndAllClose(np.sin(u), np.sin(v)) def test_where_dispatching(self): a = np.arange(10) b = a > 3 x = da.from_array(a, 5) y = da.from_array(b, 5) expected = DataArray(a).where(b) self.assertLazyAndEqual(expected, DataArray(a).where(y)) self.assertLazyAndEqual(expected, DataArray(x).where(b)) self.assertLazyAndEqual(expected, DataArray(x).where(y)) def test_simultaneous_compute(self): ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk() count = [0] def counting_get(*args, **kwargs): count[0] += 1 return dask.get(*args, **kwargs) ds.load(scheduler=counting_get) assert count[0] == 1 def test_duplicate_dims(self): data = np.random.normal(size=(4, 4)) with pytest.warns(UserWarning, match="Duplicate dimension"): arr = DataArray(data, dims=("x", "x")) with pytest.warns(UserWarning, match="Duplicate dimension"): chunked_array = arr.chunk({"x": 2}) assert chunked_array.chunks == ((2, 2), (2, 2)) assert chunked_array.chunksizes == {"x": (2, 2)} def test_stack(self): data = da.random.normal(size=(2, 3, 4), chunks=(1, 3, 4)) arr = DataArray(data, dims=("w", "x", "y")) stacked = arr.stack(z=("x", "y")) z = pd.MultiIndex.from_product( [list(range(3)), list(range(4))], names=["x", "y"] ) expected = DataArray(data.reshape(2, -1), {"z": z}, dims=["w", "z"]) assert stacked.data.chunks == expected.data.chunks self.assertLazyAndEqual(expected, stacked) def test_dot(self): eager = self.eager_array.dot(self.eager_array[0]) lazy = self.lazy_array.dot(self.lazy_array[0]) self.assertLazyAndAllClose(eager, lazy) def test_dataarray_repr(self): data = build_dask_array("data") nonindex_coord = build_dask_array("coord") a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)}) expected = dedent( f"""\ Size: 8B {data!r} Coordinates: y (x) int64 8B dask.array Dimensions without coordinates: x""" ) assert expected == repr(a) assert kernel_call_count == 0 # should not evaluate dask array def test_dataset_repr(self): data = build_dask_array("data") nonindex_coord = build_dask_array("coord") ds = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)}) expected = dedent( """\ Size: 16B Dimensions: (x: 1) Coordinates: y (x) int64 8B dask.array Dimensions without coordinates: x Data variables: a (x) int64 8B dask.array""" ) assert expected == repr(ds) assert kernel_call_count == 0 # should not evaluate dask array def test_dataarray_pickle(self): # Test that pickling/unpickling converts the dask backend # to numpy in neither the data variable nor the non-index coords data = build_dask_array("data") nonindex_coord = build_dask_array("coord") a1 = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)}) a1.compute() assert not a1._in_memory assert not a1.coords["y"]._in_memory assert kernel_call_count == 2 a2 = pickle.loads(pickle.dumps(a1)) assert kernel_call_count == 2 assert_identical(a1, a2) assert not a1._in_memory assert not a2._in_memory assert not a1.coords["y"]._in_memory assert not a2.coords["y"]._in_memory def test_dataset_pickle(self): # Test that pickling/unpickling converts the dask backend # to numpy in neither the data variables nor the non-index coords data = build_dask_array("data") nonindex_coord = build_dask_array("coord") ds1 = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)}) ds1.compute() assert not ds1["a"]._in_memory assert not ds1["y"]._in_memory assert kernel_call_count == 2 ds2 = pickle.loads(pickle.dumps(ds1)) assert kernel_call_count == 2 assert_identical(ds1, ds2) assert not ds1["a"]._in_memory assert not ds2["a"]._in_memory assert not ds1["y"]._in_memory assert not ds2["y"]._in_memory def test_dataarray_getattr(self): # ipython/jupyter does a long list of getattr() calls to when trying to # represent an object. # Make sure we're not accidentally computing dask variables. data = build_dask_array("data") nonindex_coord = build_dask_array("coord") a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)}) with suppress(AttributeError): _ = a.NOTEXIST assert kernel_call_count == 0 def test_dataset_getattr(self): # Test that pickling/unpickling converts the dask backend # to numpy in neither the data variables nor the non-index coords data = build_dask_array("data") nonindex_coord = build_dask_array("coord") ds = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)}) with suppress(AttributeError): _ = ds.NOTEXIST assert kernel_call_count == 0 def test_values(self): # Test that invoking the values property does not convert the dask # backend to numpy a = DataArray([1, 2]).chunk() assert not a._in_memory assert a.values.tolist() == [1, 2] assert not a._in_memory def test_from_dask_variable(self): # Test array creation from Variable with dask backend. # This is used e.g. in broadcast() a = DataArray(self.lazy_array.variable, coords={"x": range(4)}, name="foo") self.assertLazyAndIdentical(self.lazy_array, a) @requires_pint def test_tokenize_duck_dask_array(self): import pint unit_registry: pint.UnitRegistry = pint.UnitRegistry() q = unit_registry.Quantity(self.data, unit_registry.meter) data_array = xr.DataArray( data=q, coords={"x": range(4)}, dims=("x", "y"), name="foo" ) token = dask.base.tokenize(data_array) post_op = data_array + 5 * unit_registry.meter assert dask.base.tokenize(data_array) != dask.base.tokenize(post_op) # Immutability check assert dask.base.tokenize(data_array) == token class TestToDaskDataFrame: @pytest.mark.xfail(reason="https://github.com/dask/dask/issues/11584") def test_to_dask_dataframe(self): # Test conversion of Datasets to dask DataFrames x = np.random.randn(10) y = np.arange(10, dtype="uint8") t = list("abcdefghij") ds = Dataset( {"a": ("t", da.from_array(x, chunks=4)), "b": ("t", y), "t": ("t", t)} ) expected_pd = pd.DataFrame({"a": x, "b": y}, index=pd.Index(t, name="t")) # test if 1-D index is correctly set up expected = dd.from_pandas(expected_pd, chunksize=4) actual = ds.to_dask_dataframe(set_index=True) # test if we have dask dataframes assert isinstance(actual, dd.DataFrame) # use the .equals from pandas to check dataframes are equivalent assert_frame_equal(actual.compute(), expected.compute()) # test if no index is given expected = dd.from_pandas(expected_pd.reset_index(drop=False), chunksize=4) actual = ds.to_dask_dataframe(set_index=False) assert isinstance(actual, dd.DataFrame) assert_frame_equal(actual.compute(), expected.compute()) @pytest.mark.xfail( reason="Currently pandas with pyarrow installed will return a `string[pyarrow]` type, " "which causes the `y` column to have a different type depending on whether pyarrow is installed" ) def test_to_dask_dataframe_2D(self): # Test if 2-D dataset is supplied w = np.random.randn(2, 3) ds = Dataset({"w": (("x", "y"), da.from_array(w, chunks=(1, 2)))}) ds["x"] = ("x", np.array([0, 1], np.int64)) ds["y"] = ("y", list("abc")) # dask dataframes do not (yet) support multiindex, # but when it does, this would be the expected index: exp_index = pd.MultiIndex.from_arrays( [[0, 0, 0, 1, 1, 1], ["a", "b", "c", "a", "b", "c"]], names=["x", "y"] ) expected = pd.DataFrame({"w": w.reshape(-1)}, index=exp_index) # so for now, reset the index expected = expected.reset_index(drop=False) actual = ds.to_dask_dataframe(set_index=False) assert isinstance(actual, dd.DataFrame) assert_frame_equal(actual.compute(), expected) @pytest.mark.xfail(raises=NotImplementedError) def test_to_dask_dataframe_2D_set_index(self): # This will fail until dask implements MultiIndex support w = da.from_array(np.random.randn(2, 3), chunks=(1, 2)) ds = Dataset({"w": (("x", "y"), w)}) ds["x"] = ("x", np.array([0, 1], np.int64)) ds["y"] = ("y", list("abc")) expected = ds.compute().to_dataframe() actual = ds.to_dask_dataframe(set_index=True) assert isinstance(actual, dd.DataFrame) assert_frame_equal(expected, actual.compute()) def test_to_dask_dataframe_coordinates(self): # Test if coordinate is also a dask array x = np.random.randn(10) t = np.arange(10) * 2 ds = Dataset( { "a": ("t", da.from_array(x, chunks=4)), "t": ("t", da.from_array(t, chunks=4)), } ) expected_pd = pd.DataFrame({"a": x}, index=pd.Index(t, name="t")) expected = dd.from_pandas(expected_pd, chunksize=4) actual = ds.to_dask_dataframe(set_index=True) assert isinstance(actual, dd.DataFrame) assert_frame_equal(expected.compute(), actual.compute()) @pytest.mark.xfail( reason="Currently pandas with pyarrow installed will return a `string[pyarrow]` type, " "which causes the index to have a different type depending on whether pyarrow is installed" ) def test_to_dask_dataframe_not_daskarray(self): # Test if DataArray is not a dask array x = np.random.randn(10) y = np.arange(10, dtype="uint8") t = list("abcdefghij") ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t)}) expected = pd.DataFrame({"a": x, "b": y}, index=pd.Index(t, name="t")) actual = ds.to_dask_dataframe(set_index=True) assert isinstance(actual, dd.DataFrame) assert_frame_equal(expected, actual.compute()) def test_to_dask_dataframe_no_coordinate(self): x = da.from_array(np.random.randn(10), chunks=4) ds = Dataset({"x": ("dim_0", x)}) expected = ds.compute().to_dataframe().reset_index() actual = ds.to_dask_dataframe() assert isinstance(actual, dd.DataFrame) assert_frame_equal(expected, actual.compute()) expected = ds.compute().to_dataframe() actual = ds.to_dask_dataframe(set_index=True) assert isinstance(actual, dd.DataFrame) assert_frame_equal(expected, actual.compute()) def test_to_dask_dataframe_dim_order(self): values = np.array([[1, 2], [3, 4]], dtype=np.int64) ds = Dataset({"w": (("x", "y"), values)}).chunk(1) expected = ds["w"].to_series().reset_index() actual = ds.to_dask_dataframe(dim_order=["x", "y"]) assert isinstance(actual, dd.DataFrame) assert_frame_equal(expected, actual.compute()) expected = ds["w"].T.to_series().reset_index() actual = ds.to_dask_dataframe(dim_order=["y", "x"]) assert isinstance(actual, dd.DataFrame) assert_frame_equal(expected, actual.compute()) with pytest.raises(ValueError, match=r"does not match the set of dimensions"): ds.to_dask_dataframe(dim_order=["x"]) @pytest.mark.parametrize("method", ["load", "compute"]) def test_dask_kwargs_variable(method): chunked_array = da.from_array(np.arange(3), chunks=(2,)) x = Variable("y", chunked_array) # args should be passed on to dask.compute() (via DaskManager.compute()) with mock.patch.object(da, "compute", return_value=(np.arange(3),)) as mock_compute: getattr(x, method)(foo="bar") mock_compute.assert_called_with(chunked_array, foo="bar") @pytest.mark.parametrize("method", ["load", "compute", "persist"]) def test_dask_kwargs_dataarray(method): data = da.from_array(np.arange(3), chunks=(2,)) x = DataArray(data) if method in ["load", "compute"]: dask_func = "dask.array.compute" else: dask_func = "dask.persist" # args should be passed on to "dask_func" with mock.patch(dask_func) as mock_func: getattr(x, method)(foo="bar") mock_func.assert_called_with(data, foo="bar") @pytest.mark.parametrize("method", ["load", "compute", "persist"]) def test_dask_kwargs_dataset(method): data = da.from_array(np.arange(3), chunks=(2,)) x = Dataset({"x": (("y"), data)}) if method in ["load", "compute"]: dask_func = "dask.array.compute" else: dask_func = "dask.persist" # args should be passed on to "dask_func" with mock.patch(dask_func) as mock_func: getattr(x, method)(foo="bar") mock_func.assert_called_with(data, foo="bar") kernel_call_count = 0 def kernel(name): """Dask kernel to test pickling/unpickling and __repr__. Must be global to make it pickleable. """ global kernel_call_count kernel_call_count += 1 return np.ones(1, dtype=np.int64) def build_dask_array(name): global kernel_call_count kernel_call_count = 0 return dask.array.Array( dask={(name, 0): (kernel, name)}, name=name, chunks=((1,),), dtype=np.int64 ) @pytest.mark.parametrize( "persist", [lambda x: x.persist(), lambda x: dask.persist(x)[0]] ) def test_persist_Dataset(persist): ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk() ds = ds + 1 n = len(ds.foo.data.dask) ds2 = persist(ds) assert len(ds2.foo.data.dask) == 1 assert len(ds.foo.data.dask) == n # doesn't mutate in place @pytest.mark.parametrize( "persist", [lambda x: x.persist(), lambda x: dask.persist(x)[0]] ) def test_persist_DataArray(persist): x = da.arange(10, chunks=(5,)) y = DataArray(x) z = y + 1 n = len(z.data.dask) zz = persist(z) assert len(z.data.dask) == n assert len(zz.data.dask) == zz.data.npartitions def test_dataarray_with_dask_coords(): import toolz x = xr.Variable("x", da.arange(8, chunks=(4,))) y = xr.Variable("y", da.arange(8, chunks=(4,)) * 2) data = da.random.random((8, 8), chunks=(4, 4)) + 1 array = xr.DataArray(data, dims=["x", "y"]) array.coords["xx"] = x array.coords["yy"] = y assert dict(array.__dask_graph__()) == toolz.merge( data.__dask_graph__(), x.__dask_graph__(), y.__dask_graph__() ) (array2,) = dask.compute(array) assert not dask.is_dask_collection(array2) assert all(isinstance(v._variable.data, np.ndarray) for v in array2.coords.values()) def test_basic_compute(): ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk({"x": 2}) for get in [dask.threaded.get, dask.multiprocessing.get, dask.local.get_sync, None]: with dask.config.set(scheduler=get): ds.compute() ds.foo.compute() ds.foo.variable.compute() def test_dataset_as_delayed(): ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk() assert dask.delayed(ds).compute() == ds.compute() def make_da(): da = xr.DataArray( np.ones((10, 20)), dims=["x", "y"], coords={"x": np.arange(10), "y": np.arange(100, 120)}, name="a", ).chunk({"x": 4, "y": 5}) da.x.attrs["long_name"] = "x" da.attrs["test"] = "test" da.coords["c2"] = 0.5 da.coords["ndcoord"] = da.x * 2 da.coords["cxy"] = (da.x * da.y).chunk({"x": 4, "y": 5}) return da def make_ds(): map_ds = xr.Dataset() map_ds["a"] = make_da() map_ds["b"] = map_ds.a + 50 map_ds["c"] = map_ds.x + 20 map_ds = map_ds.chunk({"x": 4, "y": 5}) map_ds["d"] = ("z", [1, 1, 1, 1]) map_ds["z"] = [0, 1, 2, 3] map_ds["e"] = map_ds.x + map_ds.y map_ds.coords["c1"] = 0.5 map_ds.coords["cx"] = ("x", np.arange(len(map_ds.x))) map_ds.coords["cx"].attrs["test2"] = "test2" map_ds.attrs["test"] = "test" map_ds.coords["xx"] = map_ds["a"] * map_ds.y map_ds.x.attrs["long_name"] = "x" map_ds.y.attrs["long_name"] = "y" return map_ds # fixtures cannot be used in parametrize statements # instead use this workaround # https://docs.pytest.org/en/latest/deprecations.html#calling-fixtures-directly @pytest.fixture def map_da(): return make_da() @pytest.fixture def map_ds(): return make_ds() def test_unify_chunks(map_ds): ds_copy = map_ds.copy() ds_copy["cxy"] = ds_copy.cxy.chunk({"y": 10}) with pytest.raises(ValueError, match=r"inconsistent chunks"): _ = ds_copy.chunks expected_chunks = {"x": (4, 4, 2), "y": (5, 5, 5, 5)} with raise_if_dask_computes(): actual_chunks = ds_copy.unify_chunks().chunks assert actual_chunks == expected_chunks assert_identical(map_ds, ds_copy.unify_chunks()) out_a, out_b = xr.unify_chunks(ds_copy.cxy, ds_copy.drop_vars("cxy")) assert out_a.chunks == ((4, 4, 2), (5, 5, 5, 5)) assert out_b.chunks == expected_chunks # Test unordered dims da = ds_copy["cxy"] out_a, out_b = xr.unify_chunks(da.chunk({"x": -1}), da.T.chunk({"y": -1})) assert out_a.chunks == ((4, 4, 2), (5, 5, 5, 5)) assert out_b.chunks == ((5, 5, 5, 5), (4, 4, 2)) # Test mismatch with pytest.raises(ValueError, match=r"Dimension 'x' size mismatch: 10 != 2"): xr.unify_chunks(da, da.isel(x=slice(2))) @pytest.mark.parametrize("obj", [make_ds(), make_da()]) @pytest.mark.parametrize( "transform", [lambda x: x.compute(), lambda x: x.unify_chunks()] ) def test_unify_chunks_shallow_copy(obj, transform): obj = transform(obj) unified = obj.unify_chunks() assert_identical(obj, unified) # assert obj is not unified @pytest.mark.parametrize("obj", [make_da()]) def test_auto_chunk_da(obj): actual = obj.chunk("auto").data expected = obj.data.rechunk("auto") np.testing.assert_array_equal(actual, expected) assert actual.chunks == expected.chunks def test_auto_chunk_da_cftime(): yrs = np.arange(2000, 2120) cftime_dates = xr.date_range( start=f"{yrs[0]}-01-01", end=f"{yrs[-1]}-12-31", freq="1YE", use_cftime=True ) yr_array = np.tile(cftime_dates.values, (10, 1)) da = xr.DataArray( yr_array, dims=["x", "t"], coords={"x": np.arange(10), "t": cftime_dates} ).chunk({"x": 4, "t": 5}) actual = da.chunk("auto").data expected = da.data.rechunk({0: 10, 1: 120}) np.testing.assert_array_equal(actual, expected) assert actual.chunks == expected.chunks def test_map_blocks_error(map_da, map_ds): def bad_func(darray): return (darray * darray.x + 5 * darray.y)[:1, :1] with pytest.raises(ValueError, match=r"Received dimension 'x' of length 1"): xr.map_blocks(bad_func, map_da).compute() def returns_numpy(darray): return (darray * darray.x + 5 * darray.y).values with pytest.raises(TypeError, match=r"Function must return an xarray DataArray"): xr.map_blocks(returns_numpy, map_da) with pytest.raises(TypeError, match=r"args must be"): xr.map_blocks(operator.add, map_da, args=10) # type: ignore[arg-type] with pytest.raises(TypeError, match=r"kwargs must be"): xr.map_blocks(operator.add, map_da, args=[10], kwargs=[20]) # type: ignore[arg-type] def really_bad_func(darray): raise ValueError("couldn't do anything.") with pytest.raises(Exception, match=r"Cannot infer"): xr.map_blocks(really_bad_func, map_da) ds_copy = map_ds.copy() ds_copy["cxy"] = ds_copy.cxy.chunk({"y": 10}) with pytest.raises(ValueError, match=r"inconsistent chunks"): xr.map_blocks(bad_func, ds_copy) with pytest.raises(TypeError, match=r"Cannot pass dask collections"): xr.map_blocks(bad_func, map_da, kwargs=dict(a=map_da.chunk())) @pytest.mark.parametrize("obj", [make_da(), make_ds()]) def test_map_blocks(obj): def func(obj): result = obj + obj.x + 5 * obj.y return result with raise_if_dask_computes(): actual = xr.map_blocks(func, obj) expected = func(obj) assert_chunks_equal(expected.chunk(), actual) assert_identical(actual, expected) @pytest.mark.parametrize("obj", [make_da(), make_ds()]) def test_map_blocks_mixed_type_inputs(obj): def func(obj1, non_xarray_input, obj2): result = obj1 + obj1.x + 5 * obj1.y return result with raise_if_dask_computes(): actual = xr.map_blocks(func, obj, args=["non_xarray_input", obj]) expected = func(obj, "non_xarray_input", obj) assert_chunks_equal(expected.chunk(), actual) assert_identical(actual, expected) @pytest.mark.parametrize("obj", [make_da(), make_ds()]) def test_map_blocks_convert_args_to_list(obj): expected = obj + 10 with raise_if_dask_computes(): actual = xr.map_blocks(operator.add, obj, [10]) assert_chunks_equal(expected.chunk(), actual) assert_identical(actual, expected) def test_map_blocks_dask_args(): da1 = xr.DataArray( np.ones((10, 20)), dims=["x", "y"], coords={"x": np.arange(10), "y": np.arange(20)}, ).chunk({"x": 5, "y": 4}) # check that block shapes are the same def sumda(da1, da2): assert da1.shape == da2.shape return da1 + da2 da2 = da1 + 1 with raise_if_dask_computes(): mapped = xr.map_blocks(sumda, da1, args=[da2]) xr.testing.assert_equal(da1 + da2, mapped) # one dimension in common da2 = (da1 + 1).isel(x=1, drop=True) with raise_if_dask_computes(): mapped = xr.map_blocks(operator.add, da1, args=[da2]) xr.testing.assert_equal(da1 + da2, mapped) # test that everything works when dimension names are different da2 = (da1 + 1).isel(x=1, drop=True).rename({"y": "k"}) with raise_if_dask_computes(): mapped = xr.map_blocks(operator.add, da1, args=[da2]) xr.testing.assert_equal(da1 + da2, mapped) with pytest.raises(ValueError, match=r"Chunk sizes along dimension 'x'"): xr.map_blocks(operator.add, da1, args=[da1.chunk({"x": 1})]) with pytest.raises(ValueError, match=r"cannot align.*index.*are not equal"): xr.map_blocks(operator.add, da1, args=[da1.reindex(x=np.arange(20))]) # reduction da1 = da1.chunk({"x": -1}) da2 = da1 + 1 with raise_if_dask_computes(): mapped = xr.map_blocks(lambda a, b: (a + b).sum("x"), da1, args=[da2]) xr.testing.assert_equal((da1 + da2).sum("x"), mapped) # reduction with template da1 = da1.chunk({"x": -1}) da2 = da1 + 1 with raise_if_dask_computes(): mapped = xr.map_blocks( lambda a, b: (a + b).sum("x"), da1, args=[da2], template=da1.sum("x") ) xr.testing.assert_equal((da1 + da2).sum("x"), mapped) # bad template: not chunked with pytest.raises(ValueError, match="Provided template has no dask arrays"): xr.map_blocks( lambda a, b: (a + b).sum("x"), da1, args=[da2], template=da1.sum("x").compute(), ) @pytest.mark.parametrize("obj", [make_da(), make_ds()]) def test_map_blocks_add_attrs(obj): def add_attrs(obj): obj = obj.copy(deep=True) obj.attrs["new"] = "new" obj.cxy.attrs["new2"] = "new2" return obj expected = add_attrs(obj) with raise_if_dask_computes(): actual = xr.map_blocks(add_attrs, obj) assert_identical(actual, expected) # when template is specified, attrs are copied from template, not set by function with raise_if_dask_computes(): actual = xr.map_blocks(add_attrs, obj, template=obj) assert_identical(actual, obj) def test_map_blocks_change_name(map_da): def change_name(obj): obj = obj.copy(deep=True) obj.name = "new" return obj expected = change_name(map_da) with raise_if_dask_computes(): actual = xr.map_blocks(change_name, map_da) assert_identical(actual, expected) @pytest.mark.parametrize("obj", [make_da(), make_ds()]) def test_map_blocks_kwargs(obj): expected = xr.full_like(obj, fill_value=np.nan) with raise_if_dask_computes(): actual = xr.map_blocks(xr.full_like, obj, kwargs=dict(fill_value=np.nan)) assert_chunks_equal(expected.chunk(), actual) assert_identical(actual, expected) def test_map_blocks_to_dataarray(map_ds): with raise_if_dask_computes(): actual = xr.map_blocks(lambda x: x.to_dataarray(), map_ds) # to_dataarray does not preserve name, so cannot use assert_identical assert_equal(actual, map_ds.to_dataarray()) @pytest.mark.parametrize( "func", [ lambda x: x, lambda x: x.to_dataset(), lambda x: x.drop_vars("x"), lambda x: x.expand_dims(k=[1, 2, 3]), lambda x: x.expand_dims(k=3), lambda x: x.assign_coords(new_coord=("y", x.y.data * 2)), lambda x: x.astype(np.int32), lambda x: x.x, ], ) def test_map_blocks_da_transformations(func, map_da): with raise_if_dask_computes(): actual = xr.map_blocks(func, map_da) assert_identical(actual, func(map_da)) @pytest.mark.parametrize( "func", [ lambda x: x, lambda x: x.drop_vars("cxy"), lambda x: x.drop_vars("a"), lambda x: x.drop_vars("x"), lambda x: x.expand_dims(k=[1, 2, 3]), lambda x: x.expand_dims(k=3), lambda x: x.rename({"a": "new1", "b": "new2"}), lambda x: x.x, ], ) def test_map_blocks_ds_transformations(func, map_ds): with raise_if_dask_computes(): actual = xr.map_blocks(func, map_ds) assert_identical(actual, func(map_ds)) @pytest.mark.parametrize("obj", [make_da(), make_ds()]) def test_map_blocks_da_ds_with_template(obj): func = lambda x: x.isel(x=[1]) # a simple .isel(x=[1, 5, 9]) puts all those in a single chunk. template = xr.concat([obj.isel(x=[i]) for i in [1, 5, 9]], data_vars=None, dim="x") with raise_if_dask_computes(): actual = xr.map_blocks(func, obj, template=template) assert_identical(actual, template) # Check that indexes are written into the graph directly dsk = dict(actual.__dask_graph__()) assert {k for k in dsk if "x-coordinate" in k} assert all( isinstance(v, PandasIndex) for k, v in dsk.items() if "x-coordinate" in k ) with raise_if_dask_computes(): actual = obj.map_blocks(func, template=template) assert_identical(actual, template) def test_map_blocks_roundtrip_string_index(): ds = xr.Dataset( {"data": (["label"], [1, 2, 3])}, coords={"label": ["foo", "bar", "baz"]} ).chunk(label=1) assert ds.label.dtype == np.dtype("=U3") mapped = ds.map_blocks(lambda x: x, template=ds) assert mapped.label.dtype == ds.label.dtype mapped = ds.map_blocks(lambda x: x, template=None) assert mapped.label.dtype == ds.label.dtype mapped = ds.data.map_blocks(lambda x: x, template=ds.data) assert mapped.label.dtype == ds.label.dtype mapped = ds.data.map_blocks(lambda x: x, template=None) assert mapped.label.dtype == ds.label.dtype def test_map_blocks_template_convert_object(): da = make_da() ds = da.to_dataset() func = lambda x: x.to_dataset().isel(x=[1]) template = xr.concat([da.to_dataset().isel(x=[i]) for i in [1, 5, 9]], dim="x") with raise_if_dask_computes(): actual = xr.map_blocks(func, da, template=template) assert_identical(actual, template) func = lambda x: x.to_dataarray().isel(x=[1]) template = xr.concat([ds.to_dataarray().isel(x=[i]) for i in [1, 5, 9]], dim="x") with raise_if_dask_computes(): actual = xr.map_blocks(func, ds, template=template) assert_identical(actual, template) @pytest.mark.parametrize("obj", [make_da(), make_ds()]) def test_map_blocks_errors_bad_template(obj): with pytest.raises(ValueError, match=r"unexpected coordinate variables"): xr.map_blocks(lambda x: x.assign_coords(a=10), obj, template=obj).compute() with pytest.raises(ValueError, match=r"does not contain coordinate variables"): xr.map_blocks(lambda x: x.drop_vars("cxy"), obj, template=obj).compute() with pytest.raises(ValueError, match=r"Dimensions {'x'} missing"): xr.map_blocks(lambda x: x.isel(x=1), obj, template=obj).compute() with pytest.raises(ValueError, match=r"Received dimension 'x' of length 1"): xr.map_blocks(lambda x: x.isel(x=[1]), obj, template=obj).compute() with pytest.raises(TypeError, match=r"must be a DataArray"): xr.map_blocks(lambda x: x.isel(x=[1]), obj, template=(obj,)).compute() # type: ignore[arg-type] with pytest.raises(ValueError, match=r"map_blocks requires that one block"): xr.map_blocks( lambda x: x.isel(x=[1]).assign_coords(x=10), obj, template=obj.isel(x=[1]) ).compute() with pytest.raises(ValueError, match=r"Expected index 'x' to be"): xr.map_blocks( lambda a: a.isel(x=[1]).assign_coords(x=[120]), # assign bad index values obj, template=xr.concat( [obj.isel(x=[i]) for i in [1, 5, 9]], data_vars=None, dim="x" ), ).compute() def test_map_blocks_errors_bad_template_2(map_ds): with pytest.raises(ValueError, match=r"unexpected data variables {'xyz'}"): xr.map_blocks(lambda x: x.assign(xyz=1), map_ds, template=map_ds).compute() @pytest.mark.parametrize("obj", [make_da(), make_ds()]) def test_map_blocks_object_method(obj): def func(obj): result = obj + obj.x + 5 * obj.y return result with raise_if_dask_computes(): expected = xr.map_blocks(func, obj) actual = obj.map_blocks(func) assert_identical(expected, actual) def test_map_blocks_hlg_layers(): # regression test for #3599 ds = xr.Dataset( { "x": (("a",), dask.array.ones(10, chunks=(5,))), "z": (("b",), dask.array.ones(10, chunks=(5,))), } ) mapped = ds.map_blocks(lambda x: x) xr.testing.assert_equal(mapped, ds) def test_make_meta(map_ds): from xarray.core.parallel import make_meta meta = make_meta(map_ds) for variable in map_ds._coord_names: assert variable in meta._coord_names assert meta.coords[variable].shape == (0,) * meta.coords[variable].ndim for variable in map_ds.data_vars: assert variable in meta.data_vars assert meta.data_vars[variable].shape == (0,) * meta.data_vars[variable].ndim def test_identical_coords_no_computes(): lons2 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) a = xr.DataArray( da.zeros((10, 10), chunks=2), dims=("y", "x"), coords={"lons": lons2} ) b = xr.DataArray( da.zeros((10, 10), chunks=2), dims=("y", "x"), coords={"lons": lons2} ) with raise_if_dask_computes(): c = a + b assert_identical(c, a) @pytest.mark.parametrize( "obj", [make_da(), make_da().compute(), make_ds(), make_ds().compute()] ) @pytest.mark.parametrize( "transform", [ lambda x: x.reset_coords(), lambda x: x.reset_coords(drop=True), lambda x: x.isel(x=1), lambda x: x.attrs.update(new_attrs=1), lambda x: x.assign_coords(cxy=1), lambda x: x.rename({"x": "xnew"}), lambda x: x.rename({"cxy": "cxynew"}), ], ) def test_token_changes_on_transform(obj, transform): with raise_if_dask_computes(): assert dask.base.tokenize(obj) != dask.base.tokenize(transform(obj)) @pytest.mark.parametrize( "obj", [make_da(), make_da().compute(), make_ds(), make_ds().compute()] ) def test_token_changes_when_data_changes(obj): with raise_if_dask_computes(): t1 = dask.base.tokenize(obj) # Change data_var if isinstance(obj, DataArray): obj *= 2 else: obj["a"] *= 2 with raise_if_dask_computes(): t2 = dask.base.tokenize(obj) assert t2 != t1 # Change non-index coord obj.coords["ndcoord"] *= 2 with raise_if_dask_computes(): t3 = dask.base.tokenize(obj) assert t3 != t2 # Change IndexVariable obj = obj.assign_coords(x=obj.x * 2) with raise_if_dask_computes(): t4 = dask.base.tokenize(obj) assert t4 != t3 @pytest.mark.parametrize("obj", [make_da().compute(), make_ds().compute()]) def test_token_changes_when_buffer_changes(obj): with raise_if_dask_computes(): t1 = dask.base.tokenize(obj) if isinstance(obj, DataArray): obj[0, 0] = 123 else: obj["a"][0, 0] = 123 with raise_if_dask_computes(): t2 = dask.base.tokenize(obj) assert t2 != t1 obj.coords["ndcoord"][0] = 123 with raise_if_dask_computes(): t3 = dask.base.tokenize(obj) assert t3 != t2 @pytest.mark.parametrize( "transform", [lambda x: x, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)], ) @pytest.mark.parametrize("obj", [make_da(), make_ds(), make_ds().variables["a"]]) def test_token_identical(obj, transform): with raise_if_dask_computes(): assert dask.base.tokenize(obj) == dask.base.tokenize(transform(obj)) assert dask.base.tokenize(obj.compute()) == dask.base.tokenize( transform(obj.compute()) ) @pytest.mark.parametrize( "obj", [ make_ds(), # Dataset make_ds().variables["c2"], # Variable make_ds().variables["x"], # IndexVariable ], ) def test_tokenize_empty_attrs(obj): """Issues #6970 and #8788""" obj.attrs = {} assert obj._attrs is None a = dask.base.tokenize(obj) assert obj.attrs == {} assert obj._attrs == {} # attrs getter changed None to dict b = dask.base.tokenize(obj) assert a == b obj2 = obj.copy() c = dask.base.tokenize(obj2) assert a == c def test_recursive_token(): """Test that tokenization is invoked recursively, and doesn't just rely on the output of str() """ a = np.ones(10000) b = np.ones(10000) b[5000] = 2 assert str(a) == str(b) assert dask.base.tokenize(a) != dask.base.tokenize(b) # Test DataArray and Variable da_a = DataArray(a) da_b = DataArray(b) assert dask.base.tokenize(da_a) != dask.base.tokenize(da_b) # Test Dataset ds_a = da_a.to_dataset(name="x") ds_b = da_b.to_dataset(name="x") assert dask.base.tokenize(ds_a) != dask.base.tokenize(ds_b) # Test IndexVariable da_a = DataArray(a, dims=["x"], coords={"x": a}) da_b = DataArray(a, dims=["x"], coords={"x": b}) assert dask.base.tokenize(da_a) != dask.base.tokenize(da_b) @requires_scipy_or_netCDF4 def test_normalize_token_with_backend(map_ds): with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as tmp_file: map_ds.to_netcdf(tmp_file) read = xr.open_dataset(tmp_file) assert dask.base.tokenize(map_ds) != dask.base.tokenize(read) read.close() @pytest.mark.parametrize( "compat", ["broadcast_equals", "equals", "identical", "no_conflicts"] ) def test_lazy_array_equiv_variables(compat): var1 = xr.Variable(("y", "x"), da.zeros((10, 10), chunks=2)) var2 = xr.Variable(("y", "x"), da.zeros((10, 10), chunks=2)) var3 = xr.Variable(("y", "x"), da.zeros((20, 10), chunks=2)) with raise_if_dask_computes(): assert getattr(var1, compat)(var2, equiv=lazy_array_equiv) # values are actually equal, but we don't know that till we compute, return None with raise_if_dask_computes(): assert getattr(var1, compat)(var2 / 2, equiv=lazy_array_equiv) is None # shapes are not equal, return False without computes with raise_if_dask_computes(): assert getattr(var1, compat)(var3, equiv=lazy_array_equiv) is False # if one or both arrays are numpy, return None assert getattr(var1, compat)(var2.compute(), equiv=lazy_array_equiv) is None assert ( getattr(var1.compute(), compat)(var2.compute(), equiv=lazy_array_equiv) is None ) with raise_if_dask_computes(): assert getattr(var1, compat)(var2.transpose("y", "x")) @pytest.mark.parametrize( "compat", ["broadcast_equals", "equals", "identical", "no_conflicts"] ) def test_lazy_array_equiv_merge(compat): da1 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) da2 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) da3 = xr.DataArray(da.ones((20, 10), chunks=2), dims=("y", "x")) with raise_if_dask_computes(): xr.merge([da1, da2], compat=compat) # shapes are not equal; no computes necessary with raise_if_dask_computes(max_computes=0): with pytest.raises(ValueError): xr.merge([da1, da3], compat=compat) with raise_if_dask_computes(max_computes=2): xr.merge([da1, da2 / 2], compat=compat) @pytest.mark.filterwarnings("ignore::FutureWarning") # transpose_coords @pytest.mark.parametrize("obj", [make_da(), make_ds()]) @pytest.mark.parametrize( "transform", [ lambda a: a.assign_attrs(new_attr="anew"), lambda a: a.assign_coords(cxy=a.cxy), lambda a: a.copy(), lambda a: a.isel(x=slice(None)), lambda a: a.loc[dict(x=slice(None))], lambda a: a.transpose(...), lambda a: a.squeeze(), # no dimensions to squeeze lambda a: a.reindex(x=a.x), lambda a: a.reindex_like(a), lambda a: a.rename({"cxy": "cnew"}).rename({"cnew": "cxy"}), lambda a: a.pipe(lambda x: x), lambda a: xr.align(a, xr.zeros_like(a))[0], # assign # swap_dims # set_index / reset_index ], ) def test_transforms_pass_lazy_array_equiv(obj, transform): with raise_if_dask_computes(): assert_equal(obj, transform(obj)) def test_more_transforms_pass_lazy_array_equiv(map_da, map_ds): with raise_if_dask_computes(): assert_equal(map_ds.cxy.broadcast_like(map_ds.cxy), map_ds.cxy) assert_equal(xr.broadcast(map_ds.cxy, map_ds.cxy)[0], map_ds.cxy) assert_equal(map_ds.map(lambda x: x), map_ds) assert_equal(map_ds.set_coords("a").reset_coords("a"), map_ds) assert_equal(map_ds.assign({"a": map_ds.a}), map_ds) # fails because of index error # assert_equal( # map_ds.rename_dims({"x": "xnew"}).rename_dims({"xnew": "x"}), map_ds # ) assert_equal( map_ds.rename_vars({"cxy": "cnew"}).rename_vars({"cnew": "cxy"}), map_ds ) assert_equal(map_da._from_temp_dataset(map_da._to_temp_dataset()), map_da) assert_equal(map_da.astype(map_da.dtype), map_da) assert_equal(map_da.transpose("y", "x", transpose_coords=False).cxy, map_da.cxy) def test_optimize(): # https://github.com/pydata/xarray/issues/3698 a = dask.array.ones((10, 4), chunks=(5, 2)) arr = xr.DataArray(a).chunk(5) (arr2,) = dask.optimize(arr) arr2.compute() def test_graph_manipulation(): """dask.graph_manipulation passes an optional parameter, "rename", to the rebuilder function returned by __dask_postperist__; also, the dsk passed to the rebuilder is a HighLevelGraph whereas with dask.persist() and dask.optimize() it's a plain dict. """ import dask.graph_manipulation as gm v = Variable(["x"], [1, 2]).chunk(-1).chunk(1) * 2 da = DataArray(v) ds = Dataset({"d1": v[0], "d2": v[1], "d3": ("x", [3, 4])}) v2, da2, ds2 = gm.clone(v, da, ds) assert_equal(v2, v) assert_equal(da2, da) assert_equal(ds2, ds) for a, b in ((v, v2), (da, da2), (ds, ds2)): assert a.__dask_layers__() != b.__dask_layers__() assert len(a.__dask_layers__()) == len(b.__dask_layers__()) assert a.__dask_graph__().keys() != b.__dask_graph__().keys() # type: ignore[union-attr] assert len(a.__dask_graph__()) == len(b.__dask_graph__()) # type: ignore[arg-type] assert a.__dask_graph__().layers.keys() != b.__dask_graph__().layers.keys() # type: ignore[union-attr] assert len(a.__dask_graph__().layers) == len(b.__dask_graph__().layers) # type: ignore[union-attr] # Above we performed a slice operation; adding the two slices back together creates # a diamond-shaped dependency graph, which in turn will trigger a collision in layer # names if we were to use HighLevelGraph.cull() instead of # HighLevelGraph.cull_layers() in Dataset.__dask_postpersist__(). assert_equal(ds2.d1 + ds2.d2, ds.d1 + ds.d2) def test_new_index_var_computes_once(): # regression test for GH1533 data = dask.array.from_array(np.array([100, 200])) with raise_if_dask_computes(max_computes=1): Dataset(coords={"z": ("z", data)}) def test_minimize_graph_size(): # regression test for https://github.com/pydata/xarray/issues/8409 ds = Dataset( { "foo": ( ("x", "y", "z"), dask.array.ones((120, 120, 120), chunks=(20, 20, 1)), ) }, coords={"x": np.arange(120), "y": np.arange(120), "z": np.arange(120)}, ) mapped = ds.map_blocks(lambda x: x) graph = dict(mapped.__dask_graph__()) numchunks = {k: len(v) for k, v in ds.chunksizes.items()} for var in "xyz": actual = len([key for key in graph if var in key[0]]) # assert that we only include each chunk of an index variable # is only included once, not the product of number of chunks of # all the other dimensions. # e.g. previously for 'x', actual == numchunks['y'] * numchunks['z'] assert actual == numchunks[var], (actual, numchunks[var]) def test_idxmin_chunking(): # GH9425 x, y, t = 100, 100, 10 rang = np.arange(t * x * y) da = xr.DataArray( rang.reshape(t, x, y), coords={"time": range(t), "x": range(x), "y": range(y)} ) da = da.chunk(dict(time=-1, x=25, y=25)) actual = da.idxmin("time") assert actual.chunksizes == {k: da.chunksizes[k] for k in ["x", "y"]} assert_identical(actual, da.compute().idxmin("time")) def test_conjugate(): # Test for https://github.com/pydata/xarray/issues/10302 z = 1j * da.arange(100) data = xr.DataArray(z, coords={"x": np.arange(100)}) conj_data = data.conjugate() assert dask.is_dask_collection(conj_data) assert_equal(conj_data, data.conj()) python-xarray-2026.01.0/xarray/tests/test_indexes.py0000664000175000017500000006764515136607163022605 0ustar alastairalastairfrom __future__ import annotations import copy from datetime import datetime from typing import Any import numpy as np import pandas as pd import pytest import xarray as xr from xarray.coding.cftimeindex import CFTimeIndex from xarray.core.indexes import ( Hashable, Index, Indexes, PandasIndex, PandasMultiIndex, _asarray_tuplesafe, safe_cast_to_index, ) from xarray.core.variable import IndexVariable, Variable from xarray.tests import assert_array_equal, assert_identical, requires_cftime from xarray.tests.test_coding_times import _all_cftime_date_types def test_asarray_tuplesafe() -> None: res = _asarray_tuplesafe(("a", 1)) assert isinstance(res, np.ndarray) assert res.ndim == 0 assert res.item() == ("a", 1) res = _asarray_tuplesafe([(0,), (1,)]) assert res.shape == (2,) assert res[0] == (0,) assert res[1] == (1,) class CustomIndex(Index): def __init__(self, dims) -> None: self.dims = dims class TestIndex: @pytest.fixture def index(self) -> CustomIndex: return CustomIndex({"x": 2}) def test_from_variables(self) -> None: with pytest.raises(NotImplementedError): Index.from_variables({}, options={}) def test_concat(self) -> None: with pytest.raises(NotImplementedError): Index.concat([], "x") def test_stack(self) -> None: with pytest.raises(NotImplementedError): Index.stack({}, "x") def test_unstack(self, index) -> None: with pytest.raises(NotImplementedError): index.unstack() def test_create_variables(self, index) -> None: assert index.create_variables() == {} assert index.create_variables({"x": "var"}) == {"x": "var"} def test_to_pandas_index(self, index) -> None: with pytest.raises(TypeError): index.to_pandas_index() def test_isel(self, index) -> None: assert index.isel({}) is None def test_sel(self, index) -> None: with pytest.raises(NotImplementedError): index.sel({}) def test_join(self, index) -> None: with pytest.raises(NotImplementedError): index.join(CustomIndex({"y": 2})) def test_reindex_like(self, index) -> None: with pytest.raises(NotImplementedError): index.reindex_like(CustomIndex({"y": 2})) def test_equals(self, index) -> None: with pytest.raises(NotImplementedError): index.equals(CustomIndex({"y": 2})) def test_roll(self, index) -> None: assert index.roll({}) is None def test_rename(self, index) -> None: assert index.rename({}, {}) is index @pytest.mark.parametrize("deep", [True, False]) def test_copy(self, index, deep) -> None: copied = index.copy(deep=deep) assert isinstance(copied, CustomIndex) assert copied is not index copied.dims["x"] = 3 if deep: assert copied.dims != index.dims assert copied.dims != copy.deepcopy(index).dims else: assert copied.dims is index.dims assert copied.dims is copy.copy(index).dims def test_getitem(self, index) -> None: with pytest.raises(NotImplementedError): index[:] class TestPandasIndex: def test_constructor(self) -> None: pd_idx = pd.Index([1, 2, 3]) index = PandasIndex(pd_idx, "x") assert index.index.equals(pd_idx) # makes a shallow copy assert index.index is not pd_idx assert index.dim == "x" # test no name set for pd.Index pd_idx.name = None index = PandasIndex(pd_idx, "x") assert index.index.name == "x" def test_from_variables(self) -> None: # pandas has only Float64Index but variable dtype should be preserved data = np.array([1.1, 2.2, 3.3], dtype=np.float32) var = xr.Variable( "x", data, attrs={"unit": "m"}, encoding={"dtype": np.float64} ) index = PandasIndex.from_variables({"x": var}, options={}) assert index.dim == "x" assert index.index.equals(pd.Index(data)) assert index.coord_dtype == data.dtype var2 = xr.Variable(("x", "y"), [[1, 2, 3], [4, 5, 6]]) with pytest.raises(ValueError, match=r".*only accepts one variable.*"): PandasIndex.from_variables({"x": var, "foo": var2}, options={}) with pytest.raises( ValueError, match=r".*cannot set a PandasIndex.*scalar variable.*" ): PandasIndex.from_variables({"foo": xr.Variable((), 1)}, options={}) with pytest.raises( ValueError, match=r".*only accepts a 1-dimensional variable.*" ): PandasIndex.from_variables({"foo": var2}, options={}) def test_from_variables_index_adapter(self) -> None: # test index type is preserved when variable wraps a pd.Index data = pd.Series(["foo", "bar"], dtype="category") pd_idx = pd.Index(data) var = xr.Variable("x", pd_idx) index = PandasIndex.from_variables({"x": var}, options={}) assert isinstance(index.index, pd.CategoricalIndex) def test_concat_periods(self): periods = pd.period_range("2000-01-01", periods=10) indexes = [PandasIndex(periods[:5], "t"), PandasIndex(periods[5:], "t")] expected = PandasIndex(periods, "t") actual = PandasIndex.concat(indexes, dim="t") assert actual.equals(expected) assert isinstance(actual.index, pd.PeriodIndex) positions = [list(range(5)), list(range(5, 10))] actual = PandasIndex.concat(indexes, dim="t", positions=positions) assert actual.equals(expected) assert isinstance(actual.index, pd.PeriodIndex) @pytest.mark.parametrize("dtype", [str, bytes]) def test_concat_str_dtype(self, dtype) -> None: a = PandasIndex(np.array(["a"], dtype=dtype), "x", coord_dtype=dtype) b = PandasIndex(np.array(["b"], dtype=dtype), "x", coord_dtype=dtype) expected = PandasIndex( np.array(["a", "b"], dtype=dtype), "x", coord_dtype=dtype ) actual = PandasIndex.concat([a, b], "x") assert actual.equals(expected) assert np.issubdtype(actual.coord_dtype, dtype) def test_concat_empty(self) -> None: idx = PandasIndex.concat([], "x") assert idx.coord_dtype is np.dtype("O") def test_concat_dim_error(self) -> None: indexes = [PandasIndex([0, 1], "x"), PandasIndex([2, 3], "y")] with pytest.raises(ValueError, match=r"Cannot concatenate.*dimensions.*"): PandasIndex.concat(indexes, "x") def test_create_variables(self) -> None: # pandas has only Float64Index but variable dtype should be preserved data = np.array([1.1, 2.2, 3.3], dtype=np.float32) pd_idx = pd.Index(data, name="foo") index = PandasIndex(pd_idx, "x", coord_dtype=data.dtype) index_vars = { "foo": IndexVariable( "x", data, attrs={"unit": "m"}, encoding={"fill_value": 0.0} ) } actual = index.create_variables(index_vars) assert_identical(actual["foo"], index_vars["foo"]) assert actual["foo"].dtype == index_vars["foo"].dtype assert actual["foo"].dtype == index.coord_dtype def test_to_pandas_index(self) -> None: pd_idx = pd.Index([1, 2, 3], name="foo") index = PandasIndex(pd_idx, "x") assert index.to_pandas_index() is index.index def test_sel(self) -> None: # TODO: add tests that aren't just for edge cases index = PandasIndex(pd.Index([1, 2, 3]), "x") with pytest.raises(KeyError, match=r"not all values found"): index.sel({"x": [0]}) with pytest.raises(KeyError): index.sel({"x": 0}) with pytest.raises(ValueError, match=r"does not have a MultiIndex"): index.sel({"x": {"one": 0}}) def test_sel_boolean(self) -> None: # index should be ignored and indexer dtype should not be coerced # see https://github.com/pydata/xarray/issues/5727 index = PandasIndex(pd.Index([0.0, 2.0, 1.0, 3.0]), "x") actual = index.sel({"x": [False, True, False, True]}) expected_dim_indexers = {"x": [False, True, False, True]} np.testing.assert_array_equal( actual.dim_indexers["x"], expected_dim_indexers["x"] ) def test_sel_datetime(self) -> None: index = PandasIndex( pd.to_datetime(["2000-01-01", "2001-01-01", "2002-01-01"]), "x" ) actual = index.sel({"x": "2001-01-01"}) expected_dim_indexers = {"x": 1} assert actual.dim_indexers == expected_dim_indexers actual = index.sel({"x": index.to_pandas_index().to_numpy()[1]}) assert actual.dim_indexers == expected_dim_indexers def test_sel_unsorted_datetime_index_raises(self) -> None: index = PandasIndex(pd.to_datetime(["2001", "2000", "2002"]), "x") with pytest.raises(KeyError): # pandas will try to convert this into an array indexer. We should # raise instead, so we can be sure the result of indexing with a # slice is always a view. index.sel({"x": slice("2001", "2002")}) def test_equals(self) -> None: index1 = PandasIndex([1, 2, 3], "x") index2 = PandasIndex([1, 2, 3], "x") assert index1.equals(index2) is True def test_join(self) -> None: index1 = PandasIndex(["a", "aa", "aaa"], "x", coord_dtype=" None: index1 = PandasIndex([0, 1, 2], "x") index2 = PandasIndex([1, 2, 3, 4], "x") expected = {"x": [1, 2, -1, -1]} actual = index1.reindex_like(index2) assert actual.keys() == expected.keys() np.testing.assert_array_equal(actual["x"], expected["x"]) index3 = PandasIndex([1, 1, 2], "x") with pytest.raises(ValueError, match=r".*index has duplicate values"): index3.reindex_like(index2) def test_rename(self) -> None: index = PandasIndex(pd.Index([1, 2, 3], name="a"), "x", coord_dtype=np.int32) # shortcut new_index = index.rename({}, {}) assert new_index is index new_index = index.rename({"a": "b"}, {}) assert new_index.index.name == "b" assert new_index.dim == "x" assert new_index.coord_dtype == np.int32 new_index = index.rename({}, {"x": "y"}) assert new_index.index.name == "a" assert new_index.dim == "y" assert new_index.coord_dtype == np.int32 def test_copy(self) -> None: expected = PandasIndex([1, 2, 3], "x", coord_dtype=np.int32) actual = expected.copy() assert actual.index.equals(expected.index) assert actual.index is not expected.index assert actual.dim == expected.dim assert actual.coord_dtype == expected.coord_dtype def test_getitem(self) -> None: pd_idx = pd.Index([1, 2, 3]) expected = PandasIndex(pd_idx, "x", coord_dtype=np.int32) actual = expected[1:] assert actual.index.equals(pd_idx[1:]) assert actual.dim == expected.dim assert actual.coord_dtype == expected.coord_dtype class TestPandasMultiIndex: def test_constructor(self) -> None: foo_data = np.array([0, 0, 1], dtype="int64") bar_data = np.array([1.1, 1.2, 1.3], dtype="float64") pd_idx = pd.MultiIndex.from_arrays([foo_data, bar_data], names=("foo", "bar")) index = PandasMultiIndex(pd_idx, "x") assert index.dim == "x" assert index.index.equals(pd_idx) assert index.index.names == ("foo", "bar") assert index.index.name == "x" assert index.level_coords_dtype == { "foo": foo_data.dtype, "bar": bar_data.dtype, } with pytest.raises(ValueError, match=r".*conflicting multi-index level name.*"): PandasMultiIndex(pd_idx, "foo") # default level names pd_idx = pd.MultiIndex.from_arrays([foo_data, bar_data]) index = PandasMultiIndex(pd_idx, "x") assert list(index.index.names) == ["x_level_0", "x_level_1"] def test_from_variables(self) -> None: v_level1 = xr.Variable( "x", [1, 2, 3], attrs={"unit": "m"}, encoding={"dtype": np.int32} ) v_level2 = xr.Variable( "x", ["a", "b", "c"], attrs={"unit": "m"}, encoding={"dtype": "U"} ) index = PandasMultiIndex.from_variables( {"level1": v_level1, "level2": v_level2}, options={} ) expected_idx = pd.MultiIndex.from_arrays([v_level1.data, v_level2.data]) assert index.dim == "x" assert index.index.equals(expected_idx) assert index.index.name == "x" assert list(index.index.names) == ["level1", "level2"] var = xr.Variable(("x", "y"), [[1, 2, 3], [4, 5, 6]]) with pytest.raises( ValueError, match=r".*only accepts 1-dimensional variables.*" ): PandasMultiIndex.from_variables({"var": var}, options={}) v_level3 = xr.Variable("y", [4, 5, 6]) with pytest.raises( ValueError, match=r"unmatched dimensions for multi-index variables.*" ): PandasMultiIndex.from_variables( {"level1": v_level1, "level3": v_level3}, options={} ) def test_concat(self) -> None: pd_midx = pd.MultiIndex.from_product( [[0, 1, 2], ["a", "b"]], names=("foo", "bar") ) level_coords_dtype = {"foo": np.int32, "bar": "=U1"} midx1 = PandasMultiIndex( pd_midx[:2], "x", level_coords_dtype=level_coords_dtype ) midx2 = PandasMultiIndex( pd_midx[2:], "x", level_coords_dtype=level_coords_dtype ) expected = PandasMultiIndex(pd_midx, "x", level_coords_dtype=level_coords_dtype) actual = PandasMultiIndex.concat([midx1, midx2], "x") assert actual.equals(expected) assert actual.level_coords_dtype == expected.level_coords_dtype def test_stack(self) -> None: prod_vars = { "x": xr.Variable("x", pd.Index(["b", "a"]), attrs={"foo": "bar"}), "y": xr.Variable("y", pd.Index([1, 3, 2])), } index_xr = PandasMultiIndex.stack(prod_vars, "z") assert index_xr.dim == "z" index_pd = index_xr.index assert isinstance(index_pd, pd.MultiIndex) # TODO: change to tuple when pandas 3 is minimum assert list(index_pd.names) == ["x", "y"] np.testing.assert_array_equal( index_pd.codes, [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] ) with pytest.raises( ValueError, match=r"conflicting dimensions for multi-index product.*" ): PandasMultiIndex.stack( {"x": xr.Variable("x", ["a", "b"]), "x2": xr.Variable("x", [1, 2])}, "z", ) def test_stack_non_unique(self) -> None: prod_vars = { "x": xr.Variable("x", pd.Index(["b", "a"]), attrs={"foo": "bar"}), "y": xr.Variable("y", pd.Index([1, 1, 2])), } index_xr = PandasMultiIndex.stack(prod_vars, "z") index_pd = index_xr.index assert isinstance(index_pd, pd.MultiIndex) np.testing.assert_array_equal( index_pd.codes, [[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1]] ) np.testing.assert_array_equal(index_pd.levels[0], ["b", "a"]) np.testing.assert_array_equal(index_pd.levels[1], [1, 2]) def test_unstack(self) -> None: pd_midx = pd.MultiIndex.from_product( [["a", "b"], [1, 2, 3]], names=["one", "two"] ) index = PandasMultiIndex(pd_midx, "x") new_indexes, new_pd_idx = index.unstack() assert list(new_indexes) == ["one", "two"] assert new_indexes["one"].equals(PandasIndex(["a", "b"], "one")) assert new_indexes["two"].equals(PandasIndex([1, 2, 3], "two")) assert new_pd_idx.equals(pd_midx) def test_unstack_requires_unique(self) -> None: pd_midx = pd.MultiIndex.from_product([["a", "a"], [1, 2]], names=["one", "two"]) index = PandasMultiIndex(pd_midx, "x") with pytest.raises( ValueError, match="Cannot unstack MultiIndex containing duplicates" ): index.unstack() def test_create_variables(self) -> None: foo_data = np.array([0, 0, 1], dtype="int64") bar_data = np.array([1.1, 1.2, 1.3], dtype="float64") pd_idx = pd.MultiIndex.from_arrays([foo_data, bar_data], names=("foo", "bar")) index_vars = { "x": IndexVariable("x", pd_idx), "foo": IndexVariable("x", foo_data, attrs={"unit": "m"}), "bar": IndexVariable("x", bar_data, encoding={"fill_value": 0}), } index = PandasMultiIndex(pd_idx, "x") actual = index.create_variables(index_vars) for k, expected in index_vars.items(): assert_identical(actual[k], expected) assert actual[k].dtype == expected.dtype if k != "x": assert actual[k].dtype == index.level_coords_dtype[k] def test_sel(self) -> None: index = PandasMultiIndex( pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")), "x" ) # test tuples inside slice are considered as scalar indexer values actual = index.sel({"x": slice(("a", 1), ("b", 2))}) expected_dim_indexers = {"x": slice(0, 4)} assert actual.dim_indexers == expected_dim_indexers with pytest.raises(KeyError, match=r"not all values found"): index.sel({"x": [0]}) with pytest.raises(KeyError): index.sel({"x": 0}) with pytest.raises(ValueError, match=r"cannot provide labels for both.*"): index.sel({"one": 0, "x": "a"}) with pytest.raises( ValueError, match=r"multi-index level names \('three',\) not found in indexes", ): index.sel({"x": {"three": 0}}) with pytest.raises(IndexError): index.sel({"x": (slice(None), 1, "no_level")}) def test_join(self): midx = pd.MultiIndex.from_product([["a", "aa"], [1, 2]], names=("one", "two")) level_coords_dtype = {"one": "=U2", "two": "i"} index1 = PandasMultiIndex(midx, "x", level_coords_dtype=level_coords_dtype) index2 = PandasMultiIndex(midx[0:2], "x", level_coords_dtype=level_coords_dtype) actual = index1.join(index2) assert actual.equals(index2) assert actual.level_coords_dtype == level_coords_dtype actual = index1.join(index2, how="outer") assert actual.equals(index1) assert actual.level_coords_dtype == level_coords_dtype def test_rename(self) -> None: level_coords_dtype = {"one": " None: level_coords_dtype = {"one": "U<1", "two": np.int32} expected = PandasMultiIndex( pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")), "x", level_coords_dtype=level_coords_dtype, ) actual = expected.copy() assert actual.index.equals(expected.index) assert actual.index is not expected.index assert actual.dim == expected.dim assert actual.level_coords_dtype == expected.level_coords_dtype class TestIndexes: @pytest.fixture def indexes_and_vars(self) -> tuple[list[PandasIndex], dict[Hashable, Variable]]: x_idx = PandasIndex(pd.Index([1, 2, 3], name="x"), "x") y_idx = PandasIndex(pd.Index([4, 5, 6], name="y"), "y") z_pd_midx = pd.MultiIndex.from_product( [["a", "b"], [1, 2]], names=["one", "two"] ) z_midx = PandasMultiIndex(z_pd_midx, "z") indexes = [x_idx, y_idx, z_midx] variables = {} for idx in indexes: variables.update(idx.create_variables()) return indexes, variables @pytest.fixture(params=["pd_index", "xr_index"]) def unique_indexes( self, request, indexes_and_vars ) -> list[PandasIndex] | list[pd.Index]: xr_indexes, _ = indexes_and_vars if request.param == "pd_index": return [idx.index for idx in xr_indexes] else: return xr_indexes @pytest.fixture def indexes( self, unique_indexes, indexes_and_vars ) -> Indexes[Index] | Indexes[pd.Index]: x_idx, y_idx, z_midx = unique_indexes indexes: dict[Any, Index] = { "x": x_idx, "y": y_idx, "z": z_midx, "one": z_midx, "two": z_midx, } _, variables = indexes_and_vars index_type = Index if isinstance(x_idx, Index) else pd.Index return Indexes(indexes, variables, index_type=index_type) def test_interface(self, unique_indexes, indexes) -> None: x_idx = unique_indexes[0] assert list(indexes) == ["x", "y", "z", "one", "two"] assert len(indexes) == 5 assert "x" in indexes assert indexes["x"] is x_idx def test_variables(self, indexes) -> None: assert tuple(indexes.variables) == ("x", "y", "z", "one", "two") def test_dims(self, indexes) -> None: assert indexes.dims == {"x": 3, "y": 3, "z": 4} def test_get_unique(self, unique_indexes, indexes) -> None: assert indexes.get_unique() == unique_indexes def test_is_multi(self, indexes) -> None: assert indexes.is_multi("one") is True assert indexes.is_multi("x") is False def test_get_all_coords(self, indexes) -> None: expected = { "z": indexes.variables["z"], "one": indexes.variables["one"], "two": indexes.variables["two"], } assert indexes.get_all_coords("one") == expected with pytest.raises(ValueError, match=r"errors must be.*"): indexes.get_all_coords("x", errors="invalid") with pytest.raises(ValueError, match=r"no index found.*"): indexes.get_all_coords("no_coord") assert indexes.get_all_coords("no_coord", errors="ignore") == {} def test_get_all_dims(self, indexes) -> None: expected = {"z": 4} assert indexes.get_all_dims("one") == expected def test_group_by_index(self, unique_indexes, indexes): expected = [ (unique_indexes[0], {"x": indexes.variables["x"]}), (unique_indexes[1], {"y": indexes.variables["y"]}), ( unique_indexes[2], { "z": indexes.variables["z"], "one": indexes.variables["one"], "two": indexes.variables["two"], }, ), ] assert indexes.group_by_index() == expected def test_to_pandas_indexes(self, indexes) -> None: pd_indexes = indexes.to_pandas_indexes() assert isinstance(pd_indexes, Indexes) assert all(isinstance(idx, pd.Index) for idx in pd_indexes.values()) assert indexes.variables == pd_indexes.variables def test_copy_indexes(self, indexes) -> None: copied, index_vars = indexes.copy_indexes() assert copied.keys() == indexes.keys() for new, original in zip(copied.values(), indexes.values(), strict=True): assert new.equals(original) # check unique index objects preserved assert copied["z"] is copied["one"] is copied["two"] assert index_vars.keys() == indexes.variables.keys() for new, original in zip( index_vars.values(), indexes.variables.values(), strict=True ): assert_identical(new, original) def test_safe_cast_to_index(): dates = pd.date_range("2000-01-01", periods=10) x = np.arange(5) td = x * np.timedelta64(1, "D") for expected, array in [ (dates, dates.values), (pd.Index(x, dtype=object), x.astype(object)), (pd.Index(td), td), (pd.Index(td, dtype=object), td.astype(object)), ]: actual = safe_cast_to_index(array) assert_array_equal(expected, actual) assert expected.dtype == actual.dtype @requires_cftime def test_safe_cast_to_index_cftimeindex(): date_types = _all_cftime_date_types() for date_type in date_types.values(): dates = [date_type(1, 1, day) for day in range(1, 20)] expected = CFTimeIndex(dates) actual = safe_cast_to_index(np.array(dates)) assert_array_equal(expected, actual) assert expected.dtype == actual.dtype assert isinstance(actual, type(expected)) # Test that datetime.datetime objects are never used in a CFTimeIndex @requires_cftime def test_safe_cast_to_index_datetime_datetime(): dates = [datetime(1, 1, day) for day in range(1, 20)] expected = pd.Index(dates) actual = safe_cast_to_index(np.array(dates)) assert_array_equal(expected, actual) assert isinstance(actual, pd.Index) @pytest.mark.parametrize("dtype", ["int32", "float32"]) def test_restore_dtype_on_multiindexes(dtype: str) -> None: foo = xr.Dataset(coords={"bar": ("bar", np.array([0, 1], dtype=dtype))}) foo = foo.stack(baz=("bar",)) assert str(foo["bar"].values.dtype) == dtype class IndexWithExtraVariables(Index): @classmethod def from_variables(cls, variables, *, options=None): return cls() def create_variables(self, variables=None): if variables is None: # For Coordinates.from_xindex(), return all variables the index can create return { "time": Variable(dims=("time",), data=[1, 2, 3]), "valid_time": Variable( dims=("time",), data=[2, 3, 4], # time + 1 attrs={"description": "time + 1"}, ), } result = dict(variables) if "time" in variables: result["valid_time"] = Variable( dims=("time",), data=variables["time"].data + 1, attrs={"description": "time + 1"}, ) return result def test_set_xindex_with_extra_variables() -> None: """Test that set_xindex raises an error when custom index creates extra variables.""" ds = xr.Dataset(coords={"time": [1, 2, 3]}).reset_index("time") # Test that set_xindex raises error for extra variables with pytest.raises(ValueError, match="extra variables 'valid_time'"): ds.set_xindex("time", IndexWithExtraVariables) def test_set_xindex_factory_method_pattern() -> None: ds = xr.Dataset(coords={"time": [1, 2, 3]}).reset_index("time") # Test the recommended factory method pattern coord_vars = {"time": ds._variables["time"]} index = IndexWithExtraVariables.from_variables(coord_vars) coords = xr.Coordinates.from_xindex(index) result = ds.assign_coords(coords) assert "time" in result.variables assert "valid_time" in result.variables assert_array_equal(result.valid_time.data, result.time.data + 1) python-xarray-2026.01.0/xarray/tests/test_eval.py0000664000175000017500000004425615136607163022066 0ustar alastairalastair"""Tests for Dataset.eval() functionality.""" from __future__ import annotations import numpy as np import pandas as pd import pytest import xarray as xr from xarray import DataArray, Dataset from xarray.tests import ( assert_equal, assert_identical, raise_if_dask_computes, requires_dask, ) def test_eval(ds) -> None: """Test basic eval functionality.""" actual = ds.eval("z1 + 5") expect = ds["z1"] + 5 assert_identical(expect, actual) # Use bitwise operators for element-wise operations on arrays actual = ds.eval("(z1 > 5) & (z2 > 0)") expect = (ds["z1"] > 5) & (ds["z2"] > 0) assert_identical(expect, actual) def test_eval_parser_deprecated(ds) -> None: """Test that passing parser= raises a FutureWarning.""" with pytest.warns(FutureWarning, match="parser.*deprecated"): ds.eval("z1 + 5", parser="pandas") def test_eval_logical_operators(ds) -> None: """Test that 'and'/'or'/'not' are transformed for query() consistency. These operators are transformed to '&'/'|'/'~' to match pd.eval() behavior, which query() uses. This ensures syntax that works in query() also works in eval(). """ # 'and' transformed to '&' actual = ds.eval("(z1 > 5) and (z2 > 0)") expect = (ds["z1"] > 5) & (ds["z2"] > 0) assert_identical(expect, actual) # 'or' transformed to '|' actual = ds.eval("(z1 > 5) or (z2 > 0)") expect = (ds["z1"] > 5) | (ds["z2"] > 0) assert_identical(expect, actual) # 'not' transformed to '~' actual = ds.eval("not (z1 > 5)") expect = ~(ds["z1"] > 5) assert_identical(expect, actual) def test_eval_ndimensional() -> None: """Test that eval works with N-dimensional data where N > 2.""" # Create a 3D dataset - this previously failed with pd.eval rng = np.random.default_rng(42) ds = Dataset( { "x": (["time", "lat", "lon"], rng.random((3, 4, 5))), "y": (["time", "lat", "lon"], rng.random((3, 4, 5))), } ) # Basic arithmetic actual = ds.eval("x + y") expect = ds["x"] + ds["y"] assert_identical(expect, actual) # Assignment actual = ds.eval("z = x + y") assert "z" in actual.data_vars assert_equal(ds["x"] + ds["y"], actual["z"]) # Complex expression actual = ds.eval("x * 2 + y ** 2") expect = ds["x"] * 2 + ds["y"] ** 2 assert_identical(expect, actual) # Comparison actual = ds.eval("x > y") expect = ds["x"] > ds["y"] assert_identical(expect, actual) # Use bitwise operators for element-wise boolean operations actual = ds.eval("(x > 0.5) & (y < 0.5)") expect = (ds["x"] > 0.5) & (ds["y"] < 0.5) assert_identical(expect, actual) def test_eval_chained_comparisons() -> None: """Test that chained comparisons are transformed for query() consistency. Chained comparisons like 'a < b < c' are transformed to '(a < b) & (b < c)' to match pd.eval() behavior, which query() uses. """ ds = Dataset({"x": ("dim", np.arange(10))}) # Basic chained comparison: 2 < x < 7 actual = ds.eval("2 < x < 7") expect = (ds["x"] > 2) & (ds["x"] < 7) assert_identical(expect, actual) # Mixed operators: 0 <= x < 5 actual = ds.eval("0 <= x < 5") expect = (ds["x"] >= 0) & (ds["x"] < 5) assert_identical(expect, actual) # Explicit bitwise operators also work actual = ds.eval("(x > 2) & (x < 7)") expect = (ds["x"] > 2) & (ds["x"] < 7) assert_identical(expect, actual) def test_eval_restricted_syntax() -> None: """Test that eval blocks certain syntax to emulate pd.eval() behavior.""" ds = Dataset({"a": ("x", [1, 2, 3])}) # Private attribute access is not allowed (consistent with pd.eval) with pytest.raises(ValueError, match="Access to private attributes is not allowed"): ds.eval("a.__class__") with pytest.raises(ValueError, match="Access to private attributes is not allowed"): ds.eval("a._private") # Lambda expressions are not allowed (pd.eval: "Only named functions are supported") with pytest.raises(ValueError, match="Lambda expressions are not allowed"): ds.eval("(lambda x: x + 1)(a)") # These builtins are not in the namespace with pytest.raises(NameError): ds.eval("__import__('os')") with pytest.raises(NameError): ds.eval("open('file.txt')") def test_eval_unsupported_statements() -> None: """Test that unsupported statement types produce clear errors.""" ds = Dataset({"a": ("x", [1, 2, 3])}) # Augmented assignment is not supported with pytest.raises(ValueError, match="Unsupported statement type"): ds.eval("a += 1") def test_eval_functions() -> None: """Test that numpy and other functions work in eval.""" ds = Dataset({"a": ("x", [0.0, 1.0, 4.0])}) # numpy functions via np namespace should work result = ds.eval("np.sqrt(a)") assert_equal(result, np.sqrt(ds["a"])) result = ds.eval("np.sin(a) + np.cos(a)") assert_equal(result, np.sin(ds["a"]) + np.cos(ds["a"])) # pandas namespace should work result = ds.eval("pd.isna(a)") # pd.isna returns ndarray, not DataArray np.testing.assert_array_equal(result, pd.isna(ds["a"].values)) # xarray namespace should work result = ds.eval("xr.where(a > 1, a, 0)") assert_equal(result, xr.where(ds["a"] > 1, ds["a"], 0)) # Common builtins should work result = ds.eval("abs(a - 2)") assert_equal(result, abs(ds["a"] - 2)) result = ds.eval("round(float(a.mean()))") assert result == round(float(ds["a"].mean())) result = ds.eval("len(a)") assert result == 3 result = ds.eval("pow(a, 2)") assert_equal(result, ds["a"] ** 2) # Attribute access on DataArrays should work result = ds.eval("a.values") assert isinstance(result, np.ndarray) # Method calls on DataArrays should work result = ds.eval("a.mean()") assert float(result) == np.mean([0.0, 1.0, 4.0]) def test_eval_extended_builtins() -> None: """Test extended builtins available in eval namespace. These builtins are safe (no I/O, no code execution) and commonly needed for typical xarray operations like slicing, type conversion, and iteration. """ ds = Dataset( {"a": ("x", [1.0, 2.0, 3.0, 4.0, 5.0])}, coords={"time": pd.date_range("2019-01-01", periods=5)}, ) # slice - essential for .sel() with ranges result = ds.eval("a.sel(x=slice(1, 3))") expected = ds["a"].sel(x=slice(1, 3)) assert_equal(result, expected) # str - type constructor result = ds.eval("str(int(a.mean()))") assert result == "3" # list, tuple - type constructors result = ds.eval("list(range(3))") assert result == [0, 1, 2] result = ds.eval("tuple(range(3))") assert result == (0, 1, 2) # dict, set - type constructors result = ds.eval("dict(x=1, y=2)") assert result == {"x": 1, "y": 2} result = ds.eval("set([1, 2, 2, 3])") assert result == {1, 2, 3} # range - iteration result = ds.eval("list(range(3))") assert result == [0, 1, 2] # zip, enumerate - iteration helpers result = ds.eval("list(zip([1, 2], [3, 4]))") assert result == [(1, 3), (2, 4)] result = ds.eval("list(enumerate(['a', 'b']))") assert result == [(0, "a"), (1, "b")] # map, filter - functional helpers result = ds.eval("list(map(abs, [-1, -2, 3]))") assert result == [1, 2, 3] result = ds.eval("list(filter(bool, [0, 1, 0, 2]))") assert result == [1, 2] # any, all - aggregation result = ds.eval("any([False, True, False])") assert result is True result = ds.eval("all([True, True, True])") assert result is True result = ds.eval("all([True, False, True])") assert result is False def test_eval_data_variable_priority() -> None: """Test that data variables take priority over builtin functions. Users may have data variables named 'sum', 'abs', 'min', etc. When they reference these in eval(), they should get their data, not the Python builtins. The builtins should still be accessible via the np namespace (np.sum, np.abs). """ # Create dataset with data variables that shadow builtins ds = Dataset( { "sum": ("x", [10.0, 20.0, 30.0]), # shadows builtin sum "abs": ("x", [1.0, 2.0, 3.0]), # shadows builtin abs "min": ("x", [100.0, 200.0, 300.0]), # shadows builtin min "other": ("x", [5.0, 10.0, 15.0]), } ) # Data variables should take priority - user data wins result = ds.eval("sum + other") expected = ds["sum"] + ds["other"] assert_equal(result, expected) # Should get the data variable, not builtin sum applied to something result = ds.eval("sum * 2") expected = ds["sum"] * 2 assert_equal(result, expected) # abs as data variable should work result = ds.eval("abs + 1") expected = ds["abs"] + 1 assert_equal(result, expected) # min as data variable should work result = ds.eval("min - 50") expected = ds["min"] - 50 assert_equal(result, expected) # np namespace should still provide access to actual functions result = ds.eval("np.abs(other - 10)") expected = abs(ds["other"] - 10) assert_equal(result, expected) # np.sum should work even when 'sum' is a data variable result = ds.eval("np.sum(other)") expected = np.sum(ds["other"]) assert result == expected def test_eval_coordinate_priority() -> None: """Test that coordinates also take priority over builtins.""" ds = Dataset( {"data": ("x", [1.0, 2.0, 3.0])}, coords={"sum": ("x", [10.0, 20.0, 30.0])}, # coordinate named 'sum' ) # Coordinate should be accessible and take priority over builtin result = ds.eval("data + sum") expected = ds["data"] + ds.coords["sum"] assert_equal(result, expected) # Error message tests def test_eval_error_undefined_variable() -> None: """Test error message when referencing an undefined variable.""" ds = Dataset({"a": ("x", [1, 2, 3])}) with pytest.raises(NameError, match="undefined_var"): ds.eval("undefined_var + a") def test_eval_error_syntax() -> None: """Test error message for malformed expressions.""" ds = Dataset({"a": ("x", [1, 2, 3])}) with pytest.raises(ValueError, match="Invalid"): ds.eval("a +") def test_eval_error_invalid_assignment() -> None: """Test error message when assignment target is invalid.""" ds = Dataset({"a": ("x", [1, 2, 3])}) # "1 = a" should fail during parsing - can't assign to a literal with pytest.raises(ValueError, match="Invalid"): ds.eval("1 = a") def test_eval_error_dunder_access() -> None: """Test error message when trying to access dunder attributes.""" ds = Dataset({"a": ("x", [1, 2, 3])}) with pytest.raises(ValueError, match="private attributes"): ds.eval("a.__class__") def test_eval_error_missing_method() -> None: """Test error message when calling a nonexistent method.""" ds = Dataset({"a": ("x", [1, 2, 3])}) # This should raise AttributeError from the DataArray with pytest.raises(AttributeError, match="nonexistent_method"): ds.eval("a.nonexistent_method()") def test_eval_error_type_mismatch() -> None: """Test error message when types are incompatible.""" ds = Dataset({"a": ("x", [1, 2, 3])}) # Adding string to numeric array should raise TypeError or similar with pytest.raises((TypeError, np.exceptions.DTypePromotionError)): ds.eval("a + 'string'") # Edge case tests def test_eval_empty_expression() -> None: """Test handling of empty expression string.""" ds = Dataset({"a": ("x", [1, 2, 3])}) with pytest.raises(ValueError): ds.eval("") def test_eval_whitespace_only_expression() -> None: """Test handling of whitespace-only expression.""" ds = Dataset({"a": ("x", [1, 2, 3])}) with pytest.raises(ValueError): ds.eval(" ") def test_eval_just_variable_name() -> None: """Test that just a variable name returns the variable.""" ds = Dataset({"a": ("x", [1, 2, 3])}) result = ds.eval("a") expected = ds["a"] assert_equal(result, expected) def test_eval_unicode_variable_names() -> None: """Test that unicode variable names work in expressions.""" # Greek letters are valid Python identifiers ds = Dataset({"α": ("x", [1.0, 2.0, 3.0]), "β": ("x", [4.0, 5.0, 6.0])}) result = ds.eval("α + β") expected = ds["α"] + ds["β"] assert_equal(result, expected) def test_eval_long_expression() -> None: """Test that very long expressions work correctly.""" ds = Dataset({"a": ("x", [1.0, 2.0, 3.0])}) # Build a long expression: a + a + a + ... (50 times) long_expr = " + ".join(["a"] * 50) result = ds.eval(long_expr) expected = ds["a"] * 50 assert_equal(result, expected) # Dask tests @requires_dask def test_eval_dask_basic_arithmetic() -> None: """Test that basic arithmetic with dask arrays returns dask-backed result.""" from xarray.core.utils import is_duck_dask_array ds = Dataset( {"a": ("x", np.arange(10.0)), "b": ("x", np.linspace(0, 1, 10))} ).chunk({"x": 5}) with raise_if_dask_computes(): result = ds.eval("a + b") assert isinstance(result, DataArray) assert is_duck_dask_array(result.data) # Verify correctness when computed expected = ds["a"] + ds["b"] assert_equal(result, expected) @requires_dask def test_eval_dask_assignment() -> None: """Test that assignments with dask arrays preserve lazy evaluation.""" from xarray.core.utils import is_duck_dask_array ds = Dataset( {"a": ("x", np.arange(10.0)), "b": ("x", np.linspace(0, 1, 10))} ).chunk({"x": 5}) with raise_if_dask_computes(): result = ds.eval("z = a + b") assert isinstance(result, Dataset) assert "z" in result.data_vars assert is_duck_dask_array(result["z"].data) # Verify correctness when computed expected = ds["a"] + ds["b"] assert_equal(result["z"], expected) @requires_dask def test_eval_dask_method_chaining() -> None: """Test that method chaining works with dask arrays.""" ds = Dataset({"a": (("x", "y"), np.arange(20.0).reshape(4, 5))}).chunk( {"x": 2, "y": 5} ) # Calling .mean() should still be lazy result = ds.eval("a.mean(dim='x')") # Calling .compute() should return numpy-backed result computed = result.compute() expected = ds["a"].mean(dim="x").compute() assert_equal(computed, expected) @requires_dask def test_eval_dask_xr_where() -> None: """Test that xr.where() with dask arrays preserves lazy evaluation.""" from xarray.core.utils import is_duck_dask_array ds = Dataset({"a": ("x", np.arange(-5, 5, dtype=float))}).chunk({"x": 5}) with raise_if_dask_computes(): result = ds.eval("xr.where(a > 0, a, 0)") assert isinstance(result, DataArray) assert is_duck_dask_array(result.data) # Verify correctness when computed expected = xr.where(ds["a"] > 0, ds["a"], 0) assert_equal(result, expected) @requires_dask def test_eval_dask_complex_expression() -> None: """Test that complex expressions preserve dask backing.""" from xarray.core.utils import is_duck_dask_array rng = np.random.default_rng(42) ds = Dataset( { "x": (["time", "lat", "lon"], rng.random((3, 4, 5))), "y": (["time", "lat", "lon"], rng.random((3, 4, 5))), } ).chunk({"time": 1, "lat": 2, "lon": 5}) with raise_if_dask_computes(): result = ds.eval("x * 2 + y ** 2") assert is_duck_dask_array(result.data) # Verify correctness when computed expected = ds["x"] * 2 + ds["y"] ** 2 assert_equal(result, expected) @requires_dask def test_eval_dask_mixed_backends() -> None: """Test expressions with mixed dask and numpy arrays.""" from xarray.core.utils import is_duck_dask_array ds = Dataset( { "dask_var": ("x", np.arange(10.0)), "numpy_var": ("x", np.linspace(0, 1, 10)), } ) # Only chunk one variable ds["dask_var"] = ds["dask_var"].chunk({"x": 5}) with raise_if_dask_computes(): result = ds.eval("dask_var + numpy_var") # Result should be dask-backed when any input is dask assert is_duck_dask_array(result.data) # Verify correctness expected = ds["dask_var"] + ds["numpy_var"] assert_equal(result, expected) @requires_dask def test_eval_dask_np_functions() -> None: """Test that numpy functions via np namespace preserve dask.""" from xarray.core.utils import is_duck_dask_array ds = Dataset({"a": ("x", np.arange(1.0, 11.0))}).chunk({"x": 5}) with raise_if_dask_computes(): result = ds.eval("np.sqrt(a)") assert is_duck_dask_array(result.data) # Verify correctness expected = np.sqrt(ds["a"]) assert_equal(result, expected) @requires_dask def test_eval_dask_comparison() -> None: """Test that comparison operations preserve dask backing.""" from xarray.core.utils import is_duck_dask_array ds = Dataset( {"a": ("x", np.arange(10.0)), "b": ("x", np.arange(10.0)[::-1])} ).chunk({"x": 5}) with raise_if_dask_computes(): result = ds.eval("a > b") assert is_duck_dask_array(result.data) # Verify correctness expected = ds["a"] > ds["b"] assert_equal(result, expected) @requires_dask def test_eval_dask_boolean_operators() -> None: """Test that bitwise boolean operators preserve dask.""" from xarray.core.utils import is_duck_dask_array ds = Dataset( {"a": ("x", np.arange(10.0)), "b": ("x", np.arange(10.0)[::-1])} ).chunk({"x": 5}) with raise_if_dask_computes(): result = ds.eval("(a > 3) & (b < 7)") assert is_duck_dask_array(result.data) # Verify correctness expected = (ds["a"] > 3) & (ds["b"] < 7) assert_equal(result, expected) @requires_dask def test_eval_dask_chained_comparisons() -> None: """Test that chained comparisons preserve dask backing.""" from xarray.core.utils import is_duck_dask_array ds = Dataset({"x": ("dim", np.arange(10.0))}).chunk({"dim": 5}) with raise_if_dask_computes(): result = ds.eval("2 < x < 7") assert is_duck_dask_array(result.data) # Verify correctness expected = (ds["x"] > 2) & (ds["x"] < 7) assert_equal(result, expected) python-xarray-2026.01.0/xarray/tests/test_coordinates.py0000664000175000017500000002536715136607163023453 0ustar alastairalastairfrom __future__ import annotations from collections.abc import Mapping import numpy as np import pandas as pd import pytest from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex from xarray.core.variable import IndexVariable, Variable from xarray.structure.alignment import align from xarray.tests import assert_identical, source_ndarray class TestCoordinates: def test_init_noindex(self) -> None: coords = Coordinates(coords={"foo": ("x", [0, 1, 2])}) expected = Dataset(coords={"foo": ("x", [0, 1, 2])}) assert_identical(coords.to_dataset(), expected) def test_init_default_index(self) -> None: coords = Coordinates(coords={"x": [1, 2]}) expected = Dataset(coords={"x": [1, 2]}) assert_identical(coords.to_dataset(), expected) assert "x" in coords.xindexes @pytest.mark.filterwarnings("error:IndexVariable") def test_init_no_default_index(self) -> None: # dimension coordinate with no default index (explicit) coords = Coordinates(coords={"x": [1, 2]}, indexes={}) assert "x" not in coords.xindexes assert not isinstance(coords["x"], IndexVariable) def test_init_from_coords(self) -> None: expected = Dataset(coords={"foo": ("x", [0, 1, 2])}) coords = Coordinates(coords=expected.coords) assert_identical(coords.to_dataset(), expected) # test variables copied assert coords.variables["foo"] is not expected.variables["foo"] # test indexes are extracted expected = Dataset(coords={"x": [0, 1, 2]}) coords = Coordinates(coords=expected.coords) assert_identical(coords.to_dataset(), expected) assert expected.xindexes == coords.xindexes # coords + indexes not supported with pytest.raises( ValueError, match=r"passing both.*Coordinates.*indexes.*not allowed" ): coords = Coordinates( coords=expected.coords, indexes={"x": PandasIndex([0, 1, 2], "x")} ) def test_init_empty(self) -> None: coords = Coordinates() assert len(coords) == 0 def test_init_index_error(self) -> None: idx = PandasIndex([1, 2, 3], "x") with pytest.raises(ValueError, match="no coordinate variables found"): Coordinates(indexes={"x": idx}) with pytest.raises(TypeError, match=r".* is not an `xarray.indexes.Index`"): Coordinates( coords={"x": ("x", [1, 2, 3])}, indexes={"x": "not_an_xarray_index"}, # type: ignore[dict-item] ) def test_init_dim_sizes_conflict(self) -> None: with pytest.raises(ValueError): Coordinates(coords={"foo": ("x", [1, 2]), "bar": ("x", [1, 2, 3, 4])}) def test_from_xindex(self) -> None: idx = PandasIndex([1, 2, 3], "x") coords = Coordinates.from_xindex(idx) assert isinstance(coords.xindexes["x"], PandasIndex) assert coords.xindexes["x"].equals(idx) expected = PandasIndex(idx, "x").create_variables() assert list(coords.variables) == list(expected) assert_identical(expected["x"], coords.variables["x"]) def test_from_xindex_error(self) -> None: class CustomIndexNoCoordsGenerated(Index): def create_variables(self, variables: Mapping | None = None): return {} idx = CustomIndexNoCoordsGenerated() with pytest.raises(ValueError, match=r".*index.*did not create any coordinate"): Coordinates.from_xindex(idx) def test_from_pandas_multiindex(self) -> None: midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")) coords = Coordinates.from_pandas_multiindex(midx, "x") assert isinstance(coords.xindexes["x"], PandasMultiIndex) assert coords.xindexes["x"].index.equals(midx) assert coords.xindexes["x"].dim == "x" expected = PandasMultiIndex(midx, "x").create_variables() assert list(coords.variables) == list(expected) for name in ("x", "one", "two"): assert_identical(expected[name], coords.variables[name]) @pytest.mark.filterwarnings("ignore:return type") def test_dims(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) assert set(coords.dims) == {"x"} def test_sizes(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.sizes == {"x": 3} def test_dtypes(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.dtypes == {"x": int} def test_getitem(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) assert_identical( coords["x"], DataArray([0, 1, 2], coords={"x": [0, 1, 2]}, name="x"), ) def test_delitem(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) del coords["x"] assert "x" not in coords with pytest.raises( KeyError, match="'nonexistent' is not in coordinate variables" ): del coords["nonexistent"] def test_update(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) coords.update({"y": ("y", [4, 5, 6])}) assert "y" in coords assert "y" in coords.xindexes expected = DataArray([4, 5, 6], coords={"y": [4, 5, 6]}, name="y") assert_identical(coords["y"], expected) def test_equals(self): coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.equals(coords) # Test with a different Coordinates object instead of a string other_coords = Coordinates(coords={"x": [3, 4, 5]}) assert not coords.equals(other_coords) def test_identical(self): coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.identical(coords) # Test with a different Coordinates object instead of a string other_coords = Coordinates(coords={"x": [3, 4, 5]}) assert not coords.identical(other_coords) def test_assign(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) expected = Coordinates(coords={"x": [0, 1, 2], "y": [3, 4]}) actual = coords.assign(y=[3, 4]) assert_identical(actual, expected) actual = coords.assign({"y": [3, 4]}) assert_identical(actual, expected) def test_copy(self) -> None: no_index_coords = Coordinates({"foo": ("x", [1, 2, 3])}) copied = no_index_coords.copy() assert_identical(no_index_coords, copied) v0 = no_index_coords.variables["foo"] v1 = copied.variables["foo"] assert v0 is not v1 assert source_ndarray(v0.data) is source_ndarray(v1.data) deep_copied = no_index_coords.copy(deep=True) assert_identical(no_index_coords.to_dataset(), deep_copied.to_dataset()) v0 = no_index_coords.variables["foo"] v1 = deep_copied.variables["foo"] assert v0 is not v1 assert source_ndarray(v0.data) is not source_ndarray(v1.data) def test_align(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) left = coords # test Coordinates._reindex_callback right = coords.to_dataset().isel(x=[0, 1]).coords left2, right2 = align(left, right, join="inner") assert_identical(left2, right2) # test Coordinates._overwrite_indexes right.update({"x": ("x", [4, 5, 6])}) left2, right2 = align(left, right, join="override") assert_identical(left2, left) assert_identical(left2, right2) def test_dataset_from_coords_with_multidim_var_same_name(self): # regression test for GH #8883 var = Variable(data=np.arange(6).reshape(2, 3), dims=["x", "y"]) coords = Coordinates(coords={"x": var}, indexes={}) ds = Dataset(coords=coords) assert ds.coords["x"].dims == ("x", "y") def test_drop_vars(self): coords = Coordinates( coords={ "x": Variable("x", range(3)), "y": Variable("y", list("ab")), "a": Variable(["x", "y"], np.arange(6).reshape(3, 2)), }, indexes={}, ) actual = coords.drop_vars("x") assert isinstance(actual, Coordinates) assert set(actual.variables) == {"a", "y"} actual = coords.drop_vars(["x", "y"]) assert isinstance(actual, Coordinates) assert set(actual.variables) == {"a"} def test_drop_dims(self) -> None: coords = Coordinates( coords={ "x": Variable("x", range(3)), "y": Variable("y", list("ab")), "a": Variable(["x", "y"], np.arange(6).reshape(3, 2)), }, indexes={}, ) actual = coords.drop_dims("x") assert isinstance(actual, Coordinates) assert set(actual.variables) == {"y"} actual = coords.drop_dims(["x", "y"]) assert isinstance(actual, Coordinates) assert set(actual.variables) == set() def test_rename_dims(self) -> None: coords = Coordinates( coords={ "x": Variable("x", range(3)), "y": Variable("y", list("ab")), "a": Variable(["x", "y"], np.arange(6).reshape(3, 2)), }, indexes={}, ) actual = coords.rename_dims({"x": "X"}) assert isinstance(actual, Coordinates) assert set(actual.dims) == {"X", "y"} assert set(actual.variables) == {"a", "x", "y"} actual = coords.rename_dims({"x": "u", "y": "v"}) assert isinstance(actual, Coordinates) assert set(actual.dims) == {"u", "v"} assert set(actual.variables) == {"a", "x", "y"} def test_rename_vars(self) -> None: coords = Coordinates( coords={ "x": Variable("x", range(3)), "y": Variable("y", list("ab")), "a": Variable(["x", "y"], np.arange(6).reshape(3, 2)), }, indexes={}, ) actual = coords.rename_vars({"x": "X"}) assert isinstance(actual, Coordinates) assert set(actual.dims) == {"x", "y"} assert set(actual.variables) == {"a", "X", "y"} actual = coords.rename_vars({"x": "u", "y": "v"}) assert isinstance(actual, Coordinates) assert set(actual.dims) == {"x", "y"} assert set(actual.variables) == {"a", "u", "v"} def test_operator_merge(self) -> None: coords1 = Coordinates({"x": ("x", [0, 1, 2])}) coords2 = Coordinates({"y": ("y", [3, 4, 5])}) expected = Dataset(coords={"x": [0, 1, 2], "y": [3, 4, 5]}) actual = coords1 | coords2 assert_identical(Dataset(coords=actual), expected) python-xarray-2026.01.0/xarray/tests/test_datatree_mapping.py0000664000175000017500000002263715136607163024442 0ustar alastairalastairimport re import numpy as np import pytest import xarray as xr from xarray.core.datatree_mapping import map_over_datasets from xarray.core.treenode import TreeIsomorphismError from xarray.testing import assert_equal, assert_identical empty = xr.Dataset() class TestMapOverSubTree: def test_no_trees_passed(self): with pytest.raises(TypeError, match="must pass at least one tree object"): map_over_datasets(lambda x: x, "dt") def test_not_isomorphic(self, create_test_datatree): dt1 = create_test_datatree() dt2 = create_test_datatree() dt2["set1/set2/extra"] = xr.DataTree(name="extra") with pytest.raises( TreeIsomorphismError, match=re.escape( r"children at node 'set1/set2' do not match: [] vs ['extra']" ), ): map_over_datasets(lambda x, y: None, dt1, dt2) def test_no_trees_returned(self, create_test_datatree): dt1 = create_test_datatree() dt2 = create_test_datatree() expected = xr.DataTree.from_dict(dict.fromkeys(dt1.to_dict())) actual = map_over_datasets(lambda x, y: None, dt1, dt2) assert_equal(expected, actual) def test_single_tree_arg(self, create_test_datatree): dt = create_test_datatree() expected = create_test_datatree(modify=lambda x: 10.0 * x) result_tree = map_over_datasets(lambda x: 10 * x, dt) assert_equal(result_tree, expected) def test_single_tree_arg_plus_arg(self, create_test_datatree): dt = create_test_datatree() expected = create_test_datatree(modify=lambda ds: (10.0 * ds)) result_tree = map_over_datasets(lambda x, y: x * y, dt, 10.0) assert_equal(result_tree, expected) result_tree = map_over_datasets(lambda x, y: x * y, 10.0, dt) assert_equal(result_tree, expected) def test_single_tree_arg_plus_kwarg(self, create_test_datatree): dt = create_test_datatree() expected = create_test_datatree(modify=lambda ds: (10.0 * ds)) def multiply_by_kwarg(ds, **kwargs): ds = ds * kwargs.pop("multiplier") return ds result_tree = map_over_datasets( multiply_by_kwarg, dt, kwargs=dict(multiplier=10.0) ) assert_equal(result_tree, expected) def test_multiple_tree_args(self, create_test_datatree): dt1 = create_test_datatree() dt2 = create_test_datatree() expected = create_test_datatree(modify=lambda ds: 2.0 * ds) result = map_over_datasets(lambda x, y: x + y, dt1, dt2) assert_equal(result, expected) def test_return_multiple_trees(self, create_test_datatree): dt = create_test_datatree() dt_min, dt_max = map_over_datasets(lambda x: (x.min(), x.max()), dt) expected_min = create_test_datatree(modify=lambda ds: ds.min()) assert_equal(dt_min, expected_min) expected_max = create_test_datatree(modify=lambda ds: ds.max()) assert_equal(dt_max, expected_max) def test_return_wrong_type(self, simple_datatree): dt1 = simple_datatree with pytest.raises( TypeError, match=re.escape( "the result of calling func on the node at position '.' is not a " "Dataset or None or a tuple of such types" ), ): map_over_datasets(lambda x: "string", dt1) # type: ignore[arg-type,return-value] def test_return_tuple_of_wrong_types(self, simple_datatree): dt1 = simple_datatree with pytest.raises( TypeError, match=re.escape( "the result of calling func on the node at position '.' is not a " "Dataset or None or a tuple of such types" ), ): map_over_datasets(lambda x: (x, "string"), dt1) # type: ignore[arg-type,return-value] def test_return_inconsistent_number_of_results(self, simple_datatree): dt1 = simple_datatree with pytest.raises( TypeError, match=re.escape( r"Calling func on the nodes at position set1 returns a tuple " "of 0 datasets, whereas calling func on the nodes at position " ". instead returns a tuple of 2 datasets." ), ): # Datasets in simple_datatree have different numbers of dims map_over_datasets(lambda ds: tuple((None,) * len(ds.dims)), dt1) def test_wrong_number_of_arguments_for_func(self, simple_datatree): dt = simple_datatree with pytest.raises( TypeError, match="takes 1 positional argument but 2 were given" ): map_over_datasets(lambda x: 10 * x, dt, dt) def test_map_single_dataset_against_whole_tree(self, create_test_datatree): dt = create_test_datatree() def nodewise_merge(node_ds, fixed_ds): return xr.merge([node_ds, fixed_ds]) other_ds = xr.Dataset({"z": ("z", [0])}) expected = create_test_datatree(modify=lambda ds: xr.merge([ds, other_ds])) result_tree = map_over_datasets(nodewise_merge, dt, other_ds) assert_equal(result_tree, expected) @pytest.mark.xfail def test_trees_with_different_node_names(self): # TODO test this after I've got good tests for renaming nodes raise NotImplementedError def test_tree_method(self, create_test_datatree): dt = create_test_datatree() def multiply(ds, times): return times * ds expected = create_test_datatree(modify=lambda ds: 10.0 * ds) result_tree = dt.map_over_datasets(multiply, 10.0) assert_equal(result_tree, expected) def test_tree_method_with_kwarg(self, create_test_datatree): dt = create_test_datatree() def multiply(ds, **kwargs): return kwargs.pop("times") * ds expected = create_test_datatree(modify=lambda ds: 10.0 * ds) result_tree = dt.map_over_datasets(multiply, kwargs=dict(times=10.0)) assert_equal(result_tree, expected) def test_discard_ancestry(self, create_test_datatree): # Check for datatree GH issue https://github.com/xarray-contrib/datatree/issues/48 dt = create_test_datatree() subtree = dt["set1"] expected = create_test_datatree(modify=lambda ds: 10.0 * ds)["set1"] result_tree = map_over_datasets(lambda x: 10.0 * x, subtree) assert_equal(result_tree, expected) def test_keep_attrs_on_empty_nodes(self, create_test_datatree): # GH278 dt = create_test_datatree() dt["set1/set2"].attrs["foo"] = "bar" def empty_func(ds): return ds result = dt.map_over_datasets(empty_func) assert result["set1/set2"].attrs == dt["set1/set2"].attrs def test_error_contains_path_of_offending_node(self, create_test_datatree): dt = create_test_datatree() dt["set1"]["bad_var"] = 0 print(dt) def fail_on_specific_node(ds): if "bad_var" in ds: raise ValueError("Failed because 'bar_var' present in dataset") with pytest.raises( ValueError, match=re.escape( r"Raised whilst mapping function over node(s) with path 'set1'" ), ): dt.map_over_datasets(fail_on_specific_node) def test_inherited_coordinates_with_index(self): root = xr.Dataset(coords={"x": [1, 2]}) child = xr.Dataset({"foo": ("x", [0, 1])}) # no coordinates tree = xr.DataTree.from_dict({"/": root, "/child": child}) actual = tree.map_over_datasets(lambda ds: ds) # identity assert isinstance(actual, xr.DataTree) assert_identical(tree, actual) actual_child = actual.children["child"].to_dataset(inherit=False) assert_identical(actual_child, child) class TestMutableOperations: def test_construct_using_type(self): # from datatree GH issue https://github.com/xarray-contrib/datatree/issues/188 # xarray's .weighted is unusual because it uses type() to create a Dataset/DataArray a = xr.DataArray( np.random.rand(3, 4, 10), dims=["x", "y", "time"], coords={"area": (["x", "y"], np.random.rand(3, 4))}, ).to_dataset(name="data") b = xr.DataArray( np.random.rand(2, 6, 14), dims=["x", "y", "time"], coords={"area": (["x", "y"], np.random.rand(2, 6))}, ).to_dataset(name="data") dt = xr.DataTree.from_dict({"a": a, "b": b}) def weighted_mean(ds): if "area" not in ds.coords: return None return ds.weighted(ds.area).mean(["x", "y"]) dt.map_over_datasets(weighted_mean) def test_alter_inplace_forbidden(self): simpsons = xr.DataTree.from_dict( { "/": xr.Dataset({"age": 83}), "/Herbert": xr.Dataset({"age": 40}), "/Homer": xr.Dataset({"age": 39}), "/Homer/Bart": xr.Dataset({"age": 10}), "/Homer/Lisa": xr.Dataset({"age": 8}), "/Homer/Maggie": xr.Dataset({"age": 1}), }, name="Abe", ) def fast_forward(ds: xr.Dataset, years: float) -> xr.Dataset: """Add some years to the age, but by altering the given dataset""" ds["age"] = ds["age"] + years return ds with pytest.raises(AttributeError): simpsons.map_over_datasets(fast_forward, 10) python-xarray-2026.01.0/xarray/tests/test_sparse.py0000664000175000017500000007142215136607163022427 0ustar alastairalastairfrom __future__ import annotations import math import pickle from textwrap import dedent import numpy as np import pandas as pd import pytest from packaging.version import Version import xarray as xr import xarray.ufuncs as xu from xarray import DataArray, Variable from xarray.namedarray.pycompat import array_type from xarray.tests import assert_equal, assert_identical, requires_dask filterwarnings = pytest.mark.filterwarnings param = pytest.param xfail = pytest.mark.xfail sparse = pytest.importorskip("sparse") sparse_array_type = array_type("sparse") def assert_sparse_equal(a, b): assert isinstance(a, sparse_array_type) assert isinstance(b, sparse_array_type) np.testing.assert_equal(a.todense(), b.todense()) def make_ndarray(shape): return np.arange(math.prod(shape)).reshape(shape) def make_sparray(shape): return sparse.random(shape, density=0.1, random_state=0) def make_xrvar(dim_lengths): return xr.Variable( tuple(dim_lengths.keys()), make_sparray(shape=tuple(dim_lengths.values())) ) def make_xrarray(dim_lengths, coords=None, name="test"): if coords is None: coords = {d: np.arange(n) for d, n in dim_lengths.items()} return xr.DataArray( make_sparray(shape=tuple(dim_lengths.values())), dims=tuple(coords.keys()), coords=coords, name=name, ) class do: def __init__(self, meth, *args, **kwargs): self.meth = meth self.args = args self.kwargs = kwargs def __call__(self, obj): # cannot pass np.sum when using pytest-xdist kwargs = self.kwargs.copy() if "func" in self.kwargs: kwargs["func"] = getattr(np, kwargs["func"]) return getattr(obj, self.meth)(*self.args, **kwargs) def __repr__(self): return f"obj.{self.meth}(*{self.args}, **{self.kwargs})" @pytest.mark.parametrize( "prop", [ "chunks", "data", "dims", "dtype", "encoding", "imag", "nbytes", "ndim", param("values", marks=xfail(reason="Coercion to dense")), ], ) def test_variable_property(prop): var = make_xrvar({"x": 10, "y": 5}) getattr(var, prop) @pytest.mark.parametrize( "func,sparse_output", [ (do("all"), False), (do("any"), False), (do("astype", dtype=int), True), (do("clip", min=0, max=1), True), (do("coarsen", windows={"x": 2}, func="sum"), True), (do("compute"), True), (do("conj"), True), (do("copy"), True), (do("count"), False), (do("get_axis_num", dim="x"), False), (do("isel", x=slice(2, 4)), True), (do("isnull"), True), (do("load"), True), (do("mean"), False), (do("notnull"), True), (do("roll"), True), (do("round"), True), (do("set_dims", dim=("x", "y", "z")), True), (do("stack", dim={"flat": ("x", "y")}), True), (do("to_base_variable"), True), (do("transpose"), True), (do("unstack", dim={"x": {"x1": 5, "x2": 2}}), True), (do("broadcast_equals", make_xrvar({"x": 10, "y": 5})), False), (do("equals", make_xrvar({"x": 10, "y": 5})), False), (do("identical", make_xrvar({"x": 10, "y": 5})), False), param( do("argmax"), True, marks=[ xfail(reason="Missing implementation for np.argmin"), filterwarnings("ignore:Behaviour of argmin/argmax"), ], ), param( do("argmin"), True, marks=[ xfail(reason="Missing implementation for np.argmax"), filterwarnings("ignore:Behaviour of argmin/argmax"), ], ), param( do("argsort"), True, marks=xfail(reason="'COO' object has no attribute 'argsort'"), ), param( do( "concat", variables=[ make_xrvar({"x": 10, "y": 5}), make_xrvar({"x": 10, "y": 5}), ], ), True, ), param( do("conjugate"), True, marks=xfail(reason="'COO' object has no attribute 'conjugate'"), ), param( do("cumprod"), True, marks=xfail(reason="Missing implementation for np.nancumprod"), ), param( do("cumsum"), True, marks=xfail(reason="Missing implementation for np.nancumsum"), ), (do("fillna", 0), True), param( do("item", (1, 1)), False, marks=xfail(reason="'COO' object has no attribute 'item'"), ), param( do("median"), False, marks=xfail(reason="Missing implementation for np.nanmedian"), ), param(do("max"), False), param(do("min"), False), param( do("no_conflicts", other=make_xrvar({"x": 10, "y": 5})), True, marks=xfail(reason="mixed sparse-dense operation"), ), param( do("pad", mode="constant", pad_widths={"x": (1, 1)}, fill_value=5), True, marks=xfail(reason="Missing implementation for np.pad"), ), (do("prod"), False), param( do("quantile", q=0.5), True, marks=xfail(reason="Missing implementation for np.nanpercentile"), ), param( do("rank", dim="x"), False, marks=xfail(reason="Only implemented for NumPy arrays (via bottleneck)"), ), param( do("reduce", func="sum", dim="x"), True, ), param( do("rolling_window", dim="x", window=2, window_dim="x_win"), True, marks=xfail(reason="Missing implementation for np.pad"), ), param( do("shift", x=2), True, marks=xfail(reason="mixed sparse-dense operation") ), param( do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd") ), (do("sum"), False), param( do("var"), False, marks=xfail(reason="Missing implementation for np.nanvar") ), param(do("to_dict"), False), (do("where", cond=make_xrvar({"x": 10, "y": 5}) > 0.5), True), ], ids=repr, ) def test_variable_method(func, sparse_output): var_s = make_xrvar({"x": 10, "y": 5}) var_d = xr.Variable(var_s.dims, var_s.data.todense()) ret_s = func(var_s) ret_d = func(var_d) # TODO: figure out how to verify the results of each method if isinstance(ret_d, xr.Variable) and isinstance(ret_d.data, sparse.SparseArray): ret_d = ret_d.copy(data=ret_d.data.todense()) if sparse_output: assert isinstance(ret_s.data, sparse.SparseArray) assert np.allclose(ret_s.data.todense(), ret_d.data, equal_nan=True) elif func.meth != "to_dict": assert np.allclose(ret_s, ret_d) else: # pop the arrays from the dict arr_s, arr_d = ret_s.pop("data"), ret_d.pop("data") assert np.allclose(arr_s, arr_d) assert ret_s == ret_d @pytest.mark.parametrize( "func,sparse_output", [ (do("squeeze"), True), param(do("to_index"), False, marks=xfail(reason="Coercion to dense")), param(do("to_index_variable"), False, marks=xfail(reason="Coercion to dense")), param( do("searchsorted", 0.5), True, marks=xfail(reason="'COO' object has no attribute 'searchsorted'"), ), ], ) def test_1d_variable_method(func, sparse_output): var_s = make_xrvar({"x": 10}) var_d = xr.Variable(var_s.dims, var_s.data.todense()) ret_s = func(var_s) ret_d = func(var_d) if sparse_output: assert isinstance(ret_s.data, sparse.SparseArray) assert np.allclose(ret_s.data.todense(), ret_d.data) else: assert np.allclose(ret_s, ret_d) class TestSparseVariable: @pytest.fixture(autouse=True) def setUp(self): self.data = sparse.random((4, 6), random_state=0, density=0.5) self.var = xr.Variable(("x", "y"), self.data) def test_nbytes(self): assert self.var.nbytes == self.data.nbytes def test_unary_op(self): assert_sparse_equal(-self.var.data, -self.data) assert_sparse_equal(abs(self.var).data, abs(self.data)) assert_sparse_equal(self.var.round().data, self.data.round()) @pytest.mark.filterwarnings("ignore::FutureWarning") def test_univariate_ufunc(self): assert_sparse_equal(np.sin(self.data), np.sin(self.var).data) @pytest.mark.filterwarnings("ignore::FutureWarning") def test_bivariate_ufunc(self): assert_sparse_equal(np.maximum(self.data, 0), np.maximum(self.var, 0).data) assert_sparse_equal(np.maximum(self.data, 0), np.maximum(0, self.var).data) def test_univariate_xufunc(self): assert_sparse_equal(xu.sin(self.var).data, np.sin(self.data)) def test_bivariate_xufunc(self): assert_sparse_equal(xu.multiply(self.var, 0).data, np.multiply(self.data, 0)) assert_sparse_equal(xu.multiply(0, self.var).data, np.multiply(0, self.data)) def test_repr(self): expected = dedent( """\ Size: 288B """ ) assert expected == repr(self.var) def test_pickle(self): v1 = self.var v2 = pickle.loads(pickle.dumps(v1)) assert_sparse_equal(v1.data, v2.data) def test_missing_values(self): a = np.array([0, 1, np.nan, 3]) s = sparse.COO.from_numpy(a) var_s = Variable("x", s) assert np.all(var_s.fillna(2).data.todense() == np.arange(4)) assert np.all(var_s.count() == 3) @pytest.mark.parametrize( "prop", [ "attrs", "chunks", "coords", "data", "dims", "dtype", "encoding", "imag", "indexes", "loc", "name", "nbytes", "ndim", "plot", "real", "shape", "size", "sizes", "str", "variable", ], ) def test_dataarray_property(prop): arr = make_xrarray({"x": 10, "y": 5}) getattr(arr, prop) @pytest.mark.parametrize( "func,sparse_output", [ (do("all"), False), (do("any"), False), (do("assign_attrs", {"foo": "bar"}), True), (do("assign_coords", x=make_xrarray({"x": 10}).x + 1), True), (do("astype", int), True), (do("clip", min=0, max=1), True), (do("compute"), True), (do("conj"), True), (do("copy"), True), (do("count"), False), (do("diff", "x"), True), (do("drop_vars", "x"), True), (do("expand_dims", {"z": 2}, axis=2), True), (do("get_axis_num", "x"), False), (do("get_index", "x"), False), (do("identical", make_xrarray({"x": 5, "y": 5})), False), (do("integrate", "x"), True), (do("isel", {"x": slice(0, 3), "y": slice(2, 4)}), True), (do("isnull"), True), (do("load"), True), (do("mean"), False), (do("persist"), True), (do("reindex", {"x": [1, 2, 3]}), True), (do("rename", "foo"), True), (do("reorder_levels"), True), (do("reset_coords", drop=True), True), (do("reset_index", "x"), True), (do("round"), True), (do("sel", x=[0, 1, 2]), True), (do("shift"), True), (do("sortby", "x", ascending=False), True), (do("stack", z=["x", "y"]), True), (do("transpose"), True), # TODO # set_index # swap_dims (do("broadcast_equals", make_xrvar({"x": 10, "y": 5})), False), (do("equals", make_xrvar({"x": 10, "y": 5})), False), param( do("argmax"), True, marks=[ xfail(reason="Missing implementation for np.argmax"), filterwarnings("ignore:Behaviour of argmin/argmax"), ], ), param( do("argmin"), True, marks=[ xfail(reason="Missing implementation for np.argmin"), filterwarnings("ignore:Behaviour of argmin/argmax"), ], ), param( do("argsort"), True, marks=xfail(reason="'COO' object has no attribute 'argsort'"), ), param( do("bfill", dim="x"), False, marks=xfail(reason="Missing implementation for np.flip"), ), (do("combine_first", make_xrarray({"x": 10, "y": 5})), True), param( do("conjugate"), False, marks=xfail(reason="'COO' object has no attribute 'conjugate'"), ), param( do("cumprod"), True, marks=xfail(reason="Missing implementation for np.nancumprod"), ), param( do("cumsum"), True, marks=xfail(reason="Missing implementation for np.nancumsum"), ), param( do("differentiate", "x"), False, marks=xfail(reason="Missing implementation for np.gradient"), ), param( do("dot", make_xrarray({"x": 10, "y": 5})), True, marks=xfail(reason="Missing implementation for np.einsum"), ), param(do("dropna", "x"), False, marks=xfail(reason="Coercion to dense")), param(do("ffill", "x"), False, marks=xfail(reason="Coercion to dense")), (do("fillna", 0), True), param( do("interp", coords={"x": np.arange(10) + 0.5}), True, marks=xfail(reason="Coercion to dense"), ), param( do( "interp_like", make_xrarray( {"x": 10, "y": 5}, coords={"x": np.arange(10) + 0.5, "y": np.arange(5) + 0.5}, ), ), True, marks=xfail(reason="Indexing COO with more than one iterable index"), ), param(do("interpolate_na", "x"), True, marks=xfail(reason="Coercion to dense")), param( do("isin", [1, 2, 3]), False, marks=xfail(reason="Missing implementation for np.isin"), ), param( do("item", (1, 1)), False, marks=xfail(reason="'COO' object has no attribute 'item'"), ), param(do("max"), False), param(do("min"), False), param( do("median"), False, marks=xfail(reason="Missing implementation for np.nanmedian"), ), (do("notnull"), True), (do("pipe", func="sum", axis=1), True), (do("prod"), False), param( do("quantile", q=0.5), False, marks=xfail(reason="Missing implementation for np.nanpercentile"), ), param( do("rank", "x"), False, marks=xfail(reason="Only implemented for NumPy arrays (via bottleneck)"), ), param( do("reduce", func="sum", dim="x"), False, marks=xfail(reason="Coercion to dense"), ), param( do( "reindex_like", make_xrarray( {"x": 10, "y": 5}, coords={"x": np.arange(10) + 0.5, "y": np.arange(5) + 0.5}, ), ), True, marks=xfail(reason="Indexing COO with more than one iterable index"), ), (do("roll", x=2, roll_coords=True), True), param( do("sel", x=[0, 1, 2], y=[2, 3]), True, marks=xfail(reason="Indexing COO with more than one iterable index"), ), param( do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd") ), (do("sum"), False), param( do("var"), False, marks=xfail(reason="Missing implementation for np.nanvar") ), param( do("where", make_xrarray({"x": 10, "y": 5}) > 0.5), False, marks=xfail(reason="Conversion of dense to sparse when using sparse mask"), ), ], ids=repr, ) def test_dataarray_method(func, sparse_output): arr_s = make_xrarray( {"x": 10, "y": 5}, coords={"x": np.arange(10), "y": np.arange(5)} ) arr_d = xr.DataArray(arr_s.data.todense(), coords=arr_s.coords, dims=arr_s.dims) ret_s = func(arr_s) ret_d = func(arr_d) if sparse_output: assert isinstance(ret_s.data, sparse.SparseArray) assert np.allclose(ret_s.data.todense(), ret_d.data, equal_nan=True) else: assert np.allclose(ret_s, ret_d, equal_nan=True) @pytest.mark.parametrize( "func,sparse_output", [ (do("squeeze"), True), param( do("searchsorted", [1, 2, 3]), False, marks=xfail(reason="'COO' object has no attribute 'searchsorted'"), ), ], ) def test_datarray_1d_method(func, sparse_output): arr_s = make_xrarray({"x": 10}, coords={"x": np.arange(10)}) arr_d = xr.DataArray(arr_s.data.todense(), coords=arr_s.coords, dims=arr_s.dims) ret_s = func(arr_s) ret_d = func(arr_d) if sparse_output: assert isinstance(ret_s.data, sparse.SparseArray) assert np.allclose(ret_s.data.todense(), ret_d.data, equal_nan=True) else: assert np.allclose(ret_s, ret_d, equal_nan=True) class TestSparseDataArrayAndDataset: @pytest.fixture(autouse=True) def setUp(self): self.sp_ar = sparse.random((4, 6), random_state=0, density=0.5) self.sp_xr = xr.DataArray( self.sp_ar, coords={"x": range(4)}, dims=("x", "y"), name="foo" ) self.ds_ar = self.sp_ar.todense() self.ds_xr = xr.DataArray( self.ds_ar, coords={"x": range(4)}, dims=("x", "y"), name="foo" ) def test_to_dataset_roundtrip(self): x = self.sp_xr assert_equal(x, x.to_dataset("x").to_dataarray("x")) def test_align(self): a1 = xr.DataArray( sparse.COO.from_numpy(np.arange(4)), dims=["x"], coords={"x": ["a", "b", "c", "d"]}, ) b1 = xr.DataArray( sparse.COO.from_numpy(np.arange(4)), dims=["x"], coords={"x": ["a", "b", "d", "e"]}, ) a2, b2 = xr.align(a1, b1, join="inner") assert isinstance(a2.data, sparse.SparseArray) assert isinstance(b2.data, sparse.SparseArray) assert np.all(a2.coords["x"].data == ["a", "b", "d"]) assert np.all(b2.coords["x"].data == ["a", "b", "d"]) @pytest.mark.xfail( reason="COO objects currently do not accept more than one " "iterable index at a time" ) def test_align_2d(self): A1 = xr.DataArray( self.sp_ar, dims=["x", "y"], coords={ "x": np.arange(self.sp_ar.shape[0]), "y": np.arange(self.sp_ar.shape[1]), }, ) A2 = xr.DataArray( self.sp_ar, dims=["x", "y"], coords={ "x": np.arange(1, self.sp_ar.shape[0] + 1), "y": np.arange(1, self.sp_ar.shape[1] + 1), }, ) B1, B2 = xr.align(A1, A2, join="inner") assert np.all(B1.coords["x"] == np.arange(1, self.sp_ar.shape[0])) assert np.all(B1.coords["y"] == np.arange(1, self.sp_ar.shape[0])) assert np.all(B1.coords["x"] == B2.coords["x"]) assert np.all(B1.coords["y"] == B2.coords["y"]) def test_align_outer(self): a1 = xr.DataArray( sparse.COO.from_numpy(np.arange(4)), dims=["x"], coords={"x": ["a", "b", "c", "d"]}, ) b1 = xr.DataArray( sparse.COO.from_numpy(np.arange(4)), dims=["x"], coords={"x": ["a", "b", "d", "e"]}, ) a2, b2 = xr.align(a1, b1, join="outer") assert isinstance(a2.data, sparse.SparseArray) assert isinstance(b2.data, sparse.SparseArray) assert np.all(a2.coords["x"].data == ["a", "b", "c", "d", "e"]) assert np.all(b2.coords["x"].data == ["a", "b", "c", "d", "e"]) def test_concat(self): ds1 = xr.Dataset(data_vars={"d": self.sp_xr}) ds2 = xr.Dataset(data_vars={"d": self.sp_xr}) ds3 = xr.Dataset(data_vars={"d": self.sp_xr}) out = xr.concat([ds1, ds2, ds3], dim="x") assert_sparse_equal( out["d"].data, sparse.concatenate([self.sp_ar, self.sp_ar, self.sp_ar], axis=0), ) out_concat = xr.concat([self.sp_xr, self.sp_xr, self.sp_xr], dim="y") assert_sparse_equal( out_concat.data, sparse.concatenate([self.sp_ar, self.sp_ar, self.sp_ar], axis=1), ) def test_stack(self): arr = make_xrarray({"w": 2, "x": 3, "y": 4}) stacked = arr.stack(z=("x", "y")) z = pd.MultiIndex.from_product( [list(range(3)), list(range(4))], names=["x", "y"] ) expected = xr.DataArray( arr.data.reshape((2, -1)), {"w": [0, 1], "z": z}, dims=["w", "z"] ) assert_equal(expected, stacked) roundtripped = stacked.unstack() assert_identical(arr, roundtripped) def test_dataarray_repr(self): a = xr.DataArray( sparse.COO.from_numpy(np.ones(4)), dims=["x"], coords={"y": ("x", sparse.COO.from_numpy(np.arange(4, dtype="i8")))}, ) expected = dedent( """\ Size: 64B Coordinates: y (x) int64 48B Dimensions without coordinates: x""" ) assert expected == repr(a) def test_dataset_repr(self): ds = xr.Dataset( data_vars={"a": ("x", sparse.COO.from_numpy(np.ones(4)))}, coords={"y": ("x", sparse.COO.from_numpy(np.arange(4, dtype="i8")))}, ) expected = dedent( """\ Size: 112B Dimensions: (x: 4) Coordinates: y (x) int64 48B Dimensions without coordinates: x Data variables: a (x) float64 64B """ ) assert expected == repr(ds) @requires_dask def test_sparse_dask_dataset_repr(self): ds = xr.Dataset( data_vars={"a": ("x", sparse.COO.from_numpy(np.ones(4)))} ).chunk() if Version(sparse.__version__) >= Version("0.16.0"): meta = "sparse.numba_backend._coo.core.COO" else: meta = "sparse.COO" expected = dedent( f"""\ Size: 32B Dimensions: (x: 4) Dimensions without coordinates: x Data variables: a (x) float64 32B dask.array""" ) assert expected == repr(ds) def test_dataarray_pickle(self): a1 = xr.DataArray( sparse.COO.from_numpy(np.ones(4)), dims=["x"], coords={"y": ("x", sparse.COO.from_numpy(np.arange(4)))}, ) a2 = pickle.loads(pickle.dumps(a1)) assert_identical(a1, a2) def test_dataset_pickle(self): ds1 = xr.Dataset( data_vars={"a": ("x", sparse.COO.from_numpy(np.ones(4)))}, coords={"y": ("x", sparse.COO.from_numpy(np.arange(4)))}, ) ds2 = pickle.loads(pickle.dumps(ds1)) assert_identical(ds1, ds2) def test_coarsen(self): a1 = self.ds_xr a2 = self.sp_xr m1 = a1.coarsen(x=2, boundary="trim").mean() # type: ignore[attr-defined] m2 = a2.coarsen(x=2, boundary="trim").mean() # type: ignore[attr-defined] assert isinstance(m2.data, sparse.SparseArray) assert np.allclose(m1.data, m2.data.todense()) @pytest.mark.xfail(reason="No implementation of np.pad") def test_rolling(self): a1 = self.ds_xr a2 = self.sp_xr m1 = a1.rolling(x=2, center=True).mean() m2 = a2.rolling(x=2, center=True).mean() assert isinstance(m2.data, sparse.SparseArray) assert np.allclose(m1.data, m2.data.todense()) @pytest.mark.xfail(reason="Coercion to dense") def test_rolling_exp(self): a1 = self.ds_xr a2 = self.sp_xr m1 = a1.rolling_exp(x=2, center=True).mean() m2 = a2.rolling_exp(x=2, center=True).mean() assert isinstance(m2.data, sparse.SparseArray) assert np.allclose(m1.data, m2.data.todense()) @pytest.mark.xfail(reason="No implementation of np.einsum") def test_dot(self): a1 = self.sp_xr.dot(self.sp_xr[0]) a2 = self.sp_ar.dot(self.sp_ar[0]) assert_equal(a1, a2) @pytest.mark.xfail(reason="Groupby reductions produce dense output") def test_groupby(self): x1 = self.ds_xr x2 = self.sp_xr m1 = x1.groupby("x").mean(...) m2 = x2.groupby("x").mean(...) assert isinstance(m2.data, sparse.SparseArray) assert np.allclose(m1.data, m2.data.todense()) @pytest.mark.xfail(reason="Groupby reductions produce dense output") def test_groupby_first(self): x = self.sp_xr.copy() x.coords["ab"] = ("x", ["a", "a", "b", "b"]) x.groupby("ab").first() x.groupby("ab").first(skipna=False) @pytest.mark.xfail(reason="Groupby reductions produce dense output") def test_groupby_bins(self): x1 = self.ds_xr x2 = self.sp_xr m1 = x1.groupby_bins("x", bins=[0, 3, 7, 10]).sum(...) m2 = x2.groupby_bins("x", bins=[0, 3, 7, 10]).sum(...) assert isinstance(m2.data, sparse.SparseArray) assert np.allclose(m1.data, m2.data.todense()) @pytest.mark.xfail(reason="Resample produces dense output") def test_resample(self): t1 = xr.DataArray( np.linspace(0, 11, num=12), coords=[ pd.date_range("1999-12-15", periods=12, freq=pd.DateOffset(months=1)) ], dims="time", ) t2 = t1.copy() t2.data = sparse.COO(t2.data) m1 = t1.resample(time="QS-DEC").mean() m2 = t2.resample(time="QS-DEC").mean() assert isinstance(m2.data, sparse.SparseArray) assert np.allclose(m1.data, m2.data.todense()) @pytest.mark.xfail def test_reindex(self): x1 = self.ds_xr x2 = self.sp_xr for kwargs in [ {"x": [2, 3, 4]}, {"x": [1, 100, 2, 101, 3]}, {"x": [2.5, 3, 3.5], "y": [2, 2.5, 3]}, ]: m1 = x1.reindex(**kwargs) # type: ignore[arg-type] m2 = x2.reindex(**kwargs) # type: ignore[arg-type] assert np.allclose(m1, m2, equal_nan=True) @pytest.mark.xfail def test_merge(self): x = self.sp_xr y = xr.merge([x, x.rename("bar")]).to_dataarray() assert isinstance(y, sparse.SparseArray) @pytest.mark.xfail def test_where(self): a = np.arange(10) cond = a > 3 xr.DataArray(a).where(cond) s = sparse.COO.from_numpy(a) cond2 = s > 3 xr.DataArray(s).where(cond2) x = xr.DataArray(s) cond3: DataArray = x > 3 x.where(cond3) class TestSparseCoords: @pytest.mark.xfail(reason="Coercion of coords to dense") def test_sparse_coords(self): xr.DataArray( sparse.COO.from_numpy(np.arange(4)), dims=["x"], coords={"x": sparse.COO.from_numpy([1, 2, 3, 4])}, ) @requires_dask def test_chunk(): s = sparse.COO.from_numpy(np.array([0, 0, 1, 2])) a = DataArray(s) ac = a.chunk(2) assert ac.chunks == ((2, 2),) assert isinstance(ac.data._meta, sparse.COO) assert_identical(ac, a) ds = a.to_dataset(name="a") dsc = ds.chunk(2) assert dsc.chunks == {"dim_0": (2, 2)} assert_identical(dsc, ds) @requires_dask def test_dask_token(): import dask s = sparse.COO.from_numpy(np.array([0, 0, 1, 2])) a = DataArray(s) t1 = dask.base.tokenize(a) t2 = dask.base.tokenize(a) t3 = dask.base.tokenize(a + 1) assert t1 == t2 assert t3 != t2 assert isinstance(a.data, sparse.COO) ac = a.chunk(2) t4 = dask.base.tokenize(ac) t5 = dask.base.tokenize(ac + 1) assert t4 != t5 assert isinstance(ac.data._meta, sparse.COO) @requires_dask def test_apply_ufunc_check_meta_coherence(): s = sparse.COO.from_numpy(np.array([0, 0, 1, 2])) a = DataArray(s) ac = a.chunk(2) sparse_meta = ac.data._meta result = xr.apply_ufunc(lambda x: x, ac, dask="parallelized").data._meta assert_sparse_equal(result, sparse_meta) python-xarray-2026.01.0/xarray/tests/test_coding.py0000664000175000017500000001206615136607163022374 0ustar alastairalastairfrom __future__ import annotations from contextlib import suppress import numpy as np import pandas as pd import pytest import xarray as xr from xarray.coding import variables from xarray.conventions import decode_cf_variable, encode_cf_variable from xarray.tests import assert_allclose, assert_equal, assert_identical, requires_dask with suppress(ImportError): import dask.array as da def test_CFMaskCoder_decode() -> None: original = xr.Variable(("x",), [0, -1, 1], {"_FillValue": -1}) expected = xr.Variable(("x",), [0, np.nan, 1]) coder = variables.CFMaskCoder() encoded = coder.decode(original) assert_identical(expected, encoded) encoding_with_dtype = { "dtype": np.dtype("float64"), "_FillValue": np.float32(1e20), "missing_value": np.float64(1e20), } encoding_without_dtype = { "_FillValue": np.float32(1e20), "missing_value": np.float64(1e20), } CFMASKCODER_ENCODE_DTYPE_CONFLICT_TESTS = { "numeric-with-dtype": ([0.0, -1.0, 1.0], encoding_with_dtype), "numeric-without-dtype": ([0.0, -1.0, 1.0], encoding_without_dtype), "times-with-dtype": (pd.date_range("2000", periods=3), encoding_with_dtype), } @pytest.mark.parametrize( ("data", "encoding"), CFMASKCODER_ENCODE_DTYPE_CONFLICT_TESTS.values(), ids=list(CFMASKCODER_ENCODE_DTYPE_CONFLICT_TESTS.keys()), ) def test_CFMaskCoder_encode_missing_fill_values_conflict(data, encoding) -> None: original = xr.Variable(("x",), data, encoding=encoding) encoded = encode_cf_variable(original) assert encoded.dtype == encoded.attrs["missing_value"].dtype assert encoded.dtype == encoded.attrs["_FillValue"].dtype roundtripped = decode_cf_variable("foo", encoded) assert_identical(roundtripped, original) def test_CFMaskCoder_missing_value() -> None: expected = xr.DataArray( np.array([[26915, 27755, -9999, 27705], [25595, -9999, 28315, -9999]]), dims=["npts", "ntimes"], name="tmpk", ) expected.attrs["missing_value"] = -9999 decoded = xr.decode_cf(expected.to_dataset()) encoded, _ = xr.conventions.cf_encoder(decoded.variables, decoded.attrs) assert_equal(encoded["tmpk"], expected.variable) decoded.tmpk.encoding["_FillValue"] = -9940 with pytest.raises(ValueError): encoded, _ = xr.conventions.cf_encoder(decoded.variables, decoded.attrs) @requires_dask def test_CFMaskCoder_decode_dask() -> None: original = xr.Variable(("x",), [0, -1, 1], {"_FillValue": -1}).chunk() expected = xr.Variable(("x",), [0, np.nan, 1]) coder = variables.CFMaskCoder() encoded = coder.decode(original) assert isinstance(encoded.data, da.Array) assert_identical(expected, encoded) # TODO(shoyer): port other fill-value tests # TODO(shoyer): parameterize when we have more coders def test_coder_roundtrip() -> None: original = xr.Variable(("x",), [0.0, np.nan, 1.0]) coder = variables.CFMaskCoder() roundtripped = coder.decode(coder.encode(original)) assert_identical(original, roundtripped) @pytest.mark.parametrize("dtype", ["u1", "u2", "i1", "i2", "f2", "f4"]) @pytest.mark.parametrize("dtype2", ["f4", "f8"]) def test_scaling_converts_to_float(dtype: str, dtype2: str) -> None: dt = np.dtype(dtype2) original = xr.Variable( ("x",), np.arange(10, dtype=dtype), encoding=dict(scale_factor=dt.type(10)) ) coder = variables.CFScaleOffsetCoder() encoded = coder.encode(original) assert encoded.dtype == dt roundtripped = coder.decode(encoded) assert_identical(original, roundtripped) assert roundtripped.dtype == dt @pytest.mark.parametrize("scale_factor", (10, [10])) @pytest.mark.parametrize("add_offset", (0.1, [0.1])) def test_scaling_offset_as_list(scale_factor, add_offset) -> None: # test for #4631 encoding = dict(scale_factor=scale_factor, add_offset=add_offset) original = xr.Variable(("x",), np.arange(10.0), encoding=encoding) coder = variables.CFScaleOffsetCoder() encoded = coder.encode(original) roundtripped = coder.decode(encoded) assert_allclose(original, roundtripped) @pytest.mark.parametrize("bits", [1, 2, 4, 8]) def test_decode_unsigned_from_signed(bits) -> None: unsigned_dtype = np.dtype(f"u{bits}") signed_dtype = np.dtype(f"i{bits}") original_values = np.array([np.iinfo(unsigned_dtype).max], dtype=unsigned_dtype) encoded = xr.Variable( ("x",), original_values.astype(signed_dtype), attrs={"_Unsigned": "true"} ) coder = variables.CFMaskCoder() decoded = coder.decode(encoded) assert decoded.dtype == unsigned_dtype assert decoded.values == original_values @pytest.mark.parametrize("bits", [1, 2, 4, 8]) def test_decode_signed_from_unsigned(bits) -> None: unsigned_dtype = np.dtype(f"u{bits}") signed_dtype = np.dtype(f"i{bits}") original_values = np.array([-1], dtype=signed_dtype) encoded = xr.Variable( ("x",), original_values.astype(unsigned_dtype), attrs={"_Unsigned": "false"} ) coder = variables.CFMaskCoder() decoded = coder.decode(encoded) assert decoded.dtype == signed_dtype assert decoded.values == original_values python-xarray-2026.01.0/xarray/tests/test_combine.py0000664000175000017500000015227015136607163022547 0ustar alastairalastairfrom __future__ import annotations import re from itertools import product import numpy as np import pytest from xarray import ( DataArray, Dataset, DataTree, MergeError, combine_by_coords, combine_nested, concat, merge, set_options, ) from xarray.core import dtypes from xarray.structure.combine import ( _check_shape_tile_ids, _combine_all_along_first_dim, _combine_nd, _infer_concat_order_from_coords, _infer_concat_order_from_positions, _new_tile_id, ) from xarray.tests import assert_equal, assert_identical, requires_cftime from xarray.tests.test_dataset import create_test_data def assert_combined_tile_ids_equal(dict1: dict, dict2: dict) -> None: assert len(dict1) == len(dict2) for k in dict1.keys(): assert k in dict2.keys() assert_equal(dict1[k], dict2[k]) class TestTileIDsFromNestedList: def test_1d(self): ds = create_test_data input = [ds(0), ds(1)] expected = {(0,): ds(0), (1,): ds(1)} actual: dict[tuple[int, ...], Dataset] = _infer_concat_order_from_positions( input ) assert_combined_tile_ids_equal(expected, actual) def test_2d(self): ds = create_test_data input = [[ds(0), ds(1)], [ds(2), ds(3)], [ds(4), ds(5)]] expected = { (0, 0): ds(0), (0, 1): ds(1), (1, 0): ds(2), (1, 1): ds(3), (2, 0): ds(4), (2, 1): ds(5), } actual: dict[tuple[int, ...], Dataset] = _infer_concat_order_from_positions( input ) assert_combined_tile_ids_equal(expected, actual) def test_3d(self): ds = create_test_data input = [ [[ds(0), ds(1)], [ds(2), ds(3)], [ds(4), ds(5)]], [[ds(6), ds(7)], [ds(8), ds(9)], [ds(10), ds(11)]], ] expected = { (0, 0, 0): ds(0), (0, 0, 1): ds(1), (0, 1, 0): ds(2), (0, 1, 1): ds(3), (0, 2, 0): ds(4), (0, 2, 1): ds(5), (1, 0, 0): ds(6), (1, 0, 1): ds(7), (1, 1, 0): ds(8), (1, 1, 1): ds(9), (1, 2, 0): ds(10), (1, 2, 1): ds(11), } actual: dict[tuple[int, ...], Dataset] = _infer_concat_order_from_positions( input ) assert_combined_tile_ids_equal(expected, actual) def test_single_dataset(self): ds = create_test_data(0) input = [ds] expected = {(0,): ds} actual: dict[tuple[int, ...], Dataset] = _infer_concat_order_from_positions( input ) assert_combined_tile_ids_equal(expected, actual) def test_redundant_nesting(self): ds = create_test_data input = [[ds(0)], [ds(1)]] expected = {(0, 0): ds(0), (1, 0): ds(1)} actual: dict[tuple[int, ...], Dataset] = _infer_concat_order_from_positions( input ) assert_combined_tile_ids_equal(expected, actual) def test_ignore_empty_list(self): ds = create_test_data(0) input: list = [ds, []] expected = {(0,): ds} actual: dict[tuple[int, ...], Dataset] = _infer_concat_order_from_positions( input ) assert_combined_tile_ids_equal(expected, actual) def test_uneven_depth_input(self): # Auto_combine won't work on ragged input # but this is just to increase test coverage ds = create_test_data input: list = [ds(0), [ds(1), ds(2)]] expected = {(0,): ds(0), (1, 0): ds(1), (1, 1): ds(2)} actual: dict[tuple[int, ...], Dataset] = _infer_concat_order_from_positions( input ) assert_combined_tile_ids_equal(expected, actual) def test_uneven_length_input(self): # Auto_combine won't work on ragged input # but this is just to increase test coverage ds = create_test_data input = [[ds(0)], [ds(1), ds(2)]] expected = {(0, 0): ds(0), (1, 0): ds(1), (1, 1): ds(2)} actual: dict[tuple[int, ...], Dataset] = _infer_concat_order_from_positions( input ) assert_combined_tile_ids_equal(expected, actual) def test_infer_from_datasets(self): ds = create_test_data input = [ds(0), ds(1)] expected = {(0,): ds(0), (1,): ds(1)} actual: dict[tuple[int, ...], Dataset] = _infer_concat_order_from_positions( input ) assert_combined_tile_ids_equal(expected, actual) class TestTileIDsFromCoords: def test_1d(self): ds0 = Dataset({"x": [0, 1]}) ds1 = Dataset({"x": [2, 3]}) expected = {(0,): ds0, (1,): ds1} actual, concat_dims = _infer_concat_order_from_coords([ds1, ds0]) assert_combined_tile_ids_equal(expected, actual) assert concat_dims == ["x"] def test_2d(self): ds0 = Dataset({"x": [0, 1], "y": [10, 20, 30]}) ds1 = Dataset({"x": [2, 3], "y": [10, 20, 30]}) ds2 = Dataset({"x": [0, 1], "y": [40, 50, 60]}) ds3 = Dataset({"x": [2, 3], "y": [40, 50, 60]}) ds4 = Dataset({"x": [0, 1], "y": [70, 80, 90]}) ds5 = Dataset({"x": [2, 3], "y": [70, 80, 90]}) expected = { (0, 0): ds0, (1, 0): ds1, (0, 1): ds2, (1, 1): ds3, (0, 2): ds4, (1, 2): ds5, } actual, concat_dims = _infer_concat_order_from_coords( [ds1, ds0, ds3, ds5, ds2, ds4] ) assert_combined_tile_ids_equal(expected, actual) assert concat_dims == ["x", "y"] def test_no_dimension_coords(self): ds0 = Dataset({"foo": ("x", [0, 1])}) ds1 = Dataset({"foo": ("x", [2, 3])}) with pytest.raises(ValueError, match=r"Could not find any dimension"): _infer_concat_order_from_coords([ds1, ds0]) def test_coord_not_monotonic(self): ds0 = Dataset({"x": [0, 1]}) ds1 = Dataset({"x": [3, 2]}) with pytest.raises( ValueError, match=r"Coordinate variable x is neither monotonically increasing nor", ): _infer_concat_order_from_coords([ds1, ds0]) def test_coord_monotonically_decreasing(self): ds0 = Dataset({"x": [3, 2]}) ds1 = Dataset({"x": [1, 0]}) expected = {(0,): ds0, (1,): ds1} actual, concat_dims = _infer_concat_order_from_coords([ds1, ds0]) assert_combined_tile_ids_equal(expected, actual) assert concat_dims == ["x"] def test_no_concatenation_needed(self): ds = Dataset({"foo": ("x", [0, 1])}) expected = {(): ds} actual, concat_dims = _infer_concat_order_from_coords([ds]) assert_combined_tile_ids_equal(expected, actual) assert concat_dims == [] def test_2d_plus_bystander_dim(self): ds0 = Dataset({"x": [0, 1], "y": [10, 20, 30], "t": [0.1, 0.2]}) ds1 = Dataset({"x": [2, 3], "y": [10, 20, 30], "t": [0.1, 0.2]}) ds2 = Dataset({"x": [0, 1], "y": [40, 50, 60], "t": [0.1, 0.2]}) ds3 = Dataset({"x": [2, 3], "y": [40, 50, 60], "t": [0.1, 0.2]}) expected = {(0, 0): ds0, (1, 0): ds1, (0, 1): ds2, (1, 1): ds3} actual, concat_dims = _infer_concat_order_from_coords([ds1, ds0, ds3, ds2]) assert_combined_tile_ids_equal(expected, actual) assert concat_dims == ["x", "y"] def test_string_coords(self): ds0 = Dataset({"person": ["Alice", "Bob"]}) ds1 = Dataset({"person": ["Caroline", "Daniel"]}) expected = {(0,): ds0, (1,): ds1} actual, concat_dims = _infer_concat_order_from_coords([ds1, ds0]) assert_combined_tile_ids_equal(expected, actual) assert concat_dims == ["person"] # Decided against natural sorting of string coords GH #2616 def test_lexicographic_sort_string_coords(self): ds0 = Dataset({"simulation": ["run8", "run9"]}) ds1 = Dataset({"simulation": ["run10", "run11"]}) expected = {(0,): ds1, (1,): ds0} actual, concat_dims = _infer_concat_order_from_coords([ds1, ds0]) assert_combined_tile_ids_equal(expected, actual) assert concat_dims == ["simulation"] def test_datetime_coords(self): ds0 = Dataset( {"time": np.array(["2000-03-06", "2000-03-07"], dtype="datetime64[ns]")} ) ds1 = Dataset( {"time": np.array(["1999-01-01", "1999-02-04"], dtype="datetime64[ns]")} ) expected = {(0,): ds1, (1,): ds0} actual, concat_dims = _infer_concat_order_from_coords([ds0, ds1]) assert_combined_tile_ids_equal(expected, actual) assert concat_dims == ["time"] @pytest.fixture(scope="module") def create_combined_ids(): return _create_combined_ids def _create_combined_ids(shape): tile_ids = _create_tile_ids(shape) nums = range(len(tile_ids)) return { tile_id: create_test_data(num) for tile_id, num in zip(tile_ids, nums, strict=True) } def _create_tile_ids(shape): tile_ids = product(*(range(i) for i in shape)) return list(tile_ids) class TestNewTileIDs: @pytest.mark.parametrize( "old_id, new_id", [((3, 0, 1), (0, 1)), ((0, 0), (0,)), ((1,), ()), ((0,), ()), ((1, 0), (0,))], ) def test_new_tile_id(self, old_id, new_id): ds = create_test_data assert _new_tile_id((old_id, ds)) == new_id def test_get_new_tile_ids(self, create_combined_ids): shape = (1, 2, 3) combined_ids = create_combined_ids(shape) expected_tile_ids = sorted(combined_ids.keys()) actual_tile_ids = _create_tile_ids(shape) assert expected_tile_ids == actual_tile_ids class TestCombineND: @pytest.mark.parametrize( "concat_dim, kwargs", [("dim1", {}), ("new_dim", {"data_vars": "all"})] ) def test_concat_once(self, create_combined_ids, concat_dim, kwargs): shape = (2,) combined_ids = create_combined_ids(shape) ds = create_test_data result = _combine_all_along_first_dim( combined_ids, dim=concat_dim, data_vars="all", coords="different", compat="no_conflicts", fill_value=dtypes.NA, join="outer", combine_attrs="drop", ) expected_ds = concat([ds(0), ds(1)], dim=concat_dim, **kwargs) assert_combined_tile_ids_equal(result, {(): expected_ds}) def test_concat_only_first_dim(self, create_combined_ids): shape = (2, 3) combined_ids = create_combined_ids(shape) result = _combine_all_along_first_dim( combined_ids, dim="dim1", data_vars="all", coords="different", compat="no_conflicts", fill_value=dtypes.NA, join="outer", combine_attrs="drop", ) ds = create_test_data partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") partway3 = concat([ds(2), ds(5)], dim="dim1") expected_datasets = [partway1, partway2, partway3] expected = {(i,): ds for i, ds in enumerate(expected_datasets)} assert_combined_tile_ids_equal(result, expected) @pytest.mark.parametrize( "concat_dim, kwargs", [("dim1", {}), ("new_dim", {"data_vars": "all"})] ) def test_concat_twice(self, create_combined_ids, concat_dim, kwargs): shape = (2, 3) combined_ids = create_combined_ids(shape) result = _combine_nd( combined_ids, concat_dims=["dim1", concat_dim], data_vars="all", coords="different", compat="no_conflicts", fill_value=dtypes.NA, join="outer", combine_attrs="drop", ) ds = create_test_data partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") partway3 = concat([ds(2), ds(5)], dim="dim1") expected = concat([partway1, partway2, partway3], **kwargs, dim=concat_dim) assert_equal(result, expected) class TestCheckShapeTileIDs: def test_check_depths(self): ds = create_test_data(0) combined_tile_ids = {(0,): ds, (0, 1): ds} with pytest.raises( ValueError, match=r"sub-lists do not have consistent depths" ): _check_shape_tile_ids(combined_tile_ids) def test_check_lengths(self): ds = create_test_data(0) combined_tile_ids = {(0, 0): ds, (0, 1): ds, (0, 2): ds, (1, 0): ds, (1, 1): ds} with pytest.raises( ValueError, match=r"sub-lists do not have consistent lengths" ): _check_shape_tile_ids(combined_tile_ids) class TestNestedCombine: def test_nested_concat(self): objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] expected = Dataset({"x": [0, 1]}) actual = combine_nested(objs, concat_dim="x") assert_identical(expected, actual) actual = combine_nested(objs, concat_dim=["x"]) assert_identical(expected, actual) actual = combine_nested([actual], concat_dim=None) assert_identical(expected, actual) actual = combine_nested([actual], concat_dim="x") assert_identical(expected, actual) objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})] actual = combine_nested(objs, concat_dim="x") expected = Dataset({"x": [0, 1, 2]}) assert_identical(expected, actual) # ensure combine_nested handles non-sorted variables objs = [ Dataset({"x": ("a", [0]), "y": ("a", [0])}), Dataset({"y": ("a", [1]), "x": ("a", [1])}), ] actual = combine_nested(objs, concat_dim="a") expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])}) assert_identical(expected, actual) objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1]})] actual = combine_nested(objs, concat_dim="x") expected = Dataset({"x": [0, 1], "y": [0]}) assert_identical(expected, actual) @pytest.mark.parametrize( "join, expected", [ ("outer", Dataset({"x": [0, 1], "y": [0, 1]})), ("inner", Dataset({"x": [0, 1], "y": []})), ("left", Dataset({"x": [0, 1], "y": [0]})), ("right", Dataset({"x": [0, 1], "y": [1]})), ], ) def test_combine_nested_join(self, join, expected): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] actual = combine_nested(objs, concat_dim="x", join=join) assert_identical(expected, actual) def test_combine_nested_join_exact(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] with pytest.raises(ValueError, match=r"cannot align.*join.*exact"): combine_nested(objs, concat_dim="x", join="exact") def test_empty_input(self): assert_identical(Dataset(), combine_nested([], concat_dim="x")) # Fails because of concat's weird treatment of dimension coords, see #2975 @pytest.mark.xfail def test_nested_concat_too_many_dims_at_once(self): objs = [Dataset({"x": [0], "y": [1]}), Dataset({"y": [0], "x": [1]})] with pytest.raises(ValueError, match="not equal across datasets"): combine_nested(objs, concat_dim="x", coords="minimal") def test_nested_concat_along_new_dim(self): objs = [ Dataset({"a": ("x", [10]), "x": [0]}), Dataset({"a": ("x", [20]), "x": [0]}), ] expected = Dataset({"a": (("t", "x"), [[10], [20]]), "x": [0]}) actual = combine_nested(objs, data_vars="all", concat_dim="t") assert_identical(expected, actual) # Same but with a DataArray as new dim, see GH #1988 and #2647 dim = DataArray([100, 150], name="baz", dims="baz") expected = Dataset( {"a": (("baz", "x"), [[10], [20]]), "x": [0], "baz": [100, 150]} ) actual = combine_nested(objs, data_vars="all", concat_dim=dim) assert_identical(expected, actual) def test_nested_merge_with_self(self): data = Dataset({"x": 0}) actual = combine_nested([data, data, data], concat_dim=None) assert_identical(data, actual) def test_nested_merge_with_overlapping_values(self): ds1 = Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds2 = Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) expected = Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) with pytest.warns( FutureWarning, match="will change from compat='no_conflicts' to compat='override'", ): actual = combine_nested([ds1, ds2], join="outer", concat_dim=None) assert_identical(expected, actual) actual = combine_nested( [ds1, ds2], join="outer", compat="no_conflicts", concat_dim=None ) assert_identical(expected, actual) actual = combine_nested( [ds1, ds2], join="outer", compat="no_conflicts", concat_dim=[None] ) assert_identical(expected, actual) def test_nested_merge_with_nan_no_conflicts(self): tmp1 = Dataset({"x": 0}) tmp2 = Dataset({"x": np.nan}) actual = combine_nested([tmp1, tmp2], compat="no_conflicts", concat_dim=None) assert_identical(tmp1, actual) with pytest.warns( FutureWarning, match="will change from compat='no_conflicts' to compat='override'", ): combine_nested([tmp1, tmp2], concat_dim=None) actual = combine_nested([tmp1, tmp2], compat="no_conflicts", concat_dim=[None]) assert_identical(tmp1, actual) def test_nested_merge_with_concat_dim_explicitly_provided(self): # Test the issue reported in GH #1988 objs = [Dataset({"x": 0, "y": 1})] dim = DataArray([100], name="baz", dims="baz") actual = combine_nested(objs, concat_dim=[dim], data_vars="all") expected = Dataset({"x": ("baz", [0]), "y": ("baz", [1])}, {"baz": [100]}) assert_identical(expected, actual) def test_nested_merge_with_non_scalars(self): # Just making sure that auto_combine is doing what is # expected for non-scalar values, too. objs = [Dataset({"x": ("z", [0, 1]), "y": ("z", [1, 2])})] dim = DataArray([100], name="baz", dims="baz") actual = combine_nested(objs, concat_dim=[dim], data_vars="all") expected = Dataset( {"x": (("baz", "z"), [[0, 1]]), "y": (("baz", "z"), [[1, 2]])}, {"baz": [100]}, ) assert_identical(expected, actual) def test_concat_multiple_dims(self): objs = [ [Dataset({"a": (("x", "y"), [[0]])}), Dataset({"a": (("x", "y"), [[1]])})], [Dataset({"a": (("x", "y"), [[2]])}), Dataset({"a": (("x", "y"), [[3]])})], ] actual = combine_nested(objs, concat_dim=["x", "y"]) expected = Dataset({"a": (("x", "y"), [[0, 1], [2, 3]])}) assert_identical(expected, actual) def test_concat_name_symmetry(self): """Inspired by the discussion on GH issue #2777""" da1 = DataArray(name="a", data=[[0]], dims=["x", "y"]) da2 = DataArray(name="b", data=[[1]], dims=["x", "y"]) da3 = DataArray(name="a", data=[[2]], dims=["x", "y"]) da4 = DataArray(name="b", data=[[3]], dims=["x", "y"]) x_first = combine_nested([[da1, da2], [da3, da4]], concat_dim=["x", "y"]) y_first = combine_nested([[da1, da3], [da2, da4]], concat_dim=["y", "x"]) assert_identical(x_first, y_first) def test_concat_one_dim_merge_another(self): data = create_test_data(add_attrs=False) data1 = data.copy(deep=True) data2 = data.copy(deep=True) objs = [ [data1.var1.isel(dim2=slice(4)), data2.var1.isel(dim2=slice(4, 9))], [data1.var2.isel(dim2=slice(4)), data2.var2.isel(dim2=slice(4, 9))], ] expected = data[["var1", "var2"]] actual = combine_nested(objs, concat_dim=[None, "dim2"]) assert_identical(expected, actual) def test_auto_combine_2d(self): ds = create_test_data partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") partway3 = concat([ds(2), ds(5)], dim="dim1") expected = concat([partway1, partway2, partway3], data_vars="all", dim="dim2") datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]] result = combine_nested( datasets, data_vars="all", concat_dim=["dim1", "dim2"], ) assert_equal(result, expected) def test_auto_combine_2d_combine_attrs_kwarg(self): ds = lambda x: create_test_data(x, add_attrs=False) partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") partway3 = concat([ds(2), ds(5)], dim="dim1") expected = concat([partway1, partway2, partway3], data_vars="all", dim="dim2") expected_dict = {} expected_dict["drop"] = expected.copy(deep=True) expected_dict["drop"].attrs = {} expected_dict["no_conflicts"] = expected.copy(deep=True) expected_dict["no_conflicts"].attrs = { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, } expected_dict["override"] = expected.copy(deep=True) expected_dict["override"].attrs = {"a": 1} f = lambda attrs, context: attrs[0] expected_dict[f] = expected.copy(deep=True) # type: ignore[index] expected_dict[f].attrs = f([{"a": 1}], None) # type: ignore[index] datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]] datasets[0][0].attrs = {"a": 1} datasets[0][1].attrs = {"a": 1, "b": 2} datasets[0][2].attrs = {"a": 1, "c": 3} datasets[1][0].attrs = {"a": 1, "d": 4} datasets[1][1].attrs = {"a": 1, "e": 5} datasets[1][2].attrs = {"a": 1, "f": 6} with pytest.raises(ValueError, match=r"combine_attrs='identical'"): result = combine_nested( datasets, concat_dim=["dim1", "dim2"], data_vars="all", combine_attrs="identical", ) for combine_attrs, expected in expected_dict.items(): result = combine_nested( datasets, concat_dim=["dim1", "dim2"], data_vars="all", combine_attrs=combine_attrs, ) # type: ignore[call-overload] assert_identical(result, expected) def test_combine_nested_missing_data_new_dim(self): # Your data includes "time" and "station" dimensions, and each year's # data has a different set of stations. datasets = [ Dataset({"a": ("x", [2, 3]), "x": [1, 2]}), Dataset({"a": ("x", [1, 2]), "x": [0, 1]}), ] expected = Dataset( {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])}, {"x": [0, 1, 2]} ) actual = combine_nested(datasets, data_vars="all", join="outer", concat_dim="t") assert_identical(expected, actual) def test_invalid_hypercube_input(self): ds = create_test_data datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4)]] with pytest.raises( ValueError, match=r"sub-lists do not have consistent lengths" ): combine_nested(datasets, concat_dim=["dim1", "dim2"]) datasets2: list = [[ds(0), ds(1)], [[ds(3), ds(4)]]] with pytest.raises( ValueError, match=r"sub-lists do not have consistent depths" ): combine_nested(datasets2, concat_dim=["dim1", "dim2"]) datasets = [[ds(0), ds(1)], [ds(3), ds(4)]] with pytest.raises(ValueError, match=r"concat_dims has length"): combine_nested(datasets, concat_dim=["dim1"]) def test_merge_one_dim_concat_another(self): objs = [ [Dataset({"foo": ("x", [0, 1])}), Dataset({"bar": ("x", [10, 20])})], [Dataset({"foo": ("x", [2, 3])}), Dataset({"bar": ("x", [30, 40])})], ] expected = Dataset({"foo": ("x", [0, 1, 2, 3]), "bar": ("x", [10, 20, 30, 40])}) actual = combine_nested(objs, concat_dim=["x", None], compat="equals") assert_identical(expected, actual) # Proving it works symmetrically objs = [ [Dataset({"foo": ("x", [0, 1])}), Dataset({"foo": ("x", [2, 3])})], [Dataset({"bar": ("x", [10, 20])}), Dataset({"bar": ("x", [30, 40])})], ] actual = combine_nested(objs, concat_dim=[None, "x"], compat="equals") assert_identical(expected, actual) def test_combine_concat_over_redundant_nesting(self): objs = [[Dataset({"x": [0]}), Dataset({"x": [1]})]] actual = combine_nested(objs, concat_dim=[None, "x"]) expected = Dataset({"x": [0, 1]}) assert_identical(expected, actual) objs = [[Dataset({"x": [0]})], [Dataset({"x": [1]})]] actual = combine_nested(objs, concat_dim=["x", None]) expected = Dataset({"x": [0, 1]}) assert_identical(expected, actual) objs = [[Dataset({"x": [0]})]] actual = combine_nested(objs, concat_dim=[None, None]) expected = Dataset({"x": [0]}) assert_identical(expected, actual) @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"a": 2, "b": 1}]) def test_combine_nested_fill_value(self, fill_value): datasets = [ Dataset({"a": ("x", [2, 3]), "b": ("x", [-2, 1]), "x": [1, 2]}), Dataset({"a": ("x", [1, 2]), "b": ("x", [3, -1]), "x": [0, 1]}), ] if fill_value == dtypes.NA: # if we supply the default, we expect the missing value for a # float array fill_value_a = fill_value_b = np.nan elif isinstance(fill_value, dict): fill_value_a = fill_value["a"] fill_value_b = fill_value["b"] else: fill_value_a = fill_value_b = fill_value expected = Dataset( { "a": (("t", "x"), [[fill_value_a, 2, 3], [1, 2, fill_value_a]]), "b": (("t", "x"), [[fill_value_b, -2, 1], [3, -1, fill_value_b]]), }, {"x": [0, 1, 2]}, ) actual = combine_nested( datasets, concat_dim="t", data_vars="all", join="outer", fill_value=fill_value, ) assert_identical(expected, actual) def test_combine_nested_unnamed_data_arrays(self): unnamed_array = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") actual = combine_nested([unnamed_array], concat_dim="x") expected = unnamed_array assert_identical(expected, actual) unnamed_array1 = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") unnamed_array2 = DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") actual = combine_nested([unnamed_array1, unnamed_array2], concat_dim="x") expected = DataArray( data=[1.0, 2.0, 3.0, 4.0], coords={"x": [0, 1, 2, 3]}, dims="x" ) assert_identical(expected, actual) da1 = DataArray(data=[[0.0]], coords={"x": [0], "y": [0]}, dims=["x", "y"]) da2 = DataArray(data=[[1.0]], coords={"x": [0], "y": [1]}, dims=["x", "y"]) da3 = DataArray(data=[[2.0]], coords={"x": [1], "y": [0]}, dims=["x", "y"]) da4 = DataArray(data=[[3.0]], coords={"x": [1], "y": [1]}, dims=["x", "y"]) objs = [[da1, da2], [da3, da4]] expected = DataArray( data=[[0.0, 1.0], [2.0, 3.0]], coords={"x": [0, 1], "y": [0, 1]}, dims=["x", "y"], ) actual = combine_nested(objs, concat_dim=["x", "y"]) assert_identical(expected, actual) # TODO aijams - Determine if this test is appropriate. def test_nested_combine_mixed_datasets_arrays(self): objs = [ DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})), Dataset({"x": [2, 3]}), ] with pytest.raises( ValueError, match=r"Can't combine datasets with unnamed arrays." ): combine_nested(objs, "x") # type: ignore[arg-type] def test_nested_combine_mixed_datatrees_and_datasets(self): objs = [DataTree.from_dict({"foo": 0}), Dataset({"foo": 1})] with pytest.raises( ValueError, match=r"Can't combine a mix of DataTree and non-DataTree objects.", ): combine_nested(objs, concat_dim="x") # type: ignore[arg-type] def test_datatree(self): objs = [DataTree.from_dict({"foo": 0}), DataTree.from_dict({"foo": 1})] expected = DataTree.from_dict({"foo": ("x", [0, 1])}) actual = combine_nested(objs, concat_dim="x") assert expected.identical(actual) class TestCombineDatasetsbyCoords: def test_combine_by_coords(self): objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] actual = combine_by_coords(objs) expected = Dataset({"x": [0, 1]}) assert_identical(expected, actual) actual = combine_by_coords([actual]) assert_identical(expected, actual) objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})] actual = combine_by_coords(objs) expected = Dataset({"x": [0, 1, 2]}) assert_identical(expected, actual) def test_combine_by_coords_handles_non_sorted_variables(self): # ensure auto_combine handles non-sorted variables objs = [ Dataset({"x": ("a", [0]), "y": ("a", [0]), "a": [0]}), Dataset({"x": ("a", [1]), "y": ("a", [1]), "a": [1]}), ] actual = combine_by_coords(objs, join="outer") expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1]), "a": [0, 1]}) assert_identical(expected, actual) def test_combine_by_coords_multiple_variables(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})] actual = combine_by_coords(objs, join="outer") expected = Dataset({"x": [0, 1], "y": [0, 1]}) assert_equal(actual, expected) def test_combine_by_coords_for_scalar_variables(self): objs = [Dataset({"x": 0}), Dataset({"x": 1})] with pytest.raises( ValueError, match=r"Could not find any dimension coordinates" ): combine_by_coords(objs) def test_combine_by_coords_requires_coord_or_index(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] with pytest.raises( ValueError, match=r"Every dimension requires a corresponding 1D coordinate and index", ): combine_by_coords(objs) def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([])) @pytest.mark.parametrize( "join, expected", [ ("outer", Dataset({"x": [0, 1], "y": [0, 1]})), ("inner", Dataset({"x": [0, 1], "y": []})), ("left", Dataset({"x": [0, 1], "y": [0]})), ("right", Dataset({"x": [0, 1], "y": [1]})), ], ) def test_combine_coords_join(self, join, expected): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] actual = combine_nested(objs, concat_dim="x", join=join) assert_identical(expected, actual) def test_combine_coords_join_exact(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] with pytest.raises(ValueError, match=r"cannot align.*join.*exact.*"): combine_nested(objs, concat_dim="x", join="exact") @pytest.mark.parametrize( "combine_attrs, expected", [ ("drop", Dataset({"x": [0, 1], "y": [0, 1]}, attrs={})), ( "no_conflicts", Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1, "b": 2}), ), ("override", Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1})), ( lambda attrs, context: attrs[1], Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1, "b": 2}), ), ], ) def test_combine_coords_combine_attrs(self, combine_attrs, expected): objs = [ Dataset({"x": [0], "y": [0]}, attrs={"a": 1}), Dataset({"x": [1], "y": [1]}, attrs={"a": 1, "b": 2}), ] actual = combine_nested( objs, concat_dim="x", join="outer", combine_attrs=combine_attrs ) assert_identical(expected, actual) if combine_attrs == "no_conflicts": objs[1].attrs["a"] = 2 with pytest.raises(ValueError, match=r"combine_attrs='no_conflicts'"): actual = combine_nested( objs, concat_dim="x", join="outer", combine_attrs=combine_attrs ) def test_combine_coords_combine_attrs_identical(self): objs = [ Dataset({"x": [0], "y": [0]}, attrs={"a": 1}), Dataset({"x": [1], "y": [1]}, attrs={"a": 1}), ] expected = Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1}) actual = combine_nested( objs, concat_dim="x", join="outer", combine_attrs="identical" ) assert_identical(expected, actual) objs[1].attrs["b"] = 2 with pytest.raises(ValueError, match=r"combine_attrs='identical'"): actual = combine_nested( objs, concat_dim="x", join="outer", combine_attrs="identical" ) def test_combine_nested_combine_attrs_drop_conflicts(self): objs = [ Dataset({"x": [0], "y": [0]}, attrs={"a": 1, "b": 2, "c": 3}), Dataset({"x": [1], "y": [1]}, attrs={"a": 1, "b": 0, "d": 3}), ] expected = Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1, "c": 3, "d": 3}) actual = combine_nested( objs, concat_dim="x", join="outer", combine_attrs="drop_conflicts" ) assert_identical(expected, actual) @pytest.mark.parametrize( "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception", [ ( "no_conflicts", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2, "c": 3}, False, ), ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), ( "no_conflicts", {"a": 1, "b": 2}, {"a": 4, "c": 3}, {"a": 1, "b": 2, "c": 3}, True, ), ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), ( "override", {"a": 1, "b": 2}, {"a": 4, "b": 5, "c": 3}, {"a": 1, "b": 2}, False, ), ( "drop_conflicts", {"a": 1, "b": 2, "c": 3}, {"b": 1, "c": 3, "d": 4}, {"a": 1, "c": 3, "d": 4}, False, ), ], ) def test_combine_nested_combine_attrs_variables( self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception ): """check that combine_attrs is used on data variables and coords""" data1 = Dataset( { "a": ("x", [1, 2], attrs1), "b": ("x", [3, -1], attrs1), "x": ("x", [0, 1], attrs1), } ) data2 = Dataset( { "a": ("x", [2, 3], attrs2), "b": ("x", [-2, 1], attrs2), "x": ("x", [2, 3], attrs2), } ) if expect_exception: with pytest.raises(MergeError, match="combine_attrs"): combine_by_coords([data1, data2], combine_attrs=combine_attrs) else: actual = combine_by_coords([data1, data2], combine_attrs=combine_attrs) expected = Dataset( { "a": ("x", [1, 2, 2, 3], expected_attrs), "b": ("x", [3, -1, -2, 1], expected_attrs), }, {"x": ("x", [0, 1, 2, 3], expected_attrs)}, ) assert_identical(actual, expected) @pytest.mark.parametrize( "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception", [ ( "no_conflicts", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2, "c": 3}, False, ), ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), ( "no_conflicts", {"a": 1, "b": 2}, {"a": 4, "c": 3}, {"a": 1, "b": 2, "c": 3}, True, ), ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), ( "override", {"a": 1, "b": 2}, {"a": 4, "b": 5, "c": 3}, {"a": 1, "b": 2}, False, ), ( "drop_conflicts", {"a": 1, "b": 2, "c": 3}, {"b": 1, "c": 3, "d": 4}, {"a": 1, "c": 3, "d": 4}, False, ), ], ) def test_combine_by_coords_combine_attrs_variables( self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception ): """check that combine_attrs is used on data variables and coords""" data1 = Dataset( {"x": ("a", [0], attrs1), "y": ("a", [0], attrs1), "a": ("a", [0], attrs1)} ) data2 = Dataset( {"x": ("a", [1], attrs2), "y": ("a", [1], attrs2), "a": ("a", [1], attrs2)} ) if expect_exception: with pytest.raises(MergeError, match="combine_attrs"): combine_by_coords([data1, data2], combine_attrs=combine_attrs) else: actual = combine_by_coords([data1, data2], combine_attrs=combine_attrs) expected = Dataset( { "x": ("a", [0, 1], expected_attrs), "y": ("a", [0, 1], expected_attrs), "a": ("a", [0, 1], expected_attrs), } ) assert_identical(actual, expected) def test_infer_order_from_coords(self): data = create_test_data() objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))] actual = combine_by_coords(objs, data_vars="all") expected = data assert expected.broadcast_equals(actual) # type: ignore[arg-type] with set_options(use_new_combine_kwarg_defaults=True): actual = combine_by_coords(objs) assert_identical(actual, expected) def test_combine_leaving_bystander_dimensions(self): # Check non-monotonic bystander dimension coord doesn't raise # ValueError on combine (https://github.com/pydata/xarray/issues/3150) ycoord = ["a", "c", "b"] data = np.random.rand(7, 3) ds1 = Dataset( data_vars=dict(data=(["x", "y"], data[:3, :])), coords=dict(x=[1, 2, 3], y=ycoord), ) ds2 = Dataset( data_vars=dict(data=(["x", "y"], data[3:, :])), coords=dict(x=[4, 5, 6, 7], y=ycoord), ) expected = Dataset( data_vars=dict(data=(["x", "y"], data)), coords=dict(x=[1, 2, 3, 4, 5, 6, 7], y=ycoord), ) actual = combine_by_coords((ds1, ds2)) assert_identical(expected, actual) def test_combine_by_coords_previously_failed(self): # In the above scenario, one file is missing, containing the data for # one year's data for one variable. datasets = [ Dataset({"a": ("x", [0]), "x": [0]}), Dataset({"b": ("x", [0]), "x": [0]}), Dataset({"a": ("x", [1]), "x": [1]}), ] expected = Dataset({"a": ("x", [0, 1]), "b": ("x", [0, np.nan])}, {"x": [0, 1]}) actual = combine_by_coords(datasets, join="outer") assert_identical(expected, actual) def test_combine_by_coords_still_fails(self): # concat can't handle new variables (yet): # https://github.com/pydata/xarray/issues/508 datasets = [Dataset({"x": 0}, {"y": 0}), Dataset({"x": 1}, {"y": 1, "z": 1})] with pytest.raises(ValueError): combine_by_coords(datasets, "y") # type: ignore[arg-type] def test_combine_by_coords_no_concat(self): objs = [Dataset({"x": 0}), Dataset({"y": 1})] actual = combine_by_coords(objs) expected = Dataset({"x": 0, "y": 1}) assert_identical(expected, actual) objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})] actual = combine_by_coords(objs, compat="no_conflicts") expected = Dataset({"x": 0, "y": 1, "z": 2}) assert_identical(expected, actual) def test_check_for_impossible_ordering(self): ds0 = Dataset({"x": [0, 1, 5]}) ds1 = Dataset({"x": [2, 3]}) with pytest.raises( ValueError, match=r"does not have monotonic global indexes along dimension x", ): combine_by_coords([ds1, ds0]) def test_combine_by_coords_incomplete_hypercube(self): # test that this succeeds with default fill_value x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}) x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]}) x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]}) actual = combine_by_coords([x1, x2, x3], join="outer") expected = Dataset( {"a": (("y", "x"), [[1, 1], [1, np.nan]])}, coords={"y": [0, 1], "x": [0, 1]}, ) assert_identical(expected, actual) # test that this fails if fill_value is None with pytest.raises( ValueError, match="supplied objects do not form a hypercube" ): combine_by_coords([x1, x2, x3], join="outer", fill_value=None) def test_combine_by_coords_override_order(self) -> None: # regression test for https://github.com/pydata/xarray/issues/8828 x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}) x2 = Dataset( {"a": (("y", "x"), [[2]]), "b": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}, ) actual = combine_by_coords([x1, x2], compat="override") assert_equal(actual["a"], actual["b"]) assert_equal(actual["a"], x1["a"]) actual = combine_by_coords([x2, x1], compat="override") assert_equal(actual["a"], x2["a"]) class TestCombineMixedObjectsbyCoords: def test_combine_by_coords_mixed_unnamed_dataarrays(self): named_da = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") unnamed_da = DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") with pytest.raises( ValueError, match="Can't automatically combine unnamed DataArrays with" ): combine_by_coords([named_da, unnamed_da]) da = DataArray([0, 1], dims="x", coords=({"x": [0, 1]})) ds = Dataset({"x": [2, 3]}) with pytest.raises( ValueError, match="Can't automatically combine unnamed DataArrays with", ): combine_by_coords([da, ds]) def test_combine_coords_mixed_datasets_named_dataarrays(self): da = DataArray(name="a", data=[4, 5], dims="x", coords=({"x": [0, 1]})) ds = Dataset({"b": ("x", [2, 3])}) actual = combine_by_coords([da, ds]) expected = Dataset( {"a": ("x", [4, 5]), "b": ("x", [2, 3])}, coords={"x": ("x", [0, 1])} ) assert_identical(expected, actual) def test_combine_by_coords_all_unnamed_dataarrays(self): unnamed_array = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") actual = combine_by_coords([unnamed_array]) expected = unnamed_array assert_identical(expected, actual) unnamed_array1 = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") unnamed_array2 = DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") actual = combine_by_coords([unnamed_array1, unnamed_array2]) expected = DataArray( data=[1.0, 2.0, 3.0, 4.0], coords={"x": [0, 1, 2, 3]}, dims="x" ) assert_identical(expected, actual) def test_combine_by_coords_all_named_dataarrays(self): named_da = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") actual = combine_by_coords([named_da]) expected = named_da.to_dataset() assert_identical(expected, actual) named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") named_da2 = DataArray(name="b", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") actual = combine_by_coords([named_da1, named_da2], join="outer") expected = Dataset( { "a": DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x"), "b": DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x"), } ) assert_identical(expected, actual) def test_combine_by_coords_all_dataarrays_with_the_same_name(self): named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") named_da2 = DataArray(name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") actual = combine_by_coords([named_da1, named_da2], join="outer") expected = merge([named_da1, named_da2], compat="no_conflicts", join="outer") assert_identical(expected, actual) def test_combine_by_coords_datatree(self): tree = DataTree.from_dict({"/nested/foo": ("x", [10])}, coords={"x": [1]}) with pytest.raises( NotImplementedError, match=re.escape( "combine_by_coords() does not yet support DataTree objects." ), ): combine_by_coords([tree]) # type: ignore[list-item] class TestNewDefaults: def test_concat_along_existing_dim(self): concat_dim = "dim1" ds = create_test_data with set_options(use_new_combine_kwarg_defaults=False): old = concat([ds(0), ds(1)], dim=concat_dim) with set_options(use_new_combine_kwarg_defaults=True): new = concat([ds(0), ds(1)], dim=concat_dim) assert_identical(old, new) def test_concat_along_new_dim(self): concat_dim = "new_dim" ds = create_test_data with set_options(use_new_combine_kwarg_defaults=False): old = concat([ds(0), ds(1)], dim=concat_dim) with set_options(use_new_combine_kwarg_defaults=True): new = concat([ds(0), ds(1)], dim=concat_dim) assert concat_dim in old.dims assert concat_dim in new.dims def test_nested_merge_with_overlapping_values(self): ds1 = Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds2 = Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) expected = Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) with set_options(use_new_combine_kwarg_defaults=False): with pytest.warns( FutureWarning, match="will change from join='outer' to join='exact'" ): with pytest.warns( FutureWarning, match="will change from compat='no_conflicts' to compat='override'", ): old = combine_nested([ds1, ds2], concat_dim=None) with set_options(use_new_combine_kwarg_defaults=True): with pytest.raises(ValueError, match="might be related to new default"): combine_nested([ds1, ds2], concat_dim=None) assert_identical(old, expected) def test_nested_merge_with_nan_order_matters(self): ds1 = Dataset({"x": 0}) ds2 = Dataset({"x": np.nan}) with set_options(use_new_combine_kwarg_defaults=False): with pytest.warns( FutureWarning, match="will change from compat='no_conflicts' to compat='override'", ): old = combine_nested([ds1, ds2], concat_dim=None) with set_options(use_new_combine_kwarg_defaults=True): new = combine_nested([ds1, ds2], concat_dim=None) assert_identical(ds1, old) assert_identical(old, new) with set_options(use_new_combine_kwarg_defaults=False): with pytest.warns( FutureWarning, match="will change from compat='no_conflicts' to compat='override'", ): old = combine_nested([ds2, ds1], concat_dim=None) with set_options(use_new_combine_kwarg_defaults=True): new = combine_nested([ds2, ds1], concat_dim=None) assert_identical(ds1, old) with pytest.raises(AssertionError): assert_identical(old, new) def test_nested_merge_with_concat_dim_explicitly_provided(self): # Test the issue reported in GH #1988 objs = [Dataset({"x": 0, "y": 1})] dim = DataArray([100], name="baz", dims="baz") expected = Dataset({"x": ("baz", [0]), "y": ("baz", [1])}, {"baz": [100]}) with set_options(use_new_combine_kwarg_defaults=False): old = combine_nested(objs, concat_dim=dim) with set_options(use_new_combine_kwarg_defaults=True): new = combine_nested(objs, concat_dim=dim) assert_identical(expected, old) assert_identical(old, new) def test_combine_nested_missing_data_new_dim(self): # Your data includes "time" and "station" dimensions, and each year's # data has a different set of stations. datasets = [ Dataset({"a": ("x", [2, 3]), "x": [1, 2]}), Dataset({"a": ("x", [1, 2]), "x": [0, 1]}), ] expected = Dataset( {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])}, {"x": [0, 1, 2]} ) with set_options(use_new_combine_kwarg_defaults=False): with pytest.warns( FutureWarning, match="will change from join='outer' to join='exact'" ): old = combine_nested(datasets, concat_dim="t") with set_options(use_new_combine_kwarg_defaults=True): with pytest.raises(ValueError, match="might be related to new default"): combine_nested(datasets, concat_dim="t") new = combine_nested(datasets, concat_dim="t", join="outer") assert_identical(expected, old) assert_identical(expected, new) def test_combine_by_coords_multiple_variables(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})] expected = Dataset({"x": [0, 1], "y": [0, 1]}) with set_options(use_new_combine_kwarg_defaults=False): with pytest.warns( FutureWarning, match="will change from join='outer' to join='exact'" ): old = combine_by_coords(objs) with set_options(use_new_combine_kwarg_defaults=True): with pytest.raises(ValueError, match="might be related to new default"): combine_by_coords(objs) assert_identical(old, expected) @requires_cftime def test_combine_by_coords_distant_cftime_dates(): # Regression test for https://github.com/pydata/xarray/issues/3535 import cftime time_1 = [cftime.DatetimeGregorian(4500, 12, 31)] time_2 = [cftime.DatetimeGregorian(4600, 12, 31)] time_3 = [cftime.DatetimeGregorian(5100, 12, 31)] da_1 = DataArray([0], dims=["time"], coords=[time_1], name="a").to_dataset() da_2 = DataArray([1], dims=["time"], coords=[time_2], name="a").to_dataset() da_3 = DataArray([2], dims=["time"], coords=[time_3], name="a").to_dataset() result = combine_by_coords([da_1, da_2, da_3]) expected_time = np.concatenate([time_1, time_2, time_3]) expected = DataArray( [0, 1, 2], dims=["time"], coords=[expected_time], name="a" ).to_dataset() assert_identical(result, expected) @requires_cftime def test_combine_by_coords_raises_for_differing_calendars(): # previously failed with uninformative StopIteration instead of TypeError # https://github.com/pydata/xarray/issues/4495 import cftime time_1 = [cftime.DatetimeGregorian(2000, 1, 1)] time_2 = [cftime.DatetimeProlepticGregorian(2001, 1, 1)] da_1 = DataArray([0], dims=["time"], coords=[time_1], name="a").to_dataset() da_2 = DataArray([1], dims=["time"], coords=[time_2], name="a").to_dataset() error_msg = ( "Cannot combine along dimension 'time' with mixed types." " Found:.*" " If importing data directly from a file then setting" " `use_cftime=True` may fix this issue." ) with pytest.raises(TypeError, match=error_msg): combine_by_coords([da_1, da_2]) def test_combine_by_coords_raises_for_differing_types(): # str and byte cannot be compared da_1 = DataArray([0], dims=["time"], coords=[["a"]], name="a").to_dataset() da_2 = DataArray([1], dims=["time"], coords=[[b"b"]], name="a").to_dataset() with pytest.raises( TypeError, match=r"Cannot combine along dimension 'time' with mixed types." ): combine_by_coords([da_1, da_2]) python-xarray-2026.01.0/xarray/tests/test_extensions.py0000664000175000017500000000570615136607163023333 0ustar alastairalastairfrom __future__ import annotations import pickle import pytest import xarray as xr from xarray.core.extensions import register_datatree_accessor from xarray.tests import assert_identical @register_datatree_accessor("example_accessor") @xr.register_dataset_accessor("example_accessor") @xr.register_dataarray_accessor("example_accessor") class ExampleAccessor: """For the pickling tests below.""" def __init__(self, xarray_obj): self.obj = xarray_obj class TestAccessor: def test_register(self) -> None: @register_datatree_accessor("demo") @xr.register_dataset_accessor("demo") @xr.register_dataarray_accessor("demo") class DemoAccessor: """Demo accessor.""" def __init__(self, xarray_obj): self._obj = xarray_obj @property def foo(self): return "bar" dt: xr.DataTree = xr.DataTree() assert dt.demo.foo == "bar" ds = xr.Dataset() assert ds.demo.foo == "bar" da = xr.DataArray(0) assert da.demo.foo == "bar" # accessor is cached assert ds.demo is ds.demo # check descriptor assert ds.demo.__doc__ == "Demo accessor." # TODO: typing doesn't seem to work with accessors assert xr.Dataset.demo.__doc__ == "Demo accessor." # type: ignore[attr-defined] assert isinstance(ds.demo, DemoAccessor) assert xr.Dataset.demo is DemoAccessor # type: ignore[attr-defined] # ensure we can remove it del xr.Dataset.demo # type: ignore[attr-defined] assert not hasattr(xr.Dataset, "demo") with pytest.warns(Warning, match="overriding a preexisting attribute"): @xr.register_dataarray_accessor("demo") class Foo: pass # it didn't get registered again assert not hasattr(xr.Dataset, "demo") def test_pickle_dataset(self) -> None: ds = xr.Dataset() ds_restored = pickle.loads(pickle.dumps(ds)) assert_identical(ds, ds_restored) # state save on the accessor is restored assert ds.example_accessor is ds.example_accessor ds.example_accessor.value = "foo" ds_restored = pickle.loads(pickle.dumps(ds)) assert_identical(ds, ds_restored) assert ds_restored.example_accessor.value == "foo" def test_pickle_dataarray(self) -> None: array = xr.Dataset() assert array.example_accessor is array.example_accessor array_restored = pickle.loads(pickle.dumps(array)) assert_identical(array, array_restored) def test_broken_accessor(self) -> None: # regression test for GH933 @xr.register_dataset_accessor("stupid_accessor") class BrokenAccessor: def __init__(self, xarray_obj): raise AttributeError("broken") with pytest.raises(RuntimeError, match=r"error initializing"): _ = xr.Dataset().stupid_accessor python-xarray-2026.01.0/xarray/tests/test_backends_api.py0000664000175000017500000002720715136607163023537 0ustar alastairalastairfrom __future__ import annotations import io import re import sys from numbers import Number import numpy as np import pytest import xarray as xr from xarray.backends.writers import get_default_netcdf_write_engine from xarray.tests import ( assert_identical, assert_no_warnings, requires_dask, requires_h5netcdf, requires_netCDF4, requires_scipy, ) @requires_netCDF4 @requires_scipy @requires_h5netcdf def test_get_default_netcdf_write_engine() -> None: assert xr.get_options()["netcdf_engine_order"] == ("netcdf4", "h5netcdf", "scipy") engine = get_default_netcdf_write_engine("", format=None) assert engine == "netcdf4" engine = get_default_netcdf_write_engine("", format="NETCDF4") assert engine == "netcdf4" engine = get_default_netcdf_write_engine("", format="NETCDF4_CLASSIC") assert engine == "netcdf4" engine = get_default_netcdf_write_engine("", format="NETCDF3_CLASSIC") assert engine == "netcdf4" engine = get_default_netcdf_write_engine(io.BytesIO(), format=None) assert engine == "h5netcdf" engine = get_default_netcdf_write_engine(io.BytesIO(), format="NETCDF4") assert engine == "h5netcdf" engine = get_default_netcdf_write_engine(io.BytesIO(), format="NETCDF3_CLASSIC") assert engine == "scipy" engine = get_default_netcdf_write_engine("path.zarr#mode=nczarr", format=None) assert engine == "netcdf4" with xr.set_options(netcdf_engine_order=["netcdf4", "scipy", "h5netcdf"]): engine = get_default_netcdf_write_engine(io.BytesIO(), format=None) assert engine == "scipy" engine = get_default_netcdf_write_engine(io.BytesIO(), format="NETCDF4") assert engine == "h5netcdf" engine = get_default_netcdf_write_engine(io.BytesIO(), format="NETCDF3_CLASSIC") assert engine == "scipy" with xr.set_options(netcdf_engine_order=["h5netcdf", "scipy", "netcdf4"]): engine = get_default_netcdf_write_engine("", format=None) assert engine == "h5netcdf" engine = get_default_netcdf_write_engine("", format="NETCDF4") assert engine == "h5netcdf" engine = get_default_netcdf_write_engine("", format="NETCDF4_CLASSIC") assert engine == "netcdf4" engine = get_default_netcdf_write_engine(io.BytesIO(), format="NETCDF4") assert engine == "h5netcdf" engine = get_default_netcdf_write_engine("", format="NETCDF3_CLASSIC") assert engine == "scipy" engine = get_default_netcdf_write_engine(io.BytesIO(), format="NETCDF3_CLASSIC") assert engine == "scipy" @requires_h5netcdf def test_default_engine_h5netcdf(monkeypatch): """Test the default netcdf engine when h5netcdf is the only importable module.""" monkeypatch.delitem(sys.modules, "netCDF4", raising=False) monkeypatch.delitem(sys.modules, "scipy", raising=False) monkeypatch.setattr(sys, "meta_path", []) engine = get_default_netcdf_write_engine("", format=None) assert engine == "h5netcdf" with pytest.raises( ValueError, match=re.escape( "cannot write NetCDF files with format='NETCDF3_CLASSIC' because " "none of the suitable backend libraries (SUITABLE_BACKENDS) are installed" ).replace("SUITABLE_BACKENDS", r"(scipy, netCDF4)|(netCDF4, scipy)"), ): get_default_netcdf_write_engine("", format="NETCDF3_CLASSIC") def test_default_engine_nczarr_no_netcdf4_python(monkeypatch): monkeypatch.delitem(sys.modules, "netCDF4", raising=False) monkeypatch.setattr(sys, "meta_path", []) with pytest.raises( ValueError, match=re.escape( "cannot write NetCDF files in NCZarr format because " "none of the suitable backend libraries (netCDF4) are installed" ), ): get_default_netcdf_write_engine("#mode=nczarr", format=None) def test_custom_engine() -> None: expected = xr.Dataset( dict(a=2 * np.arange(5)), coords=dict(x=("x", np.arange(5), dict(units="s"))) ) class CustomBackend(xr.backends.BackendEntrypoint): def open_dataset( self, filename_or_obj, drop_variables=None, **kwargs, ) -> xr.Dataset: return expected.copy(deep=True) actual = xr.open_dataset("fake_filename", engine=CustomBackend) assert_identical(expected, actual) def test_multiindex() -> None: # GH7139 # Check that we properly handle backends that change index variables dataset = xr.Dataset(coords={"coord1": ["A", "B"], "coord2": [1, 2]}) dataset = dataset.stack(z=["coord1", "coord2"]) class MultiindexBackend(xr.backends.BackendEntrypoint): def open_dataset( self, filename_or_obj, drop_variables=None, **kwargs, ) -> xr.Dataset: return dataset.copy(deep=True) loaded = xr.open_dataset("fake_filename", engine=MultiindexBackend) assert_identical(dataset, loaded) class PassThroughBackendEntrypoint(xr.backends.BackendEntrypoint): """Access an object passed to the `open_dataset` method.""" def open_dataset(self, dataset, *, drop_variables=None): """Return the first argument.""" return dataset def explicit_chunks(chunks, shape): """Return explicit chunks, expanding any integer member to a tuple of integers.""" # Emulate `dask.array.core.normalize_chunks` but for simpler inputs. return tuple( ( ( (size // chunk) * (chunk,) + ((size % chunk,) if size % chunk or size == 0 else ()) ) if isinstance(chunk, Number) else chunk ) for chunk, size in zip(chunks, shape, strict=True) ) @requires_dask class TestPreferredChunks: """Test behaviors related to the backend's preferred chunks.""" var_name = "data" def create_dataset(self, shape, pref_chunks): """Return a dataset with a variable with the given shape and preferred chunks.""" dims = tuple(f"dim_{idx}" for idx in range(len(shape))) return xr.Dataset( { self.var_name: xr.Variable( dims, np.empty(shape, dtype=np.dtype("V1")), encoding={ "preferred_chunks": dict(zip(dims, pref_chunks, strict=True)) }, ) } ) def check_dataset(self, initial, final, expected_chunks): assert_identical(initial, final) assert final[self.var_name].chunks == expected_chunks @pytest.mark.parametrize( "shape,pref_chunks", [ # Represent preferred chunking with int. ((5,), (2,)), # Represent preferred chunking with tuple. ((5,), ((2, 2, 1),)), # Represent preferred chunking with int in two dims. ((5, 6), (4, 2)), # Represent preferred chunking with tuple in second dim. ((5, 6), (4, (2, 2, 2))), ], ) @pytest.mark.parametrize("request_with_empty_map", [False, True]) def test_honor_chunks(self, shape, pref_chunks, request_with_empty_map): """Honor the backend's preferred chunks when opening a dataset.""" initial = self.create_dataset(shape, pref_chunks) # To keep the backend's preferred chunks, the `chunks` argument must be an # empty mapping or map dimensions to `None`. chunks = ( {} if request_with_empty_map else dict.fromkeys(initial[self.var_name].dims, None) ) final = xr.open_dataset( initial, engine=PassThroughBackendEntrypoint, chunks=chunks ) self.check_dataset(initial, final, explicit_chunks(pref_chunks, shape)) @pytest.mark.parametrize( "shape,pref_chunks,req_chunks", [ # Preferred chunking is int; requested chunking is int. ((5,), (2,), (3,)), # Preferred chunking is int; requested chunking is tuple. ((5,), (2,), ((2, 1, 1, 1),)), # Preferred chunking is tuple; requested chunking is int. ((5,), ((2, 2, 1),), (3,)), # Preferred chunking is tuple; requested chunking is tuple. ((5,), ((2, 2, 1),), ((2, 1, 1, 1),)), # Split chunks along a dimension other than the first. ((1, 5), (1, 2), (1, 3)), ], ) def test_split_chunks(self, shape, pref_chunks, req_chunks): """Warn when the requested chunks separate the backend's preferred chunks.""" initial = self.create_dataset(shape, pref_chunks) with pytest.warns(UserWarning): final = xr.open_dataset( initial, engine=PassThroughBackendEntrypoint, chunks=dict(zip(initial[self.var_name].dims, req_chunks, strict=True)), ) self.check_dataset(initial, final, explicit_chunks(req_chunks, shape)) @pytest.mark.parametrize( "shape,pref_chunks,req_chunks", [ # Keep preferred chunks using int representation. ((5,), (2,), (2,)), # Keep preferred chunks using tuple representation. ((5,), (2,), ((2, 2, 1),)), # Join chunks, leaving a final short chunk. ((5,), (2,), (4,)), # Join all chunks with an int larger than the dimension size. ((5,), (2,), (6,)), # Join one chunk using tuple representation. ((5,), (1,), ((1, 1, 2, 1),)), # Join one chunk using int representation. ((5,), ((1, 1, 2, 1),), (2,)), # Join multiple chunks using tuple representation. ((5,), ((1, 1, 2, 1),), ((2, 3),)), # Join chunks in multiple dimensions. ((5, 5), (2, (1, 1, 2, 1)), (4, (2, 3))), ], ) def test_join_chunks(self, shape, pref_chunks, req_chunks): """Don't warn when the requested chunks join or keep the preferred chunks.""" initial = self.create_dataset(shape, pref_chunks) with assert_no_warnings(): final = xr.open_dataset( initial, engine=PassThroughBackendEntrypoint, chunks=dict(zip(initial[self.var_name].dims, req_chunks, strict=True)), ) self.check_dataset(initial, final, explicit_chunks(req_chunks, shape)) @pytest.mark.parametrize("create_default_indexes", [True, False]) def test_default_indexes(self, create_default_indexes): """Create default indexes if the backend does not create them.""" coords = xr.Coordinates({"x": ("x", [0, 1]), "y": list("abc")}, indexes={}) initial = xr.Dataset({"a": ("x", [1, 2])}, coords=coords) with assert_no_warnings(): final = xr.open_dataset( initial, engine=PassThroughBackendEntrypoint, create_default_indexes=create_default_indexes, ) if create_default_indexes: assert all(name in final.xindexes for name in ["x", "y"]) else: assert len(final.xindexes) == 0 @pytest.mark.parametrize("create_default_indexes", [True, False]) def test_default_indexes_passthrough(self, create_default_indexes): """Allow creating indexes in the backend.""" initial = xr.Dataset( {"a": (["x", "y"], [[1, 2, 3], [4, 5, 6]])}, coords={"x": ("x", [0, 1]), "y": ("y", list("abc"))}, ).stack(z=["x", "y"]) with assert_no_warnings(): final = xr.open_dataset( initial, engine=PassThroughBackendEntrypoint, create_default_indexes=create_default_indexes, ) assert initial.coords.equals(final.coords) python-xarray-2026.01.0/xarray/tests/test_conventions.py0000664000175000017500000006476715136607163023515 0ustar alastairalastairfrom __future__ import annotations import contextlib import warnings import numpy as np import pandas as pd import pytest from xarray import ( Dataset, SerializationWarning, Variable, coding, conventions, date_range, open_dataset, ) from xarray.backends.common import WritableCFDataStore from xarray.backends.memory import InMemoryDataStore from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder from xarray.conventions import decode_cf from xarray.testing import assert_identical from xarray.tests import ( assert_array_equal, requires_cftime, requires_dask, requires_netCDF4, ) from xarray.tests.test_backends import CFEncodedBase class TestBoolTypeArray: def test_booltype_array(self) -> None: x = np.array([1, 0, 1, 1, 0], dtype="i1") bx = coding.variables.BoolTypeArray(x) assert bx.dtype == bool assert_array_equal(bx, np.array([True, False, True, True, False], dtype=bool)) x = np.array([[1, 0, 1], [0, 1, 0]], dtype="i1") bx = coding.variables.BoolTypeArray(x) assert_array_equal(bx.transpose((1, 0)), x.transpose((1, 0))) class TestNativeEndiannessArray: def test(self) -> None: x = np.arange(5, dtype=">i8") expected = np.arange(5, dtype="int64") a = coding.variables.NativeEndiannessArray(x) assert a.dtype == expected.dtype assert a.dtype == expected[:].dtype assert_array_equal(a, expected) y = np.arange(6, dtype=">i8").reshape((2, 3)) b = coding.variables.NativeEndiannessArray(y) expected2 = np.arange(6, dtype="int64").reshape((2, 3)) assert_array_equal(b.transpose((1, 0)), expected2.transpose((1, 0))) def test_decode_cf_with_conflicting_fill_missing_value() -> None: expected = Variable(["t"], [np.nan, np.nan, 2], {"units": "foobar"}) var = Variable( ["t"], np.arange(3), {"units": "foobar", "missing_value": 0, "_FillValue": 1} ) with pytest.warns(SerializationWarning, match="has multiple fill"): actual = conventions.decode_cf_variable("t", var) assert_identical(actual, expected) expected = Variable(["t"], np.arange(10), {"units": "foobar"}) var = Variable( ["t"], np.arange(10), {"units": "foobar", "missing_value": np.nan, "_FillValue": np.nan}, ) # the following code issues two warnings, so we need to check for both with pytest.warns(SerializationWarning) as winfo: actual = conventions.decode_cf_variable("t", var) for aw in winfo: assert "non-conforming" in str(aw.message) assert_identical(actual, expected) var = Variable( ["t"], np.arange(10), { "units": "foobar", "missing_value": np.float32(np.nan), "_FillValue": np.float32(np.nan), }, ) # the following code issues two warnings, so we need to check for both with pytest.warns(SerializationWarning) as winfo: actual = conventions.decode_cf_variable("t", var) for aw in winfo: assert "non-conforming" in str(aw.message) assert_identical(actual, expected) def test_decode_cf_variable_with_mismatched_coordinates() -> None: # tests for decoding mismatched coordinates attributes # see GH #1809 zeros1 = np.zeros((1, 5, 3)) orig = Dataset( { "XLONG": (["x", "y"], zeros1.squeeze(0), {}), "XLAT": (["x", "y"], zeros1.squeeze(0), {}), "foo": (["time", "x", "y"], zeros1, {"coordinates": "XTIME XLONG XLAT"}), "time": ("time", [0.0], {"units": "hours since 2017-01-01"}), } ) decoded = conventions.decode_cf(orig, decode_coords=True) assert decoded["foo"].encoding["coordinates"] == "XTIME XLONG XLAT" assert list(decoded.coords.keys()) == ["XLONG", "XLAT", "time"] decoded = conventions.decode_cf(orig, decode_coords=False) assert "coordinates" not in decoded["foo"].encoding assert decoded["foo"].attrs.get("coordinates") == "XTIME XLONG XLAT" assert list(decoded.coords.keys()) == ["time"] @requires_cftime class TestEncodeCFVariable: def test_incompatible_attributes(self) -> None: invalid_vars = [ Variable( ["t"], pd.date_range("2000-01-01", periods=3), {"units": "foobar"} ), Variable(["t"], pd.to_timedelta(["1 day"]), {"units": "foobar"}), # type: ignore[arg-type, unused-ignore] Variable(["t"], [0, 1, 2], {"add_offset": 0}, {"add_offset": 2}), Variable(["t"], [0, 1, 2], {"_FillValue": 0}, {"_FillValue": 2}), ] for var in invalid_vars: with pytest.raises(ValueError): conventions.encode_cf_variable(var) def test_missing_fillvalue(self) -> None: v = Variable(["x"], np.array([np.nan, 1, 2, 3])) v.encoding = {"dtype": "int16"} # Expect both the SerializationWarning and the RuntimeWarning from numpy with pytest.warns(Warning) as record: conventions.encode_cf_variable(v) # Check we got the expected warnings warning_messages = [str(w.message) for w in record] assert any( "floating point data as an integer" in msg for msg in warning_messages ) assert any( "invalid value encountered in cast" in msg for msg in warning_messages ) def test_multidimensional_coordinates(self) -> None: # regression test for GH1763 # Set up test case with coordinates that have overlapping (but not # identical) dimensions. zeros1 = np.zeros((1, 5, 3)) zeros2 = np.zeros((1, 6, 3)) zeros3 = np.zeros((1, 5, 4)) orig = Dataset( { "lon1": (["x1", "y1"], zeros1.squeeze(0), {}), "lon2": (["x2", "y1"], zeros2.squeeze(0), {}), "lon3": (["x1", "y2"], zeros3.squeeze(0), {}), "lat1": (["x1", "y1"], zeros1.squeeze(0), {}), "lat2": (["x2", "y1"], zeros2.squeeze(0), {}), "lat3": (["x1", "y2"], zeros3.squeeze(0), {}), "foo1": (["time", "x1", "y1"], zeros1, {"coordinates": "lon1 lat1"}), "foo2": (["time", "x2", "y1"], zeros2, {"coordinates": "lon2 lat2"}), "foo3": (["time", "x1", "y2"], zeros3, {"coordinates": "lon3 lat3"}), "time": ("time", [0.0], {"units": "hours since 2017-01-01"}), } ) orig = conventions.decode_cf(orig) # Encode the coordinates, as they would be in a netCDF output file. enc, attrs = conventions.encode_dataset_coordinates(orig) # Make sure we have the right coordinates for each variable. foo1_coords = enc["foo1"].attrs.get("coordinates", "") foo2_coords = enc["foo2"].attrs.get("coordinates", "") foo3_coords = enc["foo3"].attrs.get("coordinates", "") assert foo1_coords == "lon1 lat1" assert foo2_coords == "lon2 lat2" assert foo3_coords == "lon3 lat3" # Should not have any global coordinates. assert "coordinates" not in attrs def test_var_with_coord_attr(self) -> None: # regression test for GH6310 # don't overwrite user-defined "coordinates" attributes orig = Dataset( {"values": ("time", np.zeros(2), {"coordinates": "time lon lat"})}, coords={ "time": ("time", np.zeros(2)), "lat": ("time", np.zeros(2)), "lon": ("time", np.zeros(2)), }, ) # Encode the coordinates, as they would be in a netCDF output file. enc, attrs = conventions.encode_dataset_coordinates(orig) # Make sure we have the right coordinates for each variable. values_coords = enc["values"].attrs.get("coordinates", "") assert values_coords == "time lon lat" # Should not have any global coordinates. assert "coordinates" not in attrs def test_do_not_overwrite_user_coordinates(self) -> None: # don't overwrite user-defined "coordinates" encoding orig = Dataset( coords={"x": [0, 1, 2], "y": ("x", [5, 6, 7]), "z": ("x", [8, 9, 10])}, data_vars={"a": ("x", [1, 2, 3]), "b": ("x", [3, 5, 6])}, ) orig["a"].encoding["coordinates"] = "y" orig["b"].encoding["coordinates"] = "z" enc, _ = conventions.encode_dataset_coordinates(orig) assert enc["a"].attrs["coordinates"] == "y" assert enc["b"].attrs["coordinates"] == "z" orig["a"].attrs["coordinates"] = "foo" with pytest.raises(ValueError, match=r"'coordinates' found in both attrs"): conventions.encode_dataset_coordinates(orig) def test_deterministic_coords_encoding(self) -> None: # the coordinates attribute is sorted when set by xarray.conventions ... # ... on a variable's coordinates attribute ds = Dataset({"foo": 0}, coords={"baz": 0, "bar": 0}) vars, attrs = conventions.encode_dataset_coordinates(ds) assert vars["foo"].attrs["coordinates"] == "bar baz" assert attrs.get("coordinates") is None # ... on the global coordinates attribute ds = ds.drop_vars("foo") vars, attrs = conventions.encode_dataset_coordinates(ds) assert attrs["coordinates"] == "bar baz" def test_emit_coordinates_attribute_in_attrs(self) -> None: orig = Dataset( {"a": 1, "b": 1}, coords={"t": np.array("2004-11-01T00:00:00", dtype=np.datetime64)}, ) orig["a"].attrs["coordinates"] = None enc, _ = conventions.encode_dataset_coordinates(orig) # check coordinate attribute emitted for 'a' assert "coordinates" not in enc["a"].attrs assert "coordinates" not in enc["a"].encoding # check coordinate attribute not emitted for 'b' assert enc["b"].attrs.get("coordinates") == "t" assert "coordinates" not in enc["b"].encoding def test_emit_coordinates_attribute_in_encoding(self) -> None: orig = Dataset( {"a": 1, "b": 1}, coords={"t": np.array("2004-11-01T00:00:00", dtype=np.datetime64)}, ) orig["a"].encoding["coordinates"] = None enc, _ = conventions.encode_dataset_coordinates(orig) # check coordinate attribute emitted for 'a' assert "coordinates" not in enc["a"].attrs assert "coordinates" not in enc["a"].encoding # check coordinate attribute not emitted for 'b' assert enc["b"].attrs.get("coordinates") == "t" assert "coordinates" not in enc["b"].encoding @requires_cftime class TestDecodeCF: def test_dataset(self) -> None: original = Dataset( { "t": ("t", [0, 1, 2], {"units": "days since 2000-01-01"}), "foo": ("t", [0, 0, 0], {"coordinates": "y", "units": "bar"}), "y": ("t", [5, 10, -999], {"_FillValue": -999}), } ) expected = Dataset( {"foo": ("t", [0, 0, 0], {"units": "bar"})}, { "t": pd.date_range("2000-01-01", periods=3, unit="ns"), "y": ("t", [5.0, 10.0, np.nan]), }, ) actual = conventions.decode_cf(original) assert_identical(expected, actual) def test_invalid_coordinates(self) -> None: # regression test for GH308, GH1809 original = Dataset({"foo": ("t", [1, 2], {"coordinates": "invalid"})}) decoded = Dataset({"foo": ("t", [1, 2], {}, {"coordinates": "invalid"})}) actual = conventions.decode_cf(original) assert_identical(decoded, actual) actual = conventions.decode_cf(original, decode_coords=False) assert_identical(original, actual) def test_decode_coordinates(self) -> None: # regression test for GH610 original = Dataset( {"foo": ("t", [1, 2], {"coordinates": "x"}), "x": ("t", [4, 5])} ) actual = conventions.decode_cf(original) assert actual.foo.encoding["coordinates"] == "x" def test_decode_coordinates_with_key_values(self) -> None: # regression test for GH9761 original = Dataset( { "temp": ( ("y", "x"), np.random.rand(2, 2), { "long_name": "temperature", "units": "K", "coordinates": "lat lon", "grid_mapping": "crs", }, ), "x": ( ("x"), np.arange(2), {"standard_name": "projection_x_coordinate", "units": "m"}, ), "y": ( ("y"), np.arange(2), {"standard_name": "projection_y_coordinate", "units": "m"}, ), "lat": ( ("y", "x"), np.random.rand(2, 2), {"standard_name": "latitude", "units": "degrees_north"}, ), "lon": ( ("y", "x"), np.random.rand(2, 2), {"standard_name": "longitude", "units": "degrees_east"}, ), "crs": ( (), None, { "grid_mapping_name": "transverse_mercator", "longitude_of_central_meridian": -2.0, }, ), "crs2": ( (), None, { "grid_mapping_name": "longitude_latitude", "longitude_of_central_meridian": -2.0, }, ), }, ) original.temp.attrs["grid_mapping"] = "crs: x y" _vars, _attrs, coords = conventions.decode_cf_variables( original.variables, {}, decode_coords="all" ) assert coords == {"lat", "lon", "crs"} original.temp.attrs["grid_mapping"] = "crs: x y crs2: lat lon" _vars, _attrs, coords = conventions.decode_cf_variables( original.variables, {}, decode_coords="all" ) assert coords == {"lat", "lon", "crs", "crs2"} # stray colon original.temp.attrs["grid_mapping"] = "crs: x y crs2 : lat lon" _vars, _attrs, coords = conventions.decode_cf_variables( original.variables, {}, decode_coords="all" ) assert coords == {"lat", "lon", "crs", "crs2"} original.temp.attrs["grid_mapping"] = "crs x y crs2: lat lon" with pytest.raises(ValueError, match="misses ':'"): conventions.decode_cf_variables(original.variables, {}, decode_coords="all") del original.temp.attrs["grid_mapping"] original.temp.attrs["formula_terms"] = "A: lat D: lon E: crs2" _vars, _attrs, coords = conventions.decode_cf_variables( original.variables, {}, decode_coords="all" ) assert coords == {"lat", "lon", "crs2"} original.temp.attrs["formula_terms"] = "A: lat lon D: crs E: crs2" with pytest.warns(UserWarning, match="has malformed content"): _vars, _attrs, coords = conventions.decode_cf_variables( original.variables, {}, decode_coords="all" ) assert coords == {"lat", "lon", "crs", "crs2"} def test_0d_int32_encoding(self) -> None: original = Variable((), np.int32(0), encoding={"dtype": "int64"}) expected = Variable((), np.int64(0)) actual = coding.variables.NonStringCoder().encode(original) assert_identical(expected, actual) def test_decode_cf_with_multiple_missing_values(self) -> None: original = Variable(["t"], [0, 1, 2], {"missing_value": np.array([0, 1])}) expected = Variable(["t"], [np.nan, np.nan, 2], {}) with pytest.warns(SerializationWarning, match="has multiple fill"): actual = conventions.decode_cf_variable("t", original) assert_identical(expected, actual) def test_decode_cf_with_drop_variables(self) -> None: original = Dataset( { "t": ("t", [0, 1, 2], {"units": "days since 2000-01-01"}), "x": ("x", [9, 8, 7], {"units": "km"}), "foo": ( ("t", "x"), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {"units": "bar"}, ), "y": ("t", [5, 10, -999], {"_FillValue": -999}), } ) expected = Dataset( { "t": pd.date_range("2000-01-01", periods=3, unit="ns"), "foo": ( ("t", "x"), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {"units": "bar"}, ), "y": ("t", [5, 10, np.nan]), } ) actual = conventions.decode_cf(original, drop_variables=("x",)) actual2 = conventions.decode_cf(original, drop_variables="x") assert_identical(expected, actual) assert_identical(expected, actual2) @pytest.mark.filterwarnings("ignore:Ambiguous reference date string") def test_invalid_time_units_raises_eagerly(self) -> None: ds = Dataset({"time": ("time", [0, 1], {"units": "foobar since 123"})}) with pytest.raises(ValueError, match=r"unable to decode time"): decode_cf(ds) @pytest.mark.parametrize("decode_times", [True, False]) def test_invalid_timedelta_units_do_not_decode(self, decode_times) -> None: # regression test for #8269 ds = Dataset( {"time": ("time", [0, 1, 20], {"units": "days invalid", "_FillValue": 20})} ) expected = Dataset( {"time": ("time", [0.0, 1.0, np.nan], {"units": "days invalid"})} ) assert_identical(expected, decode_cf(ds, decode_times=decode_times)) @requires_cftime @pytest.mark.parametrize("time_unit", ["s", "ms", "us", "ns"]) def test_dataset_repr_with_netcdf4_datetimes(self, time_unit) -> None: # regression test for #347 attrs = {"units": "days since 0001-01-01", "calendar": "noleap"} with warnings.catch_warnings(): warnings.filterwarnings("ignore", "unable to decode time") ds = decode_cf(Dataset({"time": ("time", [0, 1], attrs)})) assert "(time) object" in repr(ds) attrs = {"units": "days since 1900-01-01"} ds = decode_cf( Dataset({"time": ("time", [0, 1], attrs)}), decode_times=CFDatetimeCoder(time_unit=time_unit), ) assert f"(time) datetime64[{time_unit}]" in repr(ds) @requires_cftime def test_decode_cf_datetime_transition_to_invalid(self) -> None: # manually create dataset with not-decoded date from datetime import datetime ds = Dataset(coords={"time": [0, 266 * 365]}) units = "days since 2000-01-01 00:00:00" ds.time.attrs = dict(units=units) with warnings.catch_warnings(): warnings.filterwarnings("ignore", "unable to decode time") ds_decoded = conventions.decode_cf(ds) expected = np.array([datetime(2000, 1, 1, 0, 0), datetime(2265, 10, 28, 0, 0)]) assert_array_equal(ds_decoded.time.values, expected) @requires_dask def test_decode_cf_with_dask(self) -> None: import dask.array as da original = Dataset( { "t": ("t", [0, 1, 2], {"units": "days since 2000-01-01"}), "foo": ("t", [0, 0, 0], {"coordinates": "y", "units": "bar"}), "bar": ("string2", [b"a", b"b"]), "baz": (("x"), [b"abc"], {"_Encoding": "utf-8"}), "y": ("t", [5, 10, -999], {"_FillValue": -999}), } ).chunk() decoded = conventions.decode_cf(original) assert all( isinstance(var.data, da.Array) for name, var in decoded.variables.items() if name not in decoded.xindexes ) assert_identical(decoded, conventions.decode_cf(original).compute()) @requires_dask def test_decode_dask_times(self) -> None: original = Dataset.from_dict( { "coords": {}, "dims": {"time": 5}, "data_vars": { "average_T1": { "dims": ("time",), "attrs": {"units": "days since 1958-01-01 00:00:00"}, "data": [87659.0, 88024.0, 88389.0, 88754.0, 89119.0], } }, } ) assert_identical( conventions.decode_cf(original.chunk()), conventions.decode_cf(original).chunk(), ) @pytest.mark.parametrize("time_unit", ["s", "ms", "us", "ns"]) def test_decode_cf_time_kwargs(self, time_unit) -> None: ds = Dataset.from_dict( { "coords": { "timedelta": { "data": np.array([1, 2, 3], dtype="int64"), "dims": "timedelta", "attrs": {"units": "days"}, }, "time": { "data": np.array([1, 2, 3], dtype="int64"), "dims": "time", "attrs": {"units": "days since 2000-01-01"}, }, }, "dims": {"time": 3, "timedelta": 3}, "data_vars": { "a": {"dims": ("time", "timedelta"), "data": np.ones((3, 3))}, }, } ) dsc = conventions.decode_cf( ds, decode_times=CFDatetimeCoder(time_unit=time_unit), decode_timedelta=CFTimedeltaCoder(time_unit=time_unit), ) assert dsc.timedelta.dtype == np.dtype(f"m8[{time_unit}]") assert dsc.time.dtype == np.dtype(f"M8[{time_unit}]") dsc = conventions.decode_cf(ds, decode_times=False) assert dsc.timedelta.dtype == np.dtype("int64") assert dsc.time.dtype == np.dtype("int64") dsc = conventions.decode_cf( ds, decode_times=CFDatetimeCoder(time_unit=time_unit), decode_timedelta=False, ) assert dsc.timedelta.dtype == np.dtype("int64") assert dsc.time.dtype == np.dtype(f"M8[{time_unit}]") dsc = conventions.decode_cf(ds, decode_times=False, decode_timedelta=True) assert dsc.timedelta.dtype == np.dtype("m8[ns]") assert dsc.time.dtype == np.dtype("int64") class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore): def encode_variable(self, var, name=None): """encode one variable""" coder = coding.strings.EncodedStringCoder(allows_unicode=True) var = coder.encode(var, name=name) return var @requires_netCDF4 class TestCFEncodedDataStore(CFEncodedBase): @contextlib.contextmanager def create_store(self): yield CFEncodedInMemoryStore() @contextlib.contextmanager def roundtrip( self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False ): if save_kwargs is None: save_kwargs = {} if open_kwargs is None: open_kwargs = {} store = CFEncodedInMemoryStore() data.dump_to_store(store, **save_kwargs) yield open_dataset(store, **open_kwargs) @pytest.mark.skip("cannot roundtrip coordinates yet for CFEncodedInMemoryStore") def test_roundtrip_coordinates(self) -> None: pass def test_invalid_dataarray_names_raise(self) -> None: # only relevant for on-disk file formats pass def test_encoding_kwarg(self) -> None: # we haven't bothered to raise errors yet for unexpected encodings in # this test dummy pass def test_encoding_kwarg_fixed_width_string(self) -> None: # CFEncodedInMemoryStore doesn't support explicit string encodings. pass def test_encoding_unlimited_dims(self) -> None: # CFEncodedInMemoryStore doesn't support unlimited_dims. pass class TestDecodeCFVariableWithArrayUnits: def test_decode_cf_variable_with_array_units(self) -> None: v = Variable(["t"], [1, 2, 3], {"units": np.array(["foobar"], dtype=object)}) v_decoded = conventions.decode_cf_variable("test2", v) assert_identical(v, v_decoded) def test_decode_cf_variable_timedelta64(): variable = Variable(["time"], pd.timedelta_range("1D", periods=2)) decoded = conventions.decode_cf_variable("time", variable) assert decoded.encoding == {} assert_identical(decoded, variable) def test_decode_cf_variable_datetime64(): variable = Variable(["time"], pd.date_range("2000", periods=2)) decoded = conventions.decode_cf_variable("time", variable) assert decoded.encoding == {} assert_identical(decoded, variable) @requires_cftime def test_decode_cf_variable_cftime(): variable = Variable(["time"], date_range("2000", periods=2, use_cftime=True)) decoded = conventions.decode_cf_variable("time", variable) assert decoded.encoding == {} assert_identical(decoded, variable) def test_scalar_units() -> None: # test that scalar units does not raise an exception var = Variable(["t"], [np.nan, np.nan, 2], {"units": np.nan}) actual = conventions.decode_cf_variable("t", var) assert_identical(actual, var) def test_decode_cf_error_includes_variable_name(): ds = Dataset({"my_invalid_var": ([], 1e36, {"units": "days since 2000-01-01"})}) with pytest.raises( ValueError, match=r"unable to decode(?s:.*)my_invalid_var", ): decode_cf(ds) def test_encode_cf_variable_with_vlen_dtype() -> None: v = Variable( ["x"], np.array(["a", "b"], dtype=coding.strings.create_vlen_dtype(str)) ) encoded_v = conventions.encode_cf_variable(v) assert encoded_v.data.dtype.kind == "O" assert coding.strings.check_vlen_dtype(encoded_v.data.dtype) is str # empty array v = Variable(["x"], np.array([], dtype=coding.strings.create_vlen_dtype(str))) encoded_v = conventions.encode_cf_variable(v) assert encoded_v.data.dtype.kind == "O" assert coding.strings.check_vlen_dtype(encoded_v.data.dtype) is str def test_decode_cf_variables_decode_timedelta_warning() -> None: v = Variable(["time"], [1, 2], attrs={"units": "seconds"}) variables = {"a": v} with warnings.catch_warnings(): warnings.filterwarnings("error", "decode_timedelta", FutureWarning) conventions.decode_cf_variables(variables, {}, decode_timedelta=True) with pytest.warns(FutureWarning, match="decode_timedelta"): conventions.decode_cf_variables(variables, {}) python-xarray-2026.01.0/xarray/tests/test_namedarray.py0000664000175000017500000005742215136607163023261 0ustar alastairalastairfrom __future__ import annotations import copy import sys from abc import abstractmethod from collections.abc import Mapping from typing import TYPE_CHECKING, Any, Generic, cast, overload import numpy as np import pytest from packaging.version import Version from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray._typing import ( _arrayfunction_or_api, _default, _DType_co, _ShapeType_co, ) from xarray.namedarray.core import NamedArray, from_array from xarray.namedarray.utils import fake_target_chunksize from xarray.tests import requires_cftime if TYPE_CHECKING: from types import ModuleType from numpy.typing import ArrayLike, DTypeLike, NDArray from xarray.namedarray._typing import ( Default, DuckArray, _AttrsLike, _Dim, _DimsLike, _DType, _IndexKeyLike, _IntOrUnknown, _Shape, _ShapeLike, duckarray, ) class CustomArrayBase(Generic[_ShapeType_co, _DType_co]): def __init__(self, array: duckarray[Any, _DType_co]) -> None: self.array: duckarray[Any, _DType_co] = array @property def dtype(self) -> _DType_co: return self.array.dtype @property def shape(self) -> _Shape: return self.array.shape class CustomArray( CustomArrayBase[_ShapeType_co, _DType_co], Generic[_ShapeType_co, _DType_co] ): def __array__( self, dtype: DTypeLike | None = None, /, *, copy: bool | None = None ) -> np.ndarray[Any, np.dtype[np.generic]]: if Version(np.__version__) >= Version("2.0.0"): return np.asarray(self.array, dtype=dtype, copy=copy) else: return np.asarray(self.array, dtype=dtype) class CustomArrayIndexable( CustomArrayBase[_ShapeType_co, _DType_co], ExplicitlyIndexed, Generic[_ShapeType_co, _DType_co], ): def __getitem__( self, key: _IndexKeyLike | CustomArrayIndexable[Any, Any], / ) -> CustomArrayIndexable[Any, _DType_co]: if isinstance(key, CustomArrayIndexable): if isinstance(key.array, type(self.array)): # TODO: key.array is duckarray here, can it be narrowed down further? # an _arrayapi cannot be used on a _arrayfunction for example. return type(self)(array=self.array[key.array]) # type: ignore[index] else: raise TypeError("key must have the same array type as self") else: return type(self)(array=self.array[key]) def __array_namespace__(self) -> ModuleType: return np def check_duck_array_typevar(a: duckarray[Any, _DType]) -> duckarray[Any, _DType]: # Mypy checks a is valid: b: duckarray[Any, _DType] = a # Runtime check if valid: if isinstance(b, _arrayfunction_or_api): return b else: missing_attrs = "" actual_attrs = set(dir(b)) for t in _arrayfunction_or_api: if sys.version_info >= (3, 13): # https://github.com/python/cpython/issues/104873 from typing import get_protocol_members expected_attrs = get_protocol_members(t) elif sys.version_info >= (3, 12): expected_attrs = t.__protocol_attrs__ else: from typing import _get_protocol_attrs # type: ignore[attr-defined] expected_attrs = _get_protocol_attrs(t) missing_attrs_ = expected_attrs - actual_attrs if missing_attrs_: missing_attrs += f"{t.__name__} - {missing_attrs_}\n" raise TypeError( f"a ({type(a)}) is not a valid _arrayfunction or _arrayapi. " "Missing following attrs:\n" f"{missing_attrs}" ) class NamedArraySubclassobjects: @pytest.fixture def target(self, data: np.ndarray[Any, Any]) -> Any: """Fixture that needs to be overridden""" raise NotImplementedError @abstractmethod def cls(self, *args: Any, **kwargs: Any) -> Any: """Method that needs to be overridden""" raise NotImplementedError @pytest.fixture def data(self) -> np.ndarray[Any, np.dtype[Any]]: return 0.5 * np.arange(10).reshape(2, 5) @pytest.fixture def random_inputs(self) -> np.ndarray[Any, np.dtype[np.float32]]: return np.arange(3 * 4 * 5, dtype=np.float32).reshape((3, 4, 5)) def test_properties(self, target: Any, data: Any) -> None: assert target.dims == ("x", "y") assert np.array_equal(target.data, data) assert target.dtype == float assert target.shape == (2, 5) assert target.ndim == 2 assert target.sizes == {"x": 2, "y": 5} assert target.size == 10 assert target.nbytes == 80 assert len(target) == 2 def test_attrs(self, target: Any) -> None: assert target.attrs == {} attrs = {"foo": "bar"} target.attrs = attrs assert target.attrs == attrs assert isinstance(target.attrs, dict) target.attrs["foo"] = "baz" assert target.attrs["foo"] == "baz" @pytest.mark.parametrize( "expected", [np.array([1, 2], dtype=np.dtype(np.int8)), [1, 2]] ) def test_init(self, expected: Any) -> None: actual = self.cls(("x",), expected) assert np.array_equal(np.asarray(actual.data), expected) actual = self.cls(("x",), expected) assert np.array_equal(np.asarray(actual.data), expected) def test_data(self, random_inputs: Any) -> None: expected = self.cls(["x", "y", "z"], random_inputs) assert np.array_equal(np.asarray(expected.data), random_inputs) with pytest.raises(ValueError): expected.data = np.random.random((3, 4)).astype(np.float64) d2 = np.arange(3 * 4 * 5, dtype=np.float32).reshape((3, 4, 5)) expected.data = d2 assert np.array_equal(np.asarray(expected.data), d2) class TestNamedArray(NamedArraySubclassobjects): def cls(self, *args: Any, **kwargs: Any) -> NamedArray[Any, Any]: return NamedArray(*args, **kwargs) @pytest.fixture def target(self, data: np.ndarray[Any, Any]) -> NamedArray[Any, Any]: return NamedArray(["x", "y"], data) @pytest.mark.parametrize( "expected", [ np.array([1, 2], dtype=np.dtype(np.int8)), pytest.param( [1, 2], marks=pytest.mark.xfail( reason="NamedArray only supports array-like objects" ), ), ], ) def test_init(self, expected: Any) -> None: super().test_init(expected) @pytest.mark.parametrize( "dims, data, expected, raise_error", [ (("x",), [1, 2, 3], np.array([1, 2, 3]), False), ((1,), np.array([4, 5, 6]), np.array([4, 5, 6]), False), ((), 2, np.array(2), False), # Fail: ( ("x",), NamedArray("time", np.array([1, 2, 3], dtype=np.dtype(np.int64))), np.array([1, 2, 3]), True, ), ], ) def test_from_array( self, dims: _DimsLike, data: ArrayLike, expected: np.ndarray[Any, Any], raise_error: bool, ) -> None: actual: NamedArray[Any, Any] if raise_error: with pytest.raises(TypeError, match="already a Named array"): actual = from_array(dims, data) # Named arrays are not allowed: from_array(actual) # type: ignore[call-overload] else: actual = from_array(dims, data) assert np.array_equal(np.asarray(actual.data), expected) def test_from_array_with_masked_array(self) -> None: masked_array: np.ndarray[Any, np.dtype[np.generic]] masked_array = np.ma.array([1, 2, 3], mask=[False, True, False]) # type: ignore[no-untyped-call] with pytest.raises(NotImplementedError): from_array(("x",), masked_array) def test_from_array_with_0d_object(self) -> None: data = np.empty((), dtype=object) data[()] = (10, 12, 12) narr = from_array((), data) np.array_equal(np.asarray(narr.data), data) # TODO: Make xr.core.indexing.ExplicitlyIndexed pass as a subclass of_arrayfunction_or_api # and remove this test. def test_from_array_with_explicitly_indexed( self, random_inputs: np.ndarray[Any, Any] ) -> None: array: CustomArray[Any, Any] array = CustomArray(random_inputs) output: NamedArray[Any, Any] output = from_array(("x", "y", "z"), array) assert isinstance(output.data, np.ndarray) array2: CustomArrayIndexable[Any, Any] array2 = CustomArrayIndexable(random_inputs) output2: NamedArray[Any, Any] output2 = from_array(("x", "y", "z"), array2) assert isinstance(output2.data, CustomArrayIndexable) def test_real_and_imag(self) -> None: expected_real: np.ndarray[Any, np.dtype[np.float64]] expected_real = np.arange(3, dtype=np.float64) expected_imag: np.ndarray[Any, np.dtype[np.float64]] expected_imag = -np.arange(3, dtype=np.float64) arr: np.ndarray[Any, np.dtype[np.complex128]] arr = expected_real + 1j * expected_imag named_array: NamedArray[Any, np.dtype[np.complex128]] named_array = NamedArray(["x"], arr) actual_real: duckarray[Any, np.dtype[np.float64]] = named_array.real.data assert np.array_equal(np.asarray(actual_real), expected_real) assert actual_real.dtype == expected_real.dtype actual_imag: duckarray[Any, np.dtype[np.float64]] = named_array.imag.data assert np.array_equal(np.asarray(actual_imag), expected_imag) assert actual_imag.dtype == expected_imag.dtype # Additional tests as per your original class-based code @pytest.mark.parametrize( "data, dtype", [ ("foo", np.dtype("U3")), (b"foo", np.dtype("S3")), ], ) def test_from_array_0d_string(self, data: Any, dtype: DTypeLike | None) -> None: named_array: NamedArray[Any, Any] named_array = from_array([], data) assert named_array.data == data assert named_array.dims == () assert named_array.sizes == {} assert named_array.attrs == {} assert named_array.ndim == 0 assert named_array.size == 1 assert named_array.dtype == dtype def test_from_array_0d_object(self) -> None: named_array: NamedArray[Any, Any] named_array = from_array([], (10, 12, 12)) expected_data = np.empty((), dtype=object) expected_data[()] = (10, 12, 12) assert np.array_equal(np.asarray(named_array.data), expected_data) assert named_array.dims == () assert named_array.sizes == {} assert named_array.attrs == {} assert named_array.ndim == 0 assert named_array.size == 1 assert named_array.dtype == np.dtype("O") def test_from_array_0d_datetime(self) -> None: named_array: NamedArray[Any, Any] named_array = from_array([], np.datetime64("2000-01-01")) assert named_array.dtype == np.dtype("datetime64[D]") @pytest.mark.parametrize( "timedelta, expected_dtype", [ (np.timedelta64(1, "D"), np.dtype("timedelta64[D]")), (np.timedelta64(1, "s"), np.dtype("timedelta64[s]")), (np.timedelta64(1, "m"), np.dtype("timedelta64[m]")), (np.timedelta64(1, "h"), np.dtype("timedelta64[h]")), (np.timedelta64(1, "us"), np.dtype("timedelta64[us]")), (np.timedelta64(1, "ns"), np.dtype("timedelta64[ns]")), (np.timedelta64(1, "ps"), np.dtype("timedelta64[ps]")), (np.timedelta64(1, "fs"), np.dtype("timedelta64[fs]")), (np.timedelta64(1, "as"), np.dtype("timedelta64[as]")), ], ) def test_from_array_0d_timedelta( self, timedelta: np.timedelta64, expected_dtype: np.dtype[np.timedelta64] ) -> None: named_array: NamedArray[Any, Any] named_array = from_array([], timedelta) assert named_array.dtype == expected_dtype assert named_array.data == timedelta @pytest.mark.parametrize( "dims, data_shape, new_dims, raises", [ (["x", "y", "z"], (2, 3, 4), ["a", "b", "c"], False), (["x", "y", "z"], (2, 3, 4), ["a", "b"], True), (["x", "y", "z"], (2, 4, 5), ["a", "b", "c", "d"], True), ([], [], (), False), ([], [], ("x",), True), ], ) def test_dims_setter( self, dims: Any, data_shape: Any, new_dims: Any, raises: bool ) -> None: named_array: NamedArray[Any, Any] named_array = NamedArray(dims, np.asarray(np.random.random(data_shape))) assert named_array.dims == tuple(dims) if raises: with pytest.raises(ValueError): named_array.dims = new_dims else: named_array.dims = new_dims assert named_array.dims == tuple(new_dims) def test_duck_array_class(self) -> None: numpy_a: NDArray[np.int64] numpy_a = np.array([2.1, 4], dtype=np.dtype(np.int64)) check_duck_array_typevar(numpy_a) masked_a: np.ma.MaskedArray[Any, np.dtype[np.int64]] masked_a = np.ma.asarray([2.1, 4], dtype=np.dtype(np.int64)) # type: ignore[no-untyped-call] check_duck_array_typevar(masked_a) # type: ignore[arg-type] # MaskedArray not in duckarray union custom_a: CustomArrayIndexable[Any, np.dtype[np.int64]] custom_a = CustomArrayIndexable(numpy_a) check_duck_array_typevar(custom_a) def test_duck_array_class_array_api(self) -> None: # Test numpy's array api: nxp = pytest.importorskip("array_api_strict", minversion="1.0") # TODO: nxp doesn't use dtype typevars, so can only use Any for the moment: arrayapi_a: duckarray[Any, Any] # duckarray[Any, np.dtype[np.int64]] arrayapi_a = nxp.asarray([2.1, 4], dtype=nxp.int64) check_duck_array_typevar(arrayapi_a) def test_new_namedarray(self) -> None: dtype_float = np.dtype(np.float32) narr_float: NamedArray[Any, np.dtype[np.float32]] narr_float = NamedArray(("x",), np.array([1.5, 3.2], dtype=dtype_float)) assert narr_float.dtype == dtype_float dtype_int = np.dtype(np.int8) narr_int: NamedArray[Any, np.dtype[np.int8]] narr_int = narr_float._new(("x",), np.array([1, 3], dtype=dtype_int)) assert narr_int.dtype == dtype_int class Variable( NamedArray[_ShapeType_co, _DType_co], Generic[_ShapeType_co, _DType_co] ): @overload def _new( self, dims: _DimsLike | Default = ..., data: duckarray[Any, _DType] = ..., attrs: _AttrsLike | Default = ..., ) -> Variable[Any, _DType]: ... @overload def _new( self, dims: _DimsLike | Default = ..., data: Default = ..., attrs: _AttrsLike | Default = ..., ) -> Variable[_ShapeType_co, _DType_co]: ... def _new( self, dims: _DimsLike | Default = _default, data: duckarray[Any, _DType] | Default = _default, attrs: _AttrsLike | Default = _default, ) -> Variable[Any, _DType] | Variable[_ShapeType_co, _DType_co]: dims_ = copy.copy(self._dims) if dims is _default else dims attrs_: Mapping[Any, Any] | None if attrs is _default: attrs_ = None if self._attrs is None else self._attrs.copy() else: attrs_ = attrs if data is _default: return type(self)(dims_, copy.copy(self._data), attrs_) cls_ = cast("type[Variable[Any, _DType]]", type(self)) return cls_(dims_, data, attrs_) var_float: Variable[Any, np.dtype[np.float32]] var_float = Variable(("x",), np.array([1.5, 3.2], dtype=dtype_float)) assert var_float.dtype == dtype_float var_int: Variable[Any, np.dtype[np.int8]] var_int = var_float._new(("x",), np.array([1, 3], dtype=dtype_int)) assert var_int.dtype == dtype_int def test_replace_namedarray(self) -> None: dtype_float = np.dtype(np.float32) np_val: np.ndarray[Any, np.dtype[np.float32]] np_val = np.array([1.5, 3.2], dtype=dtype_float) np_val2: np.ndarray[Any, np.dtype[np.float32]] np_val2 = 2 * np_val narr_float: NamedArray[Any, np.dtype[np.float32]] narr_float = NamedArray(("x",), np_val) assert narr_float.dtype == dtype_float narr_float2: NamedArray[Any, np.dtype[np.float32]] narr_float2 = NamedArray(("x",), np_val2) assert narr_float2.dtype == dtype_float class Variable( NamedArray[_ShapeType_co, _DType_co], Generic[_ShapeType_co, _DType_co] ): @overload def _new( self, dims: _DimsLike | Default = ..., data: duckarray[Any, _DType] = ..., attrs: _AttrsLike | Default = ..., ) -> Variable[Any, _DType]: ... @overload def _new( self, dims: _DimsLike | Default = ..., data: Default = ..., attrs: _AttrsLike | Default = ..., ) -> Variable[_ShapeType_co, _DType_co]: ... def _new( self, dims: _DimsLike | Default = _default, data: duckarray[Any, _DType] | Default = _default, attrs: _AttrsLike | Default = _default, ) -> Variable[Any, _DType] | Variable[_ShapeType_co, _DType_co]: dims_ = copy.copy(self._dims) if dims is _default else dims attrs_: Mapping[Any, Any] | None if attrs is _default: attrs_ = None if self._attrs is None else self._attrs.copy() else: attrs_ = attrs if data is _default: return type(self)(dims_, copy.copy(self._data), attrs_) cls_ = cast("type[Variable[Any, _DType]]", type(self)) return cls_(dims_, data, attrs_) var_float: Variable[Any, np.dtype[np.float32]] var_float = Variable(("x",), np_val) assert var_float.dtype == dtype_float var_float2: Variable[Any, np.dtype[np.float32]] var_float2 = var_float._replace(("x",), np_val2) assert var_float2.dtype == dtype_float @pytest.mark.parametrize( "dim,expected_ndim,expected_shape,expected_dims", [ (None, 3, (1, 2, 5), (None, "x", "y")), (_default, 3, (1, 2, 5), ("dim_2", "x", "y")), ("z", 3, (1, 2, 5), ("z", "x", "y")), ], ) def test_expand_dims( self, target: NamedArray[Any, np.dtype[np.float32]], dim: _Dim | Default, expected_ndim: int, expected_shape: _ShapeLike, expected_dims: _DimsLike, ) -> None: result = target.expand_dims(dim=dim) assert result.ndim == expected_ndim assert result.shape == expected_shape assert result.dims == expected_dims @pytest.mark.parametrize( "dims, expected_sizes", [ ((), {"y": 5, "x": 2}), (["y", "x"], {"y": 5, "x": 2}), (["y", ...], {"y": 5, "x": 2}), ], ) def test_permute_dims( self, target: NamedArray[Any, np.dtype[np.float32]], dims: _DimsLike, expected_sizes: dict[_Dim, _IntOrUnknown], ) -> None: actual = target.permute_dims(*dims) assert actual.sizes == expected_sizes def test_permute_dims_errors( self, target: NamedArray[Any, np.dtype[np.float32]], ) -> None: with pytest.raises(ValueError, match=r"'y'.*permuted list"): dims = ["y"] target.permute_dims(*dims) @pytest.mark.parametrize( "broadcast_dims,expected_ndim", [ ({"x": 2, "y": 5}, 2), ({"x": 2, "y": 5, "z": 2}, 3), ({"w": 1, "x": 2, "y": 5}, 3), ], ) def test_broadcast_to( self, target: NamedArray[Any, np.dtype[np.float32]], broadcast_dims: Mapping[_Dim, int], expected_ndim: int, ) -> None: expand_dims = set(broadcast_dims.keys()) - set(target.dims) # loop over expand_dims and call .expand_dims(dim=dim) in a loop for dim in expand_dims: target = target.expand_dims(dim=dim) result = target.broadcast_to(broadcast_dims) assert result.ndim == expected_ndim assert result.sizes == broadcast_dims def test_broadcast_to_errors( self, target: NamedArray[Any, np.dtype[np.float32]] ) -> None: with pytest.raises( ValueError, match=r"operands could not be broadcast together with remapped shapes", ): target.broadcast_to({"x": 2, "y": 2}) with pytest.raises(ValueError, match=r"Cannot add new dimensions"): target.broadcast_to({"x": 2, "y": 2, "z": 2}) def test_warn_on_repeated_dimension_names(self) -> None: with pytest.warns(UserWarning, match="Duplicate dimension names"): NamedArray(("x", "x"), np.arange(4).reshape(2, 2)) def test_aggregation(self) -> None: x: NamedArray[Any, np.dtype[np.int64]] x = NamedArray(("x", "y"), np.arange(4).reshape(2, 2)) result = x.sum() assert isinstance(result.data, np.ndarray) def test_repr() -> None: x: NamedArray[Any, np.dtype[np.uint64]] x = NamedArray(("x",), np.array([0], dtype=np.uint64)) # Reprs should not crash: r = x.__repr__() x._repr_html_() # Basic comparison: assert r == " Size: 8B\narray([0], dtype=uint64)" @pytest.mark.parametrize( "input_array, expected_chunksize_faked, expected_dtype", [ (np.arange(100).reshape(10, 10), 1024, np.int64), (np.arange(100).reshape(10, 10).astype(np.float32), 1024, np.float32), ], ) def test_fake_target_chunksize( input_array: DuckArray[Any], expected_chunksize_faked: int, expected_dtype: DTypeLike, ) -> None: """ Check that `fake_target_chunksize` returns the expected chunksize and dtype. - It pretends to dask we are chunking an array with an 8-byte dtype, ie. a float64. As such, it will *double* the amount of memory a 4-byte dtype (like float32) would try to use, fooling it into actually using the correct amount of memory. For object dtypes, which are generally larger, it will reduce the effective dask configuration chunksize, reducing the size of the arrays per chunk such that we get the same amount of memory used. """ target_chunksize = 1024 faked_chunksize, dtype = fake_target_chunksize(input_array, target_chunksize) assert faked_chunksize == expected_chunksize_faked assert dtype == expected_dtype @requires_cftime def test_fake_target_chunksize_cftime() -> None: """ Check that `fake_target_chunksize` returns the expected chunksize and dtype. - It pretends to dask we are chunking an array with an 8-byte dtype, ie. a float64. - This is the same as the above test, but specifically for a CFTime array case - split for testing reasons """ import cftime target_chunksize = 1024 input_array = np.array( [ cftime.Datetime360Day(2000, month, day, 0, 0, 0, 0) for month in range(1, 11) for day in range(1, 11) ], dtype=object, ).reshape(10, 10) faked_chunksize, dtype = fake_target_chunksize(input_array, target_chunksize) # type: ignore[arg-type,unused-ignore] assert faked_chunksize == 73 assert dtype == np.float64 python-xarray-2026.01.0/xarray/tests/test_print_versions.py0000664000175000017500000000031015136607163024202 0ustar alastairalastairfrom __future__ import annotations import io import xarray def test_show_versions() -> None: f = io.StringIO() xarray.show_versions(file=f) assert "INSTALLED VERSIONS" in f.getvalue() python-xarray-2026.01.0/xarray/tests/test_formatting_html.py0000664000175000017500000003547315136607163024336 0ustar alastairalastairfrom __future__ import annotations import re from functools import partial import numpy as np import pandas as pd import pytest import xarray as xr from xarray.core import formatting_html as fh from xarray.core.coordinates import Coordinates def drop_fallback_text_repr(html: str) -> str: pattern = ( re.escape("
") + "[^<]*" + re.escape("
") ) return re.sub(pattern, "", html) XarrayTypes = xr.DataTree | xr.Dataset | xr.DataArray | xr.Variable def xarray_html_only_repr(obj: XarrayTypes) -> str: return drop_fallback_text_repr(obj._repr_html_()) def assert_consistent_text_and_html( obj: XarrayTypes, section_headers: list[str] ) -> None: actual_html = xarray_html_only_repr(obj) actual_text = repr(obj) for section_header in section_headers: assert actual_html.count(section_header) == actual_text.count(section_header), ( section_header ) assert_consistent_text_and_html_dataarray = partial( assert_consistent_text_and_html, section_headers=[ "Coordinates", "Indexes", "Attributes", ], ) assert_consistent_text_and_html_dataset = partial( assert_consistent_text_and_html, section_headers=[ "Dimensions", "Coordinates", "Data variables", "Indexes", "Attributes", ], ) assert_consistent_text_and_html_datatree = partial( assert_consistent_text_and_html, section_headers=[ "Dimensions", "Coordinates", "Inherited coordinates", "Data variables", "Indexes", "Attributes", ], ) @pytest.fixture def dataarray() -> xr.DataArray: return xr.DataArray(np.random.default_rng(0).random((4, 6))) @pytest.fixture def dask_dataarray(dataarray: xr.DataArray) -> xr.DataArray: pytest.importorskip("dask") return dataarray.chunk() @pytest.fixture def multiindex() -> xr.Dataset: midx = pd.MultiIndex.from_product( [["a", "b"], [1, 2]], names=("level_1", "level_2") ) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") return xr.Dataset({}, midx_coords) @pytest.fixture def dataset() -> xr.Dataset: times = pd.date_range("2000-01-01", "2001-12-31", name="time") annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28)) base = 10 + 15 * annual_cycle.reshape(-1, 1) tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3) tmax_values = base + 10 + 3 * np.random.randn(annual_cycle.size, 3) return xr.Dataset( { "tmin": (("time", "location"), tmin_values), "tmax": (("time", "location"), tmax_values), }, {"location": ["", "IN", "IL"], "time": times}, attrs={"description": "Test data."}, ) def test_short_data_repr_html(dataarray: xr.DataArray) -> None: data_repr = fh.short_data_repr_html(dataarray) assert data_repr.startswith("
array")


def test_short_data_repr_html_non_str_keys(dataset: xr.Dataset) -> None:
    ds = dataset.assign({2: lambda x: x["tmin"]})
    fh.dataset_repr(ds)


def test_short_data_repr_html_dask(dask_dataarray: xr.DataArray) -> None:
    assert hasattr(dask_dataarray.data, "_repr_html_")
    data_repr = fh.short_data_repr_html(dask_dataarray)
    assert data_repr == dask_dataarray.data._repr_html_()


def test_format_dims_no_dims() -> None:
    dims: dict = {}
    dims_with_index: list = []
    formatted = fh.format_dims(dims, dims_with_index)
    assert formatted == ""


def test_format_dims_unsafe_dim_name() -> None:
    dims = {"": 3, "y": 2}
    dims_with_index: list = []
    formatted = fh.format_dims(dims, dims_with_index)
    assert "<x>" in formatted


def test_format_dims_non_index() -> None:
    dims, dims_with_index = {"x": 3, "y": 2}, ["time"]
    formatted = fh.format_dims(dims, dims_with_index)
    assert "class='xr-has-index'" not in formatted


def test_format_dims_index() -> None:
    dims, dims_with_index = {"x": 3, "y": 2}, ["x"]
    formatted = fh.format_dims(dims, dims_with_index)
    assert "class='xr-has-index'" in formatted


def test_summarize_attrs_with_unsafe_attr_name_and_value() -> None:
    attrs = {"": 3, "y": ""}
    formatted = fh.summarize_attrs(attrs)
    assert "
<x> :
" in formatted assert "
y :
" in formatted assert "
3
" in formatted assert "
<pd.DataFrame>
" in formatted def test_repr_of_dataarray() -> None: dataarray = xr.DataArray(np.random.default_rng(0).random((4, 6))) formatted = xarray_html_only_repr(dataarray) assert "dim_0" in formatted # has an expanded data section assert formatted.count("class='xr-array-in' type='checkbox' checked>") == 1 # coords, indexes and attrs don't have an items so they'll be omitted assert "Coordinates" not in formatted assert "Indexes" not in formatted assert "Attributes" not in formatted assert_consistent_text_and_html_dataarray(dataarray) with xr.set_options(display_expand_data=False): formatted = xarray_html_only_repr(dataarray) assert "dim_0" in formatted # has a collapsed data section assert formatted.count("class='xr-array-in' type='checkbox' checked>") == 0 # coords, indexes and attrs don't have an items so they'll be omitted assert "Coordinates" not in formatted assert "Indexes" not in formatted assert "Attributes" not in formatted def test_repr_coords_order_of_datarray() -> None: da1 = xr.DataArray( np.empty((2, 2)), coords={"foo": [0, 1], "bar": [0, 1]}, dims=["foo", "bar"], ) da2 = xr.DataArray( np.empty((2, 2)), coords={"bar": [0, 1], "foo": [0, 1]}, dims=["bar", "foo"], ) ds = xr.Dataset({"da1": da1, "da2": da2}) bar_line = ( "bar
(bar)" ) foo_line = ( "foo
(foo)" ) formatted_da1 = fh.array_repr(ds.da1) assert formatted_da1.index(foo_line) < formatted_da1.index(bar_line) formatted_da2 = fh.array_repr(ds.da2) assert formatted_da2.index(bar_line) < formatted_da2.index(foo_line) def test_repr_of_multiindex(multiindex: xr.Dataset) -> None: formatted = fh.dataset_repr(multiindex) assert "(x)" in formatted assert_consistent_text_and_html_dataset(multiindex) def test_repr_of_dataset(dataset: xr.Dataset) -> None: formatted = xarray_html_only_repr(dataset) # coords, attrs, and data_vars are expanded assert ( formatted.count("class='xr-section-summary-in' type='checkbox' checked />") == 3 ) # indexes is omitted assert "Indexes" not in formatted assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted assert_consistent_text_and_html_dataset(dataset) with xr.set_options( display_expand_coords=False, display_expand_data_vars=False, display_expand_attrs=False, display_expand_indexes=True, display_default_indexes=True, ): formatted = xarray_html_only_repr(dataset) # coords, attrs, and data_vars are collapsed, indexes is shown & expanded assert ( formatted.count("class='xr-section-summary-in' type='checkbox' checked />") == 1 ) assert "Indexes" in formatted assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted def test_repr_text_fallback(dataset: xr.Dataset) -> None: formatted = fh.dataset_repr(dataset) # Just test that the "pre" block used for fallback to plain text is present. assert "
" in formatted


def test_repr_coords_order_of_dataset() -> None:
    ds = xr.Dataset()
    ds.coords["as"] = 10
    ds["var"] = xr.DataArray(np.ones((10,)), dims="x", coords={"x": np.arange(10)})
    formatted = fh.dataset_repr(ds)

    x_line = "x
(x)" as_line = "as
()" assert formatted.index(x_line) < formatted.index(as_line) def test_variable_repr_html() -> None: v = xr.Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"}) assert hasattr(v, "_repr_html_") with xr.set_options(display_style="html"): html = v._repr_html_().strip() # We don't do a complete string identity since # html output is probably subject to change, is long and... reasons. # Just test that something reasonable was produced. assert html.startswith("") assert "xarray.Variable" in html def test_repr_of_nonstr_dataset(dataset: xr.Dataset) -> None: ds = dataset.copy() ds.attrs[1] = "Test value" ds[2] = ds["tmin"] formatted = fh.dataset_repr(ds) assert "
1 :
Test value
" in formatted assert "
2" in formatted def test_repr_of_nonstr_dataarray(dataarray: xr.DataArray) -> None: da = dataarray.rename(dim_0=15) da.attrs[1] = "value" formatted = fh.array_repr(da) assert "
1 :
value
" in formatted assert "
  • 15: 4
  • " in formatted def test_nonstr_variable_repr_html() -> None: v = xr.Variable(["time", 10], [[1, 2, 3], [4, 5, 6]], {22: "bar"}) assert hasattr(v, "_repr_html_") with xr.set_options(display_style="html"): html = v._repr_html_().strip() assert "
    22 :
    bar
    " in html assert "
  • 10: 3
  • " in html class TestDataTreeTruncatesNodes: def test_many_nodes(self) -> None: number_of_files = 10 number_of_groups = 10 tree_dict = {} for f in range(number_of_files): for g in range(number_of_groups): tree_dict[f"file_{f}/group_{g}"] = xr.Dataset({"g": f * g}) tree = xr.DataTree.from_dict(tree_dict) with xr.set_options(display_max_html_elements=25): result = xarray_html_only_repr(tree) assert result.count("file_0/group_9") == 1 assert result.count("file_1/group_0") == 0 # disabled assert result.count("Too many items to display") == 9 + 10 with xr.set_options(display_max_html_elements=1000): result = xarray_html_only_repr(tree) assert result.count("Too many items to display") == 0 def test_many_children_truncated(self) -> None: # Create tree with 20 children at root level tree_dict = {f"child_{i:02d}": xr.Dataset({"x": i}) for i in range(20)} tree = xr.DataTree.from_dict(tree_dict) # With max_children=5: show first 3, ellipsis, last 2 with xr.set_options(display_max_children=5, display_max_html_elements=1000): result = xarray_html_only_repr(tree) # First 3 children should appear assert "/child_00" in result assert "/child_01" in result assert "/child_02" in result # Middle children should NOT appear assert "/child_03" not in result assert "/child_10" not in result assert "/child_17" not in result # Last 2 children should appear assert "/child_18" in result assert "/child_19" in result # Vertical ellipsis should appear assert "⋮" in result def test_few_children_not_truncated(self) -> None: # Create tree with 5 children (at the limit) tree_dict = {f"child_{i}": xr.Dataset({"x": i}) for i in range(5)} tree = xr.DataTree.from_dict(tree_dict) with xr.set_options(display_max_children=5, display_max_html_elements=1000): result = xarray_html_only_repr(tree) # All children should appear for i in range(5): assert f"/child_{i}" in result # No ellipsis assert "⋮" not in result def test_nested_children_truncated(self) -> None: # Create tree with nested children: root → 10 children → each with 2 grandchildren tree_dict = {} for i in range(10): for j in range(2): tree_dict[f"child_{i:02d}/grandchild_{j}"] = xr.Dataset({"x": i * j}) tree = xr.DataTree.from_dict(tree_dict) with xr.set_options(display_max_children=5, display_max_html_elements=1000): result = xarray_html_only_repr(tree) # Root level: first 3 and last 2 of 10 children should appear assert "/child_00" in result assert "/child_01" in result assert "/child_02" in result assert "/child_05" not in result # truncated assert "/child_08" in result assert "/child_09" in result # Ellipsis should appear for truncated children assert "⋮" in result def test_node_item_count_displayed(self) -> None: # Create tree with known item counts tree = xr.DataTree.from_dict( { "node_a": xr.Dataset({"var1": 1, "var2": 2}), # 2 vars "node_b": xr.Dataset( {"var1": 1}, attrs={"attr1": "x", "attr2": "y"} ), # 1 var + 2 attrs } ) with xr.set_options(display_max_html_elements=1000): result = xarray_html_only_repr(tree) # Item counts should appear in parentheses assert "(2)" in result # node_a: 2 variables assert "(3)" in result # node_b: 1 variable + 2 attrs def test_collapsible_group_checkbox(self) -> None: # Create simple tree with children tree = xr.DataTree.from_dict( { "child_a": xr.Dataset({"x": 1}), "child_b": xr.Dataset({"y": 2}), } ) with xr.set_options(display_max_html_elements=1000): result = xarray_html_only_repr(tree) # Group nodes should have checkbox inputs for collapsing assert " None: dt = xr.DataTree.from_dict(data={"a/b/c": None}, coords={"x": [1]}) root_html = dt._repr_html_() assert "Inherited coordinates" not in root_html child_html = xarray_html_only_repr(dt["a"]) assert child_html.count("Inherited coordinates") == 1 def test_repr_consistency(self) -> None: dt = xr.DataTree.from_dict({"/a/b/c": None}) assert_consistent_text_and_html_datatree(dt) assert_consistent_text_and_html_datatree(dt["a"]) assert_consistent_text_and_html_datatree(dt["a/b"]) assert_consistent_text_and_html_datatree(dt["a/b/c"]) def test_no_repeated_style_or_fallback_text(self) -> None: dt = xr.DataTree.from_dict({"/a/b/c": None}) html = dt._repr_html_() assert html.count("" f"
    {escape(repr(obj))}
    " "" "
    " ) def array_repr(arr) -> str: dims = OrderedDict((k, v) for k, v in zip(arr.dims, arr.shape, strict=True)) if hasattr(arr, "xindexes"): indexed_dims = arr.xindexes.dims else: indexed_dims = {} obj_type = f"xarray.{type(arr).__name__}" arr_name = escape(repr(arr.name)) if getattr(arr, "name", None) else "" header_components = [ f"
    {obj_type}
    ", f"
    {arr_name}
    ", format_dims(dims, indexed_dims), ] sections = [array_section(arr)] if hasattr(arr, "coords"): if arr.coords: sections.append(coord_section(arr.coords)) if hasattr(arr, "xindexes"): display_default_indexes = _get_boolean_with_default( "display_default_indexes", False ) xindexes = filter_nondefault_indexes( _get_indexes_dict(arr.xindexes), not display_default_indexes ) if xindexes: indexes = _get_indexes_dict(arr.xindexes) sections.append(index_section(indexes)) if arr.attrs: sections.append(attr_section(arr.attrs)) return _obj_repr(arr, header_components, sections) def dataset_repr(ds) -> str: obj_type = f"xarray.{type(ds).__name__}" header_components = [f"
    {escape(obj_type)}
    "] sections = [] sections.append(dim_section(ds)) if ds.coords: sections.append(coord_section(ds.coords)) sections.append(datavar_section(ds.data_vars)) display_default_indexes = _get_boolean_with_default( "display_default_indexes", False ) xindexes = filter_nondefault_indexes( _get_indexes_dict(ds.xindexes), not display_default_indexes ) if xindexes: sections.append(index_section(xindexes)) if ds.attrs: sections.append(attr_section(ds.attrs)) return _obj_repr(ds, header_components, sections) inherited_coord_section = partial( _mapping_section, name="Inherited coordinates", details_func=summarize_coords, max_items_collapse=25, expand_option_name="display_expand_coords", ) def _datatree_node_sections(node: DataTree, root: bool) -> tuple[list[str], int]: from xarray.core.coordinates import Coordinates ds = node._to_dataset_view(rebuild_dims=False, inherit=True) node_coords = node.to_dataset(inherit=False).coords # use this class to get access to .xindexes property inherited_coords = Coordinates( coords=inherited_vars(node._coord_variables), indexes=inherited_vars(node._indexes), ) # Only show dimensions if also showing a variable or coordinates section. show_dims = node_coords or (root and inherited_coords) or ds.data_vars display_default_indexes = _get_boolean_with_default( "display_default_indexes", False ) xindexes = filter_nondefault_indexes( _get_indexes_dict(ds.xindexes), not display_default_indexes ) sections = [] if show_dims: sections.append(dim_section(ds)) if node_coords: sections.append(coord_section(node_coords)) if root and inherited_coords: sections.append(inherited_coord_section(inherited_coords)) if ds.data_vars: sections.append(datavar_section(ds.data_vars)) if xindexes: sections.append(index_section(xindexes)) if ds.attrs: sections.append(attr_section(ds.attrs)) displayed_line_count = ( len(node.children) + int(bool(show_dims)) + int(bool(node_coords)) + len(node_coords) + int(root) * (int(bool(inherited_coords)) + len(inherited_coords)) + int(bool(ds.data_vars)) + len(ds.data_vars) + int(bool(xindexes)) + len(xindexes) + int(bool(ds.attrs)) + len(ds.attrs) ) return sections, displayed_line_count def _tree_item_count(node: DataTree, cache: dict[int, int]) -> int: if id(node) in cache: return cache[id(node)] node_ds = node.to_dataset(inherit=False) node_count = len(node_ds.variables) + len(node_ds.attrs) child_count = sum( _tree_item_count(child, cache) for child in node.children.values() ) total = node_count + child_count cache[id(node)] = total return total @dataclass class _DataTreeDisplay: node: DataTree sections: list[str] item_count: int collapsed: bool disabled: bool def _build_datatree_displays(tree: DataTree) -> dict[str, _DataTreeDisplay]: displayed_line_count = 0 html_line_count = 0 displays: dict[str, _DataTreeDisplay] = {} item_count_cache: dict[int, int] = {} root = True collapsed = False disabled = False html_limit = OPTIONS["display_max_html_elements"] uncollapsed_limit = OPTIONS["display_max_items"] too_many_items_section = collapsible_section( "Too many items to display (display_max_html_elements exceeded)", enabled=False, collapsed=True, span_grid=True, ) for node in tree.subtree: # breadth-first parent = node.parent if parent is not None: parent_display = displays.get(parent.path, None) if parent_display is not None and parent_display.disabled: break # no need to build display item_count = _tree_item_count(node, item_count_cache) sections, node_line_count = _datatree_node_sections(node, root) new_displayed_count = displayed_line_count + node_line_count new_html_count = html_line_count + node_line_count disabled = not root and (disabled or new_html_count > html_limit) if disabled: sections = [too_many_items_section] collapsed = True else: html_line_count = new_html_count collapsed = not root and (collapsed or new_displayed_count > uncollapsed_limit) if not collapsed: displayed_line_count = new_displayed_count displays[node.path] = _DataTreeDisplay( node, sections, item_count, collapsed, disabled ) root = False # If any node is collapsed, ensure its immediate siblings are also collapsed for display in displays.values(): if not display.disabled: if any( displays[child.path].collapsed for child in display.node.children.values() ): for child in display.node.children.values(): displays[child.path].collapsed = True return displays def _ellipsis_element() -> str: """Create an ellipsis element for truncated children.""" return ( "
    " "
    " "
    " "
    " "
    " "
    " ) def children_section( children: Mapping[str, DataTree], displays: dict[str, _DataTreeDisplay] ) -> str: child_elements = [] children_list = list(children.values()) nchildren = len(children_list) max_children = int(OPTIONS["display_max_children"]) if nchildren <= max_children: # Render all children for i, child in enumerate(children_list): is_last = i == nchildren - 1 child_elements.append(datatree_child_repr(child, displays, end=is_last)) else: # Truncate: show first ceil(max/2), ellipsis, last floor(max/2) first_n = ceil(max_children / 2) last_n = max_children - first_n child_elements.extend( datatree_child_repr(children_list[i], displays, end=False) for i in range(first_n) ) child_elements.append(_ellipsis_element()) child_elements.extend( datatree_child_repr(children_list[i], displays, end=(i == nchildren - 1)) for i in range(nchildren - last_n, nchildren) ) children_html = "".join(child_elements) return f"
    {children_html}
    " def datatree_sections( node: DataTree, displays: dict[str, _DataTreeDisplay] ) -> list[str]: display = displays[node.path] sections = [] if node.children and not display.disabled: sections.append(children_section(node.children, displays)) sections.extend(display.sections) return sections def datatree_child_repr( node: DataTree, displays: dict[str, _DataTreeDisplay], end: bool, ) -> str: # Wrap DataTree HTML representation with a tee to the left of it. # # Enclosing HTML tag is a
    with :code:`display: inline-grid` style. # # Turns: # [ title ] # | details | # |_____________| # # into (A): # |─ [ title ] # | | details | # | |_____________| # # or (B): # └─ [ title ] # | details | # |_____________| vline_height = "1.2em" if end else "100%" path = escape(node.path) display = displays[node.path] group_id = "group-" + str(uuid.uuid4()) collapsed = " checked" if display.collapsed else "" tip = " title='Expand/collapse group'" if not display.disabled else "" sections = datatree_sections(node, displays) sections_html = _sections_repr(sections) if sections else "" html = f"""
    {sections_html}
    """ return "".join(t.strip() for t in html.split("\n")) def datatree_repr(node: DataTree) -> str: displays = _build_datatree_displays(node) header_components = [ f"
    xarray.{type(node).__name__}
    ", ] if node.name is not None: name = escape(repr(node.name)) header_components.append(f"
    {name}
    ") sections = datatree_sections(node, displays) return _obj_repr(node, header_components, sections) python-xarray-2026.01.0/xarray/core/coordinate_transform.py0000664000175000017500000000657015136607163024105 0ustar alastairalastairfrom __future__ import annotations from collections.abc import Hashable, Iterable, Mapping from typing import Any, overload import numpy as np class CoordinateTransform: """Abstract coordinate transform with dimension & coordinate names. .. caution:: This API is experimental and subject to change. Please report any bugs or surprising behaviour you encounter. """ coord_names: tuple[Hashable, ...] dims: tuple[str, ...] dim_size: dict[str, int] dtype: Any def __init__( self, coord_names: Iterable[Hashable], dim_size: Mapping[str, int], dtype: Any = None, ): self.coord_names = tuple(coord_names) self.dims = tuple(dim_size) self.dim_size = dict(dim_size) if dtype is None: dtype = np.dtype(np.float64) self.dtype = dtype def forward(self, dim_positions: dict[str, Any]) -> dict[Hashable, Any]: """Perform grid -> world coordinate transformation. Parameters ---------- dim_positions : dict Grid location(s) along each dimension (axis). Returns ------- coord_labels : dict World coordinate labels. """ # TODO: cache the results in order to avoid re-computing # all labels when accessing the values of each coordinate one at a time raise NotImplementedError def reverse(self, coord_labels: dict[Hashable, Any]) -> dict[str, Any]: """Perform world -> grid coordinate reverse transformation. Parameters ---------- labels : dict World coordinate labels. Returns ------- dim_positions : dict Grid relative location(s) along each dimension (axis). """ raise NotImplementedError @overload def equals(self, other: CoordinateTransform) -> bool: ... @overload def equals( self, other: CoordinateTransform, *, exclude: frozenset[Hashable] | None = None ) -> bool: ... def equals(self, other: CoordinateTransform, **kwargs) -> bool: """Check equality with another CoordinateTransform of the same kind. Parameters ---------- other : CoordinateTransform The other CoordinateTransform object to compare with this object. exclude : frozenset of hashable, optional Dimensions excluded from checking. It is None by default, (i.e., when this method is not called in the context of alignment). For a n-dimensional transform this option allows a CoordinateTransform to optionally ignore any dimension in ``exclude`` when comparing ``self`` with ``other``. For a 1-dimensional transform this kwarg can be safely ignored, as this method is not called when all of the transform's dimensions are also excluded from alignment. """ raise NotImplementedError def generate_coords( self, dims: tuple[str, ...] | None = None ) -> dict[Hashable, Any]: """Compute all coordinate labels at once.""" if dims is None: dims = self.dims positions = np.meshgrid( *[np.arange(self.dim_size[d]) for d in dims], indexing="ij", ) dim_positions = {dim: positions[i] for i, dim in enumerate(dims)} return self.forward(dim_positions) python-xarray-2026.01.0/xarray/core/__init__.py0000664000175000017500000000000015136607163021400 0ustar alastairalastairpython-xarray-2026.01.0/xarray/core/parallel.py0000664000175000017500000006074615136607163021464 0ustar alastairalastairfrom __future__ import annotations import collections import itertools import operator from collections.abc import Callable, Hashable, Iterable, Mapping, Sequence from typing import TYPE_CHECKING, Any, Literal, TypedDict import numpy as np from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.indexes import Index from xarray.core.utils import is_dask_collection from xarray.core.variable import Variable from xarray.structure.alignment import align from xarray.structure.merge import merge if TYPE_CHECKING: from xarray.core.types import T_Xarray class ExpectedDict(TypedDict): shapes: dict[Hashable, int] coords: set[Hashable] data_vars: set[Hashable] def unzip(iterable): return zip(*iterable, strict=True) def assert_chunks_compatible(a: Dataset, b: Dataset): a = a.unify_chunks() b = b.unify_chunks() for dim in set(a.chunks).intersection(set(b.chunks)): if a.chunks[dim] != b.chunks[dim]: raise ValueError(f"Chunk sizes along dimension {dim!r} are not equal.") def check_result_variables( result: DataArray | Dataset, expected: ExpectedDict, kind: Literal["coords", "data_vars"], ): if kind == "coords": nice_str = "coordinate" elif kind == "data_vars": nice_str = "data" # check that coords and data variables are as expected missing = expected[kind] - set(getattr(result, kind)) if missing: raise ValueError( "Result from applying user function does not contain " f"{nice_str} variables {missing}." ) extra = set(getattr(result, kind)) - expected[kind] if extra: raise ValueError( "Result from applying user function has unexpected " f"{nice_str} variables {extra}." ) def dataset_to_dataarray(obj: Dataset) -> DataArray: if not isinstance(obj, Dataset): raise TypeError(f"Expected Dataset, got {type(obj)}") if len(obj.data_vars) > 1: raise TypeError( "Trying to convert Dataset with more than one data variable to DataArray" ) return next(iter(obj.data_vars.values())) def dataarray_to_dataset(obj: DataArray) -> Dataset: # only using _to_temp_dataset would break # func = lambda x: x.to_dataset() # since that relies on preserving name. if obj.name is None: dataset = obj._to_temp_dataset() else: dataset = obj.to_dataset() return dataset def make_meta(obj): """If obj is a DataArray or Dataset, return a new object of the same type and with the same variables and dtypes, but where all variables have size 0 and numpy backend. If obj is neither a DataArray nor Dataset, return it unaltered. """ if isinstance(obj, DataArray): obj_array = obj obj = dataarray_to_dataset(obj) elif isinstance(obj, Dataset): obj_array = None else: return obj from dask.array.utils import meta_from_array meta = Dataset() for name, variable in obj.variables.items(): meta_obj = meta_from_array(variable.data, ndim=variable.ndim) meta[name] = (variable.dims, meta_obj, variable.attrs) meta.attrs = obj.attrs meta = meta.set_coords(obj.coords) if obj_array is not None: return dataset_to_dataarray(meta) return meta def infer_template( func: Callable[..., T_Xarray], obj: DataArray | Dataset, *args, **kwargs ) -> T_Xarray: """Infer return object by running the function on meta objects.""" meta_args = [make_meta(arg) for arg in (obj,) + args] try: template = func(*meta_args, **kwargs) except Exception as e: raise Exception( "Cannot infer object returned from running user provided function. " "Please supply the 'template' kwarg to map_blocks." ) from e if not isinstance(template, Dataset | DataArray): raise TypeError( "Function must return an xarray DataArray or Dataset. Instead it returned " f"{type(template)}" ) return template def make_dict(x: DataArray | Dataset) -> dict[Hashable, Any]: """Map variable name to numpy(-like) data (Dataset.to_dict() is too complicated). """ if isinstance(x, DataArray): x = x._to_temp_dataset() return {k: v.data for k, v in x.variables.items()} def _get_chunk_slicer(dim: Hashable, chunk_index: Mapping, chunk_bounds: Mapping): if dim in chunk_index: which_chunk = chunk_index[dim] return slice(chunk_bounds[dim][which_chunk], chunk_bounds[dim][which_chunk + 1]) return slice(None) def subset_dataset_to_block( graph: dict, gname: str, dataset: Dataset, input_chunk_bounds, chunk_index ): """ Creates a task that subsets an xarray dataset to a block determined by chunk_index. Block extents are determined by input_chunk_bounds. Also subtasks that subset the constituent variables of a dataset. """ import dask # this will become [[name1, variable1], # [name2, variable2], # ...] # which is passed to dict and then to Dataset data_vars = [] coords = [] chunk_tuple = tuple(chunk_index.values()) chunk_dims_set = set(chunk_index) variable: Variable for name, variable in dataset.variables.items(): # make a task that creates tuple of (dims, chunk) if dask.is_dask_collection(variable.data): # get task name for chunk chunk = ( variable.data.name, *tuple(chunk_index[dim] for dim in variable.dims), ) chunk_variable_task = (f"{name}-{gname}-{chunk[0]!r}",) + chunk_tuple graph[chunk_variable_task] = ( tuple, [variable.dims, chunk, variable.attrs], ) else: assert name in dataset.dims or variable.ndim == 0 # non-dask array possibly with dimensions chunked on other variables # index into variable appropriately subsetter = { dim: _get_chunk_slicer(dim, chunk_index, input_chunk_bounds) for dim in variable.dims } if set(variable.dims) < chunk_dims_set: this_var_chunk_tuple = tuple(chunk_index[dim] for dim in variable.dims) else: this_var_chunk_tuple = chunk_tuple chunk_variable_task = ( f"{name}-{gname}-{dask.base.tokenize(subsetter)}", ) + this_var_chunk_tuple # We are including a dimension coordinate, # minimize duplication by not copying it in the graph for every chunk. if variable.ndim == 0 or chunk_variable_task not in graph: subset = variable.isel(subsetter) graph[chunk_variable_task] = ( tuple, [subset.dims, subset._data, subset.attrs], ) # this task creates dict mapping variable name to above tuple if name in dataset._coord_names: coords.append([name, chunk_variable_task]) else: data_vars.append([name, chunk_variable_task]) return (Dataset, (dict, data_vars), (dict, coords), dataset.attrs) def map_blocks( func: Callable[..., T_Xarray], obj: DataArray | Dataset, args: Sequence[Any] = (), kwargs: Mapping[str, Any] | None = None, template: DataArray | Dataset | None = None, ) -> T_Xarray: """Apply a function to each block of a DataArray or Dataset. .. warning:: This function is experimental and its signature may change. Parameters ---------- func : callable User-provided function that accepts a DataArray or Dataset as its first parameter ``obj``. The function will receive a subset or 'block' of ``obj`` (see below), corresponding to one chunk along each chunked dimension. ``func`` will be executed as ``func(subset_obj, *subset_args, **kwargs)``. This function must return either a single DataArray or a single Dataset. This function cannot add a new chunked dimension. obj : DataArray, Dataset Passed to the function as its first argument, one block at a time. args : sequence Passed to func after unpacking and subsetting any xarray objects by blocks. xarray objects in args must be aligned with obj, otherwise an error is raised. kwargs : mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be subset to blocks. Passing dask collections in kwargs is not allowed. template : DataArray or Dataset, optional xarray object representing the final result after compute is called. If not provided, the function will be first run on mocked-up data, that looks like ``obj`` but has sizes 0, to determine properties of the returned object such as dtype, variable names, attributes, new dimensions and new indexes (if any). ``template`` must be provided if the function changes the size of existing dimensions. When provided, ``attrs`` on variables in `template` are copied over to the result. Any ``attrs`` set by ``func`` will be ignored. Returns ------- obj : same as obj A single DataArray or Dataset with dask backend, reassembled from the outputs of the function. Notes ----- This function is designed for when ``func`` needs to manipulate a whole xarray object subset to each block. Each block is loaded into memory. In the more common case where ``func`` can work on numpy arrays, it is recommended to use ``apply_ufunc``. If none of the variables in ``obj`` is backed by dask arrays, calling this function is equivalent to calling ``func(obj, *args, **kwargs)``. See Also -------- dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks xarray.DataArray.map_blocks Examples -------- Calculate an anomaly from climatology using ``.groupby()``. Using ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``, its indices, and its methods like ``.groupby()``. >>> def calculate_anomaly(da, groupby_type="time.month"): ... gb = da.groupby(groupby_type) ... clim = gb.mean(dim="time") ... return gb - clim ... >>> time = xr.date_range("1990-01", "1992-01", freq="ME", use_cftime=True) >>> month = xr.DataArray(time.month, coords={"time": time}, dims=["time"]) >>> np.random.seed(123) >>> array = xr.DataArray( ... np.random.rand(len(time)), ... dims=["time"], ... coords={"time": time, "month": month}, ... ).chunk() >>> array.map_blocks(calculate_anomaly, template=array).compute() Size: 192B array([ 0.12894847, 0.11323072, -0.0855964 , -0.09334032, 0.26848862, 0.12382735, 0.22460641, 0.07650108, -0.07673453, -0.22865714, -0.19063865, 0.0590131 , -0.12894847, -0.11323072, 0.0855964 , 0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108, 0.07673453, 0.22865714, 0.19063865, -0.0590131 ]) Coordinates: * time (time) object 192B 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 month (time) int64 192B 1 2 3 4 5 6 7 8 9 10 ... 3 4 5 6 7 8 9 10 11 12 Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments to the function being applied in ``xr.map_blocks()``: >>> array.map_blocks( ... calculate_anomaly, ... kwargs={"groupby_type": "time.year"}, ... template=array, ... ) # doctest: +ELLIPSIS Size: 192B dask.array<-calculate_anomaly, shape=(24,), dtype=float64, chunksize=(24,), chunktype=numpy.ndarray> Coordinates: * time (time) object 192B 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 month (time) int64 192B dask.array """ def _wrapper( func: Callable, args: list, kwargs: dict, arg_is_array: Iterable[bool], expected: ExpectedDict, expected_indexes: dict[Hashable, Index], ): """ Wrapper function that receives datasets in args; converts to dataarrays when necessary; passes these to the user function `func` and checks returned objects for expected shapes/sizes/etc. """ converted_args = [ dataset_to_dataarray(arg) if is_array else arg for is_array, arg in zip(arg_is_array, args, strict=True) ] result = func(*converted_args, **kwargs) merged_coordinates = merge( [arg.coords for arg in args if isinstance(arg, Dataset | DataArray)], join="exact", compat="override", ).coords # check all dims are present missing_dimensions = set(expected["shapes"]) - set(result.sizes) if missing_dimensions: raise ValueError( f"Dimensions {missing_dimensions} missing on returned object." ) # check that index lengths and values are as expected for name, index in result._indexes.items(): if ( name in expected["shapes"] and result.sizes[name] != expected["shapes"][name] ): raise ValueError( f"Received dimension {name!r} of length {result.sizes[name]}. " f"Expected length {expected['shapes'][name]}." ) # ChainMap wants MutableMapping, but xindexes is Mapping merged_indexes = collections.ChainMap( expected_indexes, merged_coordinates.xindexes, # type: ignore[arg-type] ) expected_index = merged_indexes.get(name, None) if expected_index is not None and not index.equals(expected_index): raise ValueError( f"Expected index {name!r} to be {expected_index!r}. Received {index!r} instead." ) # check that all expected variables were returned check_result_variables(result, expected, "coords") if isinstance(result, Dataset): check_result_variables(result, expected, "data_vars") return make_dict(result) if template is not None and not isinstance(template, DataArray | Dataset): raise TypeError( f"template must be a DataArray or Dataset. Received {type(template).__name__} instead." ) if not isinstance(args, Sequence): raise TypeError("args must be a sequence (for example, a list or tuple).") if kwargs is None: kwargs = {} elif not isinstance(kwargs, Mapping): raise TypeError("kwargs must be a mapping (for example, a dict)") for value in kwargs.values(): if is_dask_collection(value): raise TypeError( "Cannot pass dask collections in kwargs yet. Please compute or " "load values before passing to map_blocks." ) if not is_dask_collection(obj): return func(obj, *args, **kwargs) try: import dask import dask.array from dask.base import tokenize from dask.highlevelgraph import HighLevelGraph except ImportError: pass all_args = [obj] + list(args) is_xarray = [isinstance(arg, Dataset | DataArray) for arg in all_args] is_array = [isinstance(arg, DataArray) for arg in all_args] # there should be a better way to group this. partition? xarray_indices, xarray_objs = unzip( (index, arg) for index, arg in enumerate(all_args) if is_xarray[index] ) others = [ (index, arg) for index, arg in enumerate(all_args) if not is_xarray[index] ] # all xarray objects must be aligned. This is consistent with apply_ufunc. aligned = align(*xarray_objs, join="exact") xarray_objs = tuple( dataarray_to_dataset(arg) if isinstance(arg, DataArray) else arg for arg in aligned ) # rechunk any numpy variables appropriately xarray_objs = tuple(arg.chunk(arg.chunksizes) for arg in xarray_objs) merged_coordinates = merge( [arg.coords for arg in aligned], join="exact", compat="override", ).coords _, npargs = unzip( sorted( list(zip(xarray_indices, xarray_objs, strict=True)) + others, key=lambda x: x[0], ) ) # check that chunk sizes are compatible input_chunks = dict(npargs[0].chunks) for arg in xarray_objs[1:]: assert_chunks_compatible(npargs[0], arg) input_chunks.update(arg.chunks) coordinates: Coordinates if template is None: # infer template by providing zero-shaped arrays template = infer_template(func, aligned[0], *args, **kwargs) template_coords = set(template.coords) preserved_coord_vars = template_coords & set(merged_coordinates) new_coord_vars = template_coords - set(merged_coordinates) preserved_coords = merged_coordinates.to_dataset()[preserved_coord_vars] # preserved_coords contains all coordinates variables that share a dimension # with any index variable in preserved_indexes # Drop any unneeded vars in a second pass, this is required for e.g. # if the mapped function were to drop a non-dimension coordinate variable. preserved_coords = preserved_coords.drop_vars( tuple(k for k in preserved_coords.variables if k not in template_coords) ) coordinates = merge( (preserved_coords, template.coords.to_dataset()[new_coord_vars]), # FIXME: this should be join="exact", but breaks a test join="outer", compat="override", ).coords output_chunks: Mapping[Hashable, tuple[int, ...]] = { dim: input_chunks[dim] for dim in template.dims if dim in input_chunks } else: # template xarray object has been provided with proper sizes and chunk shapes coordinates = template.coords output_chunks = template.chunksizes if not output_chunks: raise ValueError( "Provided template has no dask arrays. " " Please construct a template with appropriately chunked dask arrays." ) new_indexes = set(template.xindexes) - set(merged_coordinates) modified_indexes = set( name for name, xindex in coordinates.xindexes.items() if not xindex.equals(merged_coordinates.xindexes.get(name, None)) ) for dim in output_chunks: if dim in input_chunks and len(input_chunks[dim]) != len(output_chunks[dim]): raise ValueError( "map_blocks requires that one block of the input maps to one block of output. " f"Expected number of output chunks along dimension {dim!r} to be {len(input_chunks[dim])}. " f"Received {len(output_chunks[dim])} instead. Please provide template if not provided, or " "fix the provided template." ) if isinstance(template, DataArray): result_is_array = True template_name = template.name template = template._to_temp_dataset() elif isinstance(template, Dataset): result_is_array = False else: raise TypeError( f"func output must be DataArray or Dataset; got {type(template)}" ) # We're building a new HighLevelGraph hlg. We'll have one new layer # for each variable in the dataset, which is the result of the # func applied to the values. graph: dict[Any, Any] = {} new_layers: collections.defaultdict[str, dict[Any, Any]] = collections.defaultdict( dict ) gname = f"{dask.utils.funcname(func)}-{dask.base.tokenize(npargs[0], args, kwargs)}" # map dims to list of chunk indexes ichunk = {dim: range(len(chunks_v)) for dim, chunks_v in input_chunks.items()} # mapping from chunk index to slice bounds input_chunk_bounds = { dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in input_chunks.items() } output_chunk_bounds = { dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in output_chunks.items() } computed_variables = set(template.variables) - set(coordinates.indexes) # iterate over all possible chunk combinations for chunk_tuple in itertools.product(*ichunk.values()): # mapping from dimension name to chunk index chunk_index = dict(zip(ichunk.keys(), chunk_tuple, strict=True)) blocked_args = [ ( subset_dataset_to_block( graph, gname, arg, input_chunk_bounds, chunk_index ) if isxr else arg ) for isxr, arg in zip(is_xarray, npargs, strict=True) ] # only include new or modified indexes to minimize duplication of data indexes = { dim: coordinates.xindexes[dim][ _get_chunk_slicer(dim, chunk_index, output_chunk_bounds) ] for dim in (new_indexes | modified_indexes) } tokenized_indexes: dict[Hashable, str] = {} for k, v in indexes.items(): tokenized_v = tokenize(v) graph[f"{k}-coordinate-{tokenized_v}"] = v tokenized_indexes[k] = f"{k}-coordinate-{tokenized_v}" # raise nice error messages in _wrapper expected: ExpectedDict = { # input chunk 0 along a dimension maps to output chunk 0 along the same dimension # even if length of dimension is changed by the applied function "shapes": { k: output_chunks[k][v] for k, v in chunk_index.items() if k in output_chunks }, "data_vars": set(template.data_vars.keys()), "coords": set(template.coords.keys()), } from_wrapper = (gname,) + chunk_tuple graph[from_wrapper] = ( _wrapper, func, blocked_args, kwargs, is_array, expected, (dict, [[k, v] for k, v in tokenized_indexes.items()]), ) # mapping from variable name to dask graph key var_key_map: dict[Hashable, str] = {} for name in computed_variables: variable = template.variables[name] gname_l = f"{name}-{gname}" var_key_map[name] = gname_l # unchunked dimensions in the input have one chunk in the result # output can have new dimensions with exactly one chunk key: tuple[Any, ...] = (gname_l,) + tuple( chunk_index.get(dim, 0) for dim in variable.dims ) # We're adding multiple new layers to the graph: # The first new layer is the result of the computation on # the array. # Then we add one layer per variable, which extracts the # result for that variable, and depends on just the first new # layer. new_layers[gname_l][key] = (operator.getitem, from_wrapper, name) hlg = HighLevelGraph.from_collections( gname, graph, dependencies=[arg for arg in npargs if dask.is_dask_collection(arg)], ) # This adds in the getitems for each variable in the dataset. hlg = HighLevelGraph( {**hlg.layers, **new_layers}, dependencies={ **hlg.dependencies, **{name: {gname} for name in new_layers.keys()}, }, ) result = Dataset(coords=coordinates, attrs=template.attrs) for index in result._indexes: result[index].attrs = template[index].attrs result[index].encoding = template[index].encoding for name, gname_l in var_key_map.items(): dims = template[name].dims var_chunks = [] for dim in dims: if dim in output_chunks: var_chunks.append(output_chunks[dim]) elif dim in result._indexes: var_chunks.append((result.sizes[dim],)) elif dim in template.dims: # new unindexed dimension var_chunks.append((template.sizes[dim],)) data = dask.array.Array( hlg, name=gname_l, chunks=var_chunks, dtype=template[name].dtype ) result[name] = (dims, data, template[name].attrs) result[name].encoding = template[name].encoding result = result.set_coords(template._coord_names) if result_is_array: da = dataset_to_dataarray(result) da.name = template_name return da # type: ignore[return-value] return result # type: ignore[return-value] python-xarray-2026.01.0/xarray/groupers.py0000664000175000017500000012012715136607163020574 0ustar alastairalastair""" This module provides Grouper objects that encapsulate the "factorization" process - conversion of value we are grouping by to integer codes (one per group). """ from __future__ import annotations import datetime import functools import itertools import operator from abc import ABC, abstractmethod from collections import defaultdict from collections.abc import Callable, Hashable, Mapping, Sequence from dataclasses import dataclass, field from functools import partial from itertools import chain, pairwise from typing import TYPE_CHECKING, Any, Literal, cast import numpy as np import pandas as pd from numpy.typing import ArrayLike from xarray.coding.cftime_offsets import BaseCFTimeOffset, _new_to_legacy_freq from xarray.coding.cftimeindex import CFTimeIndex from xarray.compat.toolzcompat import sliding_window from xarray.computation.apply_ufunc import apply_ufunc from xarray.core.common import ( _contains_cftime_datetimes, _contains_datetime_like_objects, ) from xarray.core.coordinates import Coordinates, coordinates_from_variable from xarray.core.dataarray import DataArray from xarray.core.duck_array_ops import array_all, isnull from xarray.core.formatting import first_n_items from xarray.core.groupby import T_Group, _DummyGroup from xarray.core.indexes import safe_cast_to_index from xarray.core.resample_cftime import CFTimeGrouper from xarray.core.types import ( Bins, CFTimeDatetime, DatetimeLike, GroupIndices, PDDatetimeUnitOptions, ResampleCompatible, Self, SideOptions, ) from xarray.core.variable import Variable from xarray.namedarray.pycompat import is_chunked_array __all__ = [ "BinGrouper", "EncodedGroups", "Grouper", "Resampler", "SeasonGrouper", "SeasonResampler", "TimeResampler", "UniqueGrouper", ] RESAMPLE_DIM = "__resample_dim__" def _datetime64_via_timestamp(unit: PDDatetimeUnitOptions, **kwargs) -> np.datetime64: """Construct a numpy.datetime64 object through the pandas.Timestamp constructor with a specific resolution.""" # TODO: when pandas 3 is our minimum requirement we will no longer need to # convert to np.datetime64 values prior to passing to the DatetimeIndex # constructor. With pandas < 3 the DatetimeIndex constructor does not # infer the resolution from the resolution of the Timestamp values. return pd.Timestamp(**kwargs).as_unit(unit).to_numpy() @dataclass(init=False) class EncodedGroups: """ Dataclass for storing intermediate values for GroupBy operation. Returned by the ``factorize`` method on Grouper objects. Attributes ---------- codes : DataArray Same shape as the DataArray to group by. Values consist of a unique integer code for each group. full_index : pd.Index Pandas Index for the group coordinate containing unique group labels. This can differ from ``unique_coord`` in the case of resampling and binning, where certain groups in the output need not be present in the input. group_indices : tuple of int or slice or list of int, optional List of indices of array elements belonging to each group. Inferred if not provided. unique_coord : Variable, optional Unique group values present in dataset. Inferred if not provided """ codes: DataArray full_index: pd.Index group_indices: GroupIndices = field(init=False, repr=False) unique_coord: Variable | _DummyGroup = field(init=False, repr=False) coords: Coordinates = field(init=False, repr=False) def __init__( self, codes: DataArray, full_index: pd.Index, group_indices: GroupIndices | None = None, unique_coord: Variable | _DummyGroup | None = None, coords: Coordinates | None = None, ): from xarray.core.groupby import _codes_to_group_indices assert isinstance(codes, DataArray) if codes.name is None: raise ValueError("Please set a name on the array you are grouping by.") self.codes = codes assert isinstance(full_index, pd.Index) self.full_index = full_index if group_indices is None: if not is_chunked_array(codes.data): self.group_indices = tuple( g for g in _codes_to_group_indices( codes.data.ravel(), len(full_index) ) if g ) else: # We will not use this when grouping by a chunked array self.group_indices = tuple() else: self.group_indices = group_indices if unique_coord is None: unique_codes = np.sort(pd.unique(codes.data)) # Skip the -1 sentinel unique_codes = unique_codes[unique_codes >= 0] unique_values = full_index[unique_codes] self.unique_coord = Variable( dims=codes.name, data=unique_values, attrs=codes.attrs ) else: self.unique_coord = unique_coord if coords is None: assert not isinstance(self.unique_coord, _DummyGroup) self.coords = coordinates_from_variable(self.unique_coord) else: self.coords = coords class Grouper(ABC): """Abstract base class for Grouper objects that allow specializing GroupBy instructions.""" @abstractmethod def factorize(self, group: T_Group) -> EncodedGroups: """ Creates intermediates necessary for GroupBy. Parameters ---------- group : DataArray DataArray we are grouping by. Returns ------- EncodedGroups """ pass @abstractmethod def reset(self) -> Self: """ Creates a new version of this Grouper clearing any caches. """ pass class Resampler(Grouper): """ Abstract base class for Grouper objects that allow specializing resampling-type GroupBy instructions. Currently only used for TimeResampler, but could be used for SpaceResampler in the future. """ def compute_chunks(self, variable: Variable, *, dim: Hashable) -> tuple[int, ...]: """ Compute chunk sizes for this resampler. This method should be implemented by subclasses to provide appropriate chunking behavior for their specific resampling strategy. Parameters ---------- variable : Variable The variable being chunked. dim : Hashable The name of the dimension being chunked. Returns ------- tuple[int, ...] A tuple of chunk sizes for the dimension. """ raise NotImplementedError("Subclasses must implement compute_chunks method") @dataclass class UniqueGrouper(Grouper): """ Grouper object for grouping by a categorical variable. Parameters ---------- labels: array-like, optional Group labels to aggregate on. This is required when grouping by a chunked array type (e.g. dask or cubed) since it is used to construct the coordinate on the output. Grouped operations will only be run on the specified group labels. Any group that is not present in ``labels`` will be ignored. """ _group_as_index: pd.Index | None = field(default=None, repr=False, init=False) labels: ArrayLike | None = field(default=None) @property def group_as_index(self) -> pd.Index: """Caches the group DataArray as a pandas Index.""" if self._group_as_index is None: if self.group.ndim == 1: self._group_as_index = self.group.to_index() else: self._group_as_index = pd.Index(np.array(self.group).ravel()) return self._group_as_index def reset(self) -> Self: return type(self)() def factorize(self, group: T_Group) -> EncodedGroups: self.group = group if is_chunked_array(group.data) and self.labels is None: raise ValueError( "When grouping by a dask array, `labels` must be passed using " "a UniqueGrouper object." ) if self.labels is not None: return self._factorize_given_labels(group) index = self.group_as_index is_unique_and_monotonic = isinstance(self.group, _DummyGroup) or ( index.is_unique and (index.is_monotonic_increasing or index.is_monotonic_decreasing) ) is_dimension = self.group.dims == (self.group.name,) can_squeeze = is_dimension and is_unique_and_monotonic if can_squeeze: return self._factorize_dummy() else: return self._factorize_unique() def _factorize_given_labels(self, group: T_Group) -> EncodedGroups: codes = apply_ufunc( _factorize_given_labels, group, kwargs={"labels": self.labels}, dask="parallelized", output_dtypes=[np.int64], keep_attrs=True, ) return EncodedGroups( codes=codes, full_index=pd.Index(self.labels), # type: ignore[arg-type] unique_coord=Variable( dims=codes.name, data=self.labels, attrs=self.group.attrs, ), ) def _factorize_unique(self) -> EncodedGroups: # look through group to find the unique values sort = not isinstance(self.group_as_index, pd.MultiIndex) unique_values, codes_ = unique_value_groups(self.group_as_index, sort=sort) if array_all(codes_ == -1): raise ValueError( "Failed to group data. Are you grouping by a variable that is all NaN?" ) codes = self.group.copy(data=codes_.reshape(self.group.shape), deep=False) unique_coord = Variable( dims=codes.name, data=unique_values, attrs=self.group.attrs ) full_index = ( unique_values if isinstance(unique_values, pd.MultiIndex) else pd.Index(unique_values) ) return EncodedGroups( codes=codes, full_index=full_index, unique_coord=unique_coord, coords=coordinates_from_variable(unique_coord), ) def _factorize_dummy(self) -> EncodedGroups: size = self.group.size # no need to factorize # use slices to do views instead of fancy indexing # equivalent to: group_indices = group_indices.reshape(-1, 1) group_indices: GroupIndices = tuple(slice(i, i + 1) for i in range(size)) size_range = np.arange(size) full_index: pd.Index unique_coord: _DummyGroup | Variable if isinstance(self.group, _DummyGroup): codes = self.group.to_dataarray().copy(data=size_range) unique_coord = self.group full_index = pd.RangeIndex(self.group.size) coords = Coordinates() else: codes = self.group.copy(data=size_range, deep=False) unique_coord = self.group.variable.to_base_variable() full_index = self.group_as_index if isinstance(full_index, pd.MultiIndex): coords = Coordinates.from_pandas_multiindex( full_index, dim=self.group.name ) else: if TYPE_CHECKING: assert isinstance(unique_coord, Variable) coords = coordinates_from_variable(unique_coord) return EncodedGroups( codes=codes, group_indices=group_indices, full_index=full_index, unique_coord=unique_coord, coords=coords, ) @dataclass class BinGrouper(Grouper): """ Grouper object for binning numeric data. Attributes ---------- bins : int, sequence of scalars, or IntervalIndex The criteria to bin by. * int : Defines the number of equal-width bins in the range of `x`. The range of `x` is extended by .1% on each side to include the minimum and maximum values of `x`. * sequence of scalars : Defines the bin edges allowing for non-uniform width. No extension of the range of `x` is done. * IntervalIndex : Defines the exact bins to be used. Note that IntervalIndex for `bins` must be non-overlapping. right : bool, default True Indicates whether `bins` includes the rightmost edge or not. If ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]`` indicate (1,2], (2,3], (3,4]. This argument is ignored when `bins` is an IntervalIndex. labels : array or False, default None Specifies the labels for the returned bins. Must be the same length as the resulting bins. If False, returns only integer indicators of the bins. This affects the type of the output container (see below). This argument is ignored when `bins` is an IntervalIndex. If True, raises an error. retbins : bool, default False Whether to return the bins or not. Useful when bins is provided as a scalar. precision : int, default 3 The precision at which to store and display the bins labels. include_lowest : bool, default False Whether the first interval should be left-inclusive or not. duplicates : {"raise", "drop"}, default: "raise" If bin edges are not unique, raise ValueError or drop non-uniques. """ bins: Bins # The rest are copied from pandas right: bool = True labels: Any = None precision: int = 3 include_lowest: bool = False duplicates: Literal["raise", "drop"] = "raise" def reset(self) -> Self: return type(self)( bins=self.bins, right=self.right, labels=self.labels, precision=self.precision, include_lowest=self.include_lowest, duplicates=self.duplicates, ) def __post_init__(self) -> None: if array_all(isnull(self.bins)): raise ValueError("All bin edges are NaN.") def _cut(self, data): return pd.cut( np.asarray(data).ravel(), bins=self.bins, right=self.right, labels=self.labels, precision=self.precision, include_lowest=self.include_lowest, duplicates=self.duplicates, retbins=True, ) def _pandas_cut_wrapper(self, data, **kwargs): binned, bins = self._cut(data) if isinstance(self.bins, int): # we are running eagerly, update self.bins with actual edges instead self.bins = bins return binned.codes.reshape(data.shape) def factorize(self, group: T_Group) -> EncodedGroups: if isinstance(group, _DummyGroup): group = DataArray(group.data, dims=group.dims, name=group.name) by_is_chunked = is_chunked_array(group.data) if isinstance(self.bins, int) and by_is_chunked: raise ValueError( f"Bin edges must be provided when grouping by chunked arrays. Received {self.bins=!r} instead" ) codes = apply_ufunc( self._pandas_cut_wrapper, group, dask="parallelized", keep_attrs=True, output_dtypes=[np.int64], ) if not by_is_chunked and array_all(codes == -1): raise ValueError( f"None of the data falls within bins with edges {self.bins!r}" ) new_dim_name = f"{group.name}_bins" codes.name = new_dim_name # This seems silly, but it lets us have Pandas handle the complexity # of `labels`, `precision`, and `include_lowest`, even when group is a chunked array # Pandas ignores labels when IntervalIndex is passed if self.labels is None or not isinstance(self.bins, pd.IntervalIndex): dummy, _ = self._cut(np.array([0]).astype(group.dtype)) full_index = dummy.categories else: full_index = pd.Index(self.labels) if not by_is_chunked: uniques = np.sort(pd.unique(codes.data.ravel())) unique_values = full_index[uniques[uniques != -1]] else: unique_values = full_index unique_coord = Variable( dims=new_dim_name, data=unique_values, attrs=group.attrs ) return EncodedGroups( codes=codes, full_index=full_index, unique_coord=unique_coord, coords=coordinates_from_variable(unique_coord), ) @dataclass(repr=False) class TimeResampler(Resampler): """ Grouper object specialized to resampling the time coordinate. Attributes ---------- freq : str, datetime.timedelta, pandas.Timestamp, or pandas.DateOffset Frequency to resample to. See `Pandas frequency aliases `_ for a list of possible values. closed : {"left", "right"}, optional Side of each interval to treat as closed. label : {"left", "right"}, optional Side of each interval to use for labeling. origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pandas.Timestamp, datetime.datetime, numpy.datetime64, or cftime.datetime, default 'start_day' The datetime on which to adjust the grouping. The timezone of origin must match the timezone of the index. If a datetime is not used, these values are also supported: - 'epoch': `origin` is 1970-01-01 - 'start': `origin` is the first value of the timeseries - 'start_day': `origin` is the first day at midnight of the timeseries - 'end': `origin` is the last value of the timeseries - 'end_day': `origin` is the ceiling midnight of the last day offset : pd.Timedelta, datetime.timedelta, or str, default is None An offset timedelta added to the origin. """ freq: ResampleCompatible closed: SideOptions | None = field(default=None) label: SideOptions | None = field(default=None) origin: str | DatetimeLike = field(default="start_day") offset: pd.Timedelta | datetime.timedelta | str | None = field(default=None) index_grouper: CFTimeGrouper | pd.Grouper = field(init=False, repr=False) group_as_index: pd.Index = field(init=False, repr=False) def reset(self) -> Self: return type(self)( freq=self.freq, closed=self.closed, label=self.label, origin=self.origin, offset=self.offset, ) def _init_properties(self, group: T_Group) -> None: group_as_index = safe_cast_to_index(group) offset = self.offset if not group_as_index.is_monotonic_increasing: # TODO: sort instead of raising an error raise ValueError("Index must be monotonic for resampling") if isinstance(group_as_index, CFTimeIndex): self.index_grouper = CFTimeGrouper( freq=self.freq, closed=self.closed, label=self.label, origin=self.origin, offset=offset, ) else: if isinstance(self.freq, BaseCFTimeOffset): raise ValueError( "'BaseCFTimeOffset' resample frequencies are only supported " "when resampling a 'CFTimeIndex'" ) self.index_grouper = pd.Grouper( # TODO remove once requiring pandas >= 2.2 freq=_new_to_legacy_freq(self.freq), closed=self.closed, label=self.label, origin=self.origin, offset=offset, ) self.group_as_index = group_as_index def _get_index_and_items(self) -> tuple[pd.Index, pd.Series, np.ndarray]: first_items, codes = self.first_items() full_index = first_items.index if first_items.isnull().any(): first_items = first_items.dropna() full_index = full_index.rename("__resample_dim__") return full_index, first_items, codes def first_items(self) -> tuple[pd.Series, np.ndarray]: if isinstance(self.index_grouper, CFTimeGrouper): return self.index_grouper.first_items( cast(CFTimeIndex, self.group_as_index) ) else: s = pd.Series(np.arange(self.group_as_index.size), self.group_as_index) grouped = s.groupby(self.index_grouper) first_items = grouped.first() counts = grouped.count() # This way we generate codes for the final output index: full_index. # So for _flox_reduce we avoid one reindex and copy by avoiding # _maybe_reindex codes = np.repeat(np.arange(len(first_items)), counts) return first_items, codes def factorize(self, group: T_Group) -> EncodedGroups: self._init_properties(group) full_index, first_items, codes_ = self._get_index_and_items() sbins = first_items.values.astype(np.int64) group_indices: GroupIndices = tuple( list(itertools.starmap(slice, pairwise(sbins))) + [slice(sbins[-1], None)] ) unique_coord = Variable( dims=group.name, data=first_items.index, attrs=group.attrs ) codes = group.copy(data=codes_.reshape(group.shape), deep=False) return EncodedGroups( codes=codes, group_indices=group_indices, full_index=full_index, unique_coord=unique_coord, coords=coordinates_from_variable(unique_coord), ) def compute_chunks(self, variable: Variable, *, dim: Hashable) -> tuple[int, ...]: """ Compute chunk sizes for this time resampler. This method is used during chunking operations to determine appropriate chunk sizes for the given variable when using this resampler. Parameters ---------- name : Hashable The name of the dimension being chunked. variable : Variable The variable being chunked. Returns ------- tuple[int, ...] A tuple of chunk sizes for the dimension. """ if not _contains_datetime_like_objects(variable): raise ValueError( f"Computing chunks with {type(self)!r} only supported for datetime variables. " f"Received variable with dtype {variable.dtype!r} instead." ) chunks = ( DataArray( np.ones(variable.shape, dtype=int), dims=(dim,), coords={dim: variable}, ) .resample({dim: self}) .sum() ) # When bins (binning) or time periods are missing (resampling) # we can end up with NaNs. Drop them. if chunks.dtype.kind == "f": chunks = chunks.dropna(dim).astype(int) chunks_tuple: tuple[int, ...] = tuple(chunks.data.tolist()) return chunks_tuple def _factorize_given_labels(data: np.ndarray, labels: np.ndarray) -> np.ndarray: # Copied from flox sorter = np.argsort(labels) is_sorted = array_all(sorter == np.arange(sorter.size)) codes = np.searchsorted(labels, data, sorter=sorter) mask = ~np.isin(data, labels) | isnull(data) | (codes == len(labels)) # codes is the index in to the sorted array. # if we didn't want sorting, unsort it back if not is_sorted: codes[codes == len(labels)] = -1 codes = sorter[(codes,)] codes[mask] = -1 return codes def unique_value_groups( ar, sort: bool = True ) -> tuple[np.ndarray | pd.Index, np.ndarray]: """Group an array by its unique values. Parameters ---------- ar : array-like Input array. This will be flattened if it is not already 1-D. sort : bool, default: True Whether or not to sort unique values. Returns ------- values : np.ndarray Sorted, unique values as returned by `np.unique`. indices : list of lists of int Each element provides the integer indices in `ar` with values given by the corresponding value in `unique_values`. """ inverse, values = pd.factorize(ar, sort=sort) if isinstance(values, pd.MultiIndex): values.names = ar.names return values, inverse def season_to_month_tuple(seasons: Sequence[str]) -> tuple[tuple[int, ...], ...]: """ >>> season_to_month_tuple(["DJF", "MAM", "JJA", "SON"]) ((12, 1, 2), (3, 4, 5), (6, 7, 8), (9, 10, 11)) >>> season_to_month_tuple(["DJFM", "MAMJ", "JJAS", "SOND"]) ((12, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, 9), (9, 10, 11, 12)) >>> season_to_month_tuple(["DJFM", "SOND"]) ((12, 1, 2, 3), (9, 10, 11, 12)) """ initials = "JFMAMJJASOND" starts = { "".join(s): i + 1 for s, i in zip(sliding_window(2, initials + "J"), range(12), strict=True) } result: list[tuple[int, ...]] = [] for i, season in enumerate(seasons): if len(season) == 1: if i < len(seasons) - 1: suffix = seasons[i + 1][0] else: suffix = seasons[0][0] else: suffix = season[1] start = starts[season[0] + suffix] month_append = [] for i in range(len(season[1:])): elem = start + i + 1 month_append.append(elem - 12 * (elem > 12)) result.append((start,) + tuple(month_append)) return tuple(result) def inds_to_season_string(asints: tuple[tuple[int, ...], ...]) -> tuple[str, ...]: inits = "JFMAMJJASOND" return tuple("".join([inits[i_ - 1] for i_ in t]) for t in asints) def is_sorted_periodic(lst): """Used to verify that seasons provided to SeasonResampler are in order.""" n = len(lst) # Find the wraparound point where the list decreases wrap_point = -1 for i in range(1, n): if lst[i] < lst[i - 1]: wrap_point = i break # If no wraparound point is found, the list is already sorted if wrap_point == -1: return True # Check if both parts around the wrap point are sorted for i in range(1, wrap_point): if lst[i] < lst[i - 1]: return False for i in range(wrap_point + 1, n): if lst[i] < lst[i - 1]: return False # Check wraparound condition return lst[-1] <= lst[0] @dataclass(kw_only=True, frozen=True) class SeasonsGroup: seasons: tuple[str, ...] # tuple[integer months] corresponding to each season inds: tuple[tuple[int, ...], ...] # integer code for each season, this is not simply range(len(seasons)) # when the seasons have overlaps codes: Sequence[int] def find_independent_seasons(seasons: Sequence[str]) -> Sequence[SeasonsGroup]: """ Iterates though a list of seasons e.g. ["DJF", "FMA", ...], and splits that into multiple sequences of non-overlapping seasons. >>> find_independent_seasons( ... ["DJF", "FMA", "AMJ", "JJA", "ASO", "OND"] ... ) # doctest: +NORMALIZE_WHITESPACE [SeasonsGroup(seasons=('DJF', 'AMJ', 'ASO'), inds=((12, 1, 2), (4, 5, 6), (8, 9, 10)), codes=[0, 2, 4]), SeasonsGroup(seasons=('FMA', 'JJA', 'OND'), inds=((2, 3, 4), (6, 7, 8), (10, 11, 12)), codes=[1, 3, 5])] >>> find_independent_seasons(["DJF", "MAM", "JJA", "SON"]) [SeasonsGroup(seasons=('DJF', 'MAM', 'JJA', 'SON'), inds=((12, 1, 2), (3, 4, 5), (6, 7, 8), (9, 10, 11)), codes=[0, 1, 2, 3])] """ season_inds = season_to_month_tuple(seasons) grouped = defaultdict(list) codes = defaultdict(list) seen: set[tuple[int, ...]] = set() # This is quadratic, but the number of seasons is at most 12 for i, current in enumerate(season_inds): # Start with a group if current not in seen: grouped[i].append(current) codes[i].append(i) seen.add(current) # Loop through remaining groups, and look for overlaps for j, second in enumerate(season_inds[i:]): if not (set(chain(*grouped[i])) & set(second)) and second not in seen: grouped[i].append(second) codes[i].append(j + i) seen.add(second) if len(seen) == len(seasons): break # found all non-overlapping groups for this row start over grouped_ints = tuple(tuple(idx) for idx in grouped.values() if idx) return [ SeasonsGroup(seasons=inds_to_season_string(inds), inds=inds, codes=codes) for inds, codes in zip(grouped_ints, codes.values(), strict=False) ] @dataclass class SeasonGrouper(Grouper): """Allows grouping using a custom definition of seasons. Parameters ---------- seasons: sequence of str List of strings representing seasons. E.g. ``"JF"`` or ``"JJA"`` etc. Overlapping seasons are allowed (e.g. ``["DJFM", "MAMJ", "JJAS", "SOND"]``) Examples -------- >>> SeasonGrouper(["JF", "MAM", "JJAS", "OND"]) SeasonGrouper(seasons=['JF', 'MAM', 'JJAS', 'OND']) The ordering is preserved >>> SeasonGrouper(["MAM", "JJAS", "OND", "JF"]) SeasonGrouper(seasons=['MAM', 'JJAS', 'OND', 'JF']) Overlapping seasons are allowed >>> SeasonGrouper(["DJFM", "MAMJ", "JJAS", "SOND"]) SeasonGrouper(seasons=['DJFM', 'MAMJ', 'JJAS', 'SOND']) """ seasons: Sequence[str] # drop_incomplete: bool = field(default=True) # TODO def factorize(self, group: T_Group) -> EncodedGroups: if TYPE_CHECKING: assert not isinstance(group, _DummyGroup) if not _contains_datetime_like_objects(group.variable): raise ValueError( "SeasonGrouper can only be used to group by datetime-like arrays." ) months = group.dt.month.data seasons_groups = find_independent_seasons(self.seasons) codes_ = np.full((len(seasons_groups),) + group.shape, -1, dtype=np.int8) group_indices: list[list[int]] = [[]] * len(self.seasons) for axis_index, seasgroup in enumerate(seasons_groups): for season_tuple, code in zip( seasgroup.inds, seasgroup.codes, strict=False ): mask = np.isin(months, season_tuple) codes_[axis_index, mask] = code (indices,) = mask.nonzero() group_indices[code] = indices.tolist() if np.all(codes_ == -1): raise ValueError( "Failed to group data. Are you grouping by a variable that is all NaN?" ) needs_dummy_dim = len(seasons_groups) > 1 codes = DataArray( dims=(("__season_dim__",) if needs_dummy_dim else tuple()) + group.dims, data=codes_ if needs_dummy_dim else codes_.squeeze(), attrs=group.attrs, name="season", ) unique_coord = Variable("season", self.seasons, attrs=group.attrs) full_index = pd.Index(self.seasons) return EncodedGroups( codes=codes, group_indices=tuple(group_indices), unique_coord=unique_coord, full_index=full_index, ) def reset(self) -> Self: return type(self)(self.seasons) @dataclass class SeasonResampler(Resampler): """Allows grouping using a custom definition of seasons. Parameters ---------- seasons: Sequence[str] An ordered list of seasons. drop_incomplete: bool Whether to drop seasons that are not completely included in the data. For example, if a time series starts in Jan-2001, and seasons includes `"DJF"` then observations from Jan-2001, and Feb-2001 are ignored in the grouping since Dec-2000 isn't present. Examples -------- >>> SeasonResampler(["JF", "MAM", "JJAS", "OND"]) SeasonResampler(seasons=['JF', 'MAM', 'JJAS', 'OND'], drop_incomplete=True) >>> SeasonResampler(["DJFM", "AM", "JJA", "SON"]) SeasonResampler(seasons=['DJFM', 'AM', 'JJA', 'SON'], drop_incomplete=True) """ seasons: Sequence[str] drop_incomplete: bool = field(default=True, kw_only=True) season_inds: Sequence[Sequence[int]] = field(init=False, repr=False) season_tuples: Mapping[str, Sequence[int]] = field(init=False, repr=False) def __post_init__(self): self.season_inds = season_to_month_tuple(self.seasons) all_inds = functools.reduce(operator.add, self.season_inds) if len(all_inds) > len(set(all_inds)): raise ValueError( f"Overlapping seasons are not allowed. Received {self.seasons!r}" ) self.season_tuples = dict(zip(self.seasons, self.season_inds, strict=True)) if not is_sorted_periodic(list(itertools.chain(*self.season_inds))): raise ValueError( "Resampling is only supported with sorted seasons. " f"Provided seasons {self.seasons!r} are not sorted." ) def factorize(self, group: T_Group) -> EncodedGroups: if group.ndim != 1: raise ValueError( "SeasonResampler can only be used to resample by 1D arrays." ) if not isinstance(group, DataArray) or not _contains_datetime_like_objects( group.variable ): raise ValueError( "SeasonResampler can only be used to group by datetime-like DataArrays." ) seasons = self.seasons season_inds = self.season_inds season_tuples = self.season_tuples nstr = max(len(s) for s in seasons) year = group.dt.year.astype(int) month = group.dt.month.astype(int) season_label = np.full(group.shape, "", dtype=f"U{nstr}") # offset years for seasons with December and January for season_str, season_ind in zip(seasons, season_inds, strict=True): season_label[month.isin(season_ind)] = season_str if "DJ" in season_str: after_dec = season_ind[season_str.index("D") + 1 :] # important: this is assuming non-overlapping seasons year[month.isin(after_dec)] -= 1 # Allow users to skip one or more months? # present_seasons is a mask that is True for months that are requested in the output present_seasons = season_label != "" if present_seasons.all(): # avoid copies if we can. present_seasons = slice(None) frame = pd.DataFrame( data={ "index": np.arange(group[present_seasons].size), "month": month[present_seasons], }, index=pd.MultiIndex.from_arrays( [year.data[present_seasons], season_label[present_seasons]], names=["year", "season"], ), ) agged = ( frame["index"] .groupby(["year", "season"], sort=False) .agg(["first", "count"]) ) first_items = agged["first"] counts = agged["count"] index_class: type[CFTimeIndex | pd.DatetimeIndex] datetime_class: CFTimeDatetime | Callable[..., np.datetime64] if _contains_cftime_datetimes(group.data): index_class = CFTimeIndex datetime_class = type(first_n_items(group.data, 1).item()) else: index_class = pd.DatetimeIndex unit, _ = np.datetime_data(group.dtype) unit = cast(PDDatetimeUnitOptions, unit) datetime_class = partial(_datetime64_via_timestamp, unit) # these are the seasons that are present # TODO: when pandas 3 is our minimum requirement we will no longer need # to cast the list to a NumPy array prior to passing to the index # constructor. unique_coord = index_class( np.array( [ datetime_class(year=year, month=season_tuples[season][0], day=1) for year, season in first_items.index ] ) ) # This sorted call is a hack. It's hard to figure out how # to start the iteration for arbitrary season ordering # for example "DJF" as first entry or last entry # So we construct the largest possible index and slice it to the # range present in the data. # TODO: when pandas 3 is our minimum requirement we will no longer need # to cast the list to a NumPy array prior to passing to the index # constructor. complete_index = index_class( np.array( sorted( [ datetime_class(year=y, month=m, day=1) for y, m in itertools.product( range(year[0].item(), year[-1].item() + 1), [s[0] for s in season_inds], ) ] ) ) ) # all years and seasons def get_label(year, season): month, *_ = season_tuples[season] return f"{year}-{month:02d}-01" unique_codes = np.arange(len(unique_coord)) valid_season_mask = season_label != "" first_valid_season, last_valid_season = season_label[valid_season_mask][[0, -1]] first_year, last_year = year.data[[0, -1]] if self.drop_incomplete: if month.data[valid_season_mask][0] != season_tuples[first_valid_season][0]: if "DJ" in first_valid_season: first_year += 1 first_valid_season = seasons[ (seasons.index(first_valid_season) + 1) % len(seasons) ] unique_codes -= 1 if ( month.data[valid_season_mask][-1] != season_tuples[last_valid_season][-1] ): last_valid_season = seasons[seasons.index(last_valid_season) - 1] if "DJ" in last_valid_season: last_year -= 1 unique_codes[-1] = -1 first_label = get_label(first_year, first_valid_season) last_label = get_label(last_year, last_valid_season) slicer = complete_index.slice_indexer(first_label, last_label) full_index = complete_index[slicer] final_codes = np.full(group.data.size, -1) final_codes[present_seasons] = np.repeat(unique_codes, counts) codes = group.copy(data=final_codes, deep=False) return EncodedGroups(codes=codes, full_index=full_index) def compute_chunks(self, variable: Variable, *, dim: Hashable) -> tuple[int, ...]: """ Compute chunk sizes for this season resampler. This method is used during chunking operations to determine appropriate chunk sizes for the given variable when using this resampler. Parameters ---------- name : Hashable The name of the dimension being chunked. variable : Variable The variable being chunked. Returns ------- tuple[int, ...] A tuple of chunk sizes for the dimension. """ if not _contains_datetime_like_objects(variable): raise ValueError( f"Computing chunks with {type(self)!r} only supported for datetime variables. " f"Received variable with dtype {variable.dtype!r} instead." ) if len("".join(self.seasons)) != 12: raise ValueError( "Cannot rechunk with a SeasonResampler that does not cover all 12 months. " f"Received `seasons={self.seasons!r}`." ) # Create a temporary resampler that ignores drop_incomplete for chunking # This prevents data from being silently dropped during chunking resampler_for_chunking = type(self)(seasons=self.seasons, drop_incomplete=False) chunks = ( DataArray( np.ones(variable.shape, dtype=int), dims=(dim,), coords={dim: variable}, ) .resample({dim: resampler_for_chunking}) .sum() ) # When bins (binning) or time periods are missing (resampling) # we can end up with NaNs. Drop them. if chunks.dtype.kind == "f": chunks = chunks.dropna(dim).astype(int) chunks_tuple: tuple[int, ...] = tuple(chunks.data.tolist()) return chunks_tuple def reset(self) -> Self: return type(self)(seasons=self.seasons, drop_incomplete=self.drop_incomplete) python-xarray-2026.01.0/xarray/static/0000775000175000017500000000000015136607163017640 5ustar alastairalastairpython-xarray-2026.01.0/xarray/static/html/0000775000175000017500000000000015136607163020604 5ustar alastairalastairpython-xarray-2026.01.0/xarray/static/html/icons-svg-inline.html0000664000175000017500000000247715136607163024670 0ustar alastairalastair python-xarray-2026.01.0/xarray/static/html/__init__.py0000664000175000017500000000000015136607163022703 0ustar alastairalastairpython-xarray-2026.01.0/xarray/static/css/0000775000175000017500000000000015136607163020430 5ustar alastairalastairpython-xarray-2026.01.0/xarray/static/css/style.css0000664000175000017500000002347715136607163022317 0ustar alastairalastair/* CSS stylesheet for displaying xarray objects in notebooks */ :root { --xr-font-color0: var( --jp-content-font-color0, var(--pst-color-text-base rgba(0, 0, 0, 1)) ); --xr-font-color2: var( --jp-content-font-color2, var(--pst-color-text-base, rgba(0, 0, 0, 0.54)) ); --xr-font-color3: var( --jp-content-font-color3, var(--pst-color-text-base, rgba(0, 0, 0, 0.38)) ); --xr-border-color: var( --jp-border-color2, hsl(from var(--pst-color-on-background, white) h s calc(l - 10)) ); --xr-disabled-color: var( --jp-layout-color3, hsl(from var(--pst-color-on-background, white) h s calc(l - 40)) ); --xr-background-color: var( --jp-layout-color0, var(--pst-color-on-background, white) ); --xr-background-color-row-even: var( --jp-layout-color1, hsl(from var(--pst-color-on-background, white) h s calc(l - 5)) ); --xr-background-color-row-odd: var( --jp-layout-color2, hsl(from var(--pst-color-on-background, white) h s calc(l - 15)) ); } html[theme="dark"], html[data-theme="dark"], body[data-theme="dark"], body.vscode-dark { --xr-font-color0: var( --jp-content-font-color0, var(--pst-color-text-base, rgba(255, 255, 255, 1)) ); --xr-font-color2: var( --jp-content-font-color2, var(--pst-color-text-base, rgba(255, 255, 255, 0.54)) ); --xr-font-color3: var( --jp-content-font-color3, var(--pst-color-text-base, rgba(255, 255, 255, 0.38)) ); --xr-border-color: var( --jp-border-color2, hsl(from var(--pst-color-on-background, #111111) h s calc(l + 10)) ); --xr-disabled-color: var( --jp-layout-color3, hsl(from var(--pst-color-on-background, #111111) h s calc(l + 40)) ); --xr-background-color: var( --jp-layout-color0, var(--pst-color-on-background, #111111) ); --xr-background-color-row-even: var( --jp-layout-color1, hsl(from var(--pst-color-on-background, #111111) h s calc(l + 5)) ); --xr-background-color-row-odd: var( --jp-layout-color2, hsl(from var(--pst-color-on-background, #111111) h s calc(l + 15)) ); } .xr-wrap { display: block !important; min-width: 300px; max-width: 700px; line-height: 1.6; padding-bottom: 4px; } .xr-text-repr-fallback { /* fallback to plain text repr when CSS is not injected (untrusted notebook) */ display: none; } .xr-header { padding-top: 6px; padding-bottom: 6px; } .xr-header { border-bottom: solid 1px var(--xr-border-color); margin-bottom: 4px; } .xr-header > div, .xr-header > ul { display: inline; margin-top: 0; margin-bottom: 0; } .xr-obj-type, .xr-obj-name { margin-left: 2px; margin-right: 10px; } .xr-obj-type, .xr-group-box-contents > label { color: var(--xr-font-color2); display: block; } .xr-sections { padding-left: 0 !important; display: grid; grid-template-columns: 150px auto auto 1fr 0 20px 0 20px; margin-block-start: 0; margin-block-end: 0; } .xr-section-item { display: contents; } .xr-section-item > input, .xr-group-box-contents > input, .xr-array-wrap > input { display: block; opacity: 0; height: 0; margin: 0; } .xr-section-item > input + label, .xr-var-item > input + label { color: var(--xr-disabled-color); } .xr-section-item > input:enabled + label, .xr-var-item > input:enabled + label, .xr-array-wrap > input:enabled + label, .xr-group-box-contents > input:enabled + label { cursor: pointer; color: var(--xr-font-color2); } .xr-section-item > input:focus-visible + label, .xr-var-item > input:focus-visible + label, .xr-array-wrap > input:focus-visible + label, .xr-group-box-contents > input:focus-visible + label { outline: auto; } .xr-section-item > input:enabled + label:hover, .xr-var-item > input:enabled + label:hover, .xr-array-wrap > input:enabled + label:hover, .xr-group-box-contents > input:enabled + label:hover { color: var(--xr-font-color0); } .xr-section-summary { grid-column: 1; color: var(--xr-font-color2); font-weight: 500; white-space: nowrap; } .xr-section-summary > em { font-weight: normal; } .xr-span-grid { grid-column-end: -1; } .xr-section-summary > span { display: inline-block; padding-left: 0.3em; } .xr-group-box-contents > input:checked + label > span { display: inline-block; padding-left: 0.6em; } .xr-section-summary-in:disabled + label { color: var(--xr-font-color2); } .xr-section-summary-in + label:before { display: inline-block; content: "►"; font-size: 11px; width: 15px; text-align: center; } .xr-section-summary-in:disabled + label:before { color: var(--xr-disabled-color); } .xr-section-summary-in:checked + label:before { content: "▼"; } .xr-section-summary-in:checked + label > span { display: none; } .xr-section-summary, .xr-section-inline-details, .xr-group-box-contents > label { padding-top: 4px; } .xr-section-inline-details { grid-column: 2 / -1; } .xr-section-details { grid-column: 1 / -1; margin-top: 4px; margin-bottom: 5px; } .xr-section-summary-in ~ .xr-section-details { display: none; } .xr-section-summary-in:checked ~ .xr-section-details { display: contents; } .xr-children { display: inline-grid; grid-template-columns: 100%; grid-column: 1 / -1; padding-top: 4px; } .xr-group-box { display: inline-grid; grid-template-columns: 0px 30px auto; } .xr-group-box-vline { grid-column-start: 1; border-right: 0.2em solid; border-color: var(--xr-border-color); width: 0px; } .xr-group-box-hline { grid-column-start: 2; grid-row-start: 1; height: 1em; width: 26px; border-bottom: 0.2em solid; border-color: var(--xr-border-color); } .xr-group-box-contents { grid-column-start: 3; padding-bottom: 4px; } .xr-group-box-contents > label::before { content: "📂"; padding-right: 0.3em; } .xr-group-box-contents > input:checked + label::before { content: "📁"; } .xr-group-box-contents > input:checked + label { padding-bottom: 0px; } .xr-group-box-contents > input:checked ~ .xr-sections { display: none; } .xr-group-box-contents > input + label > span { display: none; } .xr-group-box-ellipsis { font-size: 1.4em; font-weight: 900; color: var(--xr-font-color2); letter-spacing: 0.15em; cursor: default; } .xr-array-wrap { grid-column: 1 / -1; display: grid; grid-template-columns: 20px auto; } .xr-array-wrap > label { grid-column: 1; vertical-align: top; } .xr-preview { color: var(--xr-font-color3); } .xr-array-preview, .xr-array-data { padding: 0 5px !important; grid-column: 2; } .xr-array-data, .xr-array-in:checked ~ .xr-array-preview { display: none; } .xr-array-in:checked ~ .xr-array-data, .xr-array-preview { display: inline-block; } .xr-dim-list { display: inline-block !important; list-style: none; padding: 0 !important; margin: 0; } .xr-dim-list li { display: inline-block; padding: 0; margin: 0; } .xr-dim-list:before { content: "("; } .xr-dim-list:after { content: ")"; } .xr-dim-list li:not(:last-child):after { content: ","; padding-right: 5px; } .xr-has-index { font-weight: bold; } .xr-var-list, .xr-var-item { display: contents; } .xr-var-item > div, .xr-var-item label, .xr-var-item > .xr-var-name span { background-color: var(--xr-background-color-row-even); border-color: var(--xr-background-color-row-odd); margin-bottom: 0; padding-top: 2px; } .xr-var-item > .xr-var-name:hover span { padding-right: 5px; } .xr-var-list > li:nth-child(odd) > div, .xr-var-list > li:nth-child(odd) > label, .xr-var-list > li:nth-child(odd) > .xr-var-name span { background-color: var(--xr-background-color-row-odd); border-color: var(--xr-background-color-row-even); } .xr-var-name { grid-column: 1; } .xr-var-dims { grid-column: 2; } .xr-var-dtype { grid-column: 3; text-align: right; color: var(--xr-font-color2); } .xr-var-preview { grid-column: 4; } .xr-index-preview { grid-column: 2 / 5; color: var(--xr-font-color2); } .xr-var-name, .xr-var-dims, .xr-var-dtype, .xr-preview, .xr-attrs dt { white-space: nowrap; overflow: hidden; text-overflow: ellipsis; padding-right: 10px; } .xr-var-name:hover, .xr-var-dims:hover, .xr-var-dtype:hover, .xr-attrs dt:hover { overflow: visible; width: auto; z-index: 1; } .xr-var-attrs, .xr-var-data, .xr-index-data { display: none; border-top: 2px dotted var(--xr-background-color); padding-bottom: 20px !important; padding-top: 10px !important; } .xr-var-attrs-in + label, .xr-var-data-in + label, .xr-index-data-in + label { padding: 0 1px; } .xr-var-attrs-in:checked ~ .xr-var-attrs, .xr-var-data-in:checked ~ .xr-var-data, .xr-index-data-in:checked ~ .xr-index-data { display: block; } .xr-var-data > table { float: right; } .xr-var-data > pre, .xr-index-data > pre, .xr-var-data > table > tbody > tr { background-color: transparent !important; } .xr-var-name span, .xr-var-data, .xr-index-name div, .xr-index-data, .xr-attrs { padding-left: 25px !important; } .xr-attrs, .xr-var-attrs, .xr-var-data, .xr-index-data { grid-column: 1 / -1; } dl.xr-attrs { padding: 0; margin: 0; display: grid; grid-template-columns: 125px auto; } .xr-attrs dt, .xr-attrs dd { padding: 0; margin: 0; float: left; padding-right: 10px; width: auto; } .xr-attrs dt { font-weight: normal; grid-column: 1; } .xr-attrs dt:hover span { display: inline-block; background: var(--xr-background-color); padding-right: 10px; } .xr-attrs dd { grid-column: 2; white-space: pre-wrap; word-break: break-all; } .xr-icon-database, .xr-icon-file-text2, .xr-no-icon { display: inline-block; vertical-align: middle; width: 1em; height: 1.5em !important; stroke-width: 0; stroke: currentColor; fill: currentColor; } .xr-var-attrs-in:checked + label > .xr-icon-file-text2, .xr-var-data-in:checked + label > .xr-icon-database, .xr-index-data-in:checked + label > .xr-icon-database { color: var(--xr-font-color0); filter: drop-shadow(1px 1px 5px var(--xr-font-color2)); stroke-width: 0.8px; } python-xarray-2026.01.0/xarray/static/css/__init__.py0000664000175000017500000000000015136607163022527 0ustar alastairalastairpython-xarray-2026.01.0/xarray/static/__init__.py0000664000175000017500000000000015136607163021737 0ustar alastairalastairpython-xarray-2026.01.0/xarray/compat/0000775000175000017500000000000015136607163017634 5ustar alastairalastairpython-xarray-2026.01.0/xarray/compat/npcompat.py0000664000175000017500000000631515136607163022034 0ustar alastairalastair# Copyright (c) 2005-2011, NumPy Developers. # All rights reserved. # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # * Neither the name of the NumPy Developers nor the names of any # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import annotations from typing import Any try: # requires numpy>=2.0 from numpy import isdtype # type: ignore[attr-defined,unused-ignore] HAS_STRING_DTYPE = True except ImportError: import numpy as np from numpy.typing import DTypeLike kind_mapping = { "bool": np.bool_, "signed integer": np.signedinteger, "unsigned integer": np.unsignedinteger, "integral": np.integer, "real floating": np.floating, "complex floating": np.complexfloating, "numeric": np.number, } def isdtype( # type: ignore[misc] dtype: np.dtype[Any] | type[Any], kind: DTypeLike | tuple[DTypeLike, ...] ) -> bool: kinds = kind if isinstance(kind, tuple) else (kind,) str_kinds = {k for k in kinds if isinstance(k, str)} type_kinds = {k.type for k in kinds if isinstance(k, np.dtype)} if unknown_kind_types := set(kinds) - str_kinds - type_kinds: raise TypeError( f"kind must be str, np.dtype or a tuple of these, got {unknown_kind_types}" ) if unknown_kinds := {k for k in str_kinds if k not in kind_mapping}: raise ValueError( f"unknown kind: {unknown_kinds}, must be an np.dtype or one of {list(kind_mapping)}" ) # verified the dtypes already, no need to check again translated_kinds = {kind_mapping[k] for k in str_kinds} | type_kinds if isinstance(dtype, np.generic): return isinstance(dtype, translated_kinds) else: return any(np.issubdtype(dtype, k) for k in translated_kinds) HAS_STRING_DTYPE = False python-xarray-2026.01.0/xarray/compat/array_api_compat.py0000664000175000017500000000476715136607163023536 0ustar alastairalastairimport numpy as np from xarray.namedarray.pycompat import array_type def is_weak_scalar_type(t): return isinstance(t, bool | int | float | complex | str | bytes) def _future_array_api_result_type(*arrays_and_dtypes, xp): # fallback implementation for `xp.result_type` with python scalars. Can be removed once a # version of the Array API that includes https://github.com/data-apis/array-api/issues/805 # can be required strongly_dtyped = [t for t in arrays_and_dtypes if not is_weak_scalar_type(t)] weakly_dtyped = [t for t in arrays_and_dtypes if is_weak_scalar_type(t)] if not strongly_dtyped: strongly_dtyped = [ xp.asarray(x) if not isinstance(x, type) else x for x in weakly_dtyped ] weakly_dtyped = [] dtype = xp.result_type(*strongly_dtyped) if not weakly_dtyped: return dtype possible_dtypes = { complex: "complex64", float: "float32", int: "int8", bool: "bool", str: "str", bytes: "bytes", } dtypes = [possible_dtypes.get(type(x), "object") for x in weakly_dtyped] return xp.result_type(dtype, *dtypes) def result_type(*arrays_and_dtypes, xp) -> np.dtype: if xp is np or any( isinstance(getattr(t, "dtype", t), np.dtype) for t in arrays_and_dtypes ): return xp.result_type(*arrays_and_dtypes) else: return _future_array_api_result_type(*arrays_and_dtypes, xp=xp) def get_array_namespace(*values): def _get_single_namespace(x): if hasattr(x, "__array_namespace__"): return x.__array_namespace__() elif isinstance(x, array_type("cupy")): # cupy is fully compliant from xarray's perspective, but will not expose # __array_namespace__ until at least v14. Special case it for now import cupy as cp return cp else: return np namespaces = {_get_single_namespace(t) for t in values} non_numpy = namespaces - {np} if len(non_numpy) > 1: names = [module.__name__ for module in non_numpy] raise TypeError(f"Mixed array types {names} are not supported.") elif non_numpy: [xp] = non_numpy else: xp = np return xp def to_like_array(array, like): # Mostly for cupy compatibility, because cupy binary ops require all cupy arrays xp = get_array_namespace(like) if xp is not np: return xp.asarray(array) # avoid casting things like pint quantities to numpy arrays return array python-xarray-2026.01.0/xarray/compat/pdcompat.py0000664000175000017500000000662215136607163022023 0ustar alastairalastair# For reference, here is a copy of the pandas copyright notice: # BSD 3-Clause License # Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team # All rights reserved. # Copyright (c) 2011-2025, Open source contributors. # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # * Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import annotations from enum import Enum from typing import Literal import pandas as pd from xarray.core.types import PDDatetimeUnitOptions def count_not_none(*args) -> int: """Compute the number of non-None arguments. Copied from pandas.core.common.count_not_none (not part of the public API) """ return sum(arg is not None for arg in args) class _NoDefault(Enum): """Used by pandas to specify a default value for a deprecated argument. Copied from pandas._libs.lib._NoDefault. See also: - pandas-dev/pandas#30788 - pandas-dev/pandas#40684 - pandas-dev/pandas#40715 - pandas-dev/pandas#47045 """ no_default = "NO_DEFAULT" def __repr__(self) -> str: return "" no_default = ( _NoDefault.no_default ) # Sentinel indicating the default value following pandas NoDefault = Literal[_NoDefault.no_default] # For typing following pandas def timestamp_as_unit(date: pd.Timestamp, unit: PDDatetimeUnitOptions) -> pd.Timestamp: """Convert the underlying int64 representation to the given unit. Compatibility function for pandas issue where "as_unit" is not defined for pandas.Timestamp in pandas versions < 2.2. Can be removed minimum pandas version is >= 2.2. """ if hasattr(date, "as_unit"): date = date.as_unit(unit) elif hasattr(date, "_as_unit"): date = date._as_unit(unit) return date def default_precision_timestamp(*args, **kwargs) -> pd.Timestamp: """Return a Timestamp object with the default precision. Xarray default is "ns". """ dt = pd.Timestamp(*args, **kwargs) if dt.unit != "ns": dt = timestamp_as_unit(dt, "ns") return dt python-xarray-2026.01.0/xarray/compat/dask_array_compat.py0000664000175000017500000000203215136607163023666 0ustar alastairalastairfrom typing import Any from xarray.namedarray.utils import module_available def reshape_blockwise( x: Any, shape: int | tuple[int, ...], chunks: tuple[tuple[int, ...], ...] | None = None, ): if module_available("dask", "2024.08.2"): from dask.array import reshape_blockwise return reshape_blockwise(x, shape=shape, chunks=chunks) else: return x.reshape(shape) def sliding_window_view( x, window_shape, axis=None, *, automatic_rechunk=True, **kwargs ): # Backcompat for handling `automatic_rechunk`, delete when dask>=2024.11.0 # Note that subok, writeable are unsupported by dask, so we ignore those in kwargs from dask.array.lib.stride_tricks import sliding_window_view if module_available("dask", "2024.11.0"): return sliding_window_view( x, window_shape=window_shape, axis=axis, automatic_rechunk=automatic_rechunk ) else: # automatic_rechunk is not supported return sliding_window_view(x, window_shape=window_shape, axis=axis) python-xarray-2026.01.0/xarray/compat/dask_array_ops.py0000664000175000017500000001102215136607163023203 0ustar alastairalastairfrom __future__ import annotations import math from xarray.compat.dask_array_compat import reshape_blockwise from xarray.core import dtypes, nputils def dask_rolling_wrapper(moving_func, a, window, min_count=None, axis=-1): """Wrapper to apply bottleneck moving window funcs on dask arrays""" dtype, _ = dtypes.maybe_promote(a.dtype) return a.data.map_overlap( moving_func, depth={axis: (window - 1, 0)}, axis=axis, dtype=dtype, window=window, min_count=min_count, ) def least_squares(lhs, rhs, rcond=None, skipna=False): import dask.array as da # The trick here is that the core dimension is axis 0. # All other dimensions need to be reshaped down to one axis for `lstsq` # (which only accepts 2D input) # and this needs to be undone after running `lstsq` # The order of values in the reshaped axes is irrelevant. # There are big gains to be had by simply reshaping the blocks on a blockwise # basis, and then undoing that transform. # We use a specific `reshape_blockwise` method in dask for this optimization if rhs.ndim > 2: out_shape = rhs.shape reshape_chunks = rhs.chunks rhs = reshape_blockwise(rhs, (rhs.shape[0], math.prod(rhs.shape[1:]))) else: out_shape = None lhs_da = da.from_array(lhs, chunks=(rhs.chunks[0], lhs.shape[1])) if skipna: added_dim = rhs.ndim == 1 if added_dim: rhs = rhs.reshape(rhs.shape[0], 1) results = da.apply_along_axis( nputils._nanpolyfit_1d, 0, rhs, lhs_da, dtype=float, shape=(lhs.shape[1] + 1,), rcond=rcond, ) coeffs = results[:-1, ...] residuals = results[-1, ...] if added_dim: coeffs = coeffs.reshape(coeffs.shape[0]) residuals = residuals.reshape(residuals.shape[0]) else: # Residuals here are (1, 1) but should be (K,) as rhs is (N, K) # See issue dask/dask#6516 coeffs, residuals, _, _ = da.linalg.lstsq(lhs_da, rhs) if out_shape is not None: coeffs = reshape_blockwise( coeffs, shape=(coeffs.shape[0], *out_shape[1:]), chunks=((coeffs.shape[0],), *reshape_chunks[1:]), ) residuals = reshape_blockwise( residuals, shape=out_shape[1:], chunks=reshape_chunks[1:] ) return coeffs, residuals def _fill_with_last_one(a, b): import numpy as np # cumreduction apply the push func over all the blocks first so, # the only missing part is filling the missing values using the # last data of the previous chunk return np.where(np.isnan(b), a, b) def _dtype_push(a, axis, dtype=None): from xarray.core.duck_array_ops import _push # Not sure why the blelloch algorithm force to receive a dtype return _push(a, axis=axis) def push(array, n, axis, method="blelloch"): """ Dask-aware bottleneck.push """ import dask.array as da import numpy as np from xarray.core.duck_array_ops import _push from xarray.core.nputils import nanlast if n is not None and all(n <= size for size in array.chunks[axis]): return array.map_overlap(_push, depth={axis: (n, 0)}, n=n, axis=axis) # TODO: Replace all this function # once https://github.com/pydata/xarray/issues/9229 being implemented pushed_array = da.reductions.cumreduction( func=_dtype_push, binop=_fill_with_last_one, ident=np.nan, x=array, axis=axis, dtype=array.dtype, method=method, preop=nanlast, ) if n is not None and 0 < n < array.shape[axis] - 1: # The idea is to calculate a cumulative sum of a bitmask # created from the isnan method, but every time a False is found the sum # must be restarted, and the final result indicates the amount of contiguous # nan values found in the original array on every position nan_bitmask = da.isnan(array, dtype=int) cumsum_nan = nan_bitmask.cumsum(axis=axis, method=method) valid_positions = da.where(nan_bitmask == 0, cumsum_nan, np.nan) valid_positions = push(valid_positions, None, axis, method=method) # All the NaNs at the beginning are converted to 0 valid_positions = da.nan_to_num(valid_positions) valid_positions = cumsum_nan - valid_positions valid_positions = valid_positions <= n pushed_array = da.where(valid_positions, pushed_array, np.nan) return pushed_array python-xarray-2026.01.0/xarray/compat/toolzcompat.py0000664000175000017500000000441615136607163022566 0ustar alastairalastair# This file contains functions copied from the toolz library in accordance # with its license. The original copyright notice is duplicated below. # Copyright (c) 2013 Matthew Rocklin # All rights reserved. # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # a. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # b. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # c. Neither the name of toolz nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH # DAMAGE. def sliding_window(n, seq): """A sequence of overlapping subsequences >>> list(sliding_window(2, [1, 2, 3, 4])) [(1, 2), (2, 3), (3, 4)] This function creates a sliding window suitable for transformations like sliding means / smoothing >>> mean = lambda seq: float(sum(seq)) / len(seq) >>> list(map(mean, sliding_window(2, [1, 2, 3, 4]))) [1.5, 2.5, 3.5] """ import collections import itertools return zip( *( collections.deque(itertools.islice(it, i), 0) or it for i, it in enumerate(itertools.tee(seq, n)) ), strict=False, ) python-xarray-2026.01.0/xarray/compat/__init__.py0000664000175000017500000000000015136607163021733 0ustar alastairalastairpython-xarray-2026.01.0/xarray/indexes/0000775000175000017500000000000015136607163020010 5ustar alastairalastairpython-xarray-2026.01.0/xarray/indexes/nd_point_index.py0000664000175000017500000003251315136607163023367 0ustar alastairalastairfrom __future__ import annotations import abc from collections.abc import Hashable, Iterable, Mapping from typing import TYPE_CHECKING, Any, Generic, TypeVar import numpy as np from xarray.core.dataarray import DataArray from xarray.core.indexes import Index from xarray.core.indexing import IndexSelResult from xarray.core.utils import is_scalar from xarray.core.variable import Variable from xarray.structure.alignment import broadcast if TYPE_CHECKING: from scipy.spatial import KDTree from xarray.core.types import Self class TreeAdapter(abc.ABC): """Lightweight adapter abstract class for plugging in 3rd-party structures like :py:class:`scipy.spatial.KDTree` or :py:class:`sklearn.neighbors.KDTree` into :py:class:`~xarray.indexes.NDPointIndex`. """ @abc.abstractmethod def __init__(self, points: np.ndarray, *, options: Mapping[str, Any]): """ Parameters ---------- points : ndarray of shape (n_points, n_coordinates) Two-dimensional array of points/samples (rows) and their corresponding coordinate labels (columns) to index. """ ... @abc.abstractmethod def query(self, points: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """Query points. Parameters ---------- points: ndarray of shape (n_points, n_coordinates) Two-dimensional array of points/samples (rows) and their corresponding coordinate labels (columns) to query. Returns ------- distances : ndarray of shape (n_points) Distances to the nearest neighbors. indices : ndarray of shape (n_points) Indices of the nearest neighbors in the array of the indexed points. """ ... def equals(self, other: Self) -> bool: """Check equality with another TreeAdapter of the same kind. Parameters ---------- other : The other TreeAdapter object to compare with this object. """ raise NotImplementedError class ScipyKDTreeAdapter(TreeAdapter): """:py:class:`scipy.spatial.KDTree` adapter for :py:class:`~xarray.indexes.NDPointIndex`.""" _kdtree: KDTree def __init__(self, points: np.ndarray, options: Mapping[str, Any]): try: from scipy.spatial import KDTree except ImportError as err: raise ImportError( "`NDPointIndex` requires `scipy` when used with `ScipyKDTreeAdapter`. " "Please ensure that `scipy` is installed and importable." ) from err self._kdtree = KDTree(points, **options) def query(self, points: np.ndarray) -> tuple[np.ndarray, np.ndarray]: return self._kdtree.query(points) # type: ignore[return-value,unused-ignore] def equals(self, other: Self) -> bool: return np.array_equal(self._kdtree.data, other._kdtree.data) def get_points(coords: Iterable[Variable | Any]) -> np.ndarray: """Re-arrange data from a sequence of xarray coordinate variables or labels into a 2-d array of shape (n_points, n_coordinates). """ data = [c.values if isinstance(c, Variable | DataArray) else c for c in coords] return np.stack([np.ravel(d) for d in data]).T T_TreeAdapter = TypeVar("T_TreeAdapter", bound=TreeAdapter) class NDPointIndex(Index, Generic[T_TreeAdapter]): """Xarray index for irregular, n-dimensional data. This index may be associated with a set of coordinate variables representing the arbitrary location of data points in an n-dimensional space. All coordinates must have the same shape and dimensions. The number of associated coordinate variables must correspond to the number of dimensions of the space. This index supports label-based selection (nearest neighbor lookup). It also has limited support for alignment. By default, this index relies on :py:class:`scipy.spatial.KDTree` for fast lookup. Do not use :py:meth:`~xarray.indexes.NDPointIndex.__init__` directly. Instead use :py:meth:`xarray.Dataset.set_xindex` or :py:meth:`xarray.DataArray.set_xindex` to create and set the index from existing coordinates (see the example below). Examples -------- An example using a dataset with 2-dimensional coordinates. >>> xx = [[1.0, 2.0], [3.0, 0.0]] >>> yy = [[11.0, 21.0], [29.0, 9.0]] >>> ds = xr.Dataset(coords={"xx": (("y", "x"), xx), "yy": (("y", "x"), yy)}) >>> ds Size: 64B Dimensions: (y: 2, x: 2) Coordinates: xx (y, x) float64 32B 1.0 2.0 3.0 0.0 yy (y, x) float64 32B 11.0 21.0 29.0 9.0 Dimensions without coordinates: y, x Data variables: *empty* Creation of an NDPointIndex from the "xx" and "yy" coordinate variables: >>> ds = ds.set_xindex(("xx", "yy"), xr.indexes.NDPointIndex) >>> ds Size: 64B Dimensions: (y: 2, x: 2) Coordinates: * xx (y, x) float64 32B 1.0 2.0 3.0 0.0 * yy (y, x) float64 32B 11.0 21.0 29.0 9.0 Dimensions without coordinates: y, x Data variables: *empty* Indexes: ┌ xx NDPointIndex (ScipyKDTreeAdapter) └ yy Point-wise (nearest-neighbor) data selection using Xarray's advanced indexing, i.e., using arbitrary dimension(s) for the Variable objects passed as labels: >>> ds.sel( ... xx=xr.Variable("points", [1.9, 0.1]), ... yy=xr.Variable("points", [13.0, 8.0]), ... method="nearest", ... ) Size: 32B Dimensions: (points: 2) Coordinates: xx (points) float64 16B 1.0 0.0 yy (points) float64 16B 11.0 9.0 Dimensions without coordinates: points Data variables: *empty* Data selection with scalar labels: >>> ds.sel(xx=1.9, yy=13.0, method="nearest") Size: 16B Dimensions: () Coordinates: xx float64 8B 1.0 yy float64 8B 11.0 Data variables: *empty* Data selection with broadcasting the input labels: >>> ds.sel(xx=1.9, yy=xr.Variable("points", [13.0, 8.0]), method="nearest") Size: 32B Dimensions: (points: 2) Coordinates: xx (points) float64 16B 1.0 0.0 yy (points) float64 16B 11.0 9.0 Dimensions without coordinates: points Data variables: *empty* >>> da = xr.DataArray( ... [[45.1, 53.3], [65.4, 78.2]], ... coords={"u": [1.9, 0.1], "v": [13.0, 8.0]}, ... dims=("u", "v"), ... ) >>> ds.sel(xx=da.u, yy=da.v, method="nearest") Size: 64B Dimensions: (u: 2, v: 2) Coordinates: xx (u, v) float64 32B 1.0 0.0 1.0 0.0 yy (u, v) float64 32B 11.0 9.0 11.0 9.0 Dimensions without coordinates: u, v Data variables: *empty* Data selection with array-like labels (implicit dimensions): >>> ds.sel(xx=[[1.9], [0.1]], yy=[[13.0], [8.0]], method="nearest") Size: 32B Dimensions: (y: 2, x: 1) Coordinates: xx (y, x) float64 16B 1.0 0.0 yy (y, x) float64 16B 11.0 9.0 Dimensions without coordinates: y, x Data variables: *empty* """ _tree_obj: T_TreeAdapter _coord_names: tuple[Hashable, ...] _dims: tuple[Hashable, ...] _shape: tuple[int, ...] def __init__( self, tree_obj: T_TreeAdapter, *, coord_names: tuple[Hashable, ...], dims: tuple[Hashable, ...], shape: tuple[int, ...], ): # this constructor is "private" assert isinstance(tree_obj, TreeAdapter) self._tree_obj = tree_obj assert len(coord_names) == len(dims) == len(shape) self._coord_names = coord_names self._dims = dims self._shape = shape @classmethod def from_variables( cls, variables: Mapping[Any, Variable], *, options: Mapping[str, Any], ) -> Self: if len({var.dims for var in variables.values()}) > 1: var_names = ",".join(vn for vn in variables) raise ValueError( f"variables {var_names} must all have the same dimensions and the same shape" ) var0 = next(iter(variables.values())) if len(variables) != len(var0.dims): raise ValueError( f"the number of variables {len(variables)} doesn't match " f"the number of dimensions {len(var0.dims)}" ) opts = dict(options) tree_adapter_cls: type[T_TreeAdapter] = opts.pop("tree_adapter_cls", None) if tree_adapter_cls is None: tree_adapter_cls = ScipyKDTreeAdapter points = get_points(variables.values()) return cls( tree_adapter_cls(points, options=opts), coord_names=tuple(variables), dims=var0.dims, shape=var0.shape, ) def create_variables( self, variables: Mapping[Any, Variable] | None = None ) -> dict[Any, Variable]: if variables is not None: for var in variables.values(): # maybe re-sync variable dimensions with the index object # returned by NDPointIndex.rename() if var.dims != self._dims: var.dims = self._dims return dict(**variables) else: return {} def equals( self, other: Index, *, exclude: frozenset[Hashable] | None = None ) -> bool: if not isinstance(other, NDPointIndex): return False if type(self._tree_obj) is not type(other._tree_obj): return False return self._tree_obj.equals(other._tree_obj) def _get_dim_indexers( self, indices: np.ndarray, label_dims: tuple[Hashable, ...], label_shape: tuple[int, ...], ) -> dict[Hashable, Variable]: """Returns dimension indexers based on the query results (indices) and the original label dimensions and shape. 1. Unravel the flat indices returned from the query 2. Reshape the unraveled indices according to indexers shapes 3. Wrap the indices in xarray.Variable objects. """ dim_indexers = {} u_indices = list(np.unravel_index(indices.ravel(), self._shape)) for dim, ind in zip(self._dims, u_indices, strict=False): dim_indexers[dim] = Variable(label_dims, ind.reshape(label_shape)) return dim_indexers def sel( self, labels: dict[Any, Any], method=None, tolerance=None ) -> IndexSelResult: if method != "nearest": raise ValueError( "NDPointIndex only supports selection with method='nearest'" ) missing_labels = set(self._coord_names) - set(labels) if missing_labels: missing_labels_str = ",".join([f"{name}" for name in missing_labels]) raise ValueError(f"missing labels for coordinate(s): {missing_labels_str}.") # maybe convert labels into xarray DataArray objects xr_labels: dict[Any, DataArray] = {} for name, lbl in labels.items(): if isinstance(lbl, DataArray): xr_labels[name] = lbl elif isinstance(lbl, Variable): xr_labels[name] = DataArray(lbl) elif is_scalar(lbl): xr_labels[name] = DataArray(lbl, dims=()) elif np.asarray(lbl).ndim == len(self._dims): xr_labels[name] = DataArray(lbl, dims=self._dims) else: raise ValueError( "invalid label value. NDPointIndex only supports advanced (point-wise) indexing " "with the following label value kinds:\n" "- xarray.DataArray or xarray.Variable objects\n" "- scalar values\n" "- unlabelled array-like objects with the same number of dimensions " f"than the {self._coord_names} coordinate variables ({len(self._dims)})" ) # broadcast xarray labels against one another and determine labels shape and dimensions broadcasted = broadcast(*xr_labels.values()) label_dims = broadcasted[0].dims label_shape = broadcasted[0].shape xr_labels = dict(zip(xr_labels, broadcasted, strict=True)) # get and return dimension indexers points = get_points(xr_labels[name] for name in self._coord_names) _, indices = self._tree_obj.query(points) dim_indexers = self._get_dim_indexers(indices, label_dims, label_shape) return IndexSelResult(dim_indexers=dim_indexers) def rename( self, name_dict: Mapping[Any, Hashable], dims_dict: Mapping[Any, Hashable], ) -> Self: if not set(self._coord_names) & set(name_dict) and not set(self._dims) & set( dims_dict ): return self new_coord_names = tuple(name_dict.get(n, n) for n in self._coord_names) new_dims = tuple(dims_dict.get(d, d) for d in self._dims) return type(self)( self._tree_obj, coord_names=new_coord_names, dims=new_dims, shape=self._shape, ) def _repr_inline_(self, max_width: int) -> str: tree_obj_type = self._tree_obj.__class__.__name__ return f"{self.__class__.__name__} ({tree_obj_type})" python-xarray-2026.01.0/xarray/indexes/range_index.py0000664000175000017500000003664115136607163022657 0ustar alastairalastairimport math from collections.abc import Hashable, Mapping from typing import Any import numpy as np import pandas as pd from xarray.core import duck_array_ops from xarray.core.coordinate_transform import CoordinateTransform from xarray.core.dataarray import DataArray from xarray.core.indexes import CoordinateTransformIndex, Index, PandasIndex from xarray.core.indexing import IndexSelResult from xarray.core.variable import Variable class RangeCoordinateTransform(CoordinateTransform): """1-dimensional coordinate transform representing a simple bounded interval with evenly spaced, floating-point values. """ start: float stop: float _step: float | None __slots__ = ("_step", "start", "stop") def __init__( self, start: float, stop: float, size: int, coord_name: Hashable, dim: str, dtype: Any = None, ): if dtype is None: dtype = np.dtype(np.float64) super().__init__([coord_name], {dim: size}, dtype=dtype) self.start = start self.stop = stop self._step = None # Will be calculated by property @property def coord_name(self) -> Hashable: return self.coord_names[0] @property def dim(self) -> str: return self.dims[0] @property def size(self) -> int: return self.dim_size[self.dim] @property def step(self) -> float: if self._step is not None: return self._step if self.size > 0: return (self.stop - self.start) / self.size else: # For empty arrays, default to 1.0 return 1.0 def forward(self, dim_positions: dict[str, Any]) -> dict[Hashable, Any]: positions = dim_positions[self.dim] labels = self.start + positions * self.step return {self.coord_name: labels} def reverse(self, coord_labels: dict[Hashable, Any]) -> dict[str, Any]: labels = coord_labels[self.coord_name] positions = (labels - self.start) / self.step return {self.dim: positions} def equals( self, other: CoordinateTransform, exclude: frozenset[Hashable] | None = None, *, exact: bool = False, ) -> bool: """Check equality with another RangeCoordinateTransform. Parameters ---------- other : CoordinateTransform The other transform to compare with. exclude : frozenset of hashable, optional Dimensions excluded from checking (unused for 1D RangeIndex). exact : bool, default False If False (default), use np.isclose() for floating point comparisons to handle accumulated floating point errors from slicing operations. If True, require exact equality of start and stop values. Returns ------- bool True if the transforms are equal, False otherwise. """ if not isinstance(other, RangeCoordinateTransform): return False if exact: return ( self.start == other.start and self.stop == other.stop and self.size == other.size ) # Use np.isclose for floating point comparisons to handle accumulated # floating point errors (e.g., from slicing operations) return bool( np.isclose(self.start, other.start) and np.isclose(self.stop, other.stop) and self.size == other.size ) def slice(self, sl: slice) -> "RangeCoordinateTransform": new_range = range(self.size)[sl] new_size = len(new_range) new_start = self.start + new_range.start * self.step new_stop = self.start + new_range.stop * self.step result = type(self)( new_start, new_stop, new_size, self.coord_name, self.dim, dtype=self.dtype, ) if new_size == 0: # For empty slices, preserve step from parent result._step = self.step return result class RangeIndex(CoordinateTransformIndex): """Xarray index implementing a simple bounded 1-dimension interval with evenly spaced, monotonic floating-point values. This index is memory-saving, i.e., the values of its associated coordinate variable are not materialized in memory. Do not use :py:meth:`~xarray.indexes.RangeIndex.__init__` directly. Instead use :py:meth:`~xarray.indexes.RangeIndex.arange` or :py:meth:`~xarray.indexes.RangeIndex.linspace`, which are similar to :py:func:`numpy.arange` and :py:func:`numpy.linspace`. In the case of a monotonic integer range, it is better using a :py:class:`~xarray.indexes.PandasIndex` that wraps a :py:class:`pandas.RangeIndex`. """ transform: RangeCoordinateTransform def __init__(self, transform: RangeCoordinateTransform): super().__init__(transform) def equals( self, other: "Index", *, exclude: frozenset[Hashable] | None = None, exact: bool = False, ) -> bool: """Check equality with another RangeIndex. Parameters ---------- other : Index The other index to compare with. exclude : frozenset of hashable, optional Dimensions excluded from checking (unused for 1D RangeIndex). exact : bool, default False If False (default), use np.isclose() for floating point comparisons to handle accumulated floating point errors from slicing operations. If True, require exact equality of start and stop values. Returns ------- bool True if the indexes are equal, False otherwise. """ if not isinstance(other, RangeIndex): return False return self.transform.equals(other.transform, exclude=exclude, exact=exact) @classmethod def arange( cls, start: float | None = None, stop: float | None = None, step: float | None = None, *, coord_name: Hashable | None = None, dim: str, dtype: Any = None, ) -> "RangeIndex": """Create a new RangeIndex from given start, stop and step values. ``RangeIndex.arange`` can be called with a varying number of positional arguments: - ``RangeIndex.arange(stop)``: the index is within the half-open interval [0, stop) (in other words, the interval including start but excluding stop). - ``RangeIndex.arange(start, stop)``: the index is within the half-open interval [start, stop). - ``RangeIndex.arange(start, stop, step)``: the index is within the half-open interval [start, stop), with spacing between values given by step. .. note:: When using a non-integer step, such as 0.1, it is often better to use :py:meth:`~xarray.indexes.RangeIndex.linspace`. .. note:: ``RangeIndex.arange(start=4.0)`` returns a range index in the [0.0, 4.0) interval, i.e., ``start`` is interpreted as ``stop`` even when it is given as a unique keyword argument. Parameters ---------- start : float, optional Start of interval. The interval includes this value. The default start value is 0. If ``stop`` is not given, the value given here is interpreted as the end of the interval. stop : float End of interval. In general the interval does not include this value, except floating point round-off affects the size of the dimension. step : float, optional Spacing between values (default: 1.0). coord_name : Hashable, optional Name of the (lazy) coordinate variable that will be created and associated with the new index. If ``None``, the coordinate is named as the dimension name. dim : str Dimension name. dtype : dtype, optional The dtype of the coordinate variable (default: float64). Examples -------- >>> from xarray.indexes import RangeIndex >>> index = RangeIndex.arange(0.0, 1.0, 0.2, dim="x") >>> ds = xr.Dataset(coords=xr.Coordinates.from_xindex(index)) >>> ds Size: 40B Dimensions: (x: 5) Coordinates: * x (x) float64 40B 0.0 0.2 0.4 0.6 0.8 Data variables: *empty* Indexes: x RangeIndex (start=0, stop=1, step=0.2) """ if stop is None: if start is None: raise TypeError("RangeIndex.arange() requires stop to be specified") else: stop = start start = None if start is None: start = 0.0 if step is None: step = 1.0 if coord_name is None: coord_name = dim size = math.ceil((stop - start) / step) transform = RangeCoordinateTransform( start, stop, size, coord_name, dim, dtype=dtype ) return cls(transform) @classmethod def linspace( cls, start: float, stop: float, num: int = 50, endpoint: bool = True, *, coord_name: Hashable | None = None, dim: str, dtype: Any = None, ) -> "RangeIndex": """Create a new RangeIndex from given start / stop values and number of values. Parameters ---------- start : float Start of interval. The interval includes this value. stop : float, optional End of interval. The interval includes this value if ``endpoint=True``. num : float, optional Number of values in the interval, i.e., dimension size (default: 50). endpoint : bool, optional If True (default), the ``stop`` value is included in the interval. coord_name : Hashable, optional Name of the (lazy) coordinate variable that will be created and associated with the new index. If ``None``, the coordinate is named as the dimension name. dim : str Dimension name. dtype : dtype, optional The dtype of the coordinate variable (default: float64). Examples -------- >>> from xarray.indexes import RangeIndex >>> index = RangeIndex.linspace(0.0, 1.0, 5, dim="x") >>> ds = xr.Dataset(coords=xr.Coordinates.from_xindex(index)) >>> ds Size: 40B Dimensions: (x: 5) Coordinates: * x (x) float64 40B 0.0 0.25 0.5 0.75 1.0 Data variables: *empty* Indexes: x RangeIndex (start=0, stop=1.25, step=0.25) """ if coord_name is None: coord_name = dim if endpoint: stop += (stop - start) / (num - 1) transform = RangeCoordinateTransform( start, stop, num, coord_name, dim, dtype=dtype ) return cls(transform) @classmethod def from_variables( cls, variables: Mapping[Any, Variable], *, options: Mapping[str, Any], ) -> "RangeIndex": raise NotImplementedError( "cannot create a new RangeIndex from an existing coordinate. Use instead " "either `RangeIndex.arange()` or `RangeIndex.linspace()` together with " "`Coordinates.from_xindex()`" ) @property def start(self) -> float: """Returns the start of the interval (the interval includes this value).""" return self.transform.start @property def stop(self) -> float: """Returns the end of the interval (the interval does not include this value).""" return self.transform.stop @property def step(self) -> float: """Returns the spacing between values.""" return self.transform.step @property def coord_name(self) -> Hashable: return self.transform.coord_names[0] @property def dim(self) -> str: return self.transform.dims[0] @property def size(self) -> int: return self.transform.dim_size[self.dim] def isel( self, indexers: Mapping[Any, int | slice | np.ndarray | Variable] ) -> Index | None: idxer = indexers[self.dim] if isinstance(idxer, slice): return RangeIndex(self.transform.slice(idxer)) elif (isinstance(idxer, Variable) and idxer.ndim > 1) or duck_array_ops.ndim( idxer ) == 0: return None else: values = self.transform.forward({self.dim: np.asarray(idxer)})[ self.coord_name ] if isinstance(idxer, Variable): new_dim = idxer.dims[0] else: new_dim = self.dim pd_index = pd.Index(values, name=self.coord_name) return PandasIndex(pd_index, new_dim, coord_dtype=values.dtype) def sel( self, labels: dict[Any, Any], method=None, tolerance=None ) -> IndexSelResult: label = labels[self.dim] if method != "nearest": raise ValueError("RangeIndex only supports selection with method='nearest'") # TODO: for RangeIndex it might not be too hard to support tolerance if tolerance is not None: raise ValueError( "RangeIndex doesn't support selection with a given tolerance value yet" ) if isinstance(label, slice): if label.step is None: # continuous interval slice indexing (preserves the index) positions = self.transform.reverse( {self.coord_name: np.array([label.start, label.stop])} ) pos = np.round(positions[self.dim]).astype("int") new_start = max(pos[0], 0) new_stop = min(pos[1], self.size) return IndexSelResult({self.dim: slice(new_start, new_stop)}) else: # otherwise convert to basic (array) indexing label = np.arange(label.start, label.stop, label.step) # support basic indexing (in the 1D case basic vs. vectorized indexing # are pretty much similar) unwrap_xr = False if not isinstance(label, Variable | DataArray): # basic indexing -> either scalar or 1-d array try: var = Variable("_", label) except ValueError: var = Variable((), label) labels = {self.dim: var} unwrap_xr = True result = super().sel(labels, method=method, tolerance=tolerance) if unwrap_xr: dim_indexers = {self.dim: result.dim_indexers[self.dim].values} result = IndexSelResult(dim_indexers) return result def to_pandas_index(self) -> pd.Index: values = self.transform.generate_coords() return pd.Index(values[self.dim]) def _repr_inline_(self, max_width) -> str: params_fmt = ( f"start={self.start:.3g}, stop={self.stop:.3g}, step={self.step:.3g}" ) return f"{self.__class__.__name__} ({params_fmt})" def __repr__(self) -> str: params_fmt = ( f"start={self.start:.3g}, stop={self.stop:.3g}, step={self.step:.3g}, " f"size={self.size}, coord_name={self.coord_name!r}, dim={self.dim!r}" ) return f"{self.__class__.__name__} ({params_fmt})" python-xarray-2026.01.0/xarray/indexes/__init__.py0000664000175000017500000000111415136607163022116 0ustar alastairalastair"""Xarray index objects for label-based selection and alignment of Dataset / DataArray objects. """ from xarray.core.coordinate_transform import CoordinateTransform from xarray.core.indexes import ( CoordinateTransformIndex, Index, PandasIndex, PandasMultiIndex, ) from xarray.indexes.nd_point_index import NDPointIndex, TreeAdapter from xarray.indexes.range_index import RangeIndex __all__ = [ "CoordinateTransform", "CoordinateTransformIndex", "Index", "NDPointIndex", "PandasIndex", "PandasMultiIndex", "RangeIndex", "TreeAdapter", ] python-xarray-2026.01.0/xarray/backends/0000775000175000017500000000000015136607163020123 5ustar alastairalastairpython-xarray-2026.01.0/xarray/backends/file_manager.py0000664000175000017500000004171015136607163023111 0ustar alastairalastairfrom __future__ import annotations import atexit import threading import uuid import warnings from collections.abc import Callable, Hashable, Iterator, Mapping, MutableMapping from contextlib import AbstractContextManager, contextmanager from typing import Any, Generic, Literal, TypeVar, cast from xarray.backends.locks import acquire from xarray.backends.lru_cache import LRUCache from xarray.core import utils from xarray.core.options import OPTIONS from xarray.core.types import Closable, Lock # Global cache for storing open files. FILE_CACHE: LRUCache[Any, Closable] = LRUCache( maxsize=OPTIONS["file_cache_maxsize"], on_evict=lambda k, v: v.close() ) assert FILE_CACHE.maxsize, "file cache must be at least size one" T_File = TypeVar("T_File", bound=Closable) REF_COUNTS: dict[Any, int] = {} _OMIT_MODE = utils.ReprObject("") class FileManager(Generic[T_File]): """Manager for acquiring and closing a file object. Use FileManager subclasses (CachingFileManager in particular) on backend storage classes to automatically handle issues related to keeping track of many open files and transferring them between multiple processes. """ def acquire(self, needs_lock: bool = True) -> T_File: """Acquire the file object from this manager.""" raise NotImplementedError() def acquire_context( self, needs_lock: bool = True ) -> AbstractContextManager[T_File]: """Context manager for acquiring a file. Yields a file object. The context manager unwinds any actions taken as part of acquisition (i.e., removes it from any cache) if an exception is raised from the context. It *does not* automatically close the file. """ raise NotImplementedError() def close(self, needs_lock: bool = True) -> None: """Close the file object associated with this manager, if needed.""" raise NotImplementedError() class CachingFileManager(FileManager[T_File]): """Wrapper for automatically opening and closing file objects. Unlike files, CachingFileManager objects can be safely pickled and passed between processes. They should be explicitly closed to release resources, but a per-process least-recently-used cache for open files ensures that you can safely create arbitrarily large numbers of FileManager objects. Don't directly close files acquired from a FileManager. Instead, call FileManager.close(), which ensures that closed files are removed from the cache as well. Example usage:: manager = FileManager(open, "example.txt", mode="w") f = manager.acquire() f.write(...) manager.close() # ensures file is closed Note that as long as previous files are still cached, acquiring a file multiple times from the same FileManager is essentially free:: f1 = manager.acquire() f2 = manager.acquire() assert f1 is f2 """ def __init__( self, opener: Callable[..., T_File], *args: Any, mode: Any = _OMIT_MODE, kwargs: Mapping[str, Any] | None = None, lock: Lock | None | Literal[False] = None, cache: MutableMapping[Any, T_File] | None = None, manager_id: Hashable | None = None, ref_counts: dict[Any, int] | None = None, ): """Initialize a CachingFileManager. The cache, manager_id and ref_counts arguments exist solely to facilitate dependency injection, and should only be set for tests. Parameters ---------- opener : callable Function that when called like ``opener(*args, **kwargs)`` returns an open file object. The file object must implement a ``close()`` method. *args Positional arguments for opener. A ``mode`` argument should be provided as a keyword argument (see below). All arguments must be hashable. mode : optional If provided, passed as a keyword argument to ``opener`` along with ``**kwargs``. ``mode='w' `` has special treatment: after the first call it is replaced by ``mode='a'`` in all subsequent function to avoid overriding the newly created file. kwargs : dict, optional Keyword arguments for opener, excluding ``mode``. All values must be hashable. lock : duck-compatible threading.Lock, optional Lock to use when modifying the cache inside acquire() and close(). By default, uses a new threading.Lock() object. If set, this object should be pickleable. cache : MutableMapping, optional Mapping to use as a cache for open files. By default, uses xarray's global LRU file cache. Because ``cache`` typically points to a global variable and contains non-picklable file objects, an unpickled FileManager objects will be restored with the default cache. manager_id : hashable, optional Identifier for this CachingFileManager. ref_counts : dict, optional Optional dict to use for keeping track the number of references to the same file. """ self._opener = opener self._args = args self._mode = mode self._kwargs = {} if kwargs is None else dict(kwargs) if lock is None or lock is False: self._use_default_lock = True self._lock: Lock = threading.Lock() else: self._use_default_lock = False self._lock = lock # cache[self._key] stores the file associated with this object. if cache is None: cache = cast(MutableMapping[Any, T_File], FILE_CACHE) self._cache: MutableMapping[Any, T_File] = cache if manager_id is None: # Each call to CachingFileManager should separately open files. manager_id = str(uuid.uuid4()) self._manager_id = manager_id self._key = self._make_key() # ref_counts[self._key] stores the number of CachingFileManager objects # in memory referencing this same file. We use this to know if we can # close a file when the manager is deallocated. if ref_counts is None: ref_counts = REF_COUNTS self._ref_counter = _RefCounter(ref_counts) self._ref_counter.increment(self._key) def _make_key(self) -> _HashedSequence: """Make a key for caching files in the LRU cache.""" value = ( self._opener, self._args, "a" if self._mode == "w" else self._mode, tuple(sorted(self._kwargs.items())), self._manager_id, ) return _HashedSequence(value) @contextmanager def _optional_lock(self, needs_lock: bool): """Context manager for optionally acquiring a lock.""" if needs_lock: with self._lock: yield else: yield def acquire(self, needs_lock: bool = True) -> T_File: """Acquire a file object from the manager. A new file is only opened if it has expired from the least-recently-used cache. This method uses a lock, which ensures that it is thread-safe. You can safely acquire a file in multiple threads at the same time, as long as the underlying file object is thread-safe. Returns ------- file-like An open file object, as returned by ``opener(*args, **kwargs)``. """ file, _ = self._acquire_with_cache_info(needs_lock) return file @contextmanager def acquire_context(self, needs_lock: bool = True) -> Iterator[T_File]: """Context manager for acquiring a file.""" file, cached = self._acquire_with_cache_info(needs_lock) try: yield file except Exception: if not cached: self.close(needs_lock) raise def _acquire_with_cache_info(self, needs_lock: bool = True) -> tuple[T_File, bool]: """Acquire a file, returning the file and whether it was cached.""" with self._optional_lock(needs_lock): try: file = self._cache[self._key] except KeyError: kwargs = self._kwargs if self._mode is not _OMIT_MODE: kwargs = kwargs.copy() kwargs["mode"] = self._mode file = self._opener(*self._args, **kwargs) if self._mode == "w": # ensure file doesn't get overridden when opened again self._mode = "a" self._cache[self._key] = file return file, False else: return file, True def close(self, needs_lock: bool = True) -> None: """Explicitly close any associated file object (if necessary).""" # TODO: remove needs_lock if/when we have a reentrant lock in # dask.distributed: https://github.com/dask/dask/issues/3832 with self._optional_lock(needs_lock): default = None file = self._cache.pop(self._key, default) if file is not None: file.close() def __del__(self) -> None: # If we're the only CachingFileManger referencing an unclosed file, # remove it from the cache upon garbage collection. # # We keep track of our own reference count because we don't want to # close files if another identical file manager needs it. This can # happen if a CachingFileManager is pickled and unpickled without # closing the original file. ref_count = self._ref_counter.decrement(self._key) if not ref_count and self._key in self._cache: if acquire(self._lock, blocking=False): # Only close files if we can do so immediately. try: self.close(needs_lock=False) finally: self._lock.release() if OPTIONS["warn_for_unclosed_files"]: warnings.warn( f"deallocating {self}, but file is not already closed. " "This may indicate a bug.", RuntimeWarning, stacklevel=2, ) def __getstate__(self): """State for pickling.""" # cache is intentionally omitted: we don't want to try to serialize # these global objects. lock = None if self._use_default_lock else self._lock return ( self._opener, self._args, self._mode, self._kwargs, lock, self._manager_id, ) def __setstate__(self, state) -> None: """Restore from a pickle.""" opener, args, mode, kwargs, lock, manager_id = state self.__init__( # type: ignore[misc] opener, *args, mode=mode, kwargs=kwargs, lock=lock, manager_id=manager_id ) def __repr__(self) -> str: args_string = ", ".join(map(repr, self._args)) if self._mode is not _OMIT_MODE: args_string += f", mode={self._mode!r}" return ( f"{type(self).__name__}({self._opener!r}, {args_string}, " f"kwargs={self._kwargs}, manager_id={self._manager_id!r})" ) class _RefCounter: """Class for keeping track of reference counts.""" def __init__(self, counts): self._counts = counts self._lock = threading.Lock() def increment(self, name): with self._lock: count = self._counts[name] = self._counts.get(name, 0) + 1 return count def decrement(self, name): with self._lock: count = self._counts[name] - 1 if count: self._counts[name] = count else: del self._counts[name] return count class _HashedSequence(list): """Speedup repeated look-ups by caching hash values. Based on what Python uses internally in functools.lru_cache. Python doesn't perform this optimization automatically: https://bugs.python.org/issue1462796 """ def __init__(self, tuple_value): self[:] = tuple_value self.hashvalue = hash(tuple_value) def __hash__(self) -> int: # type: ignore[override] return self.hashvalue def _get_none() -> None: return None class PickleableFileManager(FileManager[T_File]): """File manager that supports pickling by reopening a file object. Use PickleableFileManager for wrapping file-like objects that do not natively support pickling (e.g., netCDF4.Dataset and h5netcdf.File) in cases where a global cache is not desirable (e.g., for netCDF files opened from bytes in memory, or from existing file objects). """ def __init__( self, opener: Callable[..., T_File], *args: Any, mode: Any = _OMIT_MODE, lock: Lock | None | Literal[False] = None, kwargs: Mapping[str, Any] | None = None, ): kwargs = {} if kwargs is None else dict(kwargs) self._opener = opener self._args = args self._mode = "a" if mode == "w" else mode self._kwargs = kwargs self._lock = lock # Note: No need for locking with PickleableFileManager, because all # opening of files happens in the constructor. if mode != _OMIT_MODE: kwargs = kwargs | {"mode": mode} self._file: T_File | None = opener(*args, **kwargs) @property def _closed(self) -> bool: # If opener() raised an error in the constructor, _file may not be set return getattr(self, "_file", None) is None def _get_unclosed_file(self) -> T_File: if self._closed: raise RuntimeError("file is closed") file = self._file assert file is not None return file def acquire(self, needs_lock: bool = True) -> T_File: del needs_lock # unused return self._get_unclosed_file() @contextmanager def acquire_context(self, needs_lock: bool = True) -> Iterator[T_File]: del needs_lock # unused yield self._get_unclosed_file() def close(self, needs_lock: bool = True) -> None: if not self._closed: file = self._get_unclosed_file() if needs_lock and self._lock: with self._lock: file.close() else: file.close() self._file = None # Remove all references to opener arguments, so they can be garbage # collected. self._args = () self._mode = _OMIT_MODE self._kwargs = {} def __del__(self) -> None: if not self._closed: self.close() if OPTIONS["warn_for_unclosed_files"]: warnings.warn( f"deallocating {self}, but file is not already closed. " "This may indicate a bug.", RuntimeWarning, stacklevel=2, ) def __getstate__(self): # file is intentionally omitted: we want to open it again opener = _get_none if self._closed else self._opener return (opener, self._args, self._mode, self._lock, self._kwargs) def __setstate__(self, state) -> None: opener, args, mode, lock, kwargs = state self.__init__(opener, *args, mode=mode, lock=lock, kwargs=kwargs) # type: ignore[misc] def __repr__(self) -> str: if self._closed: return f"" args_string = ", ".join(map(repr, self._args)) if self._mode is not _OMIT_MODE: args_string += f", mode={self._mode!r}" kwargs = ( self._kwargs | {"memory": utils.ReprObject("...")} if "memory" in self._kwargs else self._kwargs ) return f"{type(self).__name__}({self._opener!r}, {args_string}, {kwargs=})" @atexit.register def _remove_del_methods(): # We don't need to close unclosed files at program exit, and may not be able # to, because Python is cleaning up imports / globals. del CachingFileManager.__del__ del PickleableFileManager.__del__ class DummyFileManager(FileManager[T_File]): """FileManager that simply wraps an open file in the FileManager interface.""" def __init__( self, value: T_File, *, close: Callable[[], None] | None = None, lock: Lock | None | Literal[False] = None, ): if close is None: close = value.close self._lock = lock self._value = value self._close = close def acquire(self, needs_lock: bool = True) -> T_File: del needs_lock # unused return self._value @contextmanager def acquire_context(self, needs_lock: bool = True) -> Iterator[T_File]: del needs_lock # unused yield self._value def close(self, needs_lock: bool = True) -> None: if needs_lock and self._lock: with self._lock: self._close() else: self._close() python-xarray-2026.01.0/xarray/backends/lru_cache.py0000664000175000017500000000711515136607163022426 0ustar alastairalastairfrom __future__ import annotations import threading from collections import OrderedDict from collections.abc import Callable, Iterator, MutableMapping from typing import Any, TypeVar K = TypeVar("K") V = TypeVar("V") class LRUCache(MutableMapping[K, V]): """Thread-safe LRUCache based on an OrderedDict. All dict operations (__getitem__, __setitem__, __contains__) update the priority of the relevant key and take O(1) time. The dict is iterated over in order from the oldest to newest key, which means that a complete pass over the dict should not affect the order of any entries. When a new item is set and the maximum size of the cache is exceeded, the oldest item is dropped and called with ``on_evict(key, value)``. The ``maxsize`` property can be used to view or adjust the capacity of the cache, e.g., ``cache.maxsize = new_size``. """ _cache: OrderedDict[K, V] _maxsize: int _lock: threading.RLock _on_evict: Callable[[K, V], Any] | None __slots__ = ("_cache", "_lock", "_maxsize", "_on_evict") def __init__(self, maxsize: int, on_evict: Callable[[K, V], Any] | None = None): """ Parameters ---------- maxsize : int Integer maximum number of items to hold in the cache. on_evict : callable, optional Function to call like ``on_evict(key, value)`` when items are evicted. """ if not isinstance(maxsize, int): raise TypeError("maxsize must be an integer") if maxsize < 0: raise ValueError("maxsize must be non-negative") self._maxsize = maxsize self._cache = OrderedDict() self._lock = threading.RLock() self._on_evict = on_evict def __getitem__(self, key: K) -> V: # record recent use of the key by moving it to the front of the list with self._lock: value = self._cache[key] self._cache.move_to_end(key) return value def _enforce_size_limit(self, capacity: int) -> None: """Shrink the cache if necessary, evicting the oldest items.""" while len(self._cache) > capacity: key, value = self._cache.popitem(last=False) if self._on_evict is not None: self._on_evict(key, value) def __setitem__(self, key: K, value: V) -> None: with self._lock: if key in self._cache: # insert the new value at the end del self._cache[key] self._cache[key] = value elif self._maxsize: # make room if necessary self._enforce_size_limit(self._maxsize - 1) self._cache[key] = value elif self._on_evict is not None: # not saving, immediately evict self._on_evict(key, value) def __delitem__(self, key: K) -> None: del self._cache[key] def __iter__(self) -> Iterator[K]: # create a list, so accessing the cache during iteration cannot change # the iteration order return iter(list(self._cache)) def __len__(self) -> int: return len(self._cache) @property def maxsize(self) -> int: """Maximum number of items can be held in the cache.""" return self._maxsize @maxsize.setter def maxsize(self, size: int) -> None: """Resize the cache, evicting the oldest items if necessary.""" if size < 0: raise ValueError("maxsize must be non-negative") with self._lock: self._enforce_size_limit(size) self._maxsize = size python-xarray-2026.01.0/xarray/backends/scipy_.py0000664000175000017500000003325315136607163021771 0ustar alastairalastairfrom __future__ import annotations import gzip import io import os from collections.abc import Iterable from typing import TYPE_CHECKING, Any import numpy as np from xarray.backends.common import ( BACKEND_ENTRYPOINTS, BackendArray, BackendEntrypoint, BytesIOProxy, T_PathFileOrDataStore, WritableCFDataStore, _normalize_path, ) from xarray.backends.file_manager import CachingFileManager, DummyFileManager from xarray.backends.locks import ensure_lock, get_write_lock from xarray.backends.netcdf3 import ( encode_nc3_attr_value, encode_nc3_variable, is_valid_nc3_name, ) from xarray.backends.store import StoreBackendEntrypoint from xarray.core import indexing from xarray.core.utils import ( Frozen, FrozenDict, close_on_error, module_available, try_read_magic_number_from_file_or_path, ) from xarray.core.variable import Variable try: from scipy.io import netcdf_file as netcdf_file_base except ImportError: netcdf_file_base = object # type: ignore[assignment,misc,unused-ignore] # scipy is optional if TYPE_CHECKING: import scipy.io from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.types import ReadBuffer HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") def _decode_string(s): if isinstance(s, bytes): return s.decode("utf-8", "replace") return s def _decode_attrs(d): # don't decode _FillValue from bytes -> unicode, because we want to ensure # that its type matches the data exactly return {k: v if k == "_FillValue" else _decode_string(v) for (k, v) in d.items()} class ScipyArrayWrapper(BackendArray): def __init__(self, variable_name, datastore): self.datastore = datastore self.variable_name = variable_name array = self.get_variable().data self.shape = array.shape self.dtype = np.dtype(array.dtype.kind + str(array.dtype.itemsize)) def get_variable(self, needs_lock=True): ds = self.datastore._manager.acquire(needs_lock) return ds.variables[self.variable_name] def _getitem(self, key): with self.datastore.lock: data = self.get_variable(needs_lock=False).data return data[key] def __getitem__(self, key): data = indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem ) # Copy data if the source file is mmapped. This makes things consistent # with the netCDF4 library by ensuring we can safely read arrays even # after closing associated files. copy = self.datastore.ds.use_mmap # adapt handling of copy-kwarg to numpy 2.0 # see https://github.com/numpy/numpy/issues/25916 # and https://github.com/numpy/numpy/pull/25922 copy = None if HAS_NUMPY_2_0 and copy is False else copy return np.array(data, dtype=self.dtype, copy=copy) def __setitem__(self, key, value): with self.datastore.lock: data = self.get_variable(needs_lock=False) try: data[key] = value except TypeError: if key is Ellipsis: # workaround for GH: scipy/scipy#6880 data[:] = value else: raise # TODO: Make the scipy import lazy again after upstreaming these fixes. class flush_only_netcdf_file(netcdf_file_base): # scipy.io.netcdf_file.close() incorrectly closes file objects that # were passed in as constructor arguments: # https://github.com/scipy/scipy/issues/13905 # Instead of closing such files, only call flush(), which is # equivalent as long as the netcdf_file object is not mmapped. # This suffices to keep BytesIO objects open long enough to read # their contents from to_netcdf(), but underlying files still get # closed when the netcdf_file is garbage collected (via __del__), # and will need to be fixed upstream in scipy. def close(self): if hasattr(self, "fp") and not self.fp.closed: self.flush() self.fp.seek(0) # allow file to be read again def __del__(self): # Remove the __del__ method, which in scipy is aliased to close(). # These files need to be closed explicitly by xarray. pass def _open_scipy_netcdf(filename, mode, mmap, version, flush_only=False): import scipy.io netcdf_file = flush_only_netcdf_file if flush_only else scipy.io.netcdf_file # if the string ends with .gz, then gunzip and open as netcdf file if isinstance(filename, str) and filename.endswith(".gz"): try: return netcdf_file( gzip.open(filename), mode=mode, mmap=mmap, version=version ) except TypeError as e: # TODO: gzipped loading only works with NetCDF3 files. errmsg = e.args[0] if "is not a valid NetCDF 3 file" in errmsg: raise ValueError( "gzipped file loading only supports NetCDF 3 files." ) from e else: raise try: return netcdf_file(filename, mode=mode, mmap=mmap, version=version) except TypeError as e: # netcdf3 message is obscure in this case errmsg = e.args[0] if "is not a valid NetCDF 3 file" in errmsg: msg = """ If this is a NetCDF4 file, you may need to install the netcdf4 library, e.g., $ pip install netcdf4 """ errmsg += msg raise TypeError(errmsg) from e else: raise class ScipyDataStore(WritableCFDataStore): """Store for reading and writing data via scipy.io.netcdf_file. This store has the advantage of being able to be initialized with a StringIO object, allow for serialization without writing to disk. It only supports the NetCDF3 file-format. """ def __init__( self, filename_or_obj, mode="r", format=None, group=None, mmap=None, lock=None ): if group is not None: raise ValueError("cannot save to a group with the scipy.io.netcdf backend") if format is None or format == "NETCDF3_64BIT": version = 2 elif format == "NETCDF3_CLASSIC": version = 1 else: raise ValueError(f"invalid format for scipy.io.netcdf backend: {format!r}") if lock is None and mode != "r" and isinstance(filename_or_obj, str): lock = get_write_lock(filename_or_obj) self.lock = ensure_lock(lock) if isinstance(filename_or_obj, BytesIOProxy): source = filename_or_obj filename_or_obj = io.BytesIO() source.getvalue = filename_or_obj.getbuffer if isinstance(filename_or_obj, str): # path manager = CachingFileManager( _open_scipy_netcdf, filename_or_obj, mode=mode, lock=lock, kwargs=dict(mmap=mmap, version=version), ) elif hasattr(filename_or_obj, "seek"): # file object # Note: checking for .seek matches the check for file objects # in scipy.io.netcdf_file scipy_dataset = _open_scipy_netcdf( filename_or_obj, mode=mode, mmap=mmap, version=version, flush_only=True, ) assert not scipy_dataset.use_mmap # no mmap for file objects manager = DummyFileManager(scipy_dataset) else: raise ValueError( f"cannot open {filename_or_obj=} with scipy.io.netcdf_file" ) self._manager = manager @property def ds(self) -> scipy.io.netcdf_file: return self._manager.acquire() def open_store_variable(self, name, var): return Variable( var.dimensions, indexing.LazilyIndexedArray(ScipyArrayWrapper(name, self)), _decode_attrs(var._attributes), ) def get_variables(self): return FrozenDict( (k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items() ) def get_attrs(self): return Frozen(_decode_attrs(self.ds._attributes)) def get_dimensions(self): return Frozen(self.ds.dimensions) def get_encoding(self): return { "unlimited_dims": {k for k, v in self.ds.dimensions.items() if v is None} } def set_dimension(self, name, length, is_unlimited=False): if name in self.ds.dimensions: raise ValueError( f"{type(self).__name__} does not support modifying dimensions" ) dim_length = length if not is_unlimited else None self.ds.createDimension(name, dim_length) def _validate_attr_key(self, key): if not is_valid_nc3_name(key): raise ValueError("Not a valid attribute name") def set_attribute(self, key, value): self._validate_attr_key(key) value = encode_nc3_attr_value(value) setattr(self.ds, key, value) def encode_variable(self, variable, name=None): variable = encode_nc3_variable(variable, name=name) return variable def prepare_variable( self, name, variable, check_encoding=False, unlimited_dims=None ): if ( check_encoding and variable.encoding and variable.encoding != {"_FillValue": None} ): raise ValueError( f"unexpected encoding for scipy backend: {list(variable.encoding)}" ) data = variable.data # nb. this still creates a numpy array in all memory, even though we # don't write the data yet; scipy.io.netcdf does not support incremental # writes. if name not in self.ds.variables: self.ds.createVariable(name, data.dtype, variable.dims) scipy_var = self.ds.variables[name] for k, v in variable.attrs.items(): self._validate_attr_key(k) setattr(scipy_var, k, v) target = ScipyArrayWrapper(name, self) return target, data def sync(self): self.ds.sync() def close(self): self._manager.close() def _normalize_filename_or_obj( filename_or_obj: str | os.PathLike[Any] | ReadBuffer | bytes | memoryview | AbstractDataStore, ) -> str | ReadBuffer | AbstractDataStore: if isinstance(filename_or_obj, bytes | memoryview): return io.BytesIO(filename_or_obj) else: return _normalize_path(filename_or_obj) class ScipyBackendEntrypoint(BackendEntrypoint): """ Backend for netCDF files based on the scipy package. It can open ".nc", ".cdf", and "nc..gz" files but will only be selected as the default if the "netcdf4" and "h5netcdf" engines are not available. It has the advantage that is is a lightweight engine that has no system requirements (unlike netcdf4 and h5netcdf). Additionally it can open gzip compressed (".gz") files. For more information about the underlying library, visit: https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.netcdf_file.html See Also -------- backends.ScipyDataStore backends.NetCDF4BackendEntrypoint backends.H5netcdfBackendEntrypoint """ description = "Open netCDF files (.nc, .cdf and .nc.gz) using scipy in Xarray" url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.ScipyBackendEntrypoint.html" def guess_can_open( self, filename_or_obj: T_PathFileOrDataStore, ) -> bool: from xarray.core.utils import is_remote_uri filename_or_obj = _normalize_filename_or_obj(filename_or_obj) # scipy can only handle local files - check this before trying to read magic number if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj): return False magic_number = try_read_magic_number_from_file_or_path(filename_or_obj) if magic_number is not None and magic_number.startswith(b"\x1f\x8b"): with gzip.open(filename_or_obj) as f: # type: ignore[arg-type] magic_number = try_read_magic_number_from_file_or_path(f) if magic_number is not None: return magic_number.startswith(b"CDF") if isinstance(filename_or_obj, str | os.PathLike): from pathlib import Path suffix = "".join(Path(filename_or_obj).suffixes) return suffix in {".nc", ".cdf", ".nc.gz"} return False def open_dataset( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, mode="r", format=None, group=None, mmap=None, lock=None, ) -> Dataset: filename_or_obj = _normalize_filename_or_obj(filename_or_obj) store = ScipyDataStore( filename_or_obj, mode=mode, format=format, group=group, mmap=mmap, lock=lock ) store_entrypoint = StoreBackendEntrypoint() with close_on_error(store): ds = store_entrypoint.open_dataset( store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) return ds BACKEND_ENTRYPOINTS["scipy"] = ("scipy", ScipyBackendEntrypoint) python-xarray-2026.01.0/xarray/backends/writers.py0000664000175000017500000010324215136607163022176 0ustar alastairalastairfrom __future__ import annotations import importlib import io import os from collections.abc import Callable, Hashable, Iterable, Mapping, MutableMapping from io import IOBase from itertools import starmap from numbers import Number from os import PathLike from typing import TYPE_CHECKING, Any, Literal, get_args, overload import numpy as np from xarray import backends, conventions from xarray.backends.api import ( _normalize_path, delayed_close_after_writes, ) from xarray.backends.common import AbstractWritableDataStore, ArrayWriter, BytesIOProxy from xarray.backends.locks import get_dask_scheduler from xarray.backends.store import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.options import OPTIONS from xarray.core.types import NetcdfWriteModes, ZarrWriteModes from xarray.core.utils import emit_user_level_warning if TYPE_CHECKING: from dask.delayed import Delayed from xarray.backends import ZarrStore from xarray.backends.api import T_NetcdfEngine, T_NetcdfTypes from xarray.core.types import ZarrStoreLike T_DataTreeNetcdfEngine = Literal["netcdf4", "h5netcdf", "pydap"] T_DataTreeNetcdfTypes = Literal["NETCDF4"] WRITEABLE_STORES: dict[T_NetcdfEngine, Callable] = { "netcdf4": backends.NetCDF4DataStore.open, "scipy": backends.ScipyDataStore, "h5netcdf": backends.H5NetCDFStore.open, } def get_writable_netcdf_store( target, engine: T_NetcdfEngine, *, format: T_NetcdfTypes | None, mode: NetcdfWriteModes, autoclose: bool, invalid_netcdf: bool, auto_complex: bool | None, ) -> AbstractWritableDataStore: """Create a store for writing to a netCDF file.""" try: store_open = WRITEABLE_STORES[engine] except KeyError as err: raise ValueError(f"unrecognized engine for to_netcdf: {engine!r}") from err if format is not None: format = format.upper() # type: ignore[assignment] kwargs = dict(autoclose=True) if autoclose else {} if invalid_netcdf: if engine == "h5netcdf": kwargs["invalid_netcdf"] = invalid_netcdf else: raise ValueError( f"unrecognized option 'invalid_netcdf' for engine {engine}" ) if auto_complex is not None: kwargs["auto_complex"] = auto_complex return store_open(target, mode=mode, format=format, **kwargs) def _validate_dataset_names(dataset: Dataset) -> None: """DataArray.name and Dataset keys must be a string or None""" def check_name(name: Hashable): if isinstance(name, str): if not name: raise ValueError( f"Invalid name {name!r} for DataArray or Dataset key: " "string must be length 1 or greater for " "serialization to netCDF or zarr files" ) elif name is not None: raise TypeError( f"Invalid name {name!r} for DataArray or Dataset key: " "must be either a string or None for serialization to netCDF " "or zarr files" ) for k in dataset.variables: check_name(k) def _validate_attrs(dataset, engine, invalid_netcdf=False): """`attrs` must have a string key and a value which is either: a number, a string, an ndarray, a list/tuple of numbers/strings, or a numpy.bool_. Notes ----- A numpy.bool_ is only allowed when using the h5netcdf engine with `invalid_netcdf=True`. """ valid_types = (str, Number, np.ndarray, np.number, list, tuple, bytes) if invalid_netcdf and engine == "h5netcdf": valid_types += (np.bool_,) def check_attr(name, value, valid_types): if isinstance(name, str): if not name: raise ValueError( f"Invalid name for attr {name!r}: string must be " "length 1 or greater for serialization to " "netCDF files" ) else: raise TypeError( f"Invalid name for attr: {name!r} must be a string for " "serialization to netCDF files" ) if not isinstance(value, valid_types): raise TypeError( f"Invalid value for attr {name!r}: {value!r}. For serialization to " "netCDF files, its value must be of one of the following types: " f"{', '.join([vtype.__name__ for vtype in valid_types])}" ) if isinstance(value, bytes) and engine == "h5netcdf": try: value.decode("utf-8") except UnicodeDecodeError as e: raise ValueError( f"Invalid value provided for attribute '{name!r}': {value!r}. " "Only binary data derived from UTF-8 encoded strings is allowed " f"for the '{engine}' engine. Consider using the 'netcdf4' engine." ) from e if b"\x00" in value: raise ValueError( f"Invalid value provided for attribute '{name!r}': {value!r}. " f"Null characters are not permitted for the '{engine}' engine. " "Consider using the 'netcdf4' engine." ) # Check attrs on the dataset itself for k, v in dataset.attrs.items(): check_attr(k, v, valid_types) # Check attrs on each variable within the dataset for variable in dataset.variables.values(): for k, v in variable.attrs.items(): check_attr(k, v, valid_types) def get_default_netcdf_write_engine( path_or_file: str | IOBase | None, format: T_NetcdfTypes | None, ) -> Literal["netcdf4", "h5netcdf", "scipy"]: """Return the default netCDF library to use for writing a netCDF file.""" module_names = { "netcdf4": "netCDF4", "scipy": "scipy", "h5netcdf": "h5netcdf", } candidates = list(OPTIONS["netcdf_engine_order"]) if format is not None: format = format.upper() # type: ignore[assignment] if format not in { "NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC", }: raise ValueError(f"unexpected {format=}") # TODO: allow format='NETCDF4_CLASSIC' to default to using h5netcdf, # when the oldest supported version of h5netcdf supports it: # https://github.com/h5netcdf/h5netcdf/pull/283 if format != "NETCDF4": candidates.remove("h5netcdf") if format not in {"NETCDF3_64BIT", "NETCDF3_CLASSIC"}: candidates.remove("scipy") nczarr_mode = isinstance(path_or_file, str) and path_or_file.endswith( "#mode=nczarr" ) if nczarr_mode: candidates[:] = ["netcdf4"] if isinstance(path_or_file, IOBase): candidates.remove("netcdf4") for engine in candidates: module_name = module_names[engine] if importlib.util.find_spec(module_name) is not None: return engine if nczarr_mode: format_str = " in NCZarr format" else: format_str = f" with {format=}" if format is not None else "" libraries = ", ".join(module_names[c] for c in candidates) raise ValueError( f"cannot write NetCDF files{format_str} because none of the suitable " f"backend libraries ({libraries}) are installed" ) def _sanitize_unlimited_dims(dataset, unlimited_dims): msg_origin = "unlimited_dims-kwarg" if unlimited_dims is None: unlimited_dims = dataset.encoding.get("unlimited_dims", None) msg_origin = "dataset.encoding" if unlimited_dims is not None: if isinstance(unlimited_dims, str) or not isinstance(unlimited_dims, Iterable): unlimited_dims = [unlimited_dims] else: unlimited_dims = list(unlimited_dims) dataset_dims = set(dataset.dims) unlimited_dims = set(unlimited_dims) if undeclared_dims := (unlimited_dims - dataset_dims): msg = ( f"Unlimited dimension(s) {undeclared_dims!r} declared in {msg_origin!r}, " f"but not part of current dataset dimensions. " f"Consider removing {undeclared_dims!r} from {msg_origin!r}." ) if msg_origin == "unlimited_dims-kwarg": raise ValueError(msg) else: emit_user_level_warning(msg) return unlimited_dims # multifile=True returns writer and datastore @overload def to_netcdf( dataset: Dataset, path_or_file: str | os.PathLike | None = None, mode: NetcdfWriteModes = "w", format: T_NetcdfTypes | None = None, group: str | None = None, engine: T_NetcdfEngine | None = None, encoding: Mapping[Hashable, Mapping[str, Any]] | None = None, unlimited_dims: Iterable[Hashable] | None = None, compute: bool = True, *, multifile: Literal[True], invalid_netcdf: bool = False, auto_complex: bool | None = None, ) -> tuple[ArrayWriter, AbstractDataStore]: ... # path=None writes to bytes or memoryview, depending on store @overload def to_netcdf( dataset: Dataset, path_or_file: None = None, mode: NetcdfWriteModes = "w", format: T_NetcdfTypes | None = None, group: str | None = None, engine: T_NetcdfEngine | None = None, encoding: Mapping[Hashable, Mapping[str, Any]] | None = None, unlimited_dims: Iterable[Hashable] | None = None, compute: bool = True, multifile: Literal[False] = False, invalid_netcdf: bool = False, auto_complex: bool | None = None, ) -> memoryview: ... # compute=False returns dask.Delayed @overload def to_netcdf( dataset: Dataset, path_or_file: str | os.PathLike, mode: NetcdfWriteModes = "w", format: T_NetcdfTypes | None = None, group: str | None = None, engine: T_NetcdfEngine | None = None, encoding: Mapping[Hashable, Mapping[str, Any]] | None = None, unlimited_dims: Iterable[Hashable] | None = None, *, compute: Literal[False], multifile: Literal[False] = False, invalid_netcdf: bool = False, auto_complex: bool | None = None, ) -> Delayed: ... # default return None @overload def to_netcdf( dataset: Dataset, path_or_file: str | os.PathLike | IOBase, mode: NetcdfWriteModes = "w", format: T_NetcdfTypes | None = None, group: str | None = None, engine: T_NetcdfEngine | None = None, encoding: Mapping[Hashable, Mapping[str, Any]] | None = None, unlimited_dims: Iterable[Hashable] | None = None, compute: Literal[True] = True, multifile: Literal[False] = False, invalid_netcdf: bool = False, auto_complex: bool | None = None, ) -> None: ... # if compute cannot be evaluated at type check time # we may get back either Delayed or None @overload def to_netcdf( dataset: Dataset, path_or_file: str | os.PathLike, mode: NetcdfWriteModes = "w", format: T_NetcdfTypes | None = None, group: str | None = None, engine: T_NetcdfEngine | None = None, encoding: Mapping[Hashable, Mapping[str, Any]] | None = None, unlimited_dims: Iterable[Hashable] | None = None, compute: bool = False, multifile: Literal[False] = False, invalid_netcdf: bool = False, auto_complex: bool | None = None, ) -> Delayed | None: ... # if multifile cannot be evaluated at type check time # we may get back either writer and datastore or Delayed or None @overload def to_netcdf( dataset: Dataset, path_or_file: str | os.PathLike, mode: NetcdfWriteModes = "w", format: T_NetcdfTypes | None = None, group: str | None = None, engine: T_NetcdfEngine | None = None, encoding: Mapping[Hashable, Mapping[str, Any]] | None = None, unlimited_dims: Iterable[Hashable] | None = None, compute: bool = False, multifile: bool = False, invalid_netcdf: bool = False, auto_complex: bool | None = None, ) -> tuple[ArrayWriter, AbstractDataStore] | Delayed | None: ... # Any @overload def to_netcdf( dataset: Dataset, path_or_file: str | os.PathLike | IOBase | None, mode: NetcdfWriteModes = "w", format: T_NetcdfTypes | None = None, group: str | None = None, engine: T_NetcdfEngine | None = None, encoding: Mapping[Hashable, Mapping[str, Any]] | None = None, unlimited_dims: Iterable[Hashable] | None = None, compute: bool = False, multifile: bool = False, invalid_netcdf: bool = False, auto_complex: bool | None = None, ) -> tuple[ArrayWriter, AbstractDataStore] | memoryview | Delayed | None: ... def to_netcdf( dataset: Dataset, path_or_file: str | os.PathLike | IOBase | None = None, mode: NetcdfWriteModes = "w", format: T_NetcdfTypes | None = None, group: str | None = None, engine: T_NetcdfEngine | None = None, encoding: Mapping[Hashable, Mapping[str, Any]] | None = None, unlimited_dims: Iterable[Hashable] | None = None, compute: bool = True, multifile: bool = False, invalid_netcdf: bool = False, auto_complex: bool | None = None, ) -> tuple[ArrayWriter, AbstractDataStore] | memoryview | Delayed | None: """This function creates an appropriate datastore for writing a dataset to disk as a netCDF file See `Dataset.to_netcdf` for full API docs. The ``multifile`` argument is only for the private use of save_mfdataset. """ if encoding is None: encoding = {} normalized_path = _normalize_path(path_or_file) if engine is None: engine = get_default_netcdf_write_engine(normalized_path, format) # validate Dataset keys, DataArray names, and attr keys/values _validate_dataset_names(dataset) _validate_attrs(dataset, engine, invalid_netcdf) # sanitize unlimited_dims unlimited_dims = _sanitize_unlimited_dims(dataset, unlimited_dims) autoclose = _get_netcdf_autoclose(dataset, engine) if normalized_path is None: if not compute: raise NotImplementedError( "to_netcdf() with compute=False is not yet implemented when " "returning a memoryview" ) target = BytesIOProxy() else: target = normalized_path # type: ignore[assignment] store = get_writable_netcdf_store( target, engine, mode=mode, format=format, autoclose=autoclose, invalid_netcdf=invalid_netcdf, auto_complex=auto_complex, ) if group is not None: store = store.get_child_store(group) writer = ArrayWriter() # TODO: figure out how to refactor this logic (here and in save_mfdataset) # to avoid this mess of conditionals try: # TODO: allow this work (setting up the file for writing array data) # to be parallelized with dask dump_to_store( dataset, store, writer, encoding=encoding, unlimited_dims=unlimited_dims ) if autoclose: store.close() if multifile: return writer, store writes = writer.sync(compute=compute) finally: if not multifile and not autoclose: # type: ignore[redundant-expr,unused-ignore] if compute: store.close() else: store.sync() if path_or_file is None: assert isinstance(target, BytesIOProxy) # created in this function return target.getbuffer() if not compute: return delayed_close_after_writes(writes, store) return None def dump_to_store( dataset, store, writer=None, encoder=None, encoding=None, unlimited_dims=None ): """Store dataset contents to a backends.*DataStore object.""" if writer is None: writer = ArrayWriter() if encoding is None: encoding = {} variables, attrs = conventions.encode_dataset_coordinates(dataset) check_encoding = set() for k, enc in encoding.items(): # no need to shallow copy the variable again; that already happened # in encode_dataset_coordinates variables[k].encoding = enc check_encoding.add(k) if encoder: variables, attrs = encoder(variables, attrs) store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims) def save_mfdataset( datasets, paths, mode="w", format=None, groups=None, engine=None, compute=True, **kwargs, ): """Write multiple datasets to disk as netCDF files simultaneously. This function is intended for use with datasets consisting of dask.array objects, in which case it can write the multiple datasets to disk simultaneously using a shared thread pool. When not using dask, it is no different than calling ``to_netcdf`` repeatedly. Parameters ---------- datasets : list of Dataset List of datasets to save. paths : list of str or list of path-like objects List of paths to which to save each corresponding dataset. mode : {"w", "a"}, optional Write ("w") or append ("a") mode. If mode="w", any existing file at these locations will be overwritten. format : {"NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", \ "NETCDF3_CLASSIC"}, optional File format for the resulting netCDF file: * NETCDF4: Data is stored in an HDF5 file, using netCDF4 API features. * NETCDF4_CLASSIC: Data is stored in an HDF5 file, using only netCDF 3 compatible API features. * NETCDF3_64BIT: 64-bit offset version of the netCDF 3 file format, which fully supports 2+ GB files, but is only compatible with clients linked against netCDF version 3.6.0 or later. * NETCDF3_CLASSIC: The classic netCDF 3 file format. It does not handle 2+ GB files very well. All formats are supported by the netCDF4-python library. scipy.io.netcdf only supports the last two formats. The default format is NETCDF4 if you are saving a file to disk and have the netCDF4-python library available. Otherwise, xarray falls back to using scipy to write netCDF files and defaults to the NETCDF3_64BIT format (scipy does not support netCDF4). groups : list of str, optional Paths to the netCDF4 group in each corresponding file to which to save datasets (only works for format="NETCDF4"). The groups will be created if necessary. engine : {"netcdf4", "h5netcdf", "scipy"}, optional Engine to use when writing netCDF files. If not provided, the default engine is chosen based on available dependencies, by default preferring "netcdf4" over "h5netcdf" over "scipy" (customizable via ``netcdf_engine_order`` in ``xarray.set_options()``). compute : bool If true compute immediately, otherwise return a ``dask.delayed.Delayed`` object that can be computed later. **kwargs : dict, optional Additional arguments are passed along to ``to_netcdf``. Examples -------- Save a dataset into one netCDF per year of data: >>> ds = xr.Dataset( ... {"a": ("time", np.linspace(0, 1, 48))}, ... coords={"time": pd.date_range("2010-01-01", freq="ME", periods=48)}, ... ) >>> ds Size: 768B Dimensions: (time: 48) Coordinates: * time (time) datetime64[ns] 384B 2010-01-31 2010-02-28 ... 2013-12-31 Data variables: a (time) float64 384B 0.0 0.02128 0.04255 ... 0.9574 0.9787 1.0 >>> years, datasets = zip(*ds.groupby("time.year")) >>> paths = [f"{y}.nc" for y in years] >>> xr.save_mfdataset(datasets, paths) """ if mode == "w" and len(set(paths)) < len(paths): raise ValueError( "cannot use mode='w' when writing multiple datasets to the same path" ) for obj in datasets: if not isinstance(obj, Dataset): raise TypeError( "save_mfdataset only supports writing Dataset " f"objects, received type {type(obj)}" ) if groups is None: groups = [None] * len(datasets) if len({len(datasets), len(paths), len(groups)}) > 1: raise ValueError( "must supply lists of the same length for the " "datasets, paths and groups arguments to " "save_mfdataset" ) writers, stores = zip( *[ to_netcdf( ds, path, mode, format, group, engine, compute=compute, multifile=True, **kwargs, ) for ds, path, group in zip(datasets, paths, groups, strict=True) ], strict=True, ) try: writes = [w.sync(compute=compute) for w in writers] finally: for store in stores: if compute: store.close() else: store.sync() if not compute: import dask return dask.delayed( list(starmap(delayed_close_after_writes, zip(writes, stores, strict=True))) ) def get_writable_zarr_store( store: ZarrStoreLike | None = None, *, chunk_store: MutableMapping | str | os.PathLike | None = None, mode: ZarrWriteModes | None = None, synchronizer=None, group: str | None = None, consolidated: bool | None = None, append_dim: Hashable | None = None, region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None, safe_chunks: bool = True, align_chunks: bool = False, storage_options: dict[str, str] | None = None, zarr_version: int | None = None, zarr_format: int | None = None, write_empty_chunks: bool | None = None, ) -> backends.ZarrStore: """Create a store for writing to Zarr.""" from xarray.backends.zarr import _choose_default_mode, _get_mappers kwargs, mapper, chunk_mapper = _get_mappers( storage_options=storage_options, store=store, chunk_store=chunk_store ) mode = _choose_default_mode(mode=mode, append_dim=append_dim, region=region) if mode == "r+": already_consolidated = consolidated consolidate_on_close = False else: already_consolidated = False consolidate_on_close = consolidated or consolidated is None return backends.ZarrStore.open_group( store=mapper, mode=mode, synchronizer=synchronizer, group=group, consolidated=already_consolidated, consolidate_on_close=consolidate_on_close, chunk_store=chunk_mapper, append_dim=append_dim, write_region=region, safe_chunks=safe_chunks, align_chunks=align_chunks, zarr_version=zarr_version, zarr_format=zarr_format, write_empty=write_empty_chunks, **kwargs, ) # compute=True returns ZarrStore @overload def to_zarr( dataset: Dataset, store: ZarrStoreLike | None = None, chunk_store: MutableMapping | str | os.PathLike | None = None, mode: ZarrWriteModes | None = None, synchronizer=None, group: str | None = None, encoding: Mapping | None = None, *, compute: Literal[True] = True, consolidated: bool | None = None, append_dim: Hashable | None = None, region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None, safe_chunks: bool = True, align_chunks: bool = False, storage_options: dict[str, str] | None = None, zarr_version: int | None = None, write_empty_chunks: bool | None = None, chunkmanager_store_kwargs: dict[str, Any] | None = None, ) -> backends.ZarrStore: ... # compute=False returns dask.Delayed @overload def to_zarr( dataset: Dataset, store: ZarrStoreLike | None = None, chunk_store: MutableMapping | str | os.PathLike | None = None, mode: ZarrWriteModes | None = None, synchronizer=None, group: str | None = None, encoding: Mapping | None = None, *, compute: Literal[False], consolidated: bool | None = None, append_dim: Hashable | None = None, region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None, safe_chunks: bool = True, align_chunks: bool = False, storage_options: dict[str, str] | None = None, zarr_version: int | None = None, write_empty_chunks: bool | None = None, chunkmanager_store_kwargs: dict[str, Any] | None = None, ) -> Delayed: ... def to_zarr( dataset: Dataset, store: ZarrStoreLike | None = None, chunk_store: MutableMapping | str | os.PathLike | None = None, mode: ZarrWriteModes | None = None, synchronizer=None, group: str | None = None, encoding: Mapping | None = None, *, compute: bool = True, consolidated: bool | None = None, append_dim: Hashable | None = None, region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None, safe_chunks: bool = True, align_chunks: bool = False, storage_options: dict[str, str] | None = None, zarr_version: int | None = None, zarr_format: int | None = None, write_empty_chunks: bool | None = None, chunkmanager_store_kwargs: dict[str, Any] | None = None, ) -> backends.ZarrStore | Delayed: """This function creates an appropriate datastore for writing a dataset to a zarr ztore See `Dataset.to_zarr` for full API docs. """ # validate Dataset keys, DataArray names _validate_dataset_names(dataset) # Load empty arrays to avoid bug saving zero length dimensions (Issue #5741) # TODO: delete when min dask>=2023.12.1 # https://github.com/dask/dask/pull/10506 for v in dataset.variables.values(): if v.size == 0: v.load() if encoding is None: encoding = {} zstore = get_writable_zarr_store( store, chunk_store=chunk_store, mode=mode, synchronizer=synchronizer, group=group, consolidated=consolidated, append_dim=append_dim, region=region, safe_chunks=safe_chunks, align_chunks=align_chunks, storage_options=storage_options, zarr_version=zarr_version, zarr_format=zarr_format, write_empty_chunks=write_empty_chunks, ) dataset = zstore._validate_and_autodetect_region(dataset) zstore._validate_encoding(encoding) writer = ArrayWriter() # TODO: figure out how to properly handle unlimited_dims try: dump_to_store(dataset, zstore, writer, encoding=encoding) writes = writer.sync( compute=compute, chunkmanager_store_kwargs=chunkmanager_store_kwargs ) finally: if compute: zstore.close() if not compute: return delayed_close_after_writes(writes, zstore) return zstore def _datatree_to_netcdf( dt: DataTree, filepath: str | PathLike | io.IOBase | None = None, mode: NetcdfWriteModes = "w", encoding: Mapping[str, Any] | None = None, unlimited_dims: Mapping | None = None, format: T_DataTreeNetcdfTypes | None = None, engine: T_DataTreeNetcdfEngine | None = None, group: str | None = None, write_inherited_coords: bool = False, compute: bool = True, invalid_netcdf: bool = False, auto_complex: bool | None = None, ) -> None | memoryview | Delayed: """Implementation of `DataTree.to_netcdf`.""" if format not in [None, *get_args(T_DataTreeNetcdfTypes)]: raise ValueError("DataTree.to_netcdf only supports the NETCDF4 format") if engine not in [None, *get_args(T_DataTreeNetcdfEngine)]: raise ValueError( "DataTree.to_netcdf only supports the netcdf4 and h5netcdf engines" ) normalized_path = _normalize_path(filepath) if engine is None: engine = get_default_netcdf_write_engine( path_or_file=normalized_path, format="NETCDF4", # required for supporting groups ) # type: ignore[assignment] if group is not None: raise NotImplementedError( "specifying a root group for the tree has not been implemented" ) if encoding is None: encoding = {} # In the future, we may want to expand this check to insure all the provided encoding # options are valid. For now, this simply checks that all provided encoding keys are # groups in the datatree. if set(encoding) - set(dt.groups): raise ValueError( f"unexpected encoding group name(s) provided: {set(encoding) - set(dt.groups)}" ) if normalized_path is None: if not compute: raise NotImplementedError( "to_netcdf() with compute=False is not yet implemented when " "returning a memoryview" ) target = BytesIOProxy() else: target = normalized_path # type: ignore[assignment] if unlimited_dims is None: unlimited_dims = {} scheduler = get_dask_scheduler() have_chunks = any( v.chunks is not None for node in dt.subtree for v in node.variables.values() ) autoclose = have_chunks and scheduler in ["distributed", "multiprocessing"] root_store = get_writable_netcdf_store( target, engine, # type: ignore[arg-type] mode=mode, format=format, autoclose=autoclose, invalid_netcdf=invalid_netcdf, auto_complex=auto_complex, ) writer = ArrayWriter() # TODO: allow this work (setting up the file for writing array data) # to be parallelized with dask try: for node in dt.subtree: at_root = node is dt dataset = node.to_dataset(inherit=write_inherited_coords or at_root) node_store = ( root_store if at_root else root_store.get_child_store(node.path) ) dump_to_store( dataset, node_store, writer, encoding=encoding.get(node.path), unlimited_dims=unlimited_dims.get(node.path), ) if autoclose: root_store.close() writes = writer.sync(compute=compute) finally: if compute: root_store.close() else: root_store.sync() if filepath is None: assert isinstance(target, BytesIOProxy) # created in this function return target.getbuffer() if not compute: return delayed_close_after_writes(writes, root_store) return None def _datatree_to_zarr( dt: DataTree, store: ZarrStoreLike, mode: ZarrWriteModes = "w-", encoding: Mapping[str, Any] | None = None, synchronizer=None, group: str | None = None, write_inherited_coords: bool = False, *, chunk_store: MutableMapping | str | PathLike | None = None, compute: bool = True, consolidated: bool | None = None, append_dim: Hashable | None = None, region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None, safe_chunks: bool = True, align_chunks: bool = False, storage_options: dict[str, str] | None = None, zarr_version: int | None = None, zarr_format: int | None = None, write_empty_chunks: bool | None = None, chunkmanager_store_kwargs: dict[str, Any] | None = None, ) -> ZarrStore | Delayed: """Implementation of `DataTree.to_zarr`.""" if group is not None: raise NotImplementedError( "specifying a root group for the tree has not been implemented" ) if append_dim is not None: raise NotImplementedError( "specifying ``append_dim`` with ``DataTree.to_zarr`` has not been implemented" ) if encoding is None: encoding = {} # In the future, we may want to expand this check to insure all the provided encoding # options are valid. For now, this simply checks that all provided encoding keys are # groups in the datatree. if set(encoding) - set(dt.groups): raise ValueError( f"unexpected encoding group name(s) provided: {set(encoding) - set(dt.groups)}" ) root_store = get_writable_zarr_store( store, chunk_store=chunk_store, mode=mode, synchronizer=synchronizer, group=group, consolidated=consolidated, append_dim=append_dim, region=region, safe_chunks=safe_chunks, align_chunks=align_chunks, storage_options=storage_options, zarr_version=zarr_version, zarr_format=zarr_format, write_empty_chunks=write_empty_chunks, ) writer = ArrayWriter() try: for rel_path, node in dt.subtree_with_keys: at_root = node is dt dataset = node.to_dataset(inherit=write_inherited_coords or at_root) # Use a relative path for group, because absolute paths are broken # with consolidated metadata in zarr 3.1.2 and earlier: # https://github.com/zarr-developers/zarr-python/pull/3428 node_store = root_store if at_root else root_store.get_child_store(rel_path) dataset = node_store._validate_and_autodetect_region(dataset) node_store._validate_encoding(encoding) dump_to_store( dataset, node_store, writer, encoding=encoding.get(node.path), ) writes = writer.sync( compute=compute, chunkmanager_store_kwargs=chunkmanager_store_kwargs ) finally: if compute: root_store.close() if not compute: return delayed_close_after_writes(writes, root_store) return root_store def _get_netcdf_autoclose(dataset: Dataset, engine: T_NetcdfEngine) -> bool: """Should we close files after each write operations?""" scheduler = get_dask_scheduler() have_chunks = any(v.chunks is not None for v in dataset.variables.values()) autoclose = have_chunks and scheduler in ["distributed", "multiprocessing"] if autoclose and engine == "scipy": raise NotImplementedError( f"Writing netCDF files with the {engine} backend " f"is not currently supported with dask's {scheduler} scheduler" ) return autoclose python-xarray-2026.01.0/xarray/backends/locks.py0000664000175000017500000001725615136607163021623 0ustar alastairalastairfrom __future__ import annotations import multiprocessing import threading import uuid import weakref from collections.abc import Callable, Hashable, MutableMapping, Sequence from typing import Any, ClassVar, Literal from weakref import WeakValueDictionary from xarray.core.types import Lock # SerializableLock is adapted from Dask: # https://github.com/dask/dask/blob/74e898f0ec712e8317ba86cc3b9d18b6b9922be0/dask/utils.py#L1160-L1224 # Used under the terms of Dask's license, see licenses/DASK_LICENSE. class SerializableLock(Lock): """A Serializable per-process Lock This wraps a normal ``threading.Lock`` object and satisfies the same interface. However, this lock can also be serialized and sent to different processes. It will not block concurrent operations between processes (for this you should look at ``dask.multiprocessing.Lock`` or ``locket.lock_file`` but will consistently deserialize into the same lock. So if we make a lock in one process:: lock = SerializableLock() And then send it over to another process multiple times:: bytes = pickle.dumps(lock) a = pickle.loads(bytes) b = pickle.loads(bytes) Then the deserialized objects will operate as though they were the same lock, and collide as appropriate. This is useful for consistently protecting resources on a per-process level. The creation of locks is itself not threadsafe. """ _locks: ClassVar[WeakValueDictionary[Hashable, threading.Lock]] = ( WeakValueDictionary() ) token: Hashable lock: threading.Lock def __init__(self, token: Hashable | None = None): self.token = token or str(uuid.uuid4()) if self.token in SerializableLock._locks: self.lock = SerializableLock._locks[self.token] else: self.lock = threading.Lock() SerializableLock._locks[self.token] = self.lock def acquire(self, *args, **kwargs): return self.lock.acquire(*args, **kwargs) def release(self, *args, **kwargs): return self.lock.release(*args, **kwargs) def __enter__(self): self.lock.__enter__() def __exit__(self, *args): self.lock.__exit__(*args) def locked(self): return self.lock.locked() def __getstate__(self): return self.token def __setstate__(self, token): self.__init__(token) def __str__(self): return f"<{self.__class__.__name__}: {self.token}>" __repr__ = __str__ # Locks used by multiple backends. # Neither HDF5 nor the netCDF-C library are thread-safe. HDF5_LOCK = SerializableLock() NETCDFC_LOCK = SerializableLock() _FILE_LOCKS: MutableMapping[Any, threading.Lock] = weakref.WeakValueDictionary() def _get_threaded_lock(key: str) -> threading.Lock: try: lock = _FILE_LOCKS[key] except KeyError: lock = _FILE_LOCKS[key] = threading.Lock() return lock def _get_multiprocessing_lock(key: str) -> Lock: # TODO: make use of the key -- maybe use locket.py? # https://github.com/mwilliamson/locket.py del key # unused return multiprocessing.Lock() def _get_lock_maker(scheduler: str | None = None) -> Callable[..., Lock]: """Returns an appropriate function for creating resource locks. Parameters ---------- scheduler : str or None Dask scheduler being used. See Also -------- dask.utils.get_scheduler_lock """ if scheduler is None or scheduler == "threaded": return _get_threaded_lock elif scheduler == "multiprocessing": return _get_multiprocessing_lock elif scheduler == "distributed": # Lazy import distributed since it is can add a significant # amount of time to import from dask.distributed import Lock as DistributedLock return DistributedLock else: raise KeyError(scheduler) def get_dask_scheduler(get=None, collection=None) -> str | None: """Determine the dask scheduler that is being used. None is returned if no dask scheduler is active. See Also -------- dask.base.get_scheduler """ try: # Fix for bug caused by dask installation that doesn't involve the toolz library # Issue: 4164 import dask from dask.base import get_scheduler actual_get = get_scheduler(get, collection) except ImportError: return None try: from dask.distributed import Client if isinstance(actual_get.__self__, Client): return "distributed" except (ImportError, AttributeError): pass try: # As of dask=2.6, dask.multiprocessing requires cloudpickle to be installed # Dependency removed in https://github.com/dask/dask/pull/5511 if actual_get is dask.multiprocessing.get: return "multiprocessing" except AttributeError: pass return "threaded" def get_write_lock(key: str) -> Lock: """Get a scheduler appropriate lock for writing to the given resource. Parameters ---------- key : str Name of the resource for which to acquire a lock. Typically a filename. Returns ------- Lock object that can be used like a threading.Lock object. """ scheduler = get_dask_scheduler() lock_maker = _get_lock_maker(scheduler) return lock_maker(key) def acquire(lock, blocking=True): """Acquire a lock, possibly in a non-blocking fashion. Includes backwards compatibility hacks for old versions of Python, dask and dask-distributed. """ if blocking: # no arguments needed return lock.acquire() else: # "blocking" keyword argument not supported for: # - threading.Lock on Python 2. # - dask.SerializableLock with dask v1.0.0 or earlier. # - multiprocessing.Lock calls the argument "block" instead. # - dask.distributed.Lock uses the blocking argument as the first one return lock.acquire(blocking) class CombinedLock(Lock): """A combination of multiple locks. Like a locked door, a CombinedLock is locked if any of its constituent locks are locked. """ def __init__(self, locks: Sequence[Lock]): self.locks = tuple(set(locks)) # remove duplicates def acquire(self, blocking=True): return all(acquire(lock, blocking=blocking) for lock in self.locks) def release(self): for lock in self.locks: lock.release() def __enter__(self): for lock in self.locks: lock.__enter__() def __exit__(self, *args): for lock in self.locks: lock.__exit__(*args) def locked(self): return any(lock.locked() for lock in self.locks) def __repr__(self): return f"CombinedLock({list(self.locks)!r})" class DummyLock(Lock): """DummyLock provides the lock API without any actual locking.""" def acquire(self, blocking=True): pass def release(self): pass def __enter__(self): pass def __exit__(self, *args): pass def locked(self): return False def combine_locks(locks: Sequence[Lock]) -> Lock: """Combine a sequence of locks into a single lock.""" all_locks: list[Lock] = [] for lock in locks: if isinstance(lock, CombinedLock): all_locks.extend(lock.locks) elif lock is not None: all_locks.append(lock) num_locks = len(all_locks) if num_locks > 1: return CombinedLock(all_locks) elif num_locks == 1: return all_locks[0] else: return DummyLock() def ensure_lock(lock: Lock | None | Literal[False]) -> Lock: """Ensure that the given object is a lock.""" if lock is None or lock is False: return DummyLock() return lock python-xarray-2026.01.0/xarray/backends/chunks.py0000664000175000017500000002622315136607163021775 0ustar alastairalastairimport numpy as np from xarray.core.datatree import Variable def align_nd_chunks( nd_v_chunks: tuple[tuple[int, ...], ...], nd_backend_chunks: tuple[tuple[int, ...], ...], ) -> tuple[tuple[int, ...], ...]: if len(nd_backend_chunks) != len(nd_v_chunks): raise ValueError( "The number of dimensions on the backend and the variable must be the same." ) nd_aligned_chunks: list[tuple[int, ...]] = [] for backend_chunks, v_chunks in zip(nd_backend_chunks, nd_v_chunks, strict=True): # Validate that they have the same number of elements if sum(backend_chunks) != sum(v_chunks): raise ValueError( "The number of elements in the backend does not " "match the number of elements in the variable. " "This inconsistency should never occur at this stage." ) # Validate if the backend_chunks satisfy the condition that all the values # excluding the borders are equal if len(set(backend_chunks[1:-1])) > 1: raise ValueError( f"This function currently supports aligning chunks " f"only when backend chunks are of uniform size, excluding borders. " f"If you encounter this error, please report it—this scenario should never occur " f"unless there is an internal misuse. " f"Backend chunks: {backend_chunks}" ) # The algorithm assumes that there are always two borders on the # Backend and the Array if not, the result is going to be the same # as the input, and there is nothing to optimize if len(backend_chunks) == 1: nd_aligned_chunks.append(backend_chunks) continue if len(v_chunks) == 1: nd_aligned_chunks.append(v_chunks) continue # Size of the chunk on the backend fixed_chunk = max(backend_chunks) # The ideal size of the chunks is the maximum of the two; this would avoid # that we use more memory than expected max_chunk = max(fixed_chunk, *v_chunks) # The algorithm assumes that the chunks on this array are aligned except the last one # because it can be considered a partial one aligned_chunks: list[int] = [] # For simplicity of the algorithm, let's transform the Array chunks in such a way that # we remove the partial chunks. To achieve this, we add artificial data to the borders t_v_chunks = list(v_chunks) t_v_chunks[0] += fixed_chunk - backend_chunks[0] t_v_chunks[-1] += fixed_chunk - backend_chunks[-1] # The unfilled_size is the amount of space that has not been filled on the last # processed chunk; this is equivalent to the amount of data that would need to be # added to a partial Zarr chunk to fill it up to the fixed_chunk size unfilled_size = 0 for v_chunk in t_v_chunks: # Ideally, we should try to preserve the original Dask chunks, but this is only # possible if the last processed chunk was aligned (unfilled_size == 0) ideal_chunk = v_chunk if unfilled_size: # If that scenario is not possible, the best option is to merge the chunks ideal_chunk = v_chunk + aligned_chunks[-1] while ideal_chunk: if not unfilled_size: # If the previous chunk is filled, let's add a new chunk # of size 0 that will be used on the merging step to simplify the algorithm aligned_chunks.append(0) if ideal_chunk > max_chunk: # If the ideal_chunk is bigger than the max_chunk, # we need to increase the last chunk as much as possible # but keeping it aligned, and then add a new chunk max_increase = max_chunk - aligned_chunks[-1] max_increase = ( max_increase - (max_increase - unfilled_size) % fixed_chunk ) aligned_chunks[-1] += max_increase else: # Perfect scenario where the chunks can be merged without any split. aligned_chunks[-1] = ideal_chunk ideal_chunk -= aligned_chunks[-1] unfilled_size = ( fixed_chunk - aligned_chunks[-1] % fixed_chunk ) % fixed_chunk # Now we have to remove the artificial data added to the borders for order in [-1, 1]: border_size = fixed_chunk - backend_chunks[::order][0] aligned_chunks = aligned_chunks[::order] aligned_chunks[0] -= border_size t_v_chunks = t_v_chunks[::order] t_v_chunks[0] -= border_size if ( len(aligned_chunks) >= 2 and aligned_chunks[0] + aligned_chunks[1] <= max_chunk and aligned_chunks[0] != t_v_chunks[0] ): # The artificial data added to the border can introduce inefficient chunks # on the borders, for that reason, we will check if we can merge them or not # Example: # backend_chunks = [6, 6, 1] # v_chunks = [6, 7] # t_v_chunks = [6, 12] # The ideal output should preserve the same v_chunks, but the previous loop # is going to produce aligned_chunks = [6, 6, 6] # And after removing the artificial data, we will end up with aligned_chunks = [6, 6, 1] # which is not ideal and can be merged into a single chunk aligned_chunks[1] += aligned_chunks[0] aligned_chunks = aligned_chunks[1:] t_v_chunks = t_v_chunks[::order] aligned_chunks = aligned_chunks[::order] nd_aligned_chunks.append(tuple(aligned_chunks)) return tuple(nd_aligned_chunks) def build_grid_chunks( size: int, chunk_size: int, region: slice | None = None, ) -> tuple[int, ...]: if region is None: region = slice(0, size) region_start = region.start or 0 # Generate the zarr chunks inside the region of this dim chunks_on_region = [chunk_size - (region_start % chunk_size)] if chunks_on_region[0] >= size: # This is useful for the scenarios where the chunk_size are bigger # than the variable chunks, which can happens when the user specifies # the enc_chunks manually. return (size,) chunks_on_region.extend([chunk_size] * ((size - chunks_on_region[0]) // chunk_size)) if (size - chunks_on_region[0]) % chunk_size != 0: chunks_on_region.append((size - chunks_on_region[0]) % chunk_size) return tuple(chunks_on_region) def grid_rechunk( v: Variable, enc_chunks: tuple[int, ...], region: tuple[slice, ...], ) -> Variable: nd_v_chunks = v.chunks if not nd_v_chunks: return v nd_grid_chunks = tuple( build_grid_chunks( v_size, region=interval, chunk_size=chunk_size, ) for v_size, chunk_size, interval in zip( v.shape, enc_chunks, region, strict=True ) ) nd_aligned_chunks = align_nd_chunks( nd_v_chunks=nd_v_chunks, nd_backend_chunks=nd_grid_chunks, ) v = v.chunk(dict(zip(v.dims, nd_aligned_chunks, strict=True))) return v def validate_grid_chunks_alignment( nd_v_chunks: tuple[tuple[int, ...], ...] | None, enc_chunks: tuple[int, ...], backend_shape: tuple[int, ...], region: tuple[slice, ...], allow_partial_chunks: bool, name: str, ): if nd_v_chunks is None: return base_error = ( "Specified Zarr chunks encoding['chunks']={enc_chunks!r} for " "variable named {name!r} would overlap multiple Dask chunks. " "Please check the Dask chunks at position {v_chunk_pos} and " "{v_chunk_pos_next}, on axis {axis}, they are overlapped " "on the same Zarr chunk in the region {region}. " "Writing this array in parallel with Dask could lead to corrupted data. " "To resolve this issue, consider one of the following options: " "- Rechunk the array using `chunk()`. " "- Modify or delete `encoding['chunks']`. " "- Set `safe_chunks=False`. " "- Enable automatic chunks alignment with `align_chunks=True`." ) for axis, chunk_size, v_chunks, interval, size in zip( range(len(enc_chunks)), enc_chunks, nd_v_chunks, region, backend_shape, strict=True, ): for i, chunk in enumerate(v_chunks[1:-1]): if chunk % chunk_size: raise ValueError( base_error.format( v_chunk_pos=i + 1, v_chunk_pos_next=i + 2, v_chunk_size=chunk, axis=axis, name=name, chunk_size=chunk_size, region=interval, enc_chunks=enc_chunks, ) ) interval_start = interval.start or 0 if len(v_chunks) > 1: # The first border size is the amount of data that needs to be updated on the # first chunk taking into account the region slice. first_border_size = chunk_size if allow_partial_chunks: first_border_size = chunk_size - interval_start % chunk_size if (v_chunks[0] - first_border_size) % chunk_size: raise ValueError( base_error.format( v_chunk_pos=0, v_chunk_pos_next=0, v_chunk_size=v_chunks[0], axis=axis, name=name, chunk_size=chunk_size, region=interval, enc_chunks=enc_chunks, ) ) if not allow_partial_chunks: region_stop = interval.stop or size error_on_last_chunk = base_error.format( v_chunk_pos=len(v_chunks) - 1, v_chunk_pos_next=len(v_chunks) - 1, v_chunk_size=v_chunks[-1], axis=axis, name=name, chunk_size=chunk_size, region=interval, enc_chunks=enc_chunks, ) if interval_start % chunk_size: # The last chunk which can also be the only one is a partial chunk # if it is not aligned at the beginning raise ValueError(error_on_last_chunk) if np.ceil(region_stop / chunk_size) == np.ceil(size / chunk_size): # If the region is covering the last chunk then check # if the reminder with the default chunk size # is equal to the size of the last chunk if v_chunks[-1] % chunk_size != size % chunk_size: raise ValueError(error_on_last_chunk) elif v_chunks[-1] % chunk_size: raise ValueError(error_on_last_chunk) python-xarray-2026.01.0/xarray/backends/netcdf3.py0000664000175000017500000001314015136607163022022 0ustar alastairalastairfrom __future__ import annotations import unicodedata import numpy as np from xarray import coding from xarray.core.variable import Variable # Special characters that are permitted in netCDF names except in the # 0th position of the string _specialchars = '_.@+- !"#$%&\\()*,:;<=>?[]^`{|}~' # The following are reserved names in CDL and may not be used as names of # variables, dimension, attributes _reserved_names = { "byte", "char", "short", "ushort", "int", "uint", "int64", "uint64", "float", "real", "double", "bool", "string", } # These data-types aren't supported by netCDF3, so they are automatically # coerced instead as indicated by the "coerce_nc3_dtype" function _nc3_dtype_coercions = { "int64": "int32", "uint64": "int32", "uint32": "int32", "uint16": "int16", "uint8": "int8", "bool": "int8", } # encode all strings as UTF-8 STRING_ENCODING = "utf-8" COERCION_VALUE_ERROR = ( "could not safely cast array from {dtype} to {new_dtype}. While it is not " "always the case, a common reason for this is that xarray has deemed it " "safest to encode np.datetime64[ns] or np.timedelta64[ns] values with " "int64 values representing units of 'nanoseconds'. This is either due to " "the fact that the times are known to require nanosecond precision for an " "accurate round trip, or that the times are unknown prior to writing due " "to being contained in a chunked array. Ways to work around this are " "either to use a backend that supports writing int64 values, or to " "manually specify the encoding['units'] and encoding['dtype'] (e.g. " "'seconds since 1970-01-01' and np.dtype('int32')) on the time " "variable(s) such that the times can be serialized in a netCDF3 file " "(note that depending on the situation, however, this latter option may " "result in an inaccurate round trip)." ) def coerce_nc3_dtype(arr): """Coerce an array to a data type that can be stored in a netCDF-3 file This function performs the dtype conversions as specified by the ``_nc3_dtype_coercions`` mapping: int64 -> int32 uint64 -> int32 uint32 -> int32 uint16 -> int16 uint8 -> int8 bool -> int8 Data is checked for equality, or equivalence (non-NaN values) using the ``(cast_array == original_array).all()``. """ dtype = str(arr.dtype) if dtype in _nc3_dtype_coercions: new_dtype = _nc3_dtype_coercions[dtype] # TODO: raise a warning whenever casting the data-type instead? cast_arr = arr.astype(new_dtype) if not (cast_arr == arr).all(): raise ValueError( COERCION_VALUE_ERROR.format(dtype=dtype, new_dtype=new_dtype) ) arr = cast_arr return arr def encode_nc3_attr_value(value): if isinstance(value, bytes): pass elif isinstance(value, str): value = value.encode(STRING_ENCODING) else: value = coerce_nc3_dtype(np.atleast_1d(value)) if value.ndim > 1: raise ValueError("netCDF attributes must be 1-dimensional") return value def encode_nc3_attrs(attrs): return {k: encode_nc3_attr_value(v) for k, v in attrs.items()} def _maybe_prepare_times(var): # checks for integer-based time-like and # replaces np.iinfo(np.int64).min with _FillValue or np.nan # this keeps backwards compatibility data = var.data if data.dtype.kind in "iu": units = var.attrs.get("units", None) if units is not None and coding.variables._is_time_like(units): mask = data == np.iinfo(np.int64).min if mask.any(): data = np.where(mask, var.attrs.get("_FillValue", np.nan), data) return data def encode_nc3_variable(var, name=None): for coder in [ coding.strings.EncodedStringCoder(allows_unicode=False), coding.strings.CharacterArrayCoder(), ]: var = coder.encode(var, name=name) data = _maybe_prepare_times(var) data = coerce_nc3_dtype(data) attrs = encode_nc3_attrs(var.attrs) return Variable(var.dims, data, attrs, var.encoding) def _isalnumMUTF8(c): """Return True if the given UTF-8 encoded character is alphanumeric or multibyte. Input is not checked! """ return c.isalnum() or (len(c.encode("utf-8")) > 1) def is_valid_nc3_name(s): """Test whether an object can be validly converted to a netCDF-3 dimension, variable or attribute name Earlier versions of the netCDF C-library reference implementation enforced a more restricted set of characters in creating new names, but permitted reading names containing arbitrary bytes. This specification extends the permitted characters in names to include multi-byte UTF-8 encoded Unicode and additional printing characters from the US-ASCII alphabet. The first character of a name must be alphanumeric, a multi-byte UTF-8 character, or '_' (reserved for special names with meaning to implementations, such as the "_FillValue" attribute). Subsequent characters may also include printing special characters, except for '/' which is not allowed in names. Names that have trailing space characters are also not permitted. """ if not isinstance(s, str): return False num_bytes = len(s.encode("utf-8")) return ( (unicodedata.normalize("NFC", s) == s) and (s not in _reserved_names) and (num_bytes >= 0) and ("/" not in s) and (s[-1] != " ") and (_isalnumMUTF8(s[0]) or (s[0] == "_")) and all(_isalnumMUTF8(c) or c in _specialchars for c in s) ) python-xarray-2026.01.0/xarray/backends/netCDF4_.py0000664000175000017500000007310615136607163022032 0ustar alastairalastairfrom __future__ import annotations import functools import operator import os from collections.abc import Iterable from contextlib import suppress from dataclasses import dataclass from io import IOBase from typing import TYPE_CHECKING, Any, Self import numpy as np from xarray.backends.common import ( BACKEND_ENTRYPOINTS, BackendArray, BackendEntrypoint, BytesIOProxy, T_PathFileOrDataStore, WritableCFDataStore, _normalize_path, collect_ancestor_dimensions, datatree_from_dict_with_io_cleanup, find_root_and_group, robust_getitem, ) from xarray.backends.file_manager import ( CachingFileManager, DummyFileManager, PickleableFileManager, ) from xarray.backends.locks import ( HDF5_LOCK, NETCDFC_LOCK, combine_locks, ensure_lock, get_write_lock, ) from xarray.backends.netcdf3 import encode_nc3_attr_value, encode_nc3_variable from xarray.backends.store import StoreBackendEntrypoint from xarray.coding.strings import ( CharacterArrayCoder, EncodedStringCoder, create_vlen_dtype, is_unicode_dtype, ) from xarray.coding.variables import pop_to from xarray.core import indexing from xarray.core.utils import ( FrozenDict, close_on_error, is_remote_uri, strip_uri_params, try_read_magic_number_from_path, ) from xarray.core.variable import Variable if TYPE_CHECKING: import netCDF4 from h5netcdf.core import EnumType as h5EnumType from netCDF4 import EnumType as ncEnumType from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree # This lookup table maps from dtype.byteorder to a readable endian # string used by netCDF4. _endian_lookup = {"=": "native", ">": "big", "<": "little", "|": "native"} NETCDF4_PYTHON_LOCK = combine_locks([NETCDFC_LOCK, HDF5_LOCK]) class BaseNetCDF4Array(BackendArray): __slots__ = ("datastore", "dtype", "shape", "variable_name") def __init__(self, variable_name, datastore): self.datastore = datastore self.variable_name = variable_name array = self.get_array() self.shape = array.shape dtype = array.dtype if dtype is str: # use object dtype (with additional vlen string metadata) because that's # the only way in numpy to represent variable length strings and to # check vlen string dtype in further steps # it also prevents automatic string concatenation via # conventions.decode_cf_variable dtype = create_vlen_dtype(str) self.dtype = dtype def __setitem__(self, key, value): with self.datastore.lock: data = self.get_array(needs_lock=False) data[key] = value if self.datastore.autoclose: self.datastore.close(needs_lock=False) def get_array(self, needs_lock=True): raise NotImplementedError("Virtual Method") class NetCDF4ArrayWrapper(BaseNetCDF4Array): __slots__ = () def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) variable = ds.variables[self.variable_name] variable.set_auto_maskandscale(False) # only added in netCDF4-python v1.2.8 with suppress(AttributeError): variable.set_auto_chartostring(False) return variable def __getitem__(self, key): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.OUTER, self._getitem ) def _getitem(self, key): if self.datastore.is_remote: # pragma: no cover getitem = functools.partial(robust_getitem, catch=RuntimeError) else: getitem = operator.getitem try: with self.datastore.lock: original_array = self.get_array(needs_lock=False) array = getitem(original_array, key) except IndexError as err: # Catch IndexError in netCDF4 and return a more informative # error message. This is most often called when an unsorted # indexer is used before the data is loaded from disk. msg = ( "The indexing operation you are attempting to perform " "is not valid on netCDF4.Variable object. Try loading " "your data into memory first by calling .load()." ) raise IndexError(msg) from err return array def _encode_nc4_variable(var, name=None): for coder in [ EncodedStringCoder(allows_unicode=True), CharacterArrayCoder(), ]: var = coder.encode(var, name=name) return var def _check_encoding_dtype_is_vlen_string(dtype): if dtype is not str: raise AssertionError( # pragma: no cover f"unexpected dtype encoding {dtype!r}. This shouldn't happen: please " "file a bug report at github.com/pydata/xarray" ) def _get_datatype( var, nc_format="NETCDF4", raise_on_invalid_encoding=False ) -> np.dtype: if nc_format == "NETCDF4": return _nc4_dtype(var) if "dtype" in var.encoding: encoded_dtype = var.encoding["dtype"] _check_encoding_dtype_is_vlen_string(encoded_dtype) if raise_on_invalid_encoding: raise ValueError( "encoding dtype=str for vlen strings is only supported " "with format='NETCDF4'." ) return var.dtype def _nc4_dtype(var): if "dtype" in var.encoding: dtype = var.encoding.pop("dtype") _check_encoding_dtype_is_vlen_string(dtype) elif is_unicode_dtype(var.dtype): dtype = str elif var.dtype.kind in ["i", "u", "f", "c", "S"]: dtype = var.dtype else: raise ValueError(f"unsupported dtype for netCDF4 variable: {var.dtype}") return dtype def _netcdf4_create_group(dataset, name): return dataset.createGroup(name) def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group): if group in {None, "", "/"}: # use the root group return ds else: # make sure it's a string if not isinstance(group, str): raise ValueError("group must be a string or None") # support path-like syntax path = group.strip("/").split("/") for key in path: try: ds = ds.groups[key] except KeyError as e: if mode != "r": ds = create_group(ds, key) else: # wrap error to provide slightly more helpful message raise OSError(f"group not found: {key}", e) from e return ds def _ensure_no_forward_slash_in_name(name): if "/" in name: raise ValueError( f"Forward slashes '/' are not allowed in variable and dimension names (got {name!r}). " "Forward slashes are used as hierarchy-separators for " "HDF5-based files ('netcdf4'/'h5netcdf')." ) def _ensure_fill_value_valid(data, attributes): # work around for netCDF4/scipy issue where _FillValue has the wrong type: # https://github.com/Unidata/netcdf4-python/issues/271 if data.dtype.kind == "S" and "_FillValue" in attributes: attributes["_FillValue"] = np.bytes_(attributes["_FillValue"]) def _force_native_endianness(var): # possible values for byteorder are: # = native # < little-endian # > big-endian # | not applicable # Below we check if the data type is not native or NA if var.dtype.byteorder not in ["=", "|"]: # if endianness is specified explicitly, convert to the native type data = var.data.astype(var.dtype.newbyteorder("=")) var = Variable(var.dims, data, var.attrs, var.encoding) # if endian exists, remove it from the encoding. var.encoding.pop("endian", None) # check to see if encoding has a value for endian its 'native' if var.encoding.get("endian", "native") != "native": raise NotImplementedError( "Attempt to write non-native endian type, " "this is not supported by the netCDF4 " "python library." ) return var def _extract_nc4_variable_encoding( variable: Variable, raise_on_invalid=False, lsd_okay=True, h5py_okay=False, backend="netCDF4", unlimited_dims=None, ) -> dict[str, Any]: if unlimited_dims is None: unlimited_dims = () encoding = variable.encoding.copy() safe_to_drop = {"source", "original_shape"} valid_encodings = { "zlib", "complevel", "fletcher32", "contiguous", "chunksizes", "shuffle", "_FillValue", "dtype", "compression", "significant_digits", "quantize_mode", "blosc_shuffle", "szip_coding", "szip_pixels_per_block", "endian", } if lsd_okay: valid_encodings.add("least_significant_digit") if h5py_okay: valid_encodings.add("compression_opts") if not raise_on_invalid and encoding.get("chunksizes") is not None: # It's possible to get encoded chunksizes larger than a dimension size # if the original file had an unlimited dimension. This is problematic # if the new file no longer has an unlimited dimension. chunksizes = encoding["chunksizes"] chunks_too_big = any( c > d and dim not in unlimited_dims for c, d, dim in zip( chunksizes, variable.shape, variable.dims, strict=False ) ) has_original_shape = "original_shape" in encoding changed_shape = ( has_original_shape and encoding.get("original_shape") != variable.shape ) if chunks_too_big or changed_shape: del encoding["chunksizes"] var_has_unlim_dim = any(dim in unlimited_dims for dim in variable.dims) if not raise_on_invalid and var_has_unlim_dim and "contiguous" in encoding.keys(): del encoding["contiguous"] for k in safe_to_drop: if k in encoding: del encoding[k] if raise_on_invalid: invalid = [k for k in encoding if k not in valid_encodings] if invalid: raise ValueError( f"unexpected encoding parameters for {backend!r} backend: {invalid!r}. Valid " f"encodings are: {valid_encodings!r}" ) else: for k in list(encoding): if k not in valid_encodings: del encoding[k] return encoding def _is_list_of_strings(value) -> bool: arr = np.asarray(value) return arr.dtype.kind in ["U", "S"] and arr.size > 1 def _build_and_get_enum( store, var_name: str, dtype: np.dtype, enum_name: str, enum_dict: dict[str, int] ) -> ncEnumType | h5EnumType: """ Add or get the netCDF4 Enum based on the dtype in encoding. The return type should be ``netCDF4.EnumType``, but we avoid importing netCDF4 globally for performances. """ if enum_name not in store.ds.enumtypes: create_func = ( store.ds.createEnumType if isinstance(store, NetCDF4DataStore) else store.ds.create_enumtype ) return create_func( dtype, enum_name, enum_dict, ) datatype = store.ds.enumtypes[enum_name] if datatype.enum_dict != enum_dict: error_msg = ( f"Cannot save variable `{var_name}` because an enum" f" `{enum_name}` already exists in the Dataset but has" " a different definition. To fix this error, make sure" " all variables have a uniquely named enum in their" " `encoding['dtype'].metadata` or, if they should share" " the same enum type, make sure the enums are identical." ) raise ValueError(error_msg) return datatype @dataclass class _Thunk: """Pickleable equivalent of `lambda: value`.""" value: Any def __call__(self): return self.value @dataclass class _CloseWithCopy: """Wrapper around netCDF4's esoteric interface for writing in-memory data.""" proxy: BytesIOProxy nc4_dataset: netCDF4.Dataset def __call__(self): value = self.nc4_dataset.close() self.proxy.getvalue = _Thunk(value) class NetCDF4DataStore(WritableCFDataStore): """Store for reading and writing data via the Python-NetCDF4 library. This store supports NetCDF3, NetCDF4 and OpenDAP datasets. """ __slots__ = ( "_filename", "_group", "_manager", "_mode", "autoclose", "format", "is_remote", "lock", ) def __init__( self, manager, group=None, mode=None, lock=NETCDF4_PYTHON_LOCK, autoclose=False ): import netCDF4 if isinstance(manager, netCDF4.Dataset): if group is None: root, group = find_root_and_group(manager) else: if type(manager) is not netCDF4.Dataset: raise ValueError( "must supply a root netCDF4.Dataset if the group " "argument is provided" ) root = manager manager = DummyFileManager(root, lock=NETCDF4_PYTHON_LOCK) self._manager = manager self._group = group self._mode = mode self.format = self.ds.data_model self._filename = self.ds.filepath() self.is_remote = is_remote_uri(self._filename) self.lock = ensure_lock(lock) self.autoclose = autoclose def get_child_store(self, group: str) -> Self: if self._group is not None: group = os.path.join(self._group, group) return type(self)( self._manager, group=group, mode=self._mode, lock=self.lock, autoclose=self.autoclose, ) @classmethod def open( cls, filename, mode="r", format="NETCDF4", group=None, clobber=True, diskless=False, persist=False, auto_complex=None, lock=None, lock_maker=None, autoclose=False, ): import netCDF4 if isinstance(filename, os.PathLike): filename = os.fspath(filename) if isinstance(filename, IOBase): raise TypeError( f"file objects are not supported by the netCDF4 backend: {filename}" ) if not isinstance(filename, str | bytes | memoryview | BytesIOProxy): raise TypeError(f"invalid filename for netCDF4 backend: {filename}") if format is None: format = "NETCDF4" if lock is None: if mode == "r": if isinstance(filename, str) and is_remote_uri(filename): lock = NETCDFC_LOCK else: lock = NETCDF4_PYTHON_LOCK else: if format is None or format.startswith("NETCDF4"): lock = NETCDF4_PYTHON_LOCK else: lock = NETCDFC_LOCK if isinstance(filename, str): lock = combine_locks([lock, get_write_lock(filename)]) kwargs = dict( clobber=clobber, diskless=diskless, persist=persist, format=format, ) if auto_complex is not None: kwargs["auto_complex"] = auto_complex if isinstance(filename, BytesIOProxy): assert mode == "w" # Size hint used for creating netCDF3 files. Per the documentation # for nc__create(), the special value NC_SIZEHINT_DEFAULT (which is # the value 0), lets the netcdf library choose a suitable initial # size. memory = 0 kwargs["diskless"] = False nc4_dataset = netCDF4.Dataset( "", mode=mode, memory=memory, **kwargs ) close = _CloseWithCopy(filename, nc4_dataset) manager = DummyFileManager(nc4_dataset, close=close, lock=lock) elif isinstance(filename, bytes | memoryview): assert mode == "r" kwargs["memory"] = filename manager = PickleableFileManager( netCDF4.Dataset, "", mode=mode, kwargs=kwargs, lock=lock, ) else: manager = CachingFileManager( netCDF4.Dataset, filename, mode=mode, kwargs=kwargs, lock=lock ) return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose) def _acquire(self, needs_lock=True): with self._manager.acquire_context(needs_lock) as root: ds = _nc4_require_group(root, self._group, self._mode) return ds @property def ds(self): return self._acquire() def open_store_variable(self, name: str, var): import netCDF4 dimensions = var.dimensions attributes = {k: var.getncattr(k) for k in var.ncattrs()} data = indexing.LazilyIndexedArray(NetCDF4ArrayWrapper(name, self)) encoding: dict[str, Any] = {} if isinstance(var.datatype, netCDF4.EnumType): encoding["dtype"] = np.dtype( data.dtype, metadata={ "enum": var.datatype.enum_dict, "enum_name": var.datatype.name, }, ) else: encoding["dtype"] = var.dtype _ensure_fill_value_valid(data, attributes) # netCDF4 specific encoding; save _FillValue for later filters = var.filters() if filters is not None: encoding.update(filters) chunking = var.chunking() if chunking is not None: if chunking == "contiguous": encoding["contiguous"] = True encoding["chunksizes"] = None else: encoding["contiguous"] = False encoding["chunksizes"] = tuple(chunking) encoding["preferred_chunks"] = dict( zip(var.dimensions, chunking, strict=True) ) # TODO: figure out how to round-trip "endian-ness" without raising # warnings from netCDF4 # encoding['endian'] = var.endian() pop_to(attributes, encoding, "least_significant_digit") # save source so __repr__ can detect if it's local or not encoding["source"] = self._filename encoding["original_shape"] = data.shape return Variable(dimensions, data, attributes, encoding) def get_variables(self): return FrozenDict( (k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items() ) def get_attrs(self): return FrozenDict((k, self.ds.getncattr(k)) for k in self.ds.ncattrs()) def get_dimensions(self): return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items()) def get_parent_dimensions(self): return FrozenDict(collect_ancestor_dimensions(self.ds)) def get_encoding(self): return { "unlimited_dims": { k for k, v in self.ds.dimensions.items() if v.isunlimited() } } def set_dimension(self, name, length, is_unlimited=False): _ensure_no_forward_slash_in_name(name) dim_length = length if not is_unlimited else None self.ds.createDimension(name, size=dim_length) def set_attribute(self, key, value): if self.format != "NETCDF4": value = encode_nc3_attr_value(value) if _is_list_of_strings(value): # encode as NC_STRING if attr is list of strings self.ds.setncattr_string(key, value) else: self.ds.setncattr(key, value) def encode_variable(self, variable, name=None): variable = _force_native_endianness(variable) if self.format == "NETCDF4": variable = _encode_nc4_variable(variable, name=name) else: variable = encode_nc3_variable(variable, name=name) return variable def prepare_variable( self, name, variable: Variable, check_encoding=False, unlimited_dims=None ): _ensure_no_forward_slash_in_name(name) attrs = variable.attrs.copy() fill_value = attrs.pop("_FillValue", None) datatype: np.dtype | ncEnumType | h5EnumType datatype = _get_datatype( variable, self.format, raise_on_invalid_encoding=check_encoding ) # check enum metadata and use netCDF4.EnumType if ( (meta := np.dtype(datatype).metadata) and (e_name := meta.get("enum_name")) and (e_dict := meta.get("enum")) ): datatype = _build_and_get_enum(self, name, datatype, e_name, e_dict) encoding = _extract_nc4_variable_encoding( variable, raise_on_invalid=check_encoding, unlimited_dims=unlimited_dims ) if name in self.ds.variables: nc4_var = self.ds.variables[name] else: default_args = dict( varname=name, datatype=datatype, dimensions=variable.dims, zlib=False, complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, endian="native", least_significant_digit=None, fill_value=fill_value, ) default_args.update(encoding) default_args.pop("_FillValue", None) nc4_var = self.ds.createVariable(**default_args) nc4_var.setncatts(attrs) target = NetCDF4ArrayWrapper(name, self) return target, variable.data def sync(self): self.ds.sync() def close(self, **kwargs): self._manager.close(**kwargs) class NetCDF4BackendEntrypoint(BackendEntrypoint): """ Backend for netCDF files based on the netCDF4 package. It can open ".nc", ".nc4", ".cdf" files and will be chosen as default for these files. Additionally it can open valid HDF5 files, see https://h5netcdf.org/#invalid-netcdf-files for more info. It will not be detected as valid backend for such files, so make sure to specify ``engine="netcdf4"`` in ``open_dataset``. For more information about the underlying library, visit: https://unidata.github.io/netcdf4-python See Also -------- backends.NetCDF4DataStore backends.H5netcdfBackendEntrypoint backends.ScipyBackendEntrypoint """ description = ( "Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using netCDF4 in Xarray" ) url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.NetCDF4BackendEntrypoint.html" supports_groups = True def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool: # Helper to check if magic number is netCDF or HDF5 def _is_netcdf_magic(magic: bytes) -> bool: return magic.startswith((b"CDF", b"\211HDF\r\n\032\n")) # Helper to check if extension is netCDF def _has_netcdf_ext(path: str | os.PathLike, is_remote: bool = False) -> bool: path = str(path).rstrip("/") # For remote URIs, strip query parameters and fragments if is_remote: path = strip_uri_params(path) _, ext = os.path.splitext(path) return ext in {".nc", ".nc4", ".cdf"} if isinstance(filename_or_obj, str): if is_remote_uri(filename_or_obj): # For remote URIs, check extension (accounting for query params/fragments) # Remote netcdf-c can handle both regular URLs and DAP URLs if _has_netcdf_ext(filename_or_obj, is_remote=True): return True elif "zarr" in filename_or_obj.lower(): return False # return true for non-zarr URLs so we don't have a breaking change for people relying on this # netcdf backend guessing true for all remote sources. # TODO: emit a warning here about deprecation of this behavior # https://github.com/pydata/xarray/pull/10931 return True if isinstance(filename_or_obj, str | os.PathLike): # For local paths, check magic number first, then extension magic_number = try_read_magic_number_from_path(filename_or_obj) if magic_number is not None: return _is_netcdf_magic(magic_number) # No magic number available, fallback to extension return _has_netcdf_ext(filename_or_obj) if isinstance(filename_or_obj, bytes | memoryview): return _is_netcdf_magic(bytes(filename_or_obj[:8])) return False def open_dataset( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, group=None, mode="r", format="NETCDF4", clobber=True, diskless=False, persist=False, auto_complex=None, lock=None, autoclose=False, ) -> Dataset: filename_or_obj = _normalize_path(filename_or_obj) store = NetCDF4DataStore.open( filename_or_obj, mode=mode, format=format, group=group, clobber=clobber, diskless=diskless, persist=persist, auto_complex=auto_complex, lock=lock, autoclose=autoclose, ) store_entrypoint = StoreBackendEntrypoint() with close_on_error(store): ds = store_entrypoint.open_dataset( store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) return ds def open_datatree( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, group: str | None = None, format="NETCDF4", clobber=True, diskless=False, persist=False, auto_complex=None, lock=None, autoclose=False, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( filename_or_obj, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, group=group, format=format, clobber=clobber, diskless=diskless, persist=persist, auto_complex=auto_complex, lock=lock, autoclose=autoclose, **kwargs, ) return datatree_from_dict_with_io_cleanup(groups_dict) def open_groups_as_dict( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, group: str | None = None, format="NETCDF4", clobber=True, diskless=False, persist=False, auto_complex=None, lock=None, autoclose=False, **kwargs, ) -> dict[str, Dataset]: from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath filename_or_obj = _normalize_path(filename_or_obj) store = NetCDF4DataStore.open( filename_or_obj, group=group, format=format, clobber=clobber, diskless=diskless, persist=persist, auto_complex=auto_complex, lock=lock, autoclose=autoclose, ) # Check for a group and make it a parent if it exists if group: parent = NodePath("/") / NodePath(group) else: parent = NodePath("/") manager = store._manager groups_dict = {} for path_group in _iter_nc_groups(store.ds, parent=parent): group_store = NetCDF4DataStore(manager, group=path_group, **kwargs) store_entrypoint = StoreBackendEntrypoint() with close_on_error(group_store): group_ds = store_entrypoint.open_dataset( group_store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) if group: group_name = str(NodePath(path_group).relative_to(parent)) else: group_name = str(NodePath(path_group)) groups_dict[group_name] = group_ds return groups_dict BACKEND_ENTRYPOINTS["netcdf4"] = ("netCDF4", NetCDF4BackendEntrypoint) python-xarray-2026.01.0/xarray/backends/plugins.py0000664000175000017500000002223115136607163022156 0ustar alastairalastairfrom __future__ import annotations import functools import inspect import itertools import warnings from collections.abc import Callable from importlib.metadata import entry_points from typing import TYPE_CHECKING, Any from xarray.backends.common import BACKEND_ENTRYPOINTS, BackendEntrypoint from xarray.core.options import OPTIONS from xarray.core.utils import module_available if TYPE_CHECKING: import os from importlib.metadata import EntryPoint, EntryPoints from xarray.backends.common import AbstractDataStore from xarray.core.types import ReadBuffer def remove_duplicates(entrypoints: EntryPoints) -> list[EntryPoint]: # sort and group entrypoints by name entrypoints_sorted = sorted(entrypoints, key=lambda ep: ep.name) entrypoints_grouped = itertools.groupby(entrypoints_sorted, key=lambda ep: ep.name) # check if there are multiple entrypoints for the same name unique_entrypoints = [] for name, _matches in entrypoints_grouped: # remove equal entrypoints matches = list(set(_matches)) unique_entrypoints.append(matches[0]) matches_len = len(matches) if matches_len > 1: all_module_names = [e.value.split(":")[0] for e in matches] selected_module_name = all_module_names[0] warnings.warn( f"Found {matches_len} entrypoints for the engine name {name}:" f"\n {all_module_names}.\n " f"The entrypoint {selected_module_name} will be used.", RuntimeWarning, stacklevel=2, ) return unique_entrypoints def detect_parameters(open_dataset: Callable) -> tuple[str, ...]: signature = inspect.signature(open_dataset) parameters = signature.parameters parameters_list = [] for name, param in parameters.items(): if param.kind in ( inspect.Parameter.VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL, ): raise TypeError( f"All the parameters in {open_dataset!r} signature should be explicit. " "*args and **kwargs is not supported" ) if name != "self": parameters_list.append(name) return tuple(parameters_list) def backends_dict_from_pkg( entrypoints: list[EntryPoint], ) -> dict[str, type[BackendEntrypoint]]: backend_entrypoints = {} for entrypoint in entrypoints: name = entrypoint.name try: backend = entrypoint.load() backend_entrypoints[name] = backend except Exception as ex: warnings.warn( f"Engine {name!r} loading failed:\n{ex}", RuntimeWarning, stacklevel=2 ) return backend_entrypoints def set_missing_parameters( backend_entrypoints: dict[str, type[BackendEntrypoint]], ) -> None: for backend in backend_entrypoints.values(): if backend.open_dataset_parameters is None: open_dataset = backend.open_dataset backend.open_dataset_parameters = detect_parameters(open_dataset) def sort_backends( backend_entrypoints: dict[str, type[BackendEntrypoint]], ) -> dict[str, type[BackendEntrypoint]]: ordered_backends_entrypoints: dict[str, type[BackendEntrypoint]] = {} for be_name in OPTIONS["netcdf_engine_order"]: if backend_entrypoints.get(be_name) is not None: ordered_backends_entrypoints[be_name] = backend_entrypoints.pop(be_name) ordered_backends_entrypoints.update( {name: backend_entrypoints[name] for name in sorted(backend_entrypoints)} ) return ordered_backends_entrypoints def build_engines(entrypoints: EntryPoints) -> dict[str, BackendEntrypoint]: backend_entrypoints: dict[str, type[BackendEntrypoint]] = {} for backend_name, (module_name, backend) in BACKEND_ENTRYPOINTS.items(): if module_name is None or module_available(module_name): backend_entrypoints[backend_name] = backend entrypoints_unique = remove_duplicates(entrypoints) external_backend_entrypoints = backends_dict_from_pkg(entrypoints_unique) backend_entrypoints.update(external_backend_entrypoints) backend_entrypoints = sort_backends(backend_entrypoints) set_missing_parameters(backend_entrypoints) return {name: backend() for name, backend in backend_entrypoints.items()} @functools.lru_cache(maxsize=1) def list_engines() -> dict[str, BackendEntrypoint]: """ Return a dictionary of available engines and their BackendEntrypoint objects. Returns ------- dictionary Notes ----- This function lives in the backends namespace (``engs=xr.backends.list_engines()``). If available, more information is available about each backend via ``engs["eng_name"]``. """ entrypoints = entry_points(group="xarray.backends") return build_engines(entrypoints) def refresh_engines() -> None: """Refreshes the backend engines based on installed packages.""" list_engines.cache_clear() def guess_engine( store_spec: str | os.PathLike[Any] | ReadBuffer | bytes | memoryview | AbstractDataStore, must_support_groups: bool = False, ) -> str | type[BackendEntrypoint]: engines = list_engines() for engine, backend in engines.items(): if must_support_groups and not backend.supports_groups: continue try: if backend.guess_can_open(store_spec): return engine except PermissionError: raise except Exception: warnings.warn( f"{engine!r} fails while guessing", RuntimeWarning, stacklevel=2 ) compatible_engines = [] for engine, (_, backend_cls) in BACKEND_ENTRYPOINTS.items(): try: backend = backend_cls() if must_support_groups and not backend.supports_groups: continue if backend.guess_can_open(store_spec): compatible_engines.append(engine) except Exception: warnings.warn( f"{engine!r} fails while guessing", RuntimeWarning, stacklevel=2 ) installed_engines = [k for k in engines if k != "store"] if not compatible_engines: if installed_engines: error_msg = ( "did not find a match in any of xarray's currently installed IO " f"backends {installed_engines}. Consider explicitly selecting one of the " "installed engines via the ``engine`` parameter, or installing " "additional IO dependencies, see:\n" "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html\n" "https://docs.xarray.dev/en/stable/user-guide/io.html" ) elif must_support_groups: error_msg = ( "xarray is unable to open this file because it has no currently " "installed IO backends that support reading groups (e.g., h5netcdf " "or netCDF4-python). Xarray's read/write support requires " "installing optional IO dependencies, see:\n" "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html\n" "https://docs.xarray.dev/en/stable/user-guide/io" ) else: error_msg = ( "xarray is unable to open this file because it has no currently " "installed IO backends. Xarray's read/write support requires " "installing optional IO dependencies, see:\n" "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html\n" "https://docs.xarray.dev/en/stable/user-guide/io" ) else: error_msg = ( "found the following matches with the input file in xarray's IO " f"backends: {compatible_engines}. But their dependencies may not be installed, see:\n" "https://docs.xarray.dev/en/stable/user-guide/io.html \n" "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html" ) raise ValueError(error_msg) def get_backend(engine: str | type[BackendEntrypoint]) -> BackendEntrypoint: """Select open_dataset method based on current engine.""" if isinstance(engine, str): if engine in BACKEND_ENTRYPOINTS: # fast path for built-in engines backend_cls = BACKEND_ENTRYPOINTS[engine][1] set_missing_parameters({engine: backend_cls}) backend = backend_cls() else: engines = list_engines() if engine not in engines: raise ValueError( f"unrecognized engine '{engine}' must be one of your download engines: {list(engines)}. " "To install additional dependencies, see:\n" "https://docs.xarray.dev/en/stable/user-guide/io.html \n" "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html" ) backend = engines[engine] elif issubclass(engine, BackendEntrypoint): backend = engine() else: raise TypeError( "engine must be a string or a subclass of " f"xarray.backends.BackendEntrypoint: {engine}" ) return backend python-xarray-2026.01.0/xarray/backends/common.py0000664000175000017500000006637315136607163022004 0ustar alastairalastairfrom __future__ import annotations import logging import os import time import traceback from collections.abc import Callable, Hashable, Iterable, Mapping, Sequence from dataclasses import dataclass from glob import glob from typing import ( TYPE_CHECKING, Any, ClassVar, Self, TypeVar, Union, overload, ) import numpy as np import pandas as pd from xarray.coding import strings, variables from xarray.coding.variables import SerializationWarning from xarray.conventions import cf_encoder from xarray.core import indexing from xarray.core.datatree import DataTree, Variable from xarray.core.types import ReadBuffer from xarray.core.utils import ( FrozenDict, NdimSizeLenMixin, attempt_import, emit_user_level_warning, is_remote_uri, ) from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array from xarray.namedarray.utils import is_duck_dask_array if TYPE_CHECKING: from xarray.core.dataset import Dataset from xarray.core.types import NestedSequence T_Name = Union[Hashable, None] # Create a logger object, but don't add any handlers. Leave that to user code. logger = logging.getLogger(__name__) NONE_VAR_NAME = "__values__" T = TypeVar("T") @overload def _normalize_path(path: os.PathLike) -> str: ... @overload def _normalize_path(path: str) -> str: ... @overload def _normalize_path(path: T) -> T: ... def _normalize_path(path: os.PathLike | str | T) -> str | T: """ Normalize pathlikes to string. Parameters ---------- path : Path to file. Examples -------- >>> from pathlib import Path >>> directory = Path(xr.backends.common.__file__).parent >>> paths_path = Path(directory).joinpath("comm*n.py") >>> paths_str = xr.backends.common._normalize_path(paths_path) >>> print([type(p) for p in (paths_str,)]) [] """ if isinstance(path, os.PathLike): path = os.fspath(path) if isinstance(path, str) and not is_remote_uri(path): path = os.path.abspath(os.path.expanduser(path)) return path # type: ignore[return-value] @overload def _find_absolute_paths( paths: str | os.PathLike | Sequence[str | os.PathLike], **kwargs, ) -> list[str]: ... @overload def _find_absolute_paths( paths: ReadBuffer | Sequence[ReadBuffer], **kwargs, ) -> list[ReadBuffer]: ... @overload def _find_absolute_paths( paths: NestedSequence[str | os.PathLike], **kwargs ) -> NestedSequence[str]: ... @overload def _find_absolute_paths( paths: NestedSequence[ReadBuffer], **kwargs ) -> NestedSequence[ReadBuffer]: ... @overload def _find_absolute_paths( paths: str | os.PathLike | ReadBuffer | NestedSequence[str | os.PathLike | ReadBuffer], **kwargs, ) -> NestedSequence[str | ReadBuffer]: ... def _find_absolute_paths( paths: str | os.PathLike | ReadBuffer | NestedSequence[str | os.PathLike | ReadBuffer], **kwargs, ) -> NestedSequence[str | ReadBuffer]: """ Find absolute paths from the pattern. Parameters ---------- paths : Path(s) to file(s). Can include wildcards like * . **kwargs : Extra kwargs. Mainly for fsspec. Examples -------- >>> from pathlib import Path >>> directory = Path(xr.backends.common.__file__).parent >>> paths = str(Path(directory).joinpath("comm*n.py")) # Find common with wildcard >>> paths = xr.backends.common._find_absolute_paths(paths) >>> [Path(p).name for p in paths] ['common.py'] """ if isinstance(paths, str): if is_remote_uri(paths) and kwargs.get("engine") == "zarr": if TYPE_CHECKING: import fsspec else: fsspec = attempt_import("fsspec") fs, _, _ = fsspec.core.get_fs_token_paths( paths, mode="rb", storage_options=kwargs.get("backend_kwargs", {}).get( "storage_options", {} ), expand=False, ) tmp_paths = fs.glob(fs._strip_protocol(paths)) # finds directories return [fs.get_mapper(path) for path in tmp_paths] elif is_remote_uri(paths): raise ValueError( "cannot do wild-card matching for paths that are remote URLs " f"unless engine='zarr' is specified. Got paths: {paths}. " "Instead, supply paths as an explicit list of strings." ) else: return sorted(glob(_normalize_path(paths))) elif isinstance(paths, os.PathLike): return [_normalize_path(paths)] elif isinstance(paths, ReadBuffer): return [paths] def _normalize_path_list( lpaths: NestedSequence[str | os.PathLike | ReadBuffer], ) -> NestedSequence[str | ReadBuffer]: paths = [] for p in lpaths: if isinstance(p, str | os.PathLike): paths.append(_normalize_path(p)) elif isinstance(p, list): paths.append(_normalize_path_list(p)) # type: ignore[arg-type] else: paths.append(p) # type: ignore[arg-type] return paths return _normalize_path_list(paths) @dataclass class BytesIOProxy: """Proxy object for a write that a memoryview.""" getvalue: Callable[[], memoryview] | None = None def getbuffer(self) -> memoryview: """Get the value of this write as bytes or memory.""" if self.getvalue is None: raise ValueError("must set getvalue before fetching value") return self.getvalue() def _open_remote_file(file, mode, storage_options=None): import fsspec fs, _, paths = fsspec.get_fs_token_paths( file, mode=mode, storage_options=storage_options ) return fs.open(paths[0], mode=mode) def _encode_variable_name(name): if name is None: name = NONE_VAR_NAME return name def _decode_variable_name(name): if name == NONE_VAR_NAME: name = None return name def _iter_nc_groups(root, parent="/"): from xarray.core.treenode import NodePath parent = NodePath(parent) yield str(parent) for path, group in root.groups.items(): gpath = parent / path yield from _iter_nc_groups(group, parent=gpath) def find_root_and_group(ds): """Find the root and group name of a netCDF4/h5netcdf dataset.""" hierarchy = () while ds.parent is not None: hierarchy = (ds.name.split("/")[-1],) + hierarchy ds = ds.parent group = "/" + "/".join(hierarchy) return ds, group def collect_ancestor_dimensions(group) -> dict[str, int]: """Returns dimensions defined in parent groups. If dimensions are defined in multiple ancestors, use the size of the closest ancestor. """ dims = {} while (group := group.parent) is not None: for k, v in group.dimensions.items(): if k not in dims: dims[k] = len(v) return dims def datatree_from_dict_with_io_cleanup(groups_dict: Mapping[str, Dataset]) -> DataTree: """DataTree.from_dict with file clean-up.""" try: tree = DataTree.from_dict(groups_dict) except Exception: for ds in groups_dict.values(): ds.close() raise for path, ds in groups_dict.items(): tree[path].set_close(ds._close) return tree def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500): """ Robustly index an array, using retry logic with exponential backoff if any of the errors ``catch`` are raised. The initial_delay is measured in ms. With the default settings, the maximum delay will be in the range of 32-64 seconds. """ assert max_retries >= 0 for n in range(max_retries + 1): try: return array[key] except catch: if n == max_retries: raise base_delay = initial_delay * 2**n next_delay = base_delay + np.random.randint(base_delay) msg = ( f"getitem failed, waiting {next_delay} ms before trying again " f"({max_retries - n} tries remaining). Full traceback: {traceback.format_exc()}" ) logger.debug(msg) time.sleep(1e-3 * next_delay) class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): __slots__ = () async def async_getitem(self, key: indexing.ExplicitIndexer) -> np.typing.ArrayLike: raise NotImplementedError("Backend does not support asynchronous loading") def get_duck_array(self, dtype: np.typing.DTypeLike | None = None): key = indexing.BasicIndexer((slice(None),) * self.ndim) return self[key] # type: ignore[index] async def async_get_duck_array(self, dtype: np.typing.DTypeLike | None = None): key = indexing.BasicIndexer((slice(None),) * self.ndim) return await self.async_getitem(key) class AbstractDataStore: __slots__ = () def get_child_store(self, group: str) -> Self: # pragma: no cover """Get a store corresponding to the indicated child group.""" raise NotImplementedError() def get_dimensions(self): # pragma: no cover raise NotImplementedError() def get_parent_dimensions(self): # pragma: no cover return {} def get_attrs(self): # pragma: no cover raise NotImplementedError() def get_variables(self): # pragma: no cover raise NotImplementedError() def get_encoding(self): return {} def load(self): """ This loads the variables and attributes simultaneously. A centralized loading function makes it easier to create data stores that do automatic encoding/decoding. For example:: class SuffixAppendingDataStore(AbstractDataStore): def load(self): variables, attributes = AbstractDataStore.load(self) variables = {"%s_suffix" % k: v for k, v in variables.items()} attributes = {"%s_suffix" % k: v for k, v in attributes.items()} return variables, attributes This function will be called anytime variables or attributes are requested, so care should be taken to make sure its fast. """ variables = FrozenDict( (_decode_variable_name(k), v) for k, v in self.get_variables().items() ) attributes = FrozenDict(self.get_attrs()) return variables, attributes def close(self): pass def __enter__(self): return self def __exit__(self, exception_type, exception_value, traceback): self.close() T_PathFileOrDataStore = ( str | os.PathLike[Any] | ReadBuffer | bytes | memoryview | AbstractDataStore ) class ArrayWriter: __slots__ = ("lock", "regions", "sources", "targets") def __init__(self, lock=None): self.sources = [] self.targets = [] self.regions = [] self.lock = lock def add(self, source, target, region=None): if is_chunked_array(source): self.sources.append(source) self.targets.append(target) self.regions.append(region) elif region: target[region] = source else: target[...] = source def sync(self, compute=True, chunkmanager_store_kwargs=None): if self.sources: chunkmanager = get_chunked_array_type(*self.sources) # TODO: consider wrapping targets with dask.delayed, if this makes # for any discernible difference in performance, e.g., # targets = [dask.delayed(t) for t in self.targets] if chunkmanager_store_kwargs is None: chunkmanager_store_kwargs = {} delayed_store = chunkmanager.store( self.sources, self.targets, lock=self.lock, compute=compute, flush=True, regions=self.regions, **chunkmanager_store_kwargs, ) self.sources = [] self.targets = [] self.regions = [] return delayed_store class AbstractWritableDataStore(AbstractDataStore): __slots__ = () def encode(self, variables, attributes): """ Encode the variables and attributes in this store Parameters ---------- variables : dict-like Dictionary of key/value (variable name / xr.Variable) pairs attributes : dict-like Dictionary of key/value (attribute name / attribute) pairs Returns ------- variables : dict-like attributes : dict-like """ encoded_variables = {} for k, v in variables.items(): try: encoded_variables[k] = self.encode_variable(v) except Exception as e: e.add_note(f"Raised while encoding variable {k!r} with value {v!r}") raise encoded_attributes = {} for k, v in attributes.items(): try: encoded_attributes[k] = self.encode_attribute(v) except Exception as e: e.add_note(f"Raised while encoding attribute {k!r} with value {v!r}") raise return encoded_variables, encoded_attributes def encode_variable(self, v, name=None): """encode one variable""" return v def encode_attribute(self, a): """encode one attribute""" return a def prepare_variable(self, name, variable, check_encoding, unlimited_dims): raise NotImplementedError() def set_dimension(self, dim, length, is_unlimited): # pragma: no cover raise NotImplementedError() def set_attribute(self, k, v): # pragma: no cover raise NotImplementedError() def set_variable(self, k, v): # pragma: no cover raise NotImplementedError() def store_dataset(self, dataset): """ in stores, variables are all variables AND coordinates in xarray.Dataset variables are variables NOT coordinates, so here we pass the whole dataset in instead of doing dataset.variables """ self.store(dataset, dataset.attrs) def store( self, variables, attributes, check_encoding_set=frozenset(), writer=None, unlimited_dims=None, ): """ Top level method for putting data on this store, this method: - encodes variables/attributes - sets dimensions - sets variables Parameters ---------- variables : dict-like Dictionary of key/value (variable name / xr.Variable) pairs attributes : dict-like Dictionary of key/value (attribute name / attribute) pairs check_encoding_set : list-like List of variables that should be checked for invalid encoding values writer : ArrayWriter unlimited_dims : list-like List of dimension names that should be treated as unlimited dimensions. """ if writer is None: writer = ArrayWriter() variables, attributes = self.encode(variables, attributes) self.set_attributes(attributes) self.set_dimensions(variables, unlimited_dims=unlimited_dims) self.set_variables( variables, check_encoding_set, writer, unlimited_dims=unlimited_dims ) def set_attributes(self, attributes): """ This provides a centralized method to set the dataset attributes on the data store. Parameters ---------- attributes : dict-like Dictionary of key/value (attribute name / attribute) pairs """ for k, v in attributes.items(): self.set_attribute(k, v) def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=None): """ This provides a centralized method to set the variables on the data store. Parameters ---------- variables : dict-like Dictionary of key/value (variable name / xr.Variable) pairs check_encoding_set : list-like List of variables that should be checked for invalid encoding values writer : ArrayWriter unlimited_dims : list-like List of dimension names that should be treated as unlimited dimensions. """ for vn, v in variables.items(): name = _encode_variable_name(vn) check = vn in check_encoding_set target, source = self.prepare_variable( name, v, check, unlimited_dims=unlimited_dims ) writer.add(source, target) def set_dimensions(self, variables, unlimited_dims=None): """ This provides a centralized method to set the dimensions on the data store. Parameters ---------- variables : dict-like Dictionary of key/value (variable name / xr.Variable) pairs unlimited_dims : list-like List of dimension names that should be treated as unlimited dimensions. """ if unlimited_dims is None: unlimited_dims = set() parent_dims = self.get_parent_dimensions() existing_dims = self.get_dimensions() dims = {} for v in unlimited_dims: # put unlimited_dims first dims[v] = None for v in variables.values(): dims |= v.sizes for dim, length in dims.items(): if dim in existing_dims and length != existing_dims[dim]: raise ValueError( "Unable to update size for existing dimension" f"{dim!r} ({length} != {existing_dims[dim]})" ) elif dim not in existing_dims and length != parent_dims.get(dim): is_unlimited = dim in unlimited_dims self.set_dimension(dim, length, is_unlimited) def sync(self): """Write all buffered data to disk.""" raise NotImplementedError() def _infer_dtype(array, name=None): """Given an object array with no missing values, infer its dtype from all elements.""" if array.dtype.kind != "O": raise TypeError("infer_type must be called on a dtype=object array") if array.size == 0: return np.dtype(float) native_dtypes = set(np.vectorize(type, otypes=[object])(array.ravel())) if len(native_dtypes) > 1 and native_dtypes != {bytes, str}: native_dtype_names = ", ".join(x.__name__ for x in native_dtypes) raise ValueError( f"unable to infer dtype on variable {name!r}; object array " f"contains mixed native types: {native_dtype_names}" ) element = array[(0,) * array.ndim] # We use the base types to avoid subclasses of bytes and str (which might # not play nice with e.g. hdf5 datatypes), such as those from numpy if isinstance(element, bytes): return strings.create_vlen_dtype(bytes) elif isinstance(element, str): return strings.create_vlen_dtype(str) dtype = np.array(element).dtype if dtype.kind != "O": return dtype raise ValueError( f"unable to infer dtype on variable {name!r}; xarray " "cannot serialize arbitrary Python objects" ) def _copy_with_dtype(data, dtype: np.typing.DTypeLike | None): """Create a copy of an array with the given dtype. We use this instead of np.array() to ensure that custom object dtypes end up on the resulting array. """ result = np.empty(data.shape, dtype) result[...] = data return result def ensure_dtype_not_object(var: Variable, name: T_Name = None) -> Variable: if var.dtype.kind == "O": dims, data, attrs, encoding = variables.unpack_for_encoding(var) # leave vlen dtypes unchanged if strings.check_vlen_dtype(data.dtype) is not None: return var if is_duck_dask_array(data): emit_user_level_warning( f"variable {name} has data in the form of a dask array with " "dtype=object, which means it is being loaded into memory " "to determine a data type that can be safely stored on disk. " "To avoid this, coerce this variable to a fixed-size dtype " "with astype() before saving it.", category=SerializationWarning, ) data = data.compute() missing = pd.isnull(data) if missing.any(): # nb. this will fail for dask.array data non_missing_values = data[~missing] inferred_dtype = _infer_dtype(non_missing_values, name) # There is no safe bit-pattern for NA in typical binary string # formats, we so can't set a fill_value. Unfortunately, this means # we can't distinguish between missing values and empty strings. fill_value: bytes | str if strings.is_bytes_dtype(inferred_dtype): fill_value = b"" elif strings.is_unicode_dtype(inferred_dtype): fill_value = "" else: # insist on using float for numeric values if not np.issubdtype(inferred_dtype, np.floating): inferred_dtype = np.dtype(float) fill_value = inferred_dtype.type(np.nan) data = _copy_with_dtype(data, dtype=inferred_dtype) data[missing] = fill_value else: data = _copy_with_dtype(data, dtype=_infer_dtype(data, name)) assert data.dtype.kind != "O" or data.dtype.metadata var = Variable(dims, data, attrs, encoding, fastpath=True) return var class WritableCFDataStore(AbstractWritableDataStore): __slots__ = () def encode(self, variables, attributes): # All NetCDF files get CF encoded by default, without this attempting # to write times, for example, would fail. variables, attributes = cf_encoder(variables, attributes) variables = { k: ensure_dtype_not_object(v, name=k) for k, v in variables.items() } return super().encode(variables, attributes) class BackendEntrypoint: """ ``BackendEntrypoint`` is a class container and it is the main interface for the backend plugins, see :ref:`RST backend_entrypoint`. It shall implement: - ``open_dataset`` method: it shall implement reading from file, variables decoding and it returns an instance of :py:class:`~xarray.Dataset`. It shall take in input at least ``filename_or_obj`` argument and ``drop_variables`` keyword argument. For more details see :ref:`RST open_dataset`. - ``guess_can_open`` method: it shall return ``True`` if the backend is able to open ``filename_or_obj``, ``False`` otherwise. The implementation of this method is not mandatory. - ``open_datatree`` method: it shall implement reading from file, variables decoding and it returns an instance of :py:class:`~datatree.DataTree`. It shall take in input at least ``filename_or_obj`` argument. The implementation of this method is not mandatory. For more details see . Attributes ---------- open_dataset_parameters : tuple, default: None A list of ``open_dataset`` method parameters. The setting of this attribute is not mandatory. description : str, default: "" A short string describing the engine. The setting of this attribute is not mandatory. url : str, default: "" A string with the URL to the backend's documentation. The setting of this attribute is not mandatory. supports_groups : bool, default: False Whether the backend supports opening groups (via open_datatree and open_groups_as_dict) or not. """ open_dataset_parameters: ClassVar[tuple | None] = None description: ClassVar[str] = "" url: ClassVar[str] = "" supports_groups: ClassVar[bool] = False def __repr__(self) -> str: txt = f"<{type(self).__name__}>" if self.description: txt += f"\n {self.description}" if self.url: txt += f"\n Learn more at {self.url}" return txt def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | bytes | memoryview | AbstractDataStore, *, drop_variables: str | Iterable[str] | None = None, ) -> Dataset: """ Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. """ raise NotImplementedError() def guess_can_open( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | bytes | memoryview | AbstractDataStore, ) -> bool: """ Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. """ return False def open_datatree( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | bytes | memoryview | AbstractDataStore, *, drop_variables: str | Iterable[str] | None = None, ) -> DataTree: """ Backend open_datatree method used by Xarray in :py:func:`~xarray.open_datatree`. If implemented, set the class variable supports_groups to True. """ raise NotImplementedError() def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | bytes | memoryview | AbstractDataStore, *, drop_variables: str | Iterable[str] | None = None, ) -> dict[str, Dataset]: """ Opens a dictionary mapping from group names to Datasets. Called by :py:func:`~xarray.open_groups`. This function exists to provide a universal way to open all groups in a file, before applying any additional consistency checks or requirements necessary to create a `DataTree` object (typically done using :py:meth:`~xarray.DataTree.from_dict`). If implemented, set the class variable supports_groups to True. """ raise NotImplementedError() # mapping of engine name to (module name, BackendEntrypoint Class) BACKEND_ENTRYPOINTS: dict[str, tuple[str | None, type[BackendEntrypoint]]] = {} def _is_likely_dap_url(url: str) -> bool: """ Determines if a URL is likely an OPeNDAP (DAP) endpoint based on known protocols, server software path patterns, and file extensions. Parameters ---------- url : str Returns ------- True if the URL matches common DAP patterns, False otherwise. """ if not url: return False url_lower = url.lower() # For remote URIs, check for DAP server software path patterns if is_remote_uri(url_lower): dap_path_patterns = ( "/dodsc/", # THREDDS Data Server (TDS) DAP endpoint (case-insensitive) "/dods/", # GrADS Data Server (GDS) DAP endpoint "/opendap/", # Generic OPeNDAP/Hyrax server "/erddap/", # ERDDAP data server "/dap2/", # Explicit DAP2 version in path "/dap4/", # Explicit DAP4 version in path "/dap/", ) return any(pattern in url_lower for pattern in dap_path_patterns) return False python-xarray-2026.01.0/xarray/backends/h5netcdf_.py0000664000175000017500000005422415136607163022343 0ustar alastairalastairfrom __future__ import annotations import functools import io import os from collections.abc import Iterable from typing import TYPE_CHECKING, Any, Self import numpy as np from packaging.version import Version from xarray.backends.common import ( BACKEND_ENTRYPOINTS, BackendEntrypoint, BytesIOProxy, T_PathFileOrDataStore, WritableCFDataStore, _normalize_path, _open_remote_file, collect_ancestor_dimensions, datatree_from_dict_with_io_cleanup, find_root_and_group, ) from xarray.backends.file_manager import ( CachingFileManager, DummyFileManager, FileManager, PickleableFileManager, ) from xarray.backends.locks import HDF5_LOCK, combine_locks, ensure_lock, get_write_lock from xarray.backends.netcdf3 import encode_nc3_attr_value, encode_nc3_variable from xarray.backends.netCDF4_ import ( BaseNetCDF4Array, _build_and_get_enum, _encode_nc4_variable, _ensure_no_forward_slash_in_name, _extract_nc4_variable_encoding, _get_datatype, _nc4_require_group, ) from xarray.backends.store import StoreBackendEntrypoint from xarray.core import indexing from xarray.core.utils import ( FrozenDict, emit_user_level_warning, is_remote_uri, read_magic_number_from_file, try_read_magic_number_from_file_or_path, ) from xarray.core.variable import Variable if TYPE_CHECKING: import h5netcdf from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.types import ReadBuffer class H5NetCDFArrayWrapper(BaseNetCDF4Array): def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) return ds.variables[self.variable_name] def __getitem__(self, key): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem ) def _getitem(self, key): with self.datastore.lock: array = self.get_array(needs_lock=False) return array[key] def _read_attributes(h5netcdf_var): # GH451 # to ensure conventions decoding works properly on Python 3, decode all # bytes attributes to strings attrs = {} for k, v in h5netcdf_var.attrs.items(): if k not in ["_FillValue", "missing_value"] and isinstance(v, bytes): try: v = v.decode("utf-8") except UnicodeDecodeError: emit_user_level_warning( f"'utf-8' codec can't decode bytes for attribute " f"{k!r} of h5netcdf object {h5netcdf_var.name!r}, " f"returning bytes undecoded.", UnicodeWarning, ) attrs[k] = v return attrs _extract_h5nc_encoding = functools.partial( _extract_nc4_variable_encoding, lsd_okay=False, h5py_okay=True, backend="h5netcdf", unlimited_dims=None, ) def _h5netcdf_create_group(dataset, name): return dataset.create_group(name) class H5NetCDFStore(WritableCFDataStore): """Store for reading and writing data via h5netcdf""" __slots__ = ( "_filename", "_group", "_manager", "_mode", "autoclose", "format", "is_remote", "lock", ) def __init__( self, manager: FileManager | h5netcdf.File | h5netcdf.Group, group=None, mode=None, format="NETCDF4", lock=HDF5_LOCK, autoclose=False, ): import h5netcdf if isinstance(manager, h5netcdf.File | h5netcdf.Group): if group is None: root, group = find_root_and_group(manager) else: if type(manager) is not h5netcdf.File: raise ValueError( "must supply a h5netcdf.File if the group argument is provided" ) root = manager manager = DummyFileManager(root) self._manager = manager self._group = group self._mode = mode self.format = format or "NETCDF4" # todo: utilizing find_root_and_group seems a bit clunky # making filename available on h5netcdf.Group seems better self._filename = find_root_and_group(self.ds)[0].filename self.is_remote = is_remote_uri(self._filename) self.lock = ensure_lock(lock) self.autoclose = autoclose def get_child_store(self, group: str) -> Self: if self.format == "NETCDF4_CLASSIC": raise ValueError("Cannot create sub-groups in `NETCDF4_CLASSIC` format.") if self._group is not None: group = os.path.join(self._group, group) return type(self)( self._manager, group=group, mode=self._mode, lock=self.lock, autoclose=self.autoclose, ) @classmethod def open( cls, filename, mode="r", format="NETCDF4", group=None, lock=None, autoclose=False, invalid_netcdf=None, phony_dims=None, decode_vlen_strings=True, driver=None, driver_kwds=None, storage_options: dict[str, Any] | None = None, ): import h5netcdf if isinstance(filename, str) and is_remote_uri(filename) and driver is None: mode_ = "rb" if mode == "r" else mode filename = _open_remote_file( filename, mode=mode_, storage_options=storage_options ) if isinstance(filename, BytesIOProxy): source = filename filename = io.BytesIO() source.getvalue = filename.getbuffer if isinstance(filename, io.IOBase) and mode == "r": magic_number = read_magic_number_from_file(filename) if not magic_number.startswith(b"\211HDF\r\n\032\n"): raise ValueError( f"{magic_number!r} is not the signature of a valid netCDF4 file" ) if format is None: format = "NETCDF4" if format not in ["NETCDF4", "NETCDF4_CLASSIC"]: raise ValueError(f"invalid format for h5netcdf backend: {format}") kwargs = { "invalid_netcdf": invalid_netcdf, "decode_vlen_strings": decode_vlen_strings, "driver": driver, } if driver_kwds is not None: kwargs.update(driver_kwds) if phony_dims is not None: kwargs["phony_dims"] = phony_dims if Version(h5netcdf.__version__) > Version("1.6.4"): kwargs["format"] = format elif format == "NETCDF4_CLASSIC": raise ValueError( "h5netcdf >= 1.7.0 is required to save output in NETCDF4_CLASSIC format." ) if lock is None: if mode == "r": lock = HDF5_LOCK else: lock = combine_locks([HDF5_LOCK, get_write_lock(filename)]) manager_cls = ( CachingFileManager if isinstance(filename, str) and not is_remote_uri(filename) else PickleableFileManager ) manager = manager_cls(h5netcdf.File, filename, mode=mode, kwargs=kwargs) return cls( manager, group=group, format=format, mode=mode, lock=lock, autoclose=autoclose, ) def _acquire(self, needs_lock=True): with self._manager.acquire_context(needs_lock) as root: ds = _nc4_require_group( root, self._group, self._mode, create_group=_h5netcdf_create_group ) return ds @property def ds(self): return self._acquire() def open_store_variable(self, name, var): import h5netcdf.core import h5py dimensions = var.dimensions data = indexing.LazilyIndexedArray(H5NetCDFArrayWrapper(name, self)) attrs = _read_attributes(var) # netCDF4 specific encoding encoding = { "chunksizes": var.chunks, "fletcher32": var.fletcher32, "shuffle": var.shuffle, } if var.chunks: encoding["preferred_chunks"] = dict( zip(var.dimensions, var.chunks, strict=True) ) # Convert h5py-style compression options to NetCDF4-Python # style, if possible if var.compression == "gzip": encoding["zlib"] = True encoding["complevel"] = var.compression_opts elif var.compression is not None: encoding["compression"] = var.compression encoding["compression_opts"] = var.compression_opts # save source so __repr__ can detect if it's local or not encoding["source"] = self._filename encoding["original_shape"] = data.shape vlen_dtype = h5py.check_dtype(vlen=var.dtype) if vlen_dtype is str: encoding["dtype"] = str elif vlen_dtype is not None: # pragma: no cover # xarray doesn't support writing arbitrary vlen dtypes yet. pass # just check if datatype is available and create dtype # this check can be removed if h5netcdf >= 1.4.0 for any environment elif (datatype := getattr(var, "datatype", None)) and isinstance( datatype, h5netcdf.core.EnumType ): encoding["dtype"] = np.dtype( data.dtype, metadata={ "enum": datatype.enum_dict, "enum_name": datatype.name, }, ) else: encoding["dtype"] = var.dtype return Variable(dimensions, data, attrs, encoding) def get_variables(self): return FrozenDict( (k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items() ) def get_attrs(self): return FrozenDict(_read_attributes(self.ds)) def get_dimensions(self): return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items()) def get_parent_dimensions(self): return FrozenDict(collect_ancestor_dimensions(self.ds)) def get_encoding(self): return { "unlimited_dims": { k for k, v in self.ds.dimensions.items() if v.isunlimited() } } def set_dimension(self, name, length, is_unlimited=False): _ensure_no_forward_slash_in_name(name) if is_unlimited: self.ds.dimensions[name] = None self.ds.resize_dimension(name, length) else: self.ds.dimensions[name] = length def set_attribute(self, key, value): if self.format == "NETCDF4_CLASSIC": value = encode_nc3_attr_value(value) self.ds.attrs[key] = value def encode_variable(self, variable, name=None): if self.format == "NETCDF4_CLASSIC": return encode_nc3_variable(variable, name=name) else: return _encode_nc4_variable(variable, name=name) def prepare_variable( self, name, variable, check_encoding=False, unlimited_dims=None ): import h5py _ensure_no_forward_slash_in_name(name) attrs = variable.attrs.copy() dtype = _get_datatype( variable, nc_format=self.format, raise_on_invalid_encoding=check_encoding ) fillvalue = attrs.pop("_FillValue", None) if dtype is str: dtype = h5py.special_dtype(vlen=str) # check enum metadata and use h5netcdf.core.EnumType if ( hasattr(self.ds, "enumtypes") and (meta := np.dtype(dtype).metadata) and (e_name := meta.get("enum_name")) and (e_dict := meta.get("enum")) ): dtype = _build_and_get_enum(self, name, dtype, e_name, e_dict) encoding = _extract_h5nc_encoding(variable, raise_on_invalid=check_encoding) kwargs = {} # Convert from NetCDF4-Python style compression settings to h5py style # If both styles are used together, h5py takes precedence # If set_encoding=True, raise ValueError in case of mismatch if encoding.pop("zlib", False): if check_encoding and encoding.get("compression") not in (None, "gzip"): raise ValueError("'zlib' and 'compression' encodings mismatch") encoding.setdefault("compression", "gzip") if ( check_encoding and "complevel" in encoding and "compression_opts" in encoding and encoding["complevel"] != encoding["compression_opts"] ): raise ValueError("'complevel' and 'compression_opts' encodings mismatch") complevel = encoding.pop("complevel", 0) if complevel != 0: encoding.setdefault("compression_opts", complevel) encoding["chunks"] = encoding.pop("chunksizes", None) # Do not apply compression, filters or chunking to scalars. if variable.shape: for key in [ "compression", "compression_opts", "shuffle", "chunks", "fletcher32", ]: if key in encoding: kwargs[key] = encoding[key] if name not in self.ds: nc4_var = self.ds.create_variable( name, dtype=dtype, dimensions=variable.dims, fillvalue=fillvalue, **kwargs, ) else: nc4_var = self.ds[name] for k, v in attrs.items(): if self.format == "NETCDF4_CLASSIC": v = encode_nc3_attr_value(v) nc4_var.attrs[k] = v target = H5NetCDFArrayWrapper(name, self) return target, variable.data def sync(self): self.ds.sync() def close(self, **kwargs): self._manager.close(**kwargs) def _check_phony_dims(phony_dims): emit_phony_dims_warning = False if phony_dims is None: emit_phony_dims_warning = True phony_dims = "access" return emit_phony_dims_warning, phony_dims def _emit_phony_dims_warning(): emit_user_level_warning( "The 'phony_dims' kwarg now defaults to 'access'. " "Previously 'phony_dims=None' would raise an error. " "For full netcdf equivalence please use phony_dims='sort'.", UserWarning, ) def _normalize_filename_or_obj( filename_or_obj: T_PathFileOrDataStore, ) -> str | ReadBuffer | AbstractDataStore: if isinstance(filename_or_obj, bytes | memoryview): return io.BytesIO(filename_or_obj) else: return _normalize_path(filename_or_obj) class H5netcdfBackendEntrypoint(BackendEntrypoint): """ Backend for netCDF files based on the h5netcdf package. It can open ".nc", ".nc4", ".cdf" files but will only be selected as the default if the "netcdf4" engine is not available. Additionally it can open valid HDF5 files, see https://h5netcdf.org/#invalid-netcdf-files for more info. It will not be detected as valid backend for such files, so make sure to specify ``engine="h5netcdf"`` in ``open_dataset``. For more information about the underlying library, visit: https://h5netcdf.org See Also -------- backends.H5NetCDFStore backends.NetCDF4BackendEntrypoint backends.ScipyBackendEntrypoint """ description = ( "Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using h5netcdf in Xarray" ) url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.H5netcdfBackendEntrypoint.html" supports_groups = True def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool: from xarray.core.utils import is_remote_uri filename_or_obj = _normalize_filename_or_obj(filename_or_obj) # Try to read magic number for local files only is_remote = isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj) if not is_remote: magic_number = try_read_magic_number_from_file_or_path(filename_or_obj) if magic_number is not None: return magic_number.startswith(b"\211HDF\r\n\032\n") if isinstance(filename_or_obj, str | os.PathLike): _, ext = os.path.splitext(filename_or_obj) return ext in {".nc", ".nc4", ".cdf"} return False def open_dataset( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, format="NETCDF4", group=None, lock=None, invalid_netcdf=None, phony_dims=None, decode_vlen_strings=True, driver=None, driver_kwds=None, storage_options: dict[str, Any] | None = None, ) -> Dataset: # Keep this message for some versions # remove and set phony_dims="access" above emit_phony_dims_warning, phony_dims = _check_phony_dims(phony_dims) filename_or_obj = _normalize_filename_or_obj(filename_or_obj) store = H5NetCDFStore.open( filename_or_obj, format=format, group=group, lock=lock, invalid_netcdf=invalid_netcdf, phony_dims=phony_dims, decode_vlen_strings=decode_vlen_strings, driver=driver, driver_kwds=driver_kwds, storage_options=storage_options, ) store_entrypoint = StoreBackendEntrypoint() ds = store_entrypoint.open_dataset( store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) # only warn if phony_dims exist in file # remove together with the above check # after some versions if store.ds._root._phony_dim_count > 0 and emit_phony_dims_warning: _emit_phony_dims_warning() return ds def open_datatree( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, format="NETCDF4", group: str | None = None, lock=None, invalid_netcdf=None, phony_dims=None, decode_vlen_strings=True, driver=None, driver_kwds=None, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( filename_or_obj, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, format=format, group=group, lock=lock, invalid_netcdf=invalid_netcdf, phony_dims=phony_dims, decode_vlen_strings=decode_vlen_strings, driver=driver, driver_kwds=driver_kwds, **kwargs, ) return datatree_from_dict_with_io_cleanup(groups_dict) def open_groups_as_dict( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, format="NETCDF4", group: str | None = None, lock=None, invalid_netcdf=None, phony_dims=None, decode_vlen_strings=True, driver=None, driver_kwds=None, **kwargs, ) -> dict[str, Dataset]: from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath from xarray.core.utils import close_on_error # Keep this message for some versions # remove and set phony_dims="access" above emit_phony_dims_warning, phony_dims = _check_phony_dims(phony_dims) filename_or_obj = _normalize_filename_or_obj(filename_or_obj) store = H5NetCDFStore.open( filename_or_obj, format=format, group=group, lock=lock, invalid_netcdf=invalid_netcdf, phony_dims=phony_dims, decode_vlen_strings=decode_vlen_strings, driver=driver, driver_kwds=driver_kwds, ) # Check for a group and make it a parent if it exists if group: parent = NodePath("/") / NodePath(group) else: parent = NodePath("/") manager = store._manager groups_dict = {} for path_group in _iter_nc_groups(store.ds, parent=parent): group_store = H5NetCDFStore(manager, group=path_group, **kwargs) store_entrypoint = StoreBackendEntrypoint() with close_on_error(group_store): group_ds = store_entrypoint.open_dataset( group_store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) if group: group_name = str(NodePath(path_group).relative_to(parent)) else: group_name = str(NodePath(path_group)) groups_dict[group_name] = group_ds # only warn if phony_dims exist in file # remove together with the above check # after some versions if store.ds._root._phony_dim_count > 0 and emit_phony_dims_warning: _emit_phony_dims_warning() return groups_dict BACKEND_ENTRYPOINTS["h5netcdf"] = ("h5netcdf", H5netcdfBackendEntrypoint) python-xarray-2026.01.0/xarray/backends/zarr.py0000664000175000017500000021702315136607163021460 0ustar alastairalastairfrom __future__ import annotations import base64 import json import os import struct from collections.abc import Hashable, Iterable, Mapping from typing import TYPE_CHECKING, Any, Literal, Self, cast import numpy as np import pandas as pd from xarray import coding, conventions from xarray.backends.chunks import grid_rechunk, validate_grid_chunks_alignment from xarray.backends.common import ( BACKEND_ENTRYPOINTS, AbstractWritableDataStore, BackendArray, BackendEntrypoint, T_PathFileOrDataStore, _encode_variable_name, _normalize_path, datatree_from_dict_with_io_cleanup, ensure_dtype_not_object, ) from xarray.backends.store import StoreBackendEntrypoint from xarray.core import indexing from xarray.core.treenode import NodePath from xarray.core.types import ZarrWriteModes from xarray.core.utils import ( FrozenDict, HiddenKeyDict, _default, attempt_import, close_on_error, emit_user_level_warning, ) from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import guess_chunkmanager from xarray.namedarray.pycompat import integer_types from xarray.namedarray.utils import module_available if TYPE_CHECKING: from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.types import ZarrArray, ZarrGroup def _get_mappers(*, storage_options, store, chunk_store): # expand str and path-like arguments store = _normalize_path(store) chunk_store = _normalize_path(chunk_store) kwargs = {} if storage_options is None: mapper = store chunk_mapper = chunk_store else: if not isinstance(store, str): raise ValueError( f"store must be a string to use storage_options. Got {type(store)}" ) if _zarr_v3(): kwargs["storage_options"] = storage_options mapper = store chunk_mapper = chunk_store else: from fsspec import get_mapper mapper = get_mapper(store, **storage_options) if chunk_store is not None: chunk_mapper = get_mapper(chunk_store, **storage_options) else: chunk_mapper = chunk_store return kwargs, mapper, chunk_mapper def _choose_default_mode( *, mode: ZarrWriteModes | None, append_dim: Hashable | None, region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None, ) -> ZarrWriteModes: if mode is None: if append_dim is not None: mode = "a" elif region is not None: mode = "r+" else: mode = "w-" if mode not in ["a", "a-"] and append_dim is not None: raise ValueError("cannot set append_dim unless mode='a' or mode=None") if mode not in ["a", "a-", "r+"] and region is not None: raise ValueError( "cannot set region unless mode='a', mode='a-', mode='r+' or mode=None" ) if mode not in ["w", "w-", "a", "a-", "r+"]: raise ValueError( "The only supported options for mode are 'w', " f"'w-', 'a', 'a-', and 'r+', but mode={mode!r}" ) return mode def _zarr_v3() -> bool: return module_available("zarr", minversion="3") # need some special secret attributes to tell us the dimensions DIMENSION_KEY = "_ARRAY_DIMENSIONS" ZarrFormat = Literal[2, 3] class FillValueCoder: """Handle custom logic to safely encode and decode fill values in Zarr. Possibly redundant with logic in xarray/coding/variables.py but needs to be isolated from NetCDF-specific logic. """ @classmethod def encode(cls, value: int | float | str | bytes, dtype: np.dtype[Any]) -> Any: if dtype.kind in "S": # byte string, this implies that 'value' must also be `bytes` dtype. assert isinstance(value, bytes) return base64.standard_b64encode(value).decode() elif dtype.kind in "b": # boolean return bool(value) elif dtype.kind in "iu": # todo: do we want to check for decimals? return int(value) elif dtype.kind in "f": return base64.standard_b64encode(struct.pack(" list scalar array -> scalar other -> other (no change) """ if isinstance(value, np.ndarray): encoded = value.tolist() elif isinstance(value, np.generic): encoded = value.item() else: encoded = value return encoded def has_zarr_async_index() -> bool: try: import zarr return hasattr(zarr.AsyncArray, "oindex") except (ImportError, AttributeError): return False class ZarrArrayWrapper(BackendArray): __slots__ = ("_array", "dtype", "shape") def __init__(self, zarr_array): # some callers attempt to evaluate an array if an `array` property exists on the object. # we prefix with _ to avoid this inference. # TODO type hint this? self._array = zarr_array self.shape = self._array.shape # preserve vlen string object dtype (GH 7328) if ( not _zarr_v3() and self._array.filters is not None and any(filt.codec_id == "vlen-utf8" for filt in self._array.filters) ) or ( _zarr_v3() and self._array.serializer and self._array.serializer.to_dict()["name"] == "vlen-utf8" ): dtype = coding.strings.create_vlen_dtype(str) else: dtype = self._array.dtype self.dtype = dtype def get_array(self): return self._array def _oindex(self, key): return self._array.oindex[key] def _vindex(self, key): return self._array.vindex[key] def _getitem(self, key): return self._array[key] async def _async_getitem(self, key): if not _zarr_v3(): raise NotImplementedError( "For lazy basic async indexing with zarr, zarr-python=>v3.0.0 is required" ) async_array = self._array._async_array return await async_array.getitem(key) async def _async_oindex(self, key): if not has_zarr_async_index(): raise NotImplementedError( "For lazy orthogonal async indexing with zarr, zarr-python=>v3.1.2 is required" ) async_array = self._array._async_array return await async_array.oindex.getitem(key) async def _async_vindex(self, key): if not has_zarr_async_index(): raise NotImplementedError( "For lazy vectorized async indexing with zarr, zarr-python=>v3.1.2 is required" ) async_array = self._array._async_array return await async_array.vindex.getitem(key) def __getitem__(self, key): array = self._array if isinstance(key, indexing.BasicIndexer): method = self._getitem elif isinstance(key, indexing.VectorizedIndexer): method = self._vindex elif isinstance(key, indexing.OuterIndexer): method = self._oindex return indexing.explicit_indexing_adapter( key, array.shape, indexing.IndexingSupport.VECTORIZED, method ) # if self.ndim == 0: # could possibly have a work-around for 0d data here async def async_getitem(self, key): array = self._array if isinstance(key, indexing.BasicIndexer): method = self._async_getitem elif isinstance(key, indexing.VectorizedIndexer): method = self._async_vindex elif isinstance(key, indexing.OuterIndexer): method = self._async_oindex return await indexing.async_explicit_indexing_adapter( key, array.shape, indexing.IndexingSupport.VECTORIZED, method ) def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): """ Given encoding chunks (possibly None or []) and variable chunks (possibly None or []). """ # zarr chunk spec: # chunks : int or tuple of ints, optional # Chunk shape. If not provided, will be guessed from shape and dtype. # if there are no chunks in encoding and the variable data is a numpy # array, then we let zarr use its own heuristics to pick the chunks if not var_chunks and not enc_chunks: return None # if there are no chunks in encoding but there are dask chunks, we try to # use the same chunks in zarr # However, zarr chunks needs to be uniform for each array # https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#chunks # while dask chunks can be variable sized # https://dask.pydata.org/en/latest/array-design.html#chunks if var_chunks and not enc_chunks: if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( "Zarr requires uniform chunk sizes except for final chunk. " f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. " "Consider rechunking using `chunk()`." ) if any((chunks[0] < chunks[-1]) for chunks in var_chunks): raise ValueError( "Final chunk of Zarr array must be the same size or smaller " f"than the first. Variable named {name!r} has incompatible Dask chunks {var_chunks!r}." "Consider either rechunking using `chunk()` or instead deleting " "or modifying `encoding['chunks']`." ) # return the first chunk for each dimension return tuple(chunk[0] for chunk in var_chunks) # From here on, we are dealing with user-specified chunks in encoding # zarr allows chunks to be an integer, in which case it uses the same chunk # size on each dimension. # Here we re-implement this expansion ourselves. That makes the logic of # checking chunk compatibility easier if isinstance(enc_chunks, integer_types): enc_chunks_tuple = ndim * (enc_chunks,) else: enc_chunks_tuple = tuple(enc_chunks) if len(enc_chunks_tuple) != ndim: # throw away encoding chunks, start over return _determine_zarr_chunks( None, var_chunks, ndim, name, ) for x in enc_chunks_tuple: if not isinstance(x, int): raise TypeError( "zarr chunk sizes specified in `encoding['chunks']` " "must be an int or a tuple of ints. " f"Instead found encoding['chunks']={enc_chunks_tuple!r} " f"for variable named {name!r}." ) # if there are chunks in encoding and the variable data is a numpy array, # we use the specified chunks if not var_chunks: return enc_chunks_tuple return enc_chunks_tuple def _get_zarr_dims_and_attrs(zarr_obj, dimension_key, try_nczarr): # Zarr V3 explicitly stores the dimension names in the metadata try: # if this exists, we are looking at a Zarr V3 array # convert None to empty tuple dimensions = zarr_obj.metadata.dimension_names or () except AttributeError: # continue to old code path pass else: attributes = dict(zarr_obj.attrs) if len(zarr_obj.shape) != len(dimensions): raise KeyError( "Zarr object is missing the `dimension_names` metadata which is " "required for xarray to determine variable dimensions." ) return dimensions, attributes # Zarr arrays do not have dimensions. To get around this problem, we add # an attribute that specifies the dimension. We have to hide this attribute # when we send the attributes to the user. # zarr_obj can be either a zarr group or zarr array try: # Xarray-Zarr dimensions = zarr_obj.attrs[dimension_key] except KeyError as e: if not try_nczarr: raise KeyError( f"Zarr object is missing the attribute `{dimension_key}`, which is " "required for xarray to determine variable dimensions." ) from e # NCZarr defines dimensions through metadata in .zarray zarray_path = os.path.join(zarr_obj.path, ".zarray") if _zarr_v3(): import asyncio zarray_str = asyncio.run(zarr_obj.store.get(zarray_path)).to_bytes() else: zarray_str = zarr_obj.store.get(zarray_path) zarray = json.loads(zarray_str) try: # NCZarr uses Fully Qualified Names dimensions = [ os.path.basename(dim) for dim in zarray["_NCZARR_ARRAY"]["dimrefs"] ] except KeyError as e: raise KeyError( f"Zarr object is missing the attribute `{dimension_key}` and the NCZarr metadata, " "which are required for xarray to determine variable dimensions." ) from e nc_attrs = [attr for attr in zarr_obj.attrs if attr.lower().startswith("_nc")] attributes = HiddenKeyDict(zarr_obj.attrs, [dimension_key] + nc_attrs) return dimensions, attributes def extract_zarr_variable_encoding( variable, raise_on_invalid=False, name=None, *, zarr_format: ZarrFormat, ): """ Extract zarr encoding dictionary from xarray Variable Parameters ---------- variable : Variable raise_on_invalid : bool, optional name: str | Hashable, optional zarr_format: Literal[2,3] Returns ------- encoding : dict Zarr encoding for `variable` """ encoding = variable.encoding.copy() safe_to_drop = {"source", "original_shape", "preferred_chunks"} valid_encodings = { "chunks", "shards", "compressor", # TODO: delete when min zarr >=3 "compressors", "filters", "serializer", "cache_metadata", "write_empty_chunks", "chunk_key_encoding", } if zarr_format == 3: valid_encodings.add("fill_value") for k in safe_to_drop: if k in encoding: del encoding[k] if raise_on_invalid: invalid = [k for k in encoding if k not in valid_encodings] if "fill_value" in invalid and zarr_format == 2: msg = " Use `_FillValue` to set the Zarr array `fill_value`" else: msg = "" if invalid: raise ValueError( f"unexpected encoding parameters for zarr backend: {invalid!r}." + msg ) else: for k in list(encoding): if k not in valid_encodings: del encoding[k] chunks = _determine_zarr_chunks( enc_chunks=encoding.get("chunks"), var_chunks=variable.chunks, ndim=variable.ndim, name=name, ) if _zarr_v3() and chunks is None: chunks = "auto" encoding["chunks"] = chunks return encoding # Function below is copied from conventions.encode_cf_variable. # The only change is to raise an error for object dtypes. def encode_zarr_variable(var, needs_copy=True, name=None): """ Converts a Variable into another Variable which follows some of the CF conventions: - Nans are masked using _FillValue (or the deprecated missing_value) - Rescaling via: scale_factor and add_offset - datetimes are converted to the CF 'units since time' format - dtype encodings are enforced. Parameters ---------- var : Variable A variable holding un-encoded data. Returns ------- out : Variable A variable which has been encoded as described above. """ var = conventions.encode_cf_variable(var, name=name) var = ensure_dtype_not_object(var, name=name) # zarr allows unicode, but not variable-length strings, so it's both # simpler and more compact to always encode as UTF-8 explicitly. # TODO: allow toggling this explicitly via dtype in encoding. # TODO: revisit this now that Zarr _does_ allow variable-length strings coder = coding.strings.EncodedStringCoder(allows_unicode=True) var = coder.encode(var, name=name) var = coding.strings.ensure_fixed_length_bytes(var) return var def _validate_datatypes_for_zarr_append(vname, existing_var, new_var): """If variable exists in the store, confirm dtype of the data to append is compatible with existing dtype. """ if ( np.issubdtype(new_var.dtype, np.number) or np.issubdtype(new_var.dtype, np.datetime64) or np.issubdtype(new_var.dtype, np.bool_) or new_var.dtype == object or (new_var.dtype.kind in ("S", "U") and existing_var.dtype == object) ): # We can skip dtype equality checks under two conditions: (1) if the var to append is # new to the dataset, because in this case there is no existing var to compare it to; # or (2) if var to append's dtype is known to be easy-to-append, because in this case # we can be confident appending won't cause problems. Examples of dtypes which are not # easy-to-append include length-specified strings of type `|S*` or ` Self: zarr_group = self.zarr_group.require_group(group) return type(self)( zarr_group=zarr_group, mode=self._mode, consolidate_on_close=self._consolidate_on_close, append_dim=self._append_dim, write_region=self._write_region, safe_chunks=self._safe_chunks, write_empty=self._write_empty, close_store_on_close=self._close_store_on_close, use_zarr_fill_value_as_mask=self._use_zarr_fill_value_as_mask, align_chunks=self._align_chunks, cache_members=self._cache_members, ) @property def members(self) -> dict[str, ZarrArray | ZarrGroup]: """ Model the arrays and groups contained in self.zarr_group as a dict. If `self._cache_members` is true, the dict is cached. Otherwise, it is retrieved from storage. """ if not self._cache_members: return self._fetch_members() else: return self._members def _fetch_members(self) -> dict[str, ZarrArray | ZarrGroup]: """ Get the arrays and groups defined in the zarr group modelled by this Store """ import zarr if zarr.__version__ >= "3": return dict(self.zarr_group.members()) else: return dict(self.zarr_group.items()) def array_keys(self) -> tuple[str, ...]: from zarr import Array as ZarrArray return tuple( key for (key, node) in self.members.items() if isinstance(node, ZarrArray) ) def arrays(self) -> tuple[tuple[str, ZarrArray], ...]: from zarr import Array as ZarrArray return tuple( (key, node) for (key, node) in self.members.items() if isinstance(node, ZarrArray) ) @property def ds(self): # TODO: consider deprecating this in favor of zarr_group return self.zarr_group def open_store_variable(self, name): zarr_array = self.members[name] data = indexing.LazilyIndexedArray(ZarrArrayWrapper(zarr_array)) try_nczarr = self._mode == "r" dimensions, attributes = _get_zarr_dims_and_attrs( zarr_array, DIMENSION_KEY, try_nczarr ) attributes = dict(attributes) encoding = { "chunks": zarr_array.chunks, "preferred_chunks": dict(zip(dimensions, zarr_array.chunks, strict=True)), } if _zarr_v3(): encoding.update( { "compressors": zarr_array.compressors, "filters": zarr_array.filters, "shards": zarr_array.shards, } ) if self.zarr_group.metadata.zarr_format == 3: encoding.update({"serializer": zarr_array.serializer}) else: encoding.update( { "compressor": zarr_array.compressor, "filters": zarr_array.filters, } ) if self._use_zarr_fill_value_as_mask: # Setting this attribute triggers CF decoding for missing values # by interpreting Zarr's fill_value to mean the same as netCDF's _FillValue if zarr_array.fill_value is not None: attributes["_FillValue"] = zarr_array.fill_value elif "_FillValue" in attributes: attributes["_FillValue"] = FillValueCoder.decode( attributes["_FillValue"], zarr_array.dtype ) return Variable(dimensions, data, attributes, encoding) def get_variables(self): return FrozenDict((k, self.open_store_variable(k)) for k in self.array_keys()) def get_attrs(self): return { k: v for k, v in self.zarr_group.attrs.asdict().items() if not k.lower().startswith("_nc") } def get_dimensions(self): try_nczarr = self._mode == "r" dimensions = {} for _k, v in self.arrays(): dim_names, _ = _get_zarr_dims_and_attrs(v, DIMENSION_KEY, try_nczarr) for d, s in zip(dim_names, v.shape, strict=True): if d in dimensions and dimensions[d] != s: raise ValueError( f"found conflicting lengths for dimension {d} " f"({s} != {dimensions[d]})" ) dimensions[d] = s return dimensions def set_dimensions(self, variables, unlimited_dims=None): if unlimited_dims is not None: raise NotImplementedError( "Zarr backend doesn't know how to handle unlimited dimensions" ) def set_attributes(self, attributes): _put_attrs(self.zarr_group, attributes) def encode_variable(self, variable, name=None): variable = encode_zarr_variable(variable, name=name) return variable def encode_attribute(self, a): return encode_zarr_attr_value(a) def store( self, variables, attributes, check_encoding_set=frozenset(), writer=None, unlimited_dims=None, ): """ Top level method for putting data on this store, this method: - encodes variables/attributes - sets dimensions - sets variables Parameters ---------- variables : dict-like Dictionary of key/value (variable name / xr.Variable) pairs attributes : dict-like Dictionary of key/value (attribute name / attribute) pairs check_encoding_set : list-like List of variables that should be checked for invalid encoding values writer : ArrayWriter unlimited_dims : list-like List of dimension names that should be treated as unlimited dimensions. dimension on which the zarray will be appended only needed in append mode """ if TYPE_CHECKING: import zarr else: zarr = attempt_import("zarr") if self._mode == "w": # always overwrite, so we don't care about existing names, # and consistency of encoding new_variable_names = set(variables) existing_keys = {} existing_variable_names = {} else: existing_keys = self.array_keys() existing_variable_names = { vn for vn in variables if _encode_variable_name(vn) in existing_keys } new_variable_names = set(variables) - existing_variable_names if self._mode == "r+" and ( new_names := [k for k in variables if k not in existing_keys] ): raise ValueError( f"dataset contains non-pre-existing variables {new_names!r}, " "which is not allowed in ``xarray.Dataset.to_zarr()`` with " "``mode='r+'``. To allow writing new variables, set ``mode='a'``." ) if self._append_dim is not None and self._append_dim not in existing_keys: # For dimensions without coordinate values, we must parse # the _ARRAY_DIMENSIONS attribute on *all* arrays to check if it # is a valid existing dimension name. # TODO: This `get_dimensions` method also does shape checking # which isn't strictly necessary for our check. existing_dims = self.get_dimensions() if self._append_dim not in existing_dims: raise ValueError( f"append_dim={self._append_dim!r} does not match any existing " f"dataset dimensions {existing_dims}" ) variables_encoded, attributes = self.encode( {vn: variables[vn] for vn in new_variable_names}, attributes ) if existing_variable_names: # We make sure that values to be appended are encoded *exactly* # as the current values in the store. # To do so, we decode variables directly to access the proper encoding, # without going via xarray.Dataset to avoid needing to load # index variables into memory. existing_vars, _, _ = conventions.decode_cf_variables( variables={ k: self.open_store_variable(name=k) for k in existing_variable_names }, # attributes = {} since we don't care about parsing the global # "coordinates" attribute attributes={}, ) # Modified variables must use the same encoding as the store. vars_with_encoding = {} for vn in existing_variable_names: _validate_datatypes_for_zarr_append( vn, existing_vars[vn], variables[vn] ) vars_with_encoding[vn] = variables[vn].copy(deep=False) vars_with_encoding[vn].encoding = existing_vars[vn].encoding vars_with_encoding, _ = self.encode(vars_with_encoding, {}) variables_encoded.update(vars_with_encoding) for var_name in existing_variable_names: variables_encoded[var_name] = _validate_and_transpose_existing_dims( var_name, variables_encoded[var_name], existing_vars[var_name], self._write_region, self._append_dim, ) if self._mode not in ["r", "r+"]: self.set_attributes(attributes) self.set_dimensions(variables_encoded, unlimited_dims=unlimited_dims) # if we are appending to an append_dim, only write either # - new variables not already present, OR # - variables with the append_dim in their dimensions # We do NOT overwrite other variables. if self._mode == "a-" and self._append_dim is not None: variables_to_set = { k: v for k, v in variables_encoded.items() if (k not in existing_variable_names) or (self._append_dim in v.dims) } else: variables_to_set = variables_encoded self.set_variables( variables_to_set, check_encoding_set, writer, unlimited_dims=unlimited_dims ) if self._consolidate_on_close: kwargs = {} if _zarr_v3(): kwargs["zarr_format"] = self.zarr_group.metadata.zarr_format zarr.consolidate_metadata(self.zarr_group.store, **kwargs) def _open_existing_array(self, *, name) -> ZarrArray: import zarr from zarr import Array as ZarrArray # TODO: if mode="a", consider overriding the existing variable # metadata. This would need some case work properly with region # and append_dim. if self._write_empty is not None: # Write to zarr_group.chunk_store instead of zarr_group.store # See https://github.com/pydata/xarray/pull/8326#discussion_r1365311316 for a longer explanation # The open_consolidated() enforces a mode of r or r+ # (and to_zarr with region provided enforces a read mode of r+), # and this function makes sure the resulting Group has a store of type ConsolidatedMetadataStore # and a 'normal Store subtype for chunk_store. # The exact type depends on if a local path was used, or a URL of some sort, # but the point is that it's not a read-only ConsolidatedMetadataStore. # It is safe to write chunk data to the chunk_store because no metadata would be changed by # to_zarr with the region parameter: # - Because the write mode is enforced to be r+, no new variables can be added to the store # (this is also checked and enforced in xarray.backends.api.py::to_zarr()). # - Existing variables already have their attrs included in the consolidated metadata file. # - The size of dimensions can not be expanded, that would require a call using `append_dim` # which is mutually exclusive with `region` empty: dict[str, bool] | dict[str, dict[str, bool]] if _zarr_v3(): empty = dict(config={"write_empty_chunks": self._write_empty}) else: empty = dict(write_empty_chunks=self._write_empty) zarr_array = zarr.open( store=( self.zarr_group.store if _zarr_v3() else self.zarr_group.chunk_store ), # TODO: see if zarr should normalize these strings. path="/".join([self.zarr_group.name.rstrip("/"), name]).lstrip("/"), **empty, ) else: zarr_array = self.zarr_group[name] return cast(ZarrArray, zarr_array) def _create_new_array( self, *, name, shape, dtype, fill_value, encoding, attrs ) -> ZarrArray: if coding.strings.check_vlen_dtype(dtype) is str: dtype = str if self._write_empty is not None: if ( "write_empty_chunks" in encoding and encoding["write_empty_chunks"] != self._write_empty ): raise ValueError( 'Differing "write_empty_chunks" values in encoding and parameters' f'Got {encoding["write_empty_chunks"] = } and {self._write_empty = }' ) else: encoding["write_empty_chunks"] = self._write_empty if _zarr_v3(): # zarr v3 deprecated origin and write_empty_chunks # instead preferring to pass them via the config argument encoding["config"] = {} for c in ("write_empty_chunks", "order"): if c in encoding: encoding["config"][c] = encoding.pop(c) zarr_array = self.zarr_group.create( name, shape=shape, dtype=dtype, fill_value=fill_value, **encoding, ) zarr_array = _put_attrs(zarr_array, attrs) return zarr_array def set_variables( self, variables: dict[str, Variable], check_encoding_set, writer, unlimited_dims=None, ): """ This provides a centralized method to set the variables on the data store. Parameters ---------- variables : dict-like Dictionary of key/value (variable name / xr.Variable) pairs check_encoding_set : list-like List of variables that should be checked for invalid encoding values writer unlimited_dims : list-like List of dimension names that should be treated as unlimited dimensions. """ existing_keys = self.array_keys() is_zarr_v3_format = _zarr_v3() and self.zarr_group.metadata.zarr_format == 3 for vn, v in variables.items(): name = _encode_variable_name(vn) attrs = v.attrs.copy() dims = v.dims dtype = v.dtype shape = v.shape if self._use_zarr_fill_value_as_mask: fill_value = attrs.pop("_FillValue", None) else: fill_value = v.encoding.pop("fill_value", None) if fill_value is None and v.dtype.kind == "f": # For floating point data, Xarray defaults to a fill_value # of NaN (unlike Zarr, which uses zero): # https://github.com/pydata/xarray/issues/10646 fill_value = np.nan if "_FillValue" in attrs: # replace with encoded fill value fv = attrs.pop("_FillValue") if fv is not None: attrs["_FillValue"] = FillValueCoder.encode(fv, dtype) # _FillValue is never a valid encoding for Zarr # TODO: refactor this logic so we don't need to check this here if "_FillValue" in v.encoding: if v.encoding.get("_FillValue") is not None: raise ValueError("Zarr does not support _FillValue in encoding.") else: del v.encoding["_FillValue"] zarr_shape = None write_region = self._write_region if self._write_region is not None else {} write_region = {dim: write_region.get(dim, slice(None)) for dim in dims} if self._mode != "w" and name in existing_keys: # existing variable zarr_array = self._open_existing_array(name=name) if self._append_dim is not None and self._append_dim in dims: # resize existing variable append_axis = dims.index(self._append_dim) assert write_region[self._append_dim] == slice(None) write_region[self._append_dim] = slice( zarr_array.shape[append_axis], None ) new_shape = ( zarr_array.shape[:append_axis] + (zarr_array.shape[append_axis] + v.shape[append_axis],) + zarr_array.shape[append_axis + 1 :] ) zarr_array.resize(new_shape) zarr_shape = zarr_array.shape region = tuple(write_region[dim] for dim in dims) # We need to do this for both new and existing variables to ensure we're not # writing to a partial chunk, even though we don't use the `encoding` value # when writing to an existing variable. See # https://github.com/pydata/xarray/issues/8371 for details. # Note: Ideally there should be two functions, one for validating the chunks and # another one for extracting the encoding. encoding = extract_zarr_variable_encoding( v, raise_on_invalid=vn in check_encoding_set, name=vn, zarr_format=3 if is_zarr_v3_format else 2, ) if self._align_chunks and isinstance(encoding["chunks"], tuple): v = grid_rechunk( v=v, enc_chunks=encoding["chunks"], region=region, ) if self._safe_chunks and isinstance(encoding["chunks"], tuple): # the hard case # DESIGN CHOICE: do not allow multiple dask chunks on a single zarr chunk # this avoids the need to get involved in zarr synchronization / locking # From zarr docs: # "If each worker in a parallel computation is writing to a # separate region of the array, and if region boundaries are perfectly aligned # with chunk boundaries, then no synchronization is required." # TODO: incorporate synchronizer to allow writes from multiple dask # threads shape = zarr_shape or v.shape validate_grid_chunks_alignment( nd_v_chunks=v.chunks, enc_chunks=encoding["chunks"], region=region, allow_partial_chunks=self._mode != "r+", name=name, backend_shape=shape, ) if self._mode == "w" or name not in existing_keys: # new variable encoded_attrs = {k: self.encode_attribute(v) for k, v in attrs.items()} # the magic for storing the hidden dimension data if is_zarr_v3_format: encoding["dimension_names"] = dims else: encoded_attrs[DIMENSION_KEY] = dims encoding["overwrite"] = self._mode == "w" zarr_array = self._create_new_array( name=name, dtype=dtype, shape=shape, fill_value=fill_value, encoding=encoding, attrs=encoded_attrs, ) writer.add(v.data, zarr_array, region) def sync(self) -> None: pass def close(self) -> None: if self._close_store_on_close: self.zarr_group.store.close() def _auto_detect_regions(self, ds, region): for dim, val in region.items(): if val != "auto": continue if dim not in ds._variables: # unindexed dimension region[dim] = slice(0, ds.sizes[dim]) continue variable = conventions.decode_cf_variable( dim, self.open_store_variable(dim).compute() ) assert variable.dims == (dim,) index = pd.Index(variable.data) idxs = index.get_indexer(ds[dim].data) if (idxs == -1).any(): raise KeyError( f"Not all values of coordinate '{dim}' in the new array were" " found in the original store. Writing to a zarr region slice" " requires that no dimensions or metadata are changed by the write." ) if (np.diff(idxs) != 1).any(): raise ValueError( f"The auto-detected region of coordinate '{dim}' for writing new data" " to the original store had non-contiguous indices. Writing to a zarr" " region slice requires that the new data constitute a contiguous subset" " of the original store." ) region[dim] = slice(idxs[0], idxs[-1] + 1) return region def _validate_and_autodetect_region(self, ds: Dataset) -> Dataset: if self._write_region is None: return ds region = self._write_region if region == "auto": region = dict.fromkeys(ds.dims, "auto") if not isinstance(region, dict): raise TypeError(f"``region`` must be a dict, got {type(region)}") if any(v == "auto" for v in region.values()): if self._mode not in ["r+", "a"]: raise ValueError( f"``mode`` must be 'r+' or 'a' when using ``region='auto'``, got {self._mode!r}" ) region = self._auto_detect_regions(ds, region) # validate before attempting to auto-detect since the auto-detection # should always return a valid slice. for k, v in region.items(): if k not in ds.dims: raise ValueError( f"all keys in ``region`` are not in Dataset dimensions, got " f"{list(region)} and {list(ds.dims)}" ) if not isinstance(v, slice): raise TypeError( "all values in ``region`` must be slice objects, got " f"region={region}" ) if v.step not in {1, None}: raise ValueError( "step on all slices in ``region`` must be 1 or None, got " f"region={region}" ) non_matching_vars = [ k for k, v in ds.variables.items() if not set(region).intersection(v.dims) ] if region and non_matching_vars: raise ValueError( f"when setting `region` explicitly in to_zarr(), all " f"variables in the dataset to write must have at least " f"one dimension in common with the region's dimensions " f"{list(region.keys())}, but that is not " f"the case for some variables here. To drop these variables " f"from this dataset before exporting to zarr, write: " f".drop_vars({non_matching_vars!r})" ) if self._append_dim is not None and self._append_dim in region: raise ValueError( f"cannot list the same dimension in both ``append_dim`` and " f"``region`` with to_zarr(), got {self._append_dim} in both" ) self._write_region = region # can't modify indexes with region writes return ds.drop_vars(ds.indexes) def _validate_encoding(self, encoding) -> None: if encoding and self._mode in ["a", "a-", "r+"]: existing_var_names = self.array_keys() for var_name in existing_var_names: if var_name in encoding: raise ValueError( f"variable {var_name!r} already exists, but encoding was provided" ) def open_zarr( store, group=None, synchronizer=None, chunks=_default, decode_cf=True, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables=None, consolidated=None, overwrite_encoded_chunks=False, chunk_store=None, storage_options=None, decode_timedelta=None, use_cftime=None, zarr_version=None, zarr_format=None, use_zarr_fill_value_as_mask=None, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, create_default_indexes=True, **kwargs, ): """Load and decode a dataset from a Zarr store. The `store` object should be a valid store for a Zarr group. `store` variables must contain dimension metadata encoded in the `_ARRAY_DIMENSIONS` attribute or must have NCZarr format. Parameters ---------- store : MutableMapping or str A MutableMapping where a Zarr Group has been stored or a path to a directory in file system where a Zarr DirectoryStore has been stored. synchronizer : object, optional Array synchronizer provided to zarr group : str, optional Group path. (a.k.a. `path` in zarr terminology.) chunks : int, dict, "auto" or None, optional Used to load the data into dask arrays. Default behavior is to use ``chunks={}`` if dask is available, otherwise ``chunks=None``. - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the engine preferred chunks. - ``chunks=None`` skips using dask. This uses xarray's internally private :ref:`lazy indexing classes `, but data is eagerly loaded into memory as numpy arrays when accessed. This can be more efficient for smaller arrays, though results may vary. - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. - ``chunks={}`` loads the data with dask using engine preferred chunks if exposed by the backend, otherwise with a single chunk for all arrays. See dask chunking for more details. overwrite_encoded_chunks : bool, optional Whether to drop the zarr chunks encoded for each variable when a dataset is loaded with specified chunk sizes (default: False) decode_cf : bool, optional Whether to decode these variables, assuming they were saved according to CF conventions. mask_and_scale : bool, optional If True, replace array values equal to `_FillValue` with NA and scale values according to the formula `original_values * scale_factor + add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are taken from variable attributes (if they exist). If the `_FillValue` or `missing_value` attribute contains multiple values a warning will be issued and all array values matching one of the multiple values will be replaced by NA. decode_times : bool, optional If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers. concat_characters : bool, optional If True, concatenate along the last dimension of character arrays to form string arrays. Dimensions will only be concatenated over (and removed) if they have no corresponding variable and if they are only used as the last dimension of character arrays. decode_coords : bool, optional If True, decode the 'coordinates' attribute to identify coordinates in the resulting dataset. drop_variables : str or iterable, optional A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or inconsistent values. consolidated : bool, optional Whether to open the store using zarr's consolidated metadata capability. Only works for stores that have already been consolidated. By default (`consolidate=None`), attempts to read consolidated metadata, falling back to read non-consolidated metadata if that fails. When the experimental ``zarr_version=3``, ``consolidated`` must be either be ``None`` or ``False``. chunk_store : MutableMapping, optional A separate Zarr store only for chunk data. storage_options : dict, optional Any additional parameters for the storage backend (ignored for local paths). decode_timedelta : bool, optional If True, decode variables and coordinates with time units in {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} into timedelta objects. If False, leave them encoded as numbers. If None (default), assume the same value of decode_time. use_cftime : bool, optional Only relevant if encoded dates come from a standard calendar (e.g. "gregorian", "proleptic_gregorian", "standard", or not specified). If None (default), attempt to decode times to ``np.datetime64[ns]`` objects; if this is not possible, decode times to ``cftime.datetime`` objects. If True, always decode times to ``cftime.datetime`` objects, regardless of whether or not they can be represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. zarr_version : int or None, optional .. deprecated:: 2024.9.1 Use ``zarr_format`` instead. zarr_format : int or None, optional The desired zarr format to target (currently 2 or 3). The default of None will attempt to determine the zarr version from ``store`` when possible, otherwise defaulting to the default version used by the zarr-python library installed. use_zarr_fill_value_as_mask : bool, optional If True, use the zarr Array ``fill_value`` to mask the data, the same as done for NetCDF data with ``_FillValue`` or ``missing_value`` attributes. If False, the ``fill_value`` is ignored and the data are not masked. If None, this defaults to True for ``zarr_version=2`` and False for ``zarr_version=3``. chunked_array_type: str, optional Which chunked array type to coerce this datasets' arrays to. Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEntryPoint` system. Experimental API that should not be relied upon. from_array_kwargs: dict, optional Additional keyword arguments passed on to the ``ChunkManagerEntrypoint.from_array`` method used to create chunked arrays, via whichever chunk manager is specified through the ``chunked_array_type`` kwarg. Defaults to ``{'manager': 'dask'}``, meaning additional kwargs will be passed eventually to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. create_default_indexes : bool, default: True If True, create pandas indexes for :term:`dimension coordinates `, which loads the coordinate data into memory. Set it to False if you want to avoid loading data into memory. Note that backends can still choose to create other indexes. If you want to control that, please refer to the backend's documentation. Returns ------- dataset : Dataset The newly created dataset. See Also -------- open_dataset open_mfdataset References ---------- https://zarr.readthedocs.io/ """ from xarray.backends.api import open_dataset if from_array_kwargs is None: from_array_kwargs = {} if chunks is _default: try: guess_chunkmanager( chunked_array_type ) # attempt to import that parallel backend chunks = {} except (ValueError, ImportError): chunks = None if kwargs: raise TypeError( "open_zarr() got unexpected keyword arguments " + ",".join(kwargs.keys()) ) backend_kwargs = { "synchronizer": synchronizer, "consolidated": consolidated, "overwrite_encoded_chunks": overwrite_encoded_chunks, "chunk_store": chunk_store, "storage_options": storage_options, "zarr_version": zarr_version, "zarr_format": zarr_format, } ds = open_dataset( filename_or_obj=store, group=group, decode_cf=decode_cf, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, engine="zarr", chunks=chunks, drop_variables=drop_variables, create_default_indexes=create_default_indexes, chunked_array_type=chunked_array_type, from_array_kwargs=from_array_kwargs, backend_kwargs=backend_kwargs, decode_timedelta=decode_timedelta, use_cftime=use_cftime, zarr_version=zarr_version, use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask, ) return ds class ZarrBackendEntrypoint(BackendEntrypoint): """ Backend for ".zarr" files based on the zarr package. For more information about the underlying library, visit: https://zarr.readthedocs.io/en/stable See Also -------- backends.ZarrStore """ description = "Open zarr files (.zarr) using zarr in Xarray" url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.ZarrBackendEntrypoint.html" supports_groups = True def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool: if isinstance(filename_or_obj, str | os.PathLike): # allow a trailing slash to account for an autocomplete # adding it. _, ext = os.path.splitext(str(filename_or_obj).rstrip("/")) return ext in [".zarr"] return False def open_dataset( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, group=None, mode="r", synchronizer=None, consolidated=None, chunk_store=None, storage_options=None, zarr_version=None, zarr_format=None, store=None, engine=None, use_zarr_fill_value_as_mask=None, cache_members: bool = True, ) -> Dataset: filename_or_obj = _normalize_path(filename_or_obj) if not store: store = ZarrStore.open_group( filename_or_obj, group=group, mode=mode, synchronizer=synchronizer, consolidated=consolidated, consolidate_on_close=False, chunk_store=chunk_store, storage_options=storage_options, zarr_version=zarr_version, use_zarr_fill_value_as_mask=None, zarr_format=zarr_format, cache_members=cache_members, ) store_entrypoint = StoreBackendEntrypoint() with close_on_error(store): ds = store_entrypoint.open_dataset( store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) return ds def open_datatree( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, group: str | None = None, mode="r", synchronizer=None, consolidated=None, chunk_store=None, storage_options=None, zarr_version=None, zarr_format=None, ) -> DataTree: filename_or_obj = _normalize_path(filename_or_obj) groups_dict = self.open_groups_as_dict( filename_or_obj=filename_or_obj, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, group=group, mode=mode, synchronizer=synchronizer, consolidated=consolidated, chunk_store=chunk_store, storage_options=storage_options, zarr_version=zarr_version, zarr_format=zarr_format, ) return datatree_from_dict_with_io_cleanup(groups_dict) def open_groups_as_dict( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, group: str | None = None, mode="r", synchronizer=None, consolidated=None, chunk_store=None, storage_options=None, zarr_version=None, zarr_format=None, ) -> dict[str, Dataset]: filename_or_obj = _normalize_path(filename_or_obj) # Check for a group and make it a parent if it exists if group: parent = str(NodePath("/") / NodePath(group)) else: parent = str(NodePath("/")) stores = ZarrStore.open_store( filename_or_obj, group=parent, mode=mode, synchronizer=synchronizer, consolidated=consolidated, consolidate_on_close=False, chunk_store=chunk_store, storage_options=storage_options, zarr_version=zarr_version, zarr_format=zarr_format, ) groups_dict = {} for path_group, store in stores.items(): store_entrypoint = StoreBackendEntrypoint() with close_on_error(store): group_ds = store_entrypoint.open_dataset( store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) if group: group_name = str(NodePath(path_group).relative_to(parent)) else: group_name = str(NodePath(path_group)) groups_dict[group_name] = group_ds return groups_dict def _iter_zarr_groups(root: ZarrGroup, parent: str = "/") -> Iterable[str]: parent_nodepath = NodePath(parent) yield str(parent_nodepath) for path, group in root.groups(): gpath = parent_nodepath / path yield from _iter_zarr_groups(group, parent=str(gpath)) def _get_open_params( store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, zarr_version, use_zarr_fill_value_as_mask, zarr_format, ): if TYPE_CHECKING: import zarr else: zarr = attempt_import("zarr") # zarr doesn't support pathlib.Path objects yet. zarr-python#601 if isinstance(store, os.PathLike): store = os.fspath(store) open_kwargs = dict( # mode='a-' is a handcrafted xarray specialty mode="a" if mode == "a-" else mode, synchronizer=synchronizer, path=group, ) open_kwargs["storage_options"] = storage_options zarr_format = _handle_zarr_version_or_format( zarr_version=zarr_version, zarr_format=zarr_format ) if _zarr_v3(): open_kwargs["zarr_format"] = zarr_format else: open_kwargs["zarr_version"] = zarr_format if chunk_store is not None: open_kwargs["chunk_store"] = chunk_store if consolidated is None: consolidated = False if _zarr_v3(): # TODO: replace AssertionError after https://github.com/zarr-developers/zarr-python/issues/2821 is resolved missing_exc = AssertionError else: missing_exc = zarr.errors.GroupNotFoundError if _zarr_v3(): # zarr 3.0.8 and earlier did not support this property - it was effectively assumed true if not getattr(store, "supports_consolidated_metadata", True): consolidated = consolidate_on_close = False if consolidated in [None, True]: # open the root of the store, in case there is metadata consolidated there group = open_kwargs.pop("path") if consolidated: # TODO: an option to pass the metadata_key keyword zarr_root_group = zarr.open_consolidated(store, **open_kwargs) elif consolidated is None: # same but with more error handling in case no consolidated metadata found try: zarr_root_group = zarr.open_consolidated(store, **open_kwargs) except (ValueError, KeyError): # ValueError in zarr-python 3.x, KeyError in 2.x. try: zarr_root_group = zarr.open_group(store, **open_kwargs) emit_user_level_warning( "Failed to open Zarr store with consolidated metadata, " "but successfully read with non-consolidated metadata. " "This is typically much slower for opening a dataset. " "To silence this warning, consider:\n" "1. Consolidating metadata in this existing store with " "zarr.consolidate_metadata().\n" "2. Explicitly setting consolidated=False, to avoid trying " "to read consolidate metadata, or\n" "3. Explicitly setting consolidated=True, to raise an " "error in this case instead of falling back to try " "reading non-consolidated metadata.", RuntimeWarning, ) except missing_exc as err: raise FileNotFoundError( f"No such file or directory: '{store}'" ) from err # but the user should still receive a DataTree whose root is the group they asked for if group and group != "/": zarr_group = zarr_root_group[group.removeprefix("/")] else: zarr_group = zarr_root_group else: if _zarr_v3(): # we have determined that we don't want to use consolidated metadata # so we set that to False to avoid trying to read it open_kwargs["use_consolidated"] = False zarr_group = zarr.open_group(store, **open_kwargs) close_store_on_close = zarr_group.store is not store # we use this to determine how to handle fill_value is_zarr_v3_format = _zarr_v3() and zarr_group.metadata.zarr_format == 3 if use_zarr_fill_value_as_mask is None: if is_zarr_v3_format: # for new data, we use a better default use_zarr_fill_value_as_mask = False else: # this was the default for v2 and should apply to most existing Zarr data use_zarr_fill_value_as_mask = True return ( zarr_group, consolidate_on_close, close_store_on_close, use_zarr_fill_value_as_mask, ) def _handle_zarr_version_or_format( *, zarr_version: ZarrFormat | None, zarr_format: ZarrFormat | None ) -> ZarrFormat | None: """handle the deprecated zarr_version kwarg and return zarr_format""" if ( zarr_format is not None and zarr_version is not None and zarr_format != zarr_version ): raise ValueError( f"zarr_format {zarr_format} does not match zarr_version {zarr_version}, please only set one" ) if zarr_version is not None: emit_user_level_warning( "zarr_version is deprecated, use zarr_format", FutureWarning ) return zarr_version return zarr_format BACKEND_ENTRYPOINTS["zarr"] = ("zarr", ZarrBackendEntrypoint) python-xarray-2026.01.0/xarray/backends/memory.py0000664000175000017500000000276515136607163022017 0ustar alastairalastairfrom __future__ import annotations import copy import numpy as np from xarray.backends.common import AbstractWritableDataStore from xarray.core import indexing from xarray.core.variable import Variable class InMemoryDataStore(AbstractWritableDataStore): """ Stores dimensions, variables and attributes in ordered dictionaries, making this store fast compared to stores which save to disk. This store exists purely for internal testing purposes. """ def __init__(self, variables=None, attributes=None): self._variables = {} if variables is None else variables self._attributes = {} if attributes is None else attributes def get_attrs(self): return self._attributes def get_variables(self): res = {} for k, v in self._variables.items(): v = v.copy(deep=True) res[k] = v v._data = indexing.LazilyIndexedArray(v._data) return res def get_dimensions(self): return {d: s for v in self._variables.values() for d, s in v.dims.items()} def prepare_variable(self, k, v, *args, **kwargs): new_var = Variable(v.dims, np.empty_like(v), v.attrs) self._variables[k] = new_var return new_var, v.data def set_attribute(self, k, v): # copy to imitate writing to disk. self._attributes[k] = copy.deepcopy(v) def set_dimension(self, dim, length, unlimited_dims=None): # in this model, dimensions are accounted for in the variables pass python-xarray-2026.01.0/xarray/backends/pydap_.py0000664000175000017500000003345415136607163021762 0ustar alastairalastairfrom __future__ import annotations import os from collections.abc import Iterable from typing import TYPE_CHECKING, Any import numpy as np from xarray.backends.common import ( BACKEND_ENTRYPOINTS, AbstractDataStore, BackendArray, BackendEntrypoint, T_PathFileOrDataStore, _is_likely_dap_url, _normalize_path, datatree_from_dict_with_io_cleanup, robust_getitem, ) from xarray.backends.store import StoreBackendEntrypoint from xarray.core import indexing from xarray.core.utils import ( Frozen, FrozenDict, close_on_error, ) from xarray.core.variable import Variable from xarray.namedarray.pycompat import integer_types if TYPE_CHECKING: import os from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.types import ReadBuffer class PydapArrayWrapper(BackendArray): def __init__(self, array, checksums=True): self.array = array @property def shape(self) -> tuple[int, ...]: return self.array.shape @property def dtype(self): return self.array.dtype def __getitem__(self, key): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.BASIC, self._getitem ) def _getitem(self, key): result = robust_getitem(self.array, key, catch=ValueError) result = np.asarray(result.data) axis = tuple(n for n, k in enumerate(key) if isinstance(k, integer_types)) if result.ndim + len(axis) != self.array.ndim and axis: result = np.squeeze(result, axis) return result def get_group(ds, group): if group in {None, "", "/"}: # use the root group return ds else: try: return ds[group] except KeyError as e: # wrap error to provide slightly more helpful message raise KeyError(f"group not found: {group}", e) from e class PydapDataStore(AbstractDataStore): """Store for accessing OpenDAP datasets with pydap. This store provides an alternative way to access OpenDAP datasets that may be useful if the netCDF4 library is not available. """ def __init__( self, dataset, group=None, session=None, protocol=None, checksums=True, ): """ Parameters ---------- ds : pydap DatasetType group: str or None (default None) The group to open. If None, the root group is opened. """ self.dataset = dataset self.group = group self._protocol = protocol self._checksums = checksums # true by default @classmethod def open( cls, url, group=None, application=None, session=None, output_grid=None, timeout=None, verify=None, user_charset=None, checksums=True, ): from pydap.client import open_url from pydap.net import DEFAULT_TIMEOUT if output_grid is not None: # output_grid is no longer passed to pydap.client.open_url from xarray.core.utils import emit_user_level_warning emit_user_level_warning( "`output_grid` is deprecated and will be removed in a future version" " of xarray. Will be set to `None`, the new default. ", DeprecationWarning, ) output_grid = False # new default behavior kwargs = { "url": url, "application": application, "session": session, "output_grid": output_grid or False, "timeout": timeout or DEFAULT_TIMEOUT, "verify": verify or True, "user_charset": user_charset, } if isinstance(url, str): # check uit begins with an acceptable scheme dataset = open_url(**kwargs) elif hasattr(url, "ds"): # pydap dataset dataset = url.ds args = {"dataset": dataset, "checksums": checksums} if group: args["group"] = group if url.startswith(("http", "dap2")): args["protocol"] = "dap2" elif url.startswith("dap4"): args["protocol"] = "dap4" return cls(**args) def open_store_variable(self, var): if hasattr(var, "dims"): dimensions = [ dim.split("/")[-1] if dim.startswith("/") else dim for dim in var.dims ] else: # GridType does not have a dims attribute - instead get `dimensions` # see https://github.com/pydap/pydap/issues/485 dimensions = var.dimensions if ( self._protocol == "dap4" and var.name in dimensions and hasattr(var, "dataset") # only True for pydap>3.5.5 ): var.dataset.enable_batch_mode() data_array = self._get_data_array(var) data = indexing.LazilyIndexedArray(data_array) var.dataset.disable_batch_mode() else: # all non-dimension variables data = indexing.LazilyIndexedArray(PydapArrayWrapper(var)) return Variable(dimensions, data, var.attributes) def get_variables(self): # get first all variables arrays, excluding any container type like, # `Groups`, `Sequence` or `Structure` types try: _vars = list(self.ds.variables()) _vars += list(self.ds.grids()) # dap2 objects except AttributeError: from pydap.model import GroupType _vars = [ var for var in self.ds.keys() # check the key is not a BaseType or GridType if not isinstance(self.ds[var], GroupType) ] return FrozenDict((k, self.open_store_variable(self.ds[k])) for k in _vars) def get_attrs(self): """Remove any opendap specific attributes""" opendap_attrs = ( "configuration", "build_dmrpp", "bes", "libdap", "invocation", "dimensions", "path", "Maps", ) attrs = dict(self.ds.attributes) list(map(attrs.pop, opendap_attrs, [None] * len(opendap_attrs))) return Frozen(attrs) def get_dimensions(self): return Frozen(sorted(self.ds.dimensions)) @property def ds(self): return get_group(self.dataset, self.group) def _get_data_array(self, var): """gets dimension data all at once, storing the numpy arrays within a cached dictionary """ from pydap.client import get_batch_data if not var._is_data_loaded(): # data has not been deserialized yet # runs only once per store/hierarchy get_batch_data(var, checksums=self._checksums) return self.dataset[var.id].data class PydapBackendEntrypoint(BackendEntrypoint): """ Backend for steaming datasets over the internet using the Data Access Protocol, also known as DODS or OPeNDAP based on the pydap package. This backend is selected by default for urls. For more information about the underlying library, visit: https://pydap.github.io/pydap/en/intro.html See Also -------- backends.PydapDataStore """ description = "Open remote datasets via OPeNDAP using pydap in Xarray" url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.PydapBackendEntrypoint.html" def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool: if not isinstance(filename_or_obj, str): return False return _is_likely_dap_url(filename_or_obj) def open_dataset( self, filename_or_obj: ( str | os.PathLike[Any] | ReadBuffer | bytes | memoryview | AbstractDataStore ), *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, group=None, application=None, session=None, output_grid=None, timeout=None, verify=None, user_charset=None, checksums=True, ) -> Dataset: store = PydapDataStore.open( url=filename_or_obj, group=group, application=application, session=session, output_grid=output_grid, timeout=timeout, verify=verify, user_charset=user_charset, checksums=checksums, ) store_entrypoint = StoreBackendEntrypoint() with close_on_error(store): ds = store_entrypoint.open_dataset( store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) return ds def open_datatree( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, group: str | None = None, application=None, session=None, timeout=None, verify=None, user_charset=None, checksums=True, ) -> DataTree: groups_dict = self.open_groups_as_dict( filename_or_obj, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, group=group, application=application, session=session, timeout=timeout, verify=verify, user_charset=user_charset, checksums=checksums, ) return datatree_from_dict_with_io_cleanup(groups_dict) def open_groups_as_dict( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, group: str | None = None, application=None, session=None, timeout=None, verify=None, user_charset=None, checksums=True, ) -> dict[str, Dataset]: from xarray.core.treenode import NodePath filename_or_obj = _normalize_path(filename_or_obj) store = PydapDataStore.open( url=filename_or_obj, application=application, session=session, timeout=timeout, verify=verify, user_charset=user_charset, checksums=checksums, ) # Check for a group and make it a parent if it exists if group: parent = str(NodePath("/") / NodePath(group)) else: parent = str(NodePath("/")) groups_dict = {} group_names = [parent] # construct fully qualified path to group try: # this works for pydap >= 3.5.1 Groups = store.ds[parent].groups() except AttributeError: # THIS IS ONLY NEEDED FOR `pydap == 3.5.0` # `pydap>= 3.5.1` has a new method `groups()` # that returns a dict of group names and their paths def group_fqn(store, path=None, g_fqn=None) -> dict[str, str]: """To be removed for pydap > 3.5.0. Derives the fully qualifying name of a Group.""" from pydap.model import GroupType if not path: path = "/" # parent if not g_fqn: g_fqn = {} groups = [ store[key].id for key in store.keys() if isinstance(store[key], GroupType) ] for g in groups: g_fqn.update({g: path}) subgroups = [ var for var in store[g] if isinstance(store[g][var], GroupType) ] if len(subgroups) > 0: npath = path + g g_fqn = group_fqn(store[g], npath, g_fqn) return g_fqn Groups = group_fqn(store.ds) group_names += [ str(NodePath(path_to_group) / NodePath(group)) for group, path_to_group in Groups.items() ] for path_group in group_names: # get a group from the store store.group = path_group store_entrypoint = StoreBackendEntrypoint() with close_on_error(store): group_ds = store_entrypoint.open_dataset( store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) if group: group_name = str(NodePath(path_group).relative_to(parent)) else: group_name = str(NodePath(path_group)) groups_dict[group_name] = group_ds return groups_dict BACKEND_ENTRYPOINTS["pydap"] = ("pydap", PydapBackendEntrypoint) python-xarray-2026.01.0/xarray/backends/api.py0000664000175000017500000022132615136607163021254 0ustar alastairalastairfrom __future__ import annotations import os from collections.abc import ( Callable, Iterable, Mapping, Sequence, ) from functools import partial from typing import ( TYPE_CHECKING, Any, Literal, TypeVar, Union, cast, ) from xarray.backends import plugins from xarray.backends.common import ( T_PathFileOrDataStore, _find_absolute_paths, _normalize_path, ) from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, indexing from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.indexes import Index from xarray.core.treenode import group_subtrees from xarray.core.types import ReadBuffer from xarray.core.utils import emit_user_level_warning, is_remote_uri from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager from xarray.namedarray.utils import _get_chunk from xarray.structure.chunks import _maybe_chunk from xarray.structure.combine import ( _infer_concat_order_from_positions, _nested_combine, combine_by_coords, ) from xarray.util.deprecation_helpers import ( _COMPAT_DEFAULT, _COORDS_DEFAULT, _DATA_VARS_DEFAULT, _JOIN_DEFAULT, CombineKwargDefault, ) if TYPE_CHECKING: try: from dask.delayed import Delayed except ImportError: Delayed = None # type: ignore[assignment, misc] from xarray.backends.common import BackendEntrypoint from xarray.core.types import ( CombineAttrsOptions, CompatOptions, ErrorOptionsWithWarn, JoinOptions, NestedSequence, T_Chunks, ) T_NetcdfEngine = Literal["netcdf4", "scipy", "h5netcdf"] T_Engine = Union[ T_NetcdfEngine, Literal["pydap", "zarr"], # noqa: PYI051 type[BackendEntrypoint], str, # no nice typing support for custom backends None, ] T_NetcdfTypes = Literal[ "NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC" ] DATAARRAY_NAME = "__xarray_dataarray_name__" DATAARRAY_VARIABLE = "__xarray_dataarray_variable__" def _resolve_decoders_kwargs(decode_cf, open_backend_dataset_parameters, **decoders): for d in list(decoders): if decode_cf is False and d in open_backend_dataset_parameters: decoders[d] = False if decoders[d] is None: decoders.pop(d) return decoders def _get_mtime(filename_or_obj): # if passed an actual file path, augment the token with # the file modification time mtime = None try: path = os.fspath(filename_or_obj) except TypeError: path = None if path and not is_remote_uri(path): mtime = os.path.getmtime(os.path.expanduser(filename_or_obj)) return mtime def _protect_dataset_variables_inplace(dataset: Dataset, cache: bool) -> None: for name, variable in dataset.variables.items(): if name not in dataset._indexes: # no need to protect IndexVariable objects data: indexing.ExplicitlyIndexedNDArrayMixin data = indexing.CopyOnWriteArray(variable._data) if cache: data = indexing.MemoryCachedArray(data) variable.data = data def _protect_datatree_variables_inplace(tree: DataTree, cache: bool) -> None: for node in tree.subtree: _protect_dataset_variables_inplace(node.dataset, cache) def _finalize_store(writes, store): """Finalize this store by explicitly syncing and closing""" del writes # ensure writing is done first store.close() def delayed_close_after_writes(writes, store): import dask return dask.delayed(_finalize_store)(writes, store) def _multi_file_closer(closers): for closer in closers: closer() def load_dataset(filename_or_obj: T_PathFileOrDataStore, **kwargs) -> Dataset: """Open, load into memory, and close a Dataset from a file or file-like object. This is a thin wrapper around :py:meth:`~xarray.open_dataset`. It differs from `open_dataset` in that it loads the Dataset into memory, closes the file, and returns the Dataset. In contrast, `open_dataset` keeps the file handle open and lazy loads its contents. All parameters are passed directly to `open_dataset`. See that documentation for further details. Returns ------- dataset : Dataset The newly created Dataset. See Also -------- open_dataset """ if "cache" in kwargs: raise TypeError("cache has no effect in this context") with open_dataset(filename_or_obj, **kwargs) as ds: return ds.load() def load_dataarray(filename_or_obj: T_PathFileOrDataStore, **kwargs) -> DataArray: """Open, load into memory, and close a DataArray from a file or file-like object containing a single data variable. This is a thin wrapper around :py:meth:`~xarray.open_dataarray`. It differs from `open_dataarray` in that it loads the Dataset into memory, closes the file, and returns the Dataset. In contrast, `open_dataarray` keeps the file handle open and lazy loads its contents. All parameters are passed directly to `open_dataarray`. See that documentation for further details. Returns ------- datarray : DataArray The newly created DataArray. See Also -------- open_dataarray """ if "cache" in kwargs: raise TypeError("cache has no effect in this context") with open_dataarray(filename_or_obj, **kwargs) as da: return da.load() def load_datatree(filename_or_obj: T_PathFileOrDataStore, **kwargs) -> DataTree: """Open, load into memory, and close a DataTree from a file or file-like object. This is a thin wrapper around :py:meth:`~xarray.open_datatree`. It differs from `open_datatree` in that it loads the DataTree into memory, closes the file, and returns the DataTree. In contrast, `open_datatree` keeps the file handle open and lazy loads its contents. All parameters are passed directly to `open_datatree`. See that documentation for further details. Returns ------- datatree : DataTree The newly created DataTree. See Also -------- open_datatree """ if "cache" in kwargs: raise TypeError("cache has no effect in this context") with open_datatree(filename_or_obj, **kwargs) as dt: return dt.load() def _chunk_ds( backend_ds, filename_or_obj, engine, chunks, overwrite_encoded_chunks, inline_array, chunked_array_type, from_array_kwargs, **extra_tokens, ): chunkmanager = guess_chunkmanager(chunked_array_type) # TODO refactor to move this dask-specific logic inside the DaskManager class if isinstance(chunkmanager, DaskManager): from dask.base import tokenize mtime = _get_mtime(filename_or_obj) token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens) name_prefix = "open_dataset-" else: # not used token = (None,) name_prefix = None variables = {} for name, var in backend_ds.variables.items(): if var._in_memory: variables[name] = var continue var_chunks = _get_chunk( var._data, chunks, chunkmanager, preferred_chunks=var.encoding.get("preferred_chunks", {}), dims=var.dims, ) variables[name] = _maybe_chunk( name, var, var_chunks, overwrite_encoded_chunks=overwrite_encoded_chunks, name_prefix=name_prefix, token=token, inline_array=inline_array, chunked_array_type=chunkmanager, from_array_kwargs=from_array_kwargs.copy(), ) return backend_ds._replace(variables) def _maybe_create_default_indexes(ds): to_index = { name: coord.variable for name, coord in ds.coords.items() if coord.dims == (name,) and name not in ds.xindexes } return ds.assign_coords(Coordinates(to_index)) def _dataset_from_backend_dataset( backend_ds, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, inline_array, chunked_array_type, from_array_kwargs, create_default_indexes, **extra_tokens, ): if not isinstance(chunks, int | dict) and chunks not in {None, "auto"}: raise ValueError( f"chunks must be an int, dict, 'auto', or None. Instead found {chunks}." ) _protect_dataset_variables_inplace(backend_ds, cache) if create_default_indexes: ds = _maybe_create_default_indexes(backend_ds) else: ds = backend_ds if chunks is not None: ds = _chunk_ds( ds, filename_or_obj, engine, chunks, overwrite_encoded_chunks, inline_array, chunked_array_type, from_array_kwargs, **extra_tokens, ) ds.set_close(backend_ds._close) # Ensure source filename always stored in dataset object if "source" not in ds.encoding: path = getattr(filename_or_obj, "path", filename_or_obj) if isinstance(path, str | os.PathLike): ds.encoding["source"] = _normalize_path(path) return ds def _datatree_from_backend_datatree( backend_tree, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, inline_array, chunked_array_type, from_array_kwargs, create_default_indexes, **extra_tokens, ): if not isinstance(chunks, int | dict) and chunks not in {None, "auto"}: raise ValueError( f"chunks must be an int, dict, 'auto', or None. Instead found {chunks}." ) _protect_datatree_variables_inplace(backend_tree, cache) if create_default_indexes: tree = backend_tree.map_over_datasets(_maybe_create_default_indexes) else: tree = backend_tree if chunks is not None: tree = DataTree.from_dict( { path: _chunk_ds( node.dataset, filename_or_obj, engine, chunks, overwrite_encoded_chunks, inline_array, chunked_array_type, from_array_kwargs, node=path, **extra_tokens, ) for path, [node] in group_subtrees(tree) }, name=tree.name, ) if create_default_indexes or chunks is not None: for path, [node] in group_subtrees(backend_tree): tree[path].set_close(node._close) # Ensure source filename always stored in dataset object if "source" not in tree.encoding: path = getattr(filename_or_obj, "path", filename_or_obj) if isinstance(path, str | os.PathLike): tree.encoding["source"] = _normalize_path(path) return tree def open_dataset( filename_or_obj: T_PathFileOrDataStore, *, engine: T_Engine = None, chunks: T_Chunks = None, cache: bool | None = None, decode_cf: bool | None = None, mask_and_scale: bool | Mapping[str, bool] | None = None, decode_times: ( bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] | None ) = None, decode_timedelta: ( bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None ) = None, use_cftime: bool | Mapping[str, bool] | None = None, concat_characters: bool | Mapping[str, bool] | None = None, decode_coords: Literal["coordinates", "all"] | bool | None = None, drop_variables: str | Iterable[str] | None = None, create_default_indexes: bool = True, inline_array: bool = False, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, **kwargs, ) -> Dataset: """Open and decode a dataset from a file or file-like object. Parameters ---------- filename_or_obj : str, Path, file-like, bytes, memoryview or DataStore Strings and Path objects are interpreted as a path to a netCDF file or an OpenDAP URL and opened with python-netCDF4, unless the filename ends with .gz, in which case the file is gunzipped and opened with scipy.io.netcdf (only netCDF3 supported). Bytes, memoryview and file-like objects are opened by scipy.io.netcdf (netCDF3) or h5netcdf (netCDF4). engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\ , installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, by default preferring "netcdf4" over "h5netcdf" over "scipy" (customizable via ``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend class (a subclass of ``BackendEntrypoint``) can also be used. chunks : int, dict, 'auto' or None, default: None If provided, used to load the data into dask arrays. - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the engine preferred chunks. - ``chunks=None`` skips using dask. This uses xarray's internally private :ref:`lazy indexing classes `, but data is eagerly loaded into memory as numpy arrays when accessed. This can be more efficient for smaller arrays or when large arrays are sliced before computation. - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. - ``chunks={}`` loads the data with dask using the engine's preferred chunk size, generally identical to the format's chunk size. If not available, a single chunk for all arrays. See dask chunking for more details. cache : bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- store multiple times. Defaults to True unless you specify the `chunks` argument to use dask, in which case it defaults to False. Does not change the behavior of coordinates corresponding to dimensions, which always load their data from disk into a ``pandas.Index``. decode_cf : bool, optional Whether to decode these variables, assuming they were saved according to CF conventions. mask_and_scale : bool or dict-like, optional If True, replace array values equal to `_FillValue` with NA and scale values according to the formula `original_values * scale_factor + add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are taken from variable attributes (if they exist). If the `_FillValue` or `missing_value` attribute contains multiple values a warning will be issued and all array values matching one of the multiple values will be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. decode_times : bool, CFDatetimeCoder or dict-like, optional If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them encoded as numbers. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. decode_timedelta : bool, CFTimedeltaCoder, or dict-like, optional If True, decode variables and coordinates with time units in {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} into timedelta objects. If False, leave them encoded as numbers. If None (default), assume the same value of ``decode_times``; if ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a matching ``time_unit``. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. use_cftime: bool or dict-like, optional Only relevant if encoded dates come from a standard calendar (e.g. "gregorian", "proleptic_gregorian", "standard", or not specified). If None (default), attempt to decode times to ``np.datetime64[ns]`` objects; if this is not possible, decode times to ``cftime.datetime`` objects. If True, always decode times to ``cftime.datetime`` objects, regardless of whether or not they can be represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. .. deprecated:: 2025.01.1 Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. concat_characters : bool or dict-like, optional If True, concatenate along the last dimension of character arrays to form string arrays. Dimensions will only be concatenated over (and removed) if they have no corresponding variable and if they are only used as the last dimension of character arrays. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. decode_coords : bool or {"coordinates", "all"}, optional Controls which variables are set as coordinate variables: - "coordinates" or True: Set variables referred to in the ``'coordinates'`` attribute of the datasets or individual variables as coordinate variables. - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and other attributes as coordinate variables. Only existing variables can be set as coordinates. Missing variables will be silently ignored. drop_variables: str or iterable of str, optional A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or inconsistent values. create_default_indexes : bool, default: True If True, create pandas indexes for :term:`dimension coordinates `, which loads the coordinate data into memory. Set it to False if you want to avoid loading data into memory. Note that backends can still choose to create other indexes. If you want to control that, please refer to the backend's documentation. inline_array: bool, default: False How to include the array in the dask task graph. By default(``inline_array=False``) the array is included in a task by itself, and each chunk refers to that task by its key. With ``inline_array=True``, Dask will instead inline the array directly in the values of the task graph. See :py:func:`dask.array.from_array`. chunked_array_type: str, optional Which chunked array type to coerce this datasets' arrays to. Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system. Experimental API that should not be relied upon. from_array_kwargs: dict Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg. For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. backend_kwargs: dict Additional keyword arguments passed on to the engine open function, equivalent to `**kwargs`. **kwargs: dict Additional keyword arguments passed on to the engine open function. For example: - 'group': path to the netCDF4 group in the given file to open given as a str,supported by "netcdf4", "h5netcdf", "zarr". - 'lock': resource lock to use when reading data from disk. Only relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", "scipy". See engine open function for kwargs accepted by each specific engine. Returns ------- dataset : Dataset The newly created dataset. Notes ----- ``open_dataset`` opens the file with read-only access. When you modify values of a Dataset, even one linked to files on disk, only the in-memory copy you are manipulating in xarray is modified: the original file on disk is never touched. See Also -------- open_mfdataset """ if cache is None: cache = chunks is None if backend_kwargs is not None: kwargs.update(backend_kwargs) if engine is None: engine = plugins.guess_engine(filename_or_obj) if from_array_kwargs is None: from_array_kwargs = {} backend = plugins.get_backend(engine) decoders = _resolve_decoders_kwargs( decode_cf, open_backend_dataset_parameters=backend.open_dataset_parameters, mask_and_scale=mask_and_scale, decode_times=decode_times, decode_timedelta=decode_timedelta, concat_characters=concat_characters, use_cftime=use_cftime, decode_coords=decode_coords, ) overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) backend_ds = backend.open_dataset( filename_or_obj, drop_variables=drop_variables, **decoders, **kwargs, ) ds = _dataset_from_backend_dataset( backend_ds, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, inline_array, chunked_array_type, from_array_kwargs, drop_variables=drop_variables, create_default_indexes=create_default_indexes, **decoders, **kwargs, ) return ds def open_dataarray( filename_or_obj: T_PathFileOrDataStore, *, engine: T_Engine = None, chunks: T_Chunks = None, cache: bool | None = None, decode_cf: bool | None = None, mask_and_scale: bool | None = None, decode_times: ( bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] | None ) = None, decode_timedelta: bool | CFTimedeltaCoder | None = None, use_cftime: bool | None = None, concat_characters: bool | None = None, decode_coords: Literal["coordinates", "all"] | bool | None = None, drop_variables: str | Iterable[str] | None = None, create_default_indexes: bool = True, inline_array: bool = False, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, **kwargs, ) -> DataArray: """Open a DataArray from a file or file-like object containing a single data variable. This is designed to read netCDF files with only one data variable. If multiple variables are present then a ValueError is raised. Parameters ---------- filename_or_obj : str, Path, file-like, bytes, memoryview or DataStore Strings and Path objects are interpreted as a path to a netCDF file or an OpenDAP URL and opened with python-netCDF4, unless the filename ends with .gz, in which case the file is gunzipped and opened with scipy.io.netcdf (only netCDF3 supported). Bytes, memoryview and file-like objects are opened by scipy.io.netcdf (netCDF3) or h5netcdf (netCDF4). engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\ , installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, by default preferring "netcdf4" over "h5netcdf" over "scipy" (customizable via ``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend class (a subclass of ``BackendEntrypoint``) can also be used. chunks : int, dict, 'auto' or None, default: None If provided, used to load the data into dask arrays. - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the engine preferred chunks. - ``chunks=None`` skips using dask. This uses xarray's internally private :ref:`lazy indexing classes `, but data is eagerly loaded into memory as numpy arrays when accessed. This can be more efficient for smaller arrays, though results may vary. - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. - ``chunks={}`` loads the data with dask using engine preferred chunks if exposed by the backend, otherwise with a single chunk for all arrays. See dask chunking for more details. cache : bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- store multiple times. Defaults to True unless you specify the `chunks` argument to use dask, in which case it defaults to False. Does not change the behavior of coordinates corresponding to dimensions, which always load their data from disk into a ``pandas.Index``. decode_cf : bool, optional Whether to decode these variables, assuming they were saved according to CF conventions. mask_and_scale : bool, optional If True, replace array values equal to `_FillValue` with NA and scale values according to the formula `original_values * scale_factor + add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are taken from variable attributes (if they exist). If the `_FillValue` or `missing_value` attribute contains multiple values a warning will be issued and all array values matching one of the multiple values will be replaced by NA. This keyword may not be supported by all the backends. decode_times : bool, CFDatetimeCoder or dict-like, optional If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them encoded as numbers. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. decode_timedelta : bool, optional If True, decode variables and coordinates with time units in {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} into timedelta objects. If False, leave them encoded as numbers. If None (default), assume the same value of ``decode_times``; if ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a matching ``time_unit``. This keyword may not be supported by all the backends. use_cftime: bool, optional Only relevant if encoded dates come from a standard calendar (e.g. "gregorian", "proleptic_gregorian", "standard", or not specified). If None (default), attempt to decode times to ``np.datetime64[ns]`` objects; if this is not possible, decode times to ``cftime.datetime`` objects. If True, always decode times to ``cftime.datetime`` objects, regardless of whether or not they can be represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. This keyword may not be supported by all the backends. .. deprecated:: 2025.01.1 Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. concat_characters : bool, optional If True, concatenate along the last dimension of character arrays to form string arrays. Dimensions will only be concatenated over (and removed) if they have no corresponding variable and if they are only used as the last dimension of character arrays. This keyword may not be supported by all the backends. decode_coords : bool or {"coordinates", "all"}, optional Controls which variables are set as coordinate variables: - "coordinates" or True: Set variables referred to in the ``'coordinates'`` attribute of the datasets or individual variables as coordinate variables. - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and other attributes as coordinate variables. Only existing variables can be set as coordinates. Missing variables will be silently ignored. drop_variables: str or iterable of str, optional A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or inconsistent values. create_default_indexes : bool, default: True If True, create pandas indexes for :term:`dimension coordinates `, which loads the coordinate data into memory. Set it to False if you want to avoid loading data into memory. Note that backends can still choose to create other indexes. If you want to control that, please refer to the backend's documentation. inline_array: bool, default: False How to include the array in the dask task graph. By default(``inline_array=False``) the array is included in a task by itself, and each chunk refers to that task by its key. With ``inline_array=True``, Dask will instead inline the array directly in the values of the task graph. See :py:func:`dask.array.from_array`. chunked_array_type: str, optional Which chunked array type to coerce the underlying data array to. Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system. Experimental API that should not be relied upon. from_array_kwargs: dict Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg. For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. backend_kwargs: dict Additional keyword arguments passed on to the engine open function, equivalent to `**kwargs`. **kwargs: dict Additional keyword arguments passed on to the engine open function. For example: - 'group': path to the netCDF4 group in the given file to open given as a str,supported by "netcdf4", "h5netcdf", "zarr". - 'lock': resource lock to use when reading data from disk. Only relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", "scipy". See engine open function for kwargs accepted by each specific engine. Notes ----- This is designed to be fully compatible with `DataArray.to_netcdf`. Saving using `DataArray.to_netcdf` and then loading with this function will produce an identical result. All parameters are passed directly to `xarray.open_dataset`. See that documentation for further details. See also -------- open_dataset """ dataset = open_dataset( filename_or_obj, decode_cf=decode_cf, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, engine=engine, chunks=chunks, cache=cache, drop_variables=drop_variables, create_default_indexes=create_default_indexes, inline_array=inline_array, chunked_array_type=chunked_array_type, from_array_kwargs=from_array_kwargs, backend_kwargs=backend_kwargs, use_cftime=use_cftime, decode_timedelta=decode_timedelta, **kwargs, ) if len(dataset.data_vars) != 1: if len(dataset.data_vars) == 0: msg = "Given file dataset contains no data variables." else: msg = ( "Given file dataset contains more than one data " "variable. Please read with xarray.open_dataset and " "then select the variable you want." ) raise ValueError(msg) else: (data_array,) = dataset.data_vars.values() data_array.set_close(dataset._close) # Reset names if they were changed during saving # to ensure that we can 'roundtrip' perfectly if DATAARRAY_NAME in dataset.attrs: data_array.name = dataset.attrs[DATAARRAY_NAME] del dataset.attrs[DATAARRAY_NAME] if data_array.name == DATAARRAY_VARIABLE: data_array.name = None return data_array def open_datatree( filename_or_obj: T_PathFileOrDataStore, *, engine: T_Engine = None, chunks: T_Chunks = None, cache: bool | None = None, decode_cf: bool | None = None, mask_and_scale: bool | Mapping[str, bool] | None = None, decode_times: ( bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] | None ) = None, decode_timedelta: ( bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None ) = None, use_cftime: bool | Mapping[str, bool] | None = None, concat_characters: bool | Mapping[str, bool] | None = None, decode_coords: Literal["coordinates", "all"] | bool | None = None, drop_variables: str | Iterable[str] | None = None, create_default_indexes: bool = True, inline_array: bool = False, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, **kwargs, ) -> DataTree: """ Open and decode a DataTree from a file or file-like object, creating one tree node for each group in the file. Parameters ---------- filename_or_obj : str, Path, file-like, bytes or DataStore Strings and Path objects are interpreted as a path to a netCDF file or Zarr store. Bytes and memoryview objects are interpreted as file contents. engine : {"netcdf4", "h5netcdf", "zarr", None}, \ installed backend or xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, by default preferring "h5netcdf" over "netcdf4" (customizable via ``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend class (a subclass of ``BackendEntrypoint``) can also be used. chunks : int, dict, 'auto' or None, default: None If provided, used to load the data into dask arrays. - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the engine preferred chunks. - ``chunks=None`` skips using dask. This uses xarray's internally private :ref:`lazy indexing classes `, but data is eagerly loaded into memory as numpy arrays when accessed. This can be more efficient for smaller arrays, though results may vary. - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. - ``chunks={}`` loads the data with dask using the engine's preferred chunk size, generally identical to the format's chunk size. If not available, a single chunk for all arrays. See dask chunking for more details. cache : bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- store multiple times. Defaults to True unless you specify the `chunks` argument to use dask, in which case it defaults to False. Does not change the behavior of coordinates corresponding to dimensions, which always load their data from disk into a ``pandas.Index``. decode_cf : bool, optional Whether to decode these variables, assuming they were saved according to CF conventions. mask_and_scale : bool or dict-like, optional If True, replace array values equal to `_FillValue` with NA and scale values according to the formula `original_values * scale_factor + add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are taken from variable attributes (if they exist). If the `_FillValue` or `missing_value` attribute contains multiple values a warning will be issued and all array values matching one of the multiple values will be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. decode_times : bool, CFDatetimeCoder or dict-like, optional If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them encoded as numbers. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. decode_timedelta : bool or dict-like, optional If True, decode variables and coordinates with time units in {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} into timedelta objects. If False, leave them encoded as numbers. If None (default), assume the same value of ``decode_times``; if ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a matching ``time_unit``. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. use_cftime: bool or dict-like, optional Only relevant if encoded dates come from a standard calendar (e.g. "gregorian", "proleptic_gregorian", "standard", or not specified). If None (default), attempt to decode times to ``np.datetime64[ns]`` objects; if this is not possible, decode times to ``cftime.datetime`` objects. If True, always decode times to ``cftime.datetime`` objects, regardless of whether or not they can be represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. .. deprecated:: 2025.01.1 Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. concat_characters : bool or dict-like, optional If True, concatenate along the last dimension of character arrays to form string arrays. Dimensions will only be concatenated over (and removed) if they have no corresponding variable and if they are only used as the last dimension of character arrays. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. decode_coords : bool or {"coordinates", "all"}, optional Controls which variables are set as coordinate variables: - "coordinates" or True: Set variables referred to in the ``'coordinates'`` attribute of the datasets or individual variables as coordinate variables. - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and other attributes as coordinate variables. Only existing variables can be set as coordinates. Missing variables will be silently ignored. drop_variables: str or iterable of str, optional A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or inconsistent values. create_default_indexes : bool, default: True If True, create pandas indexes for :term:`dimension coordinates `, which loads the coordinate data into memory. Set it to False if you want to avoid loading data into memory. Note that backends can still choose to create other indexes. If you want to control that, please refer to the backend's documentation. inline_array: bool, default: False How to include the array in the dask task graph. By default(``inline_array=False``) the array is included in a task by itself, and each chunk refers to that task by its key. With ``inline_array=True``, Dask will instead inline the array directly in the values of the task graph. See :py:func:`dask.array.from_array`. chunked_array_type: str, optional Which chunked array type to coerce this datasets' arrays to. Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system. Experimental API that should not be relied upon. from_array_kwargs: dict Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg. For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. backend_kwargs: dict Additional keyword arguments passed on to the engine open function, equivalent to `**kwargs`. **kwargs: dict Additional keyword arguments passed on to the engine open function. For example: - 'group': path to the group in the given file to open as the root group as a str. - 'lock': resource lock to use when reading data from disk. Only relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", "scipy". See engine open function for kwargs accepted by each specific engine. Returns ------- tree : DataTree The newly created datatree. Notes ----- ``open_datatree`` opens the file with read-only access. When you modify values of a DataTree, even one linked to files on disk, only the in-memory copy you are manipulating in xarray is modified: the original file on disk is never touched. See Also -------- xarray.open_groups xarray.open_dataset """ if cache is None: cache = chunks is None if backend_kwargs is not None: kwargs.update(backend_kwargs) if engine is None: engine = plugins.guess_engine(filename_or_obj, must_support_groups=True) if from_array_kwargs is None: from_array_kwargs = {} backend = plugins.get_backend(engine) decoders = _resolve_decoders_kwargs( decode_cf, open_backend_dataset_parameters=backend.open_dataset_parameters, mask_and_scale=mask_and_scale, decode_times=decode_times, decode_timedelta=decode_timedelta, concat_characters=concat_characters, use_cftime=use_cftime, decode_coords=decode_coords, ) overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) backend_tree = backend.open_datatree( filename_or_obj, drop_variables=drop_variables, **decoders, **kwargs, ) tree = _datatree_from_backend_datatree( backend_tree, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, inline_array, chunked_array_type, from_array_kwargs, drop_variables=drop_variables, create_default_indexes=create_default_indexes, **decoders, **kwargs, ) return tree def open_groups( filename_or_obj: T_PathFileOrDataStore, *, engine: T_Engine = None, chunks: T_Chunks = None, cache: bool | None = None, decode_cf: bool | None = None, mask_and_scale: bool | Mapping[str, bool] | None = None, decode_times: ( bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] | None ) = None, decode_timedelta: ( bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None ) = None, use_cftime: bool | Mapping[str, bool] | None = None, concat_characters: bool | Mapping[str, bool] | None = None, decode_coords: Literal["coordinates", "all"] | bool | None = None, drop_variables: str | Iterable[str] | None = None, create_default_indexes: bool = True, inline_array: bool = False, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, **kwargs, ) -> dict[str, Dataset]: """ Open and decode a file or file-like object, creating a dictionary containing one xarray Dataset for each group in the file. Useful for an HDF file ("netcdf4" or "h5netcdf") containing many groups that are not alignable with their parents and cannot be opened directly with ``open_datatree``. It is encouraged to use this function to inspect your data, then make the necessary changes to make the structure coercible to a `DataTree` object before calling `DataTree.from_dict()` and proceeding with your analysis. Parameters ---------- filename_or_obj : str, Path, file-like, bytes, memoryview or DataStore Strings and Path objects are interpreted as a path to a netCDF file or Zarr store. Bytes and memoryview objects are interpreted as file contents. engine : {"netcdf4", "h5netcdf", "zarr", None}, \ installed backend or xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, by default preferring "h5netcdf" over "netcdf4" (customizable via ``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend class (a subclass of ``BackendEntrypoint``) can also be used. can also be used. chunks : int, dict, 'auto' or None, default: None If provided, used to load the data into dask arrays. - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the engine preferred chunks. - ``chunks=None`` skips using dask. This uses xarray's internally private :ref:`lazy indexing classes `, but data is eagerly loaded into memory as numpy arrays when accessed. This can be more efficient for smaller arrays, though results may vary. - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. - ``chunks={}`` loads the data with dask using the engine's preferred chunk size, generally identical to the format's chunk size. If not available, a single chunk for all arrays. See dask chunking for more details. cache : bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- store multiple times. Defaults to True unless you specify the `chunks` argument to use dask, in which case it defaults to False. Does not change the behavior of coordinates corresponding to dimensions, which always load their data from disk into a ``pandas.Index``. decode_cf : bool, optional Whether to decode these variables, assuming they were saved according to CF conventions. mask_and_scale : bool or dict-like, optional If True, replace array values equal to `_FillValue` with NA and scale values according to the formula `original_values * scale_factor + add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are taken from variable attributes (if they exist). If the `_FillValue` or `missing_value` attribute contains multiple values a warning will be issued and all array values matching one of the multiple values will be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. decode_times : bool, CFDatetimeCoder or dict-like, optional If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them encoded as numbers. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. decode_timedelta : bool or dict-like, optional If True, decode variables and coordinates with time units in {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} into timedelta objects. If False, leave them encoded as numbers. If None (default), assume the same value of ``decode_times``; if ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a matching ``time_unit``. This keyword may not be supported by all the backends. use_cftime: bool or dict-like, optional Only relevant if encoded dates come from a standard calendar (e.g. "gregorian", "proleptic_gregorian", "standard", or not specified). If None (default), attempt to decode times to ``np.datetime64[ns]`` objects; if this is not possible, decode times to ``cftime.datetime`` objects. If True, always decode times to ``cftime.datetime`` objects, regardless of whether or not they can be represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. .. deprecated:: 2025.01.1 Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. concat_characters : bool or dict-like, optional If True, concatenate along the last dimension of character arrays to form string arrays. Dimensions will only be concatenated over (and removed) if they have no corresponding variable and if they are only used as the last dimension of character arrays. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. decode_coords : bool or {"coordinates", "all"}, optional Controls which variables are set as coordinate variables: - "coordinates" or True: Set variables referred to in the ``'coordinates'`` attribute of the datasets or individual variables as coordinate variables. - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and other attributes as coordinate variables. Only existing variables can be set as coordinates. Missing variables will be silently ignored. drop_variables: str or iterable of str, optional A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or inconsistent values. create_default_indexes : bool, default: True If True, create pandas indexes for :term:`dimension coordinates `, which loads the coordinate data into memory. Set it to False if you want to avoid loading data into memory. Note that backends can still choose to create other indexes. If you want to control that, please refer to the backend's documentation. inline_array: bool, default: False How to include the array in the dask task graph. By default(``inline_array=False``) the array is included in a task by itself, and each chunk refers to that task by its key. With ``inline_array=True``, Dask will instead inline the array directly in the values of the task graph. See :py:func:`dask.array.from_array`. chunked_array_type: str, optional Which chunked array type to coerce this datasets' arrays to. Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system. Experimental API that should not be relied upon. from_array_kwargs: dict Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg. For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. backend_kwargs: dict Additional keyword arguments passed on to the engine open function, equivalent to `**kwargs`. **kwargs: dict Additional keyword arguments passed on to the engine open function. For example: - 'group': path to the group in the given file to open as the root group as a str. - 'lock': resource lock to use when reading data from disk. Only relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", "scipy". See engine open function for kwargs accepted by each specific engine. Returns ------- groups : dict of str to xarray.Dataset The groups as Dataset objects Notes ----- ``open_groups`` opens the file with read-only access. When you modify values of a Dataset, even one linked to files on disk, only the in-memory copy you are manipulating in xarray is modified: the original file on disk is never touched. See Also -------- xarray.open_datatree xarray.open_dataset xarray.DataTree.from_dict """ if cache is None: cache = chunks is None if backend_kwargs is not None: kwargs.update(backend_kwargs) if engine is None: engine = plugins.guess_engine(filename_or_obj, must_support_groups=True) if from_array_kwargs is None: from_array_kwargs = {} backend = plugins.get_backend(engine) decoders = _resolve_decoders_kwargs( decode_cf, open_backend_dataset_parameters=(), mask_and_scale=mask_and_scale, decode_times=decode_times, decode_timedelta=decode_timedelta, concat_characters=concat_characters, use_cftime=use_cftime, decode_coords=decode_coords, ) overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) backend_groups = backend.open_groups_as_dict( filename_or_obj, drop_variables=drop_variables, **decoders, **kwargs, ) groups = { name: _dataset_from_backend_dataset( backend_ds, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, inline_array, chunked_array_type, from_array_kwargs, drop_variables=drop_variables, create_default_indexes=create_default_indexes, **decoders, **kwargs, ) for name, backend_ds in backend_groups.items() } return groups _FLike = TypeVar("_FLike", bound=Union[str, ReadBuffer]) def _remove_path( paths: NestedSequence[_FLike], paths_to_remove: set[_FLike] ) -> NestedSequence[_FLike]: # Initialize an empty list to store the result result: list[Union[_FLike, NestedSequence[_FLike]]] = [] for item in paths: if isinstance(item, list): # If the current item is a list, recursively call remove_elements on it nested_result = _remove_path(item, paths_to_remove) if nested_result: # Only add non-empty lists to avoid adding empty lists result.append(nested_result) elif item not in paths_to_remove: # Add the item to the result if it is not in the set of elements to remove result.append(item) return result def open_mfdataset( paths: ( str | os.PathLike | ReadBuffer | NestedSequence[str | os.PathLike | ReadBuffer] ), chunks: T_Chunks = None, concat_dim: ( str | DataArray | Index | Sequence[str] | Sequence[DataArray] | Sequence[Index] | None ) = None, compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT, preprocess: Callable[[Dataset], Dataset] | None = None, engine: T_Engine = None, data_vars: ( Literal["all", "minimal", "different"] | None | list[str] | CombineKwargDefault ) = _DATA_VARS_DEFAULT, coords=_COORDS_DEFAULT, combine: Literal["by_coords", "nested"] = "by_coords", parallel: bool = False, join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, attrs_file: str | os.PathLike | None = None, combine_attrs: CombineAttrsOptions = "override", errors: ErrorOptionsWithWarn = "raise", **kwargs, ) -> Dataset: """Open multiple files as a single dataset. If combine='by_coords' then the function ``combine_by_coords`` is used to combine the datasets into one before returning the result, and if combine='nested' then ``combine_nested`` is used. The filepaths must be structured according to which combining function is used, the details of which are given in the documentation for ``combine_by_coords`` and ``combine_nested``. By default ``combine='by_coords'`` will be used. Requires dask to be installed. See documentation for details on dask [1]_. Global attributes from the ``attrs_file`` are used for the combined dataset. Parameters ---------- paths : str or nested sequence of paths Either a string glob in the form ``"path/to/my/files/*.nc"`` or an explicit list of files to open. Paths can be given as strings or as pathlib Paths. If concatenation along more than one dimension is desired, then ``paths`` must be a nested list-of-lists (see ``combine_nested`` for details). (A string glob will be expanded to a 1-dimensional list.) chunks : int, dict, 'auto' or None, optional Dictionary with keys given by dimension names and values given by chunk sizes. In general, these should divide the dimensions of each dataset. If int, chunk each dimension by ``chunks``. By default, chunks will be chosen to match the chunks on disk. This may impact performance: please see the full documentation for more details [2]_. This argument is evaluated on a per-file basis, so chunk sizes that span multiple files will be ignored. concat_dim : str, DataArray, Index or a Sequence of these or None, optional Dimensions to concatenate files along. You only need to provide this argument if ``combine='nested'``, and if any of the dimensions along which you want to concatenate is not a dimension in the original datasets, e.g., if you want to stack a collection of 2D arrays along a third dimension. Set ``concat_dim=[..., None, ...]`` explicitly to disable concatenation along a particular dimension. Default is None, which for a 1D list of filepaths is equivalent to opening the files separately and then merging them with ``xarray.merge``. combine : {"by_coords", "nested"}, optional Whether ``xarray.combine_by_coords`` or ``xarray.combine_nested`` is used to combine all the data. Default is to use ``xarray.combine_by_coords``. compat : {"identical", "equals", "broadcast_equals", \ "no_conflicts", "override"}, default: "no_conflicts" String indicating how to compare variables of the same name for potential conflicts when merging: * "broadcast_equals": all values must be equal when variables are broadcast against each other to ensure common dimensions. * "equals": all values and dimensions must be the same. * "identical": all values, dimensions and attributes must be the same. * "no_conflicts": only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. * "override": skip comparing and pick variable from first dataset preprocess : callable, optional If provided, call this function on each dataset prior to concatenation. You can find the file-name from which each dataset was loaded in ``ds.encoding["source"]``. engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\ , installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, by default preferring "netcdf4" over "h5netcdf" over "scipy" (customizable via ``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend class (a subclass of ``BackendEntrypoint``) can also be used. data_vars : {"minimal", "different", "all", None} or list of str, default: "all" These data variables will be concatenated together: * "minimal": Only data variables in which the dimension already appears are included. * "different": Data variables which are not equal (ignoring attributes) across all datasets are also concatenated (as well as all for which dimension already appears). Beware: this option may load the data payload of data variables into memory if they are not already loaded. * "all": All data variables will be concatenated. * None: Means ``"all"`` if ``concat_dim`` is not present in any of the ``objs``, and ``"minimal"`` if ``concat_dim`` is present in any of ``objs``. * list of str: The listed data variables will be concatenated, in addition to the "minimal" data variables. coords : {"minimal", "different", "all"} or list of str, default: "different" These coordinate variables will be concatenated together: * "minimal": Only coordinates in which the dimension already appears are included. * "different": Coordinates which are not equal (ignoring attributes) across all datasets are also concatenated (as well as all for which dimension already appears). Beware: this option may load the data payload of coordinate variables into memory if they are not already loaded. * "all": All coordinate variables will be concatenated, except those corresponding to other dimensions. * list of str: The listed coordinate variables will be concatenated, in addition the "minimal" coordinates. parallel : bool, default: False If True, the open and preprocess steps of this function will be performed in parallel using ``dask.delayed``. Default is False. join : {"outer", "inner", "left", "right", "exact", "override"}, default: "outer" String indicating how to combine differing indexes (excluding concat_dim) in objects - "outer": use the union of object indexes - "inner": use the intersection of object indexes - "left": use indexes from the first object with each dimension - "right": use indexes from the last object with each dimension - "exact": instead of aligning, raise `ValueError` when indexes to be aligned are not equal - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. attrs_file : str or path-like, optional Path of the file used to read global attributes from. By default global attributes are read from the first file provided, with wildcard matches sorted by filename. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "override" A callable or a string indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. - "drop_conflicts": attrs from all objects are combined, any that have the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. If a callable, it must expect a sequence of ``attrs`` dicts and a context object as its only parameters. errors : {"raise", "warn", "ignore"}, default: "raise" String indicating how to handle errors in opening dataset. - "raise": invalid dataset will raise an exception. - "warn": a warning will be issued for each invalid dataset. - "ignore": invalid dataset will be ignored. **kwargs : optional Additional arguments passed on to :py:func:`xarray.open_dataset`. For an overview of some of the possible options, see the documentation of :py:func:`xarray.open_dataset` Returns ------- xarray.Dataset Notes ----- ``open_mfdataset`` opens files with read-only access. When you modify values of a Dataset, even one linked to files on disk, only the in-memory copy you are manipulating in xarray is modified: the original file on disk is never touched. See Also -------- combine_by_coords combine_nested open_dataset Examples -------- A user might want to pass additional arguments into ``preprocess`` when applying some operation to many individual files that are being opened. One route to do this is through the use of ``functools.partial``. >>> from functools import partial >>> def _preprocess(x, lon_bnds, lat_bnds): ... return x.sel(lon=slice(*lon_bnds), lat=slice(*lat_bnds)) ... >>> lon_bnds, lat_bnds = (-110, -105), (40, 45) >>> partial_func = partial(_preprocess, lon_bnds=lon_bnds, lat_bnds=lat_bnds) >>> ds = xr.open_mfdataset( ... "file_*.nc", concat_dim="time", preprocess=partial_func ... ) # doctest: +SKIP It is also possible to use any argument to ``open_dataset`` together with ``open_mfdataset``, such as for example ``drop_variables``: >>> ds = xr.open_mfdataset( ... "file.nc", drop_variables=["varname_1", "varname_2"] # any list of vars ... ) # doctest: +SKIP References ---------- .. [1] https://docs.xarray.dev/en/stable/dask.html .. [2] https://docs.xarray.dev/en/stable/dask.html#chunking-and-performance """ paths = _find_absolute_paths(paths, engine=engine, **kwargs) if not paths: raise OSError("no files to open") paths1d: list[str | ReadBuffer] if combine == "nested": if isinstance(concat_dim, str | DataArray) or concat_dim is None: concat_dim = [concat_dim] # type: ignore[assignment] # This creates a flat list which is easier to iterate over, whilst # encoding the originally-supplied structure as "ids". # The "ids" are not used at all if combine='by_coords`. combined_ids_paths = _infer_concat_order_from_positions(paths) ids, paths1d = ( list(combined_ids_paths.keys()), list(combined_ids_paths.values()), ) elif concat_dim is not None: raise ValueError( "When combine='by_coords', passing a value for `concat_dim` has no " "effect. To manually combine along a specific dimension you should " "instead specify combine='nested' along with a value for `concat_dim`.", ) else: paths1d = paths # type: ignore[assignment] open_kwargs = dict(engine=engine, chunks=chunks or {}, **kwargs) if parallel: import dask # wrap the open_dataset, getattr, and preprocess with delayed open_ = dask.delayed(open_dataset) getattr_ = dask.delayed(getattr) if preprocess is not None: preprocess = dask.delayed(preprocess) else: open_ = open_dataset getattr_ = getattr if errors not in ("raise", "warn", "ignore"): raise ValueError( f"'errors' must be 'raise', 'warn' or 'ignore', got '{errors}'" ) datasets = [] invalid_paths = set() for p in paths1d: try: ds = open_(p, **open_kwargs) datasets.append(ds) except Exception as e: if errors == "raise": raise elif errors == "warn": emit_user_level_warning(f"Could not open {p} due to {e}. Ignoring.") # remove invalid paths invalid_paths.add(p) if invalid_paths: paths = _remove_path(paths, invalid_paths) if combine == "nested": # Create new ids and paths based on removed items combined_ids_paths = _infer_concat_order_from_positions(paths) ids = list(combined_ids_paths.keys()) closers = [getattr_(ds, "_close") for ds in datasets] if preprocess is not None: datasets = [preprocess(ds) for ds in datasets] if parallel: # calling compute here will return the datasets/file_objs lists, # the underlying datasets will still be stored as dask arrays datasets, closers = dask.compute(datasets, closers) # Combine all datasets, closing them in case of a ValueError try: if combine == "nested": # Combined nested list by successive concat and merge operations # along each dimension, using structure given by "ids" combined = _nested_combine( datasets, concat_dims=concat_dim, compat=compat, data_vars=data_vars, coords=coords, ids=ids, join=join, combine_attrs=combine_attrs, fill_value=dtypes.NA, ) elif combine == "by_coords": # Redo ordering from coordinates, ignoring how they were ordered # previously combined = combine_by_coords( datasets, compat=compat, data_vars=data_vars, coords=coords, join=join, combine_attrs=combine_attrs, ) else: raise ValueError( f"{combine} is an invalid option for the keyword argument ``combine``" ) except ValueError: for ds in datasets: ds.close() raise combined.set_close(partial(_multi_file_closer, closers)) # read global attributes from the attrs_file or from the first dataset if attrs_file is not None: if isinstance(attrs_file, os.PathLike): attrs_file = cast(str, os.fspath(attrs_file)) combined.attrs = datasets[paths1d.index(attrs_file)].attrs return combined python-xarray-2026.01.0/xarray/backends/store.py0000664000175000017500000000445315136607163021637 0ustar alastairalastairfrom __future__ import annotations from collections.abc import Iterable from typing import TYPE_CHECKING from xarray import conventions from xarray.backends.common import ( BACKEND_ENTRYPOINTS, AbstractDataStore, BackendEntrypoint, T_PathFileOrDataStore, ) from xarray.core.coordinates import Coordinates from xarray.core.dataset import Dataset if TYPE_CHECKING: pass class StoreBackendEntrypoint(BackendEntrypoint): description = "Open AbstractDataStore instances in Xarray" url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.StoreBackendEntrypoint.html" def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool: return isinstance(filename_or_obj, AbstractDataStore) def open_dataset( self, filename_or_obj: T_PathFileOrDataStore, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, drop_variables: str | Iterable[str] | None = None, set_indexes: bool = True, use_cftime=None, decode_timedelta=None, ) -> Dataset: assert isinstance(filename_or_obj, AbstractDataStore) vars, attrs = filename_or_obj.load() encoding = filename_or_obj.get_encoding() vars, attrs, coord_names = conventions.decode_cf_variables( vars, attrs, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) # split data and coordinate variables (promote dimension coordinates) data_vars = {} coord_vars = {} for name, var in vars.items(): if name in coord_names or var.dims == (name,): coord_vars[name] = var else: data_vars[name] = var # explicit Coordinates object with no index passed coords = Coordinates(coord_vars, indexes={}) ds = Dataset(data_vars, coords=coords, attrs=attrs) ds.set_close(filename_or_obj.close) ds.encoding = encoding return ds BACKEND_ENTRYPOINTS["store"] = (None, StoreBackendEntrypoint) python-xarray-2026.01.0/xarray/backends/__init__.py0000664000175000017500000000267315136607163022244 0ustar alastairalastair"""Backend objects for saving and loading data DataStores provide a uniform interface for saving and loading data in different formats. They should not be used directly, but rather through Dataset objects. """ from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint from xarray.backends.file_manager import ( CachingFileManager, DummyFileManager, FileManager, ) from xarray.backends.h5netcdf_ import H5netcdfBackendEntrypoint, H5NetCDFStore from xarray.backends.memory import InMemoryDataStore from xarray.backends.netCDF4_ import NetCDF4BackendEntrypoint, NetCDF4DataStore from xarray.backends.plugins import list_engines, refresh_engines from xarray.backends.pydap_ import PydapBackendEntrypoint, PydapDataStore from xarray.backends.scipy_ import ScipyBackendEntrypoint, ScipyDataStore from xarray.backends.store import StoreBackendEntrypoint from xarray.backends.zarr import ZarrBackendEntrypoint, ZarrStore __all__ = [ "AbstractDataStore", "BackendArray", "BackendEntrypoint", "CachingFileManager", "DummyFileManager", "FileManager", "H5NetCDFStore", "H5netcdfBackendEntrypoint", "InMemoryDataStore", "NetCDF4BackendEntrypoint", "NetCDF4DataStore", "PydapBackendEntrypoint", "PydapDataStore", "ScipyBackendEntrypoint", "ScipyDataStore", "StoreBackendEntrypoint", "ZarrBackendEntrypoint", "ZarrStore", "list_engines", "refresh_engines", ] python-xarray-2026.01.0/xarray/coding/0000775000175000017500000000000015136607163017614 5ustar alastairalastairpython-xarray-2026.01.0/xarray/coding/strings.py0000664000175000017500000002464115136607163021666 0ustar alastairalastair"""Coders for strings.""" from __future__ import annotations import re from functools import partial import numpy as np from xarray.coding.variables import ( VariableCoder, lazy_elemwise_func, pop_to, safe_setitem, unpack_for_decoding, unpack_for_encoding, ) from xarray.core import indexing from xarray.core.utils import emit_user_level_warning, module_available from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") def create_vlen_dtype(element_type): if element_type not in (str, bytes): raise TypeError(f"unsupported type for vlen_dtype: {element_type!r}") # based on h5py.special_dtype return np.dtype("O", metadata={"element_type": element_type}) def check_vlen_dtype(dtype): if dtype.kind != "O" or dtype.metadata is None: return None else: # check xarray (element_type) as well as h5py (vlen) return dtype.metadata.get("element_type", dtype.metadata.get("vlen")) def is_unicode_dtype(dtype): return dtype.kind == "U" or check_vlen_dtype(dtype) is str def is_bytes_dtype(dtype): return dtype.kind == "S" or check_vlen_dtype(dtype) is bytes class EncodedStringCoder(VariableCoder): """Transforms between unicode strings and fixed-width UTF-8 bytes.""" def __init__(self, allows_unicode=True): self.allows_unicode = allows_unicode def encode(self, variable: Variable, name=None) -> Variable: dims, data, attrs, encoding = unpack_for_encoding(variable) contains_unicode = is_unicode_dtype(data.dtype) encode_as_char = encoding.get("dtype") == "S1" if encode_as_char: del encoding["dtype"] # no longer relevant if contains_unicode and (encode_as_char or not self.allows_unicode): if "_FillValue" in attrs: raise NotImplementedError( f"variable {name!r} has a _FillValue specified, but " "_FillValue is not yet supported on unicode strings: " "https://github.com/pydata/xarray/issues/1647" ) string_encoding = encoding.pop("_Encoding", "utf-8") safe_setitem(attrs, "_Encoding", string_encoding, name=name) # TODO: figure out how to handle this in a lazy way with dask data = encode_string_array(data, string_encoding) return Variable(dims, data, attrs, encoding) else: variable.encoding = encoding return variable def decode(self, variable: Variable, name=None) -> Variable: dims, data, attrs, encoding = unpack_for_decoding(variable) if "_Encoding" in attrs: string_encoding = pop_to(attrs, encoding, "_Encoding") func = partial(decode_bytes_array, encoding=string_encoding) data = lazy_elemwise_func(data, func, np.dtype(object)) return Variable(dims, data, attrs, encoding) def decode_bytes_array(bytes_array, encoding="utf-8"): # This is faster than using np.char.decode() or np.vectorize() bytes_array = np.asarray(bytes_array) decoded = [x.decode(encoding) for x in bytes_array.ravel()] return np.array(decoded, dtype=object).reshape(bytes_array.shape) def encode_string_array(string_array, encoding="utf-8"): string_array = np.asarray(string_array) encoded = [x.encode(encoding) for x in string_array.ravel()] return np.array(encoded, dtype=bytes).reshape(string_array.shape) def ensure_fixed_length_bytes(var: Variable) -> Variable: """Ensure that a variable with vlen bytes is converted to fixed width.""" if check_vlen_dtype(var.dtype) is bytes: dims, data, attrs, encoding = unpack_for_encoding(var) # TODO: figure out how to handle this with dask data = np.asarray(data, dtype=np.bytes_) return Variable(dims, data, attrs, encoding) else: return var def validate_char_dim_name(strlen, encoding, name) -> str: """Check character array dimension naming and size and return it.""" if (char_dim_name := encoding.pop("char_dim_name", None)) is not None: # 1 - extract all characters up to last number sequence # 2 - extract last number sequence match = re.search(r"^(.*?)(\d+)(?!.*\d)", char_dim_name) if match: new_dim_name = match.group(1) if int(match.group(2)) != strlen: emit_user_level_warning( f"String dimension naming mismatch on variable {name!r}. {char_dim_name!r} provided by encoding, but data has length of '{strlen}'. Using '{new_dim_name}{strlen}' instead of {char_dim_name!r} to prevent possible naming clash.\n" "To silence this warning either remove 'char_dim_name' from encoding or provide a fitting name." ) char_dim_name = f"{new_dim_name}{strlen}" elif ( original_shape := encoding.get("original_shape", [-1])[-1] ) != -1 and original_shape != strlen: emit_user_level_warning( f"String dimension length mismatch on variable {name!r}. '{original_shape}' provided by encoding, but data has length of '{strlen}'. Using '{char_dim_name}{strlen}' instead of {char_dim_name!r} to prevent possible naming clash.\n" f"To silence this warning remove 'original_shape' from encoding." ) char_dim_name = f"{char_dim_name}{strlen}" else: char_dim_name = f"string{strlen}" return char_dim_name class CharacterArrayCoder(VariableCoder): """Transforms between arrays containing bytes and character arrays.""" def encode(self, variable, name=None): variable = ensure_fixed_length_bytes(variable) dims, data, attrs, encoding = unpack_for_encoding(variable) if data.dtype.kind == "S" and encoding.get("dtype") is not str: data = bytes_to_char(data) char_dim_name = validate_char_dim_name(data.shape[-1], encoding, name) dims = dims + (char_dim_name,) return Variable(dims, data, attrs, encoding) def decode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_decoding(variable) if data.dtype == "S1" and dims: encoding["char_dim_name"] = dims[-1] dims = dims[:-1] data = char_to_bytes(data) return Variable(dims, data, attrs, encoding) def bytes_to_char(arr): """Convert numpy/dask arrays from fixed width bytes to characters.""" if arr.dtype.kind != "S": raise ValueError("argument must have a fixed-width bytes dtype") if is_chunked_array(arr): chunkmanager = get_chunked_array_type(arr) return chunkmanager.map_blocks( _numpy_bytes_to_char, arr, dtype="S1", chunks=arr.chunks + ((arr.dtype.itemsize,)), new_axis=[arr.ndim], ) return _numpy_bytes_to_char(arr) def _numpy_bytes_to_char(arr): """Like netCDF4.stringtochar, but faster and more flexible.""" # adapt handling of copy-kwarg to numpy 2.0 # see https://github.com/numpy/numpy/issues/25916 # and https://github.com/numpy/numpy/pull/25922 copy = None if HAS_NUMPY_2_0 else False # ensure the array is contiguous arr = np.array(arr, copy=copy, order="C", dtype=np.bytes_) return arr.reshape(arr.shape + (1,)).view("S1") def char_to_bytes(arr): """Convert numpy/dask arrays from characters to fixed width bytes.""" if arr.dtype != "S1": raise ValueError("argument must have dtype='S1'") if not arr.ndim: # no dimension to concatenate along return arr size = arr.shape[-1] if not size: # can't make an S0 dtype return np.zeros(arr.shape[:-1], dtype=np.bytes_) if is_chunked_array(arr): chunkmanager = get_chunked_array_type(arr) if len(arr.chunks[-1]) > 1: raise ValueError( "cannot stacked dask character array with " f"multiple chunks in the last dimension: {arr}" ) dtype = np.dtype("S" + str(arr.shape[-1])) return chunkmanager.map_blocks( _numpy_char_to_bytes, arr, dtype=dtype, chunks=arr.chunks[:-1], drop_axis=[arr.ndim - 1], ) else: return StackedBytesArray(arr) def _numpy_char_to_bytes(arr): """Like netCDF4.chartostring, but faster and more flexible.""" # adapt handling of copy-kwarg to numpy 2.0 # see https://github.com/numpy/numpy/issues/25916 # and https://github.com/numpy/numpy/pull/25922 copy = None if HAS_NUMPY_2_0 else False # based on: https://stackoverflow.com/a/10984878/809705 arr = np.array(arr, copy=copy, order="C") dtype = "S" + str(arr.shape[-1]) return arr.view(dtype).reshape(arr.shape[:-1]) class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically stacked along the last dimension. >>> indexer = indexing.BasicIndexer((slice(None),)) >>> np.array(StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer]) array(b'abc', dtype='|S3') """ def __init__(self, array): """ Parameters ---------- array : array-like Original array of values to wrap. """ if array.dtype != "S1": raise ValueError( "can only use StackedBytesArray if argument has dtype='S1'" ) self.array = indexing.as_indexable(array) @property def dtype(self): return np.dtype("S" + str(self.array.shape[-1])) @property def shape(self) -> tuple[int, ...]: return self.array.shape[:-1] def __repr__(self): return f"{type(self).__name__}({self.array!r})" def _vindex_get(self, key): return type(self)(self.array.vindex[key]) def _oindex_get(self, key): return type(self)(self.array.oindex[key]) def __getitem__(self, key): # require slicing the last dimension completely key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) if key.tuple[-1] != slice(None): raise IndexError("too many indices") return type(self)(self.array[key]) def get_duck_array(self): return _numpy_char_to_bytes(self.array.get_duck_array()) python-xarray-2026.01.0/xarray/coding/cftimeindex.py0000664000175000017500000007437415136607163022504 0ustar alastairalastair"""DatetimeIndex analog for cftime.datetime objects""" # The pandas.Index subclass defined here was copied and adapted for # use with cftime.datetime objects based on the source code defining # pandas.DatetimeIndex. # For reference, here is a copy of the pandas copyright notice: # (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team # All rights reserved. # Copyright (c) 2008-2011 AQR Capital Management, LLC # All rights reserved. # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # * Neither the name of the copyright holder nor the names of any # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import annotations import math from datetime import timedelta from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd from packaging.version import Version from xarray.coding.times import ( _STANDARD_CALENDARS, _parse_iso8601, cftime_to_nptime, infer_calendar_name, ) from xarray.core.common import _contains_cftime_datetimes from xarray.core.options import OPTIONS from xarray.core.types import PDDatetimeUnitOptions from xarray.core.utils import attempt_import, emit_user_level_warning, is_scalar if TYPE_CHECKING: from xarray.coding.cftime_offsets import BaseCFTimeOffset from xarray.core.types import Self # constants for cftimeindex.repr CFTIME_REPR_LENGTH = 19 ITEMS_IN_REPR_MAX_ELSE_ELLIPSIS = 100 REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END = 10 OUT_OF_BOUNDS_TIMEDELTA_ERRORS: tuple[type[Exception], ...] try: OUT_OF_BOUNDS_TIMEDELTA_ERRORS = (pd.errors.OutOfBoundsTimedelta, OverflowError) except AttributeError: OUT_OF_BOUNDS_TIMEDELTA_ERRORS = (OverflowError,) def _parsed_string_to_bounds(date_type, resolution, parsed): """Generalization of pandas.tseries.index.DatetimeIndex._parsed_string_to_bounds for use with non-standard calendars and cftime.datetime objects. """ if resolution == "year": return ( date_type(parsed.year, 1, 1), date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1), ) elif resolution == "month": if parsed.month == 12: end = date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1) else: end = date_type(parsed.year, parsed.month + 1, 1) - timedelta( microseconds=1 ) return date_type(parsed.year, parsed.month, 1), end elif resolution == "day": start = date_type(parsed.year, parsed.month, parsed.day) return start, start + timedelta(days=1, microseconds=-1) elif resolution == "hour": start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour) return start, start + timedelta(hours=1, microseconds=-1) elif resolution == "minute": start = date_type( parsed.year, parsed.month, parsed.day, parsed.hour, parsed.minute ) return start, start + timedelta(minutes=1, microseconds=-1) elif resolution == "second": start = date_type( parsed.year, parsed.month, parsed.day, parsed.hour, parsed.minute, parsed.second, ) return start, start + timedelta(seconds=1, microseconds=-1) else: raise KeyError def get_date_field(datetimes, field): """Adapted from pandas.tslib.get_date_field""" return np.array([getattr(date, field) for date in datetimes], dtype=np.int64) def _field_accessor(name, docstring=None, min_cftime_version="0.0"): """Adapted from pandas.tseries.index._field_accessor""" def f(self, min_cftime_version=min_cftime_version): if TYPE_CHECKING: import cftime else: cftime = attempt_import("cftime") if Version(cftime.__version__) >= Version(min_cftime_version): return get_date_field(self._data, name) else: raise ImportError( f"The {name:!r} accessor requires a minimum " f"version of cftime of {min_cftime_version}. Found an " f"installed version of {cftime.__version__}." ) f.__name__ = name f.__doc__ = docstring return property(f) def get_date_type(self): if self._data.size: return type(self._data[0]) else: return None def assert_all_valid_date_type(data): if TYPE_CHECKING: import cftime else: cftime = attempt_import("cftime") if len(data) > 0: sample = data[0] date_type = type(sample) if not isinstance(sample, cftime.datetime): raise TypeError( "CFTimeIndex requires cftime.datetime " f"objects. Got object of {date_type}." ) if not all(isinstance(value, date_type) for value in data): raise TypeError( "CFTimeIndex requires using datetime " f"objects of all the same type. Got\n{data}." ) def format_row(times, indent=0, separator=", ", row_end=",\n"): """Format a single row from format_times.""" return indent * " " + separator.join(map(str, times)) + row_end def format_times( index, max_width, offset, separator=", ", first_row_offset=0, intermediate_row_end=",\n", last_row_end="", ): """Format values of cftimeindex as pd.Index.""" n_per_row = max(max_width // (CFTIME_REPR_LENGTH + len(separator)), 1) n_rows = math.ceil(len(index) / n_per_row) representation = "" for row in range(n_rows): indent = first_row_offset if row == 0 else offset row_end = last_row_end if row == n_rows - 1 else intermediate_row_end times_for_row = index[row * n_per_row : (row + 1) * n_per_row] representation += format_row( times_for_row, indent=indent, separator=separator, row_end=row_end ) return representation def format_attrs(index, separator=", "): """Format attributes of CFTimeIndex for __repr__.""" attrs = { "dtype": f"'{index.dtype}'", "length": f"{len(index)}", "calendar": f"{index.calendar!r}", "freq": f"{index.freq!r}", } attrs_str = [f"{k}={v}" for k, v in attrs.items()] attrs_str = f"{separator}".join(attrs_str) return attrs_str class CFTimeIndex(pd.Index): """Custom Index for working with CF calendars and dates All elements of a CFTimeIndex must be cftime.datetime objects. Parameters ---------- data : array or CFTimeIndex Sequence of cftime.datetime objects to use in index name : str, default: None Name of the resulting index See Also -------- date_range """ _data: np.ndarray year = _field_accessor("year", "The year of the datetime") month = _field_accessor("month", "The month of the datetime") day = _field_accessor("day", "The days of the datetime") hour = _field_accessor("hour", "The hours of the datetime") minute = _field_accessor("minute", "The minutes of the datetime") second = _field_accessor("second", "The seconds of the datetime") microsecond = _field_accessor("microsecond", "The microseconds of the datetime") dayofyear = _field_accessor( "dayofyr", "The ordinal day of year of the datetime", "1.0.2.1" ) dayofweek = _field_accessor("dayofwk", "The day of week of the datetime", "1.0.2.1") days_in_month = _field_accessor( "daysinmonth", "The number of days in the month of the datetime", "1.1.0.0" ) date_type = property(get_date_type) def __new__(cls, data, name=None, **kwargs): assert_all_valid_date_type(data) if name is None and hasattr(data, "name"): name = data.name result = object.__new__(cls) result._data = np.array(data, dtype="O") result.name = name result._cache = {} return result def __repr__(self): """ Return a string representation for this object. """ klass_name = type(self).__name__ display_width = OPTIONS["display_width"] offset = len(klass_name) + 2 if len(self) <= ITEMS_IN_REPR_MAX_ELSE_ELLIPSIS: datastr = format_times( self.values, display_width, offset=offset, first_row_offset=0 ) else: front_str = format_times( self.values[:REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END], display_width, offset=offset, first_row_offset=0, last_row_end=",", ) end_str = format_times( self.values[-REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END:], display_width, offset=offset, first_row_offset=offset, ) datastr = "\n".join([front_str, f"{' ' * offset}...", end_str]) attrs_str = format_attrs(self) # oneliner only if smaller than display_width full_repr_str = f"{klass_name}([{datastr}], {attrs_str})" if len(full_repr_str) > display_width: # if attrs_str too long, one per line if len(attrs_str) >= display_width - offset: attrs_str = attrs_str.replace(",", f",\n{' ' * (offset - 2)}") full_repr_str = ( f"{klass_name}([{datastr}],\n{' ' * (offset - 1)}{attrs_str})" ) return full_repr_str def _partial_date_slice(self, resolution, parsed): """Adapted from pandas.tseries.index.DatetimeIndex._partial_date_slice Note that when using a CFTimeIndex, if a partial-date selection returns a single element, it will never be converted to a scalar coordinate; this is in slight contrast to the behavior when using a DatetimeIndex, which sometimes will return a DataArray with a scalar coordinate depending on the resolution of the datetimes used in defining the index. For example: >>> from cftime import DatetimeNoLeap >>> da = xr.DataArray( ... [1, 2], ... coords=[[DatetimeNoLeap(2001, 1, 1), DatetimeNoLeap(2001, 2, 1)]], ... dims=["time"], ... ) >>> da.sel(time="2001-01-01") Size: 8B array([1]) Coordinates: * time (time) object 8B 2001-01-01 00:00:00 >>> da = xr.DataArray( ... [1, 2], ... coords=[[pd.Timestamp(2001, 1, 1), pd.Timestamp(2001, 2, 1)]], ... dims=["time"], ... ) >>> da.sel(time="2001-01-01") Size: 8B array(1) Coordinates: time datetime64[ns] 8B 2001-01-01 >>> da = xr.DataArray( ... [1, 2], ... coords=[[pd.Timestamp(2001, 1, 1, 1), pd.Timestamp(2001, 2, 1)]], ... dims=["time"], ... ) >>> da.sel(time="2001-01-01") Size: 8B array([1]) Coordinates: * time (time) datetime64[ns] 8B 2001-01-01T01:00:00 """ start, end = _parsed_string_to_bounds(self.date_type, resolution, parsed) times = self._data if self.is_monotonic_increasing: if len(times) and ( (start < times[0] and end < times[0]) or (start > times[-1] and end > times[-1]) ): # we are out of range raise KeyError # a monotonic (sorted) series can be sliced left = times.searchsorted(start, side="left") right = times.searchsorted(end, side="right") return slice(left, right) lhs_mask = times >= start rhs_mask = times <= end return np.flatnonzero(lhs_mask & rhs_mask) def _get_string_slice(self, key): """Adapted from pandas.tseries.index.DatetimeIndex._get_string_slice""" parsed, resolution = _parse_iso8601(self.date_type, key) try: loc = self._partial_date_slice(resolution, parsed) except KeyError as err: raise KeyError(key) from err return loc def _get_nearest_indexer(self, target, limit, tolerance): """Adapted from pandas.Index._get_nearest_indexer""" left_indexer = self.get_indexer(target, "pad", limit=limit) right_indexer = self.get_indexer(target, "backfill", limit=limit) left_distances = abs(self.values[left_indexer] - target.values) right_distances = abs(self.values[right_indexer] - target.values) if self.is_monotonic_increasing: condition = (left_distances < right_distances) | (right_indexer == -1) else: condition = (left_distances <= right_distances) | (right_indexer == -1) indexer = np.where(condition, left_indexer, right_indexer) if tolerance is not None: indexer = self._filter_indexer_tolerance(target, indexer, tolerance) return indexer def _filter_indexer_tolerance(self, target, indexer, tolerance): """Adapted from pandas.Index._filter_indexer_tolerance""" if isinstance(target, pd.Index): distance = abs(self.values[indexer] - target.values) else: distance = abs(self.values[indexer] - target) indexer = np.where(distance <= tolerance, indexer, -1) return indexer def get_loc(self, key): """Adapted from pandas.tseries.index.DatetimeIndex.get_loc""" if isinstance(key, str): return self._get_string_slice(key) else: return super().get_loc(key) def _maybe_cast_slice_bound(self, label, side): """Adapted from pandas.tseries.index.DatetimeIndex._maybe_cast_slice_bound """ if not isinstance(label, str): return label parsed, resolution = _parse_iso8601(self.date_type, label) start, end = _parsed_string_to_bounds(self.date_type, resolution, parsed) if self.is_monotonic_decreasing and len(self) > 1: return end if side == "left" else start return start if side == "left" else end # TODO: Add ability to use integer range outside of iloc? # e.g. series[1:5]. def get_value(self, series, key): """Adapted from pandas.tseries.index.DatetimeIndex.get_value""" if np.asarray(key).dtype == np.dtype(bool): return series.iloc[key] elif isinstance(key, slice): return series.iloc[self.slice_indexer(key.start, key.stop, key.step)] else: return series.iloc[self.get_loc(key)] def __contains__(self, key: Any) -> bool: """Adapted from pandas.tseries.base.DatetimeIndexOpsMixin.__contains__""" try: result = self.get_loc(key) return ( is_scalar(result) or isinstance(result, slice) or (isinstance(result, np.ndarray) and result.size > 0) ) except (KeyError, TypeError, ValueError): return False def contains(self, key: Any) -> bool: """Needed for .loc based partial-string indexing""" return self.__contains__(key) def shift( # type: ignore[override,unused-ignore] self, periods: int | float, freq: str | timedelta | BaseCFTimeOffset | None = None, ) -> Self: """Shift the CFTimeIndex a multiple of the given frequency. See the documentation for :py:func:`~xarray.date_range` for a complete listing of valid frequency strings. Parameters ---------- periods : int, float if freq of days or below Periods to shift by freq : str, datetime.timedelta or BaseCFTimeOffset A frequency string or datetime.timedelta object to shift by Returns ------- CFTimeIndex See Also -------- pandas.DatetimeIndex.shift Examples -------- >>> index = xr.date_range("2000", periods=1, freq="ME", use_cftime=True) >>> index CFTimeIndex([2000-01-31 00:00:00], dtype='object', length=1, calendar='standard', freq=None) >>> index.shift(1, "ME") CFTimeIndex([2000-02-29 00:00:00], dtype='object', length=1, calendar='standard', freq=None) >>> index.shift(1.5, "24h") CFTimeIndex([2000-02-01 12:00:00], dtype='object', length=1, calendar='standard', freq=None) """ from xarray.coding.cftime_offsets import BaseCFTimeOffset if freq is None: # None type is required to be compatible with base pd.Index class raise TypeError( f"`freq` argument cannot be None for {type(self).__name__}.shift" ) if isinstance(freq, timedelta): return self + periods * freq if isinstance(freq, str | BaseCFTimeOffset): from xarray.coding.cftime_offsets import to_offset return self + periods * to_offset(freq) raise TypeError( f"'freq' must be of type str or datetime.timedelta, got {type(freq)}." ) # pandas-stubs defines many overloads for Index.__add__/__radd__ with specific # return types, but CFTimeIndex legitimately returns Self for all cases def __add__(self, other) -> Self: # type: ignore[override] if isinstance(other, pd.TimedeltaIndex): other = other.to_pytimedelta() return type(self)(np.array(self) + other) def __radd__(self, other) -> Self: # type: ignore[override] if isinstance(other, pd.TimedeltaIndex): other = other.to_pytimedelta() return type(self)(other + np.array(self)) def __sub__(self, other): if _contains_datetime_timedeltas(other): return type(self)(np.array(self) - other) if isinstance(other, pd.TimedeltaIndex): return type(self)(np.array(self) - other.to_pytimedelta()) if _contains_cftime_datetimes(np.array(other)): try: return pd.TimedeltaIndex(np.array(self) - np.array(other)) except OUT_OF_BOUNDS_TIMEDELTA_ERRORS as err: raise ValueError( "The time difference exceeds the range of values " "that can be expressed at the nanosecond resolution." ) from err return NotImplemented def __rsub__(self, other): try: return pd.TimedeltaIndex(other - np.array(self)) except OUT_OF_BOUNDS_TIMEDELTA_ERRORS as err: raise ValueError( "The time difference exceeds the range of values " "that can be expressed at the nanosecond resolution." ) from err def to_datetimeindex( self, unsafe: bool = False, time_unit: PDDatetimeUnitOptions | None = None ) -> pd.DatetimeIndex: """If possible, convert this index to a pandas.DatetimeIndex. Parameters ---------- unsafe : bool Flag to turn off calendar mismatch warnings (default ``False``). time_unit : str Time resolution of resulting DatetimeIndex. Can be one of `"s"`, ``"ms"``, ``"us"``, or ``"ns"`` (default ``"ns"``). Returns ------- pandas.DatetimeIndex Raises ------ ValueError If the CFTimeIndex contains dates that are not possible in the standard calendar or outside the range representable by the specified ``time_unit``. Warns ----- RuntimeWarning If converting from a non-standard calendar, or a Gregorian calendar with dates prior to the reform (1582-10-15). Warnings -------- Note that for non-proleptic Gregorian calendars, this will change the calendar type of the index. In that case the result of this method should be used with caution. Examples -------- >>> times = xr.date_range( ... "2000", periods=2, calendar="gregorian", use_cftime=True ... ) >>> times CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00], dtype='object', length=2, calendar='standard', freq=None) >>> times.to_datetimeindex(time_unit="ns") DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[ns]', freq=None) """ if not self._data.size: return pd.DatetimeIndex([]) if time_unit is None: emit_user_level_warning( "In a future version of xarray to_datetimeindex will default " "to returning a 'us'-resolution DatetimeIndex instead of a " "'ns'-resolution DatetimeIndex. This warning can be silenced " "by explicitly passing the `time_unit` keyword argument.", FutureWarning, ) time_unit = "ns" nptimes = cftime_to_nptime(self, time_unit=time_unit) calendar = infer_calendar_name(self) if calendar not in _STANDARD_CALENDARS and not unsafe: emit_user_level_warning( "Converting a CFTimeIndex with dates from a non-standard " f"calendar, {calendar!r}, to a pandas.DatetimeIndex, which " "uses dates from the standard calendar. This may lead to " "subtle errors in operations that depend on the length of " "time between dates.", RuntimeWarning, ) if calendar == "standard" and not unsafe: reform_date = self.date_type(1582, 10, 15) if self.min() < reform_date: emit_user_level_warning( "Converting a CFTimeIndex with dates from a Gregorian " "calendar that fall before the reform date of 1582-10-15 " "to a pandas.DatetimeIndex. During this time period the " "Gregorian calendar and the proleptic Gregorian calendar " "of the DatetimeIndex do not exactly align. This warning " "can be silenced by setting unsafe=True.", RuntimeWarning, ) return pd.DatetimeIndex(nptimes) def strftime(self, date_format): """ Return an Index of formatted strings specified by date_format, which supports the same string format as the python standard library. Details of the string format can be found in `python string format doc `__ Parameters ---------- date_format : str Date format string (e.g. "%Y-%m-%d") Returns ------- pandas.Index Index of formatted strings Examples -------- >>> rng = xr.date_range( ... start="2000", ... periods=5, ... freq="2MS", ... calendar="noleap", ... use_cftime=True, ... ) >>> rng.strftime("%B %d, %Y, %r") Index(['January 01, 2000, 12:00:00 AM', 'March 01, 2000, 12:00:00 AM', 'May 01, 2000, 12:00:00 AM', 'July 01, 2000, 12:00:00 AM', 'September 01, 2000, 12:00:00 AM'], dtype='object') """ return pd.Index([date.strftime(date_format) for date in self._data]) @property def asi8(self): """Convert to integers with units of microseconds since 1970-01-01.""" from xarray.core.resample_cftime import exact_cftime_datetime_difference if not self._data.size: return np.array([], dtype=np.int64) epoch = self.date_type(1970, 1, 1) return np.array( [ _total_microseconds(exact_cftime_datetime_difference(epoch, date)) for date in self.values ], dtype=np.int64, ) @property def calendar(self): """The calendar used by the datetimes in the index.""" if not self._data.size: return None return infer_calendar_name(self) @property def freq(self): """The frequency used by the dates in the index.""" from xarray.coding.frequencies import infer_freq # min 3 elemtents required to determine freq if self._data.size < 3: return None return infer_freq(self) def _round_via_method(self, freq, method): """Round dates using a specified method.""" from xarray.coding.cftime_offsets import CFTIME_TICKS, Day, to_offset if not self._data.size: return CFTimeIndex(np.array(self)) offset = to_offset(freq) if isinstance(offset, Day): # Following pandas, "In the 'round' context, Day unambiguously # means 24h, not calendar-day" offset_as_timedelta = timedelta(days=offset.n) elif isinstance(offset, CFTIME_TICKS): offset_as_timedelta = offset.as_timedelta() else: raise ValueError(f"{offset} is a non-fixed frequency") unit = _total_microseconds(offset_as_timedelta) values = self.asi8 rounded = method(values, unit) return _cftimeindex_from_i8(rounded, self.date_type, self.name) def floor(self, freq): """Round dates down to fixed frequency. Parameters ---------- freq : str The frequency level to round the index to. Must be a fixed frequency like 'S' (second) not 'ME' (month end). See `frequency aliases `_ for a list of possible values. Returns ------- CFTimeIndex """ return self._round_via_method(freq, _floor_int) def ceil(self, freq): """Round dates up to fixed frequency. Parameters ---------- freq : str The frequency level to round the index to. Must be a fixed frequency like 'S' (second) not 'ME' (month end). See `frequency aliases `_ for a list of possible values. Returns ------- CFTimeIndex """ return self._round_via_method(freq, _ceil_int) def round(self, freq): """Round dates to a fixed frequency. Parameters ---------- freq : str The frequency level to round the index to. Must be a fixed frequency like 'S' (second) not 'ME' (month end). See `frequency aliases `_ for a list of possible values. Returns ------- CFTimeIndex """ return self._round_via_method(freq, _round_to_nearest_half_even) @property def is_leap_year(self): if TYPE_CHECKING: import cftime else: cftime = attempt_import("cftime") func = np.vectorize(cftime.is_leap_year) return func(self.year, calendar=self.calendar) def _parse_array_of_cftime_strings(strings, date_type): """Create a numpy array from an array of strings. For use in generating dates from strings for use with interp. Assumes the array is either 0-dimensional or 1-dimensional. Parameters ---------- strings : array of strings Strings to convert to dates date_type : cftime.datetime type Calendar type to use for dates Returns ------- np.array """ return np.array([_parse_iso8601(date_type, s)[0] for s in strings.ravel()]).reshape( strings.shape ) def _contains_datetime_timedeltas(array): """Check if an input array contains datetime.timedelta objects.""" array = np.atleast_1d(array) return isinstance(array[0], timedelta) def _cftimeindex_from_i8(values, date_type, name): """Construct a CFTimeIndex from an array of integers. Parameters ---------- values : np.array Integers representing microseconds since 1970-01-01. date_type : cftime.datetime Type of date for the index. name : str Name of the index. Returns ------- CFTimeIndex """ epoch = date_type(1970, 1, 1) dates = np.array([epoch + timedelta(microseconds=int(value)) for value in values]) return CFTimeIndex(dates, name=name) def _total_microseconds(delta): """Compute the total number of microseconds of a datetime.timedelta. Parameters ---------- delta : datetime.timedelta Input timedelta. Returns ------- int """ return delta / timedelta(microseconds=1) def _floor_int(values, unit): """Copied from pandas.""" return values - np.remainder(values, unit) def _ceil_int(values, unit): """Copied from pandas.""" return values + np.remainder(-values, unit) def _round_to_nearest_half_even(values, unit): """Copied from pandas.""" if unit % 2: return _ceil_int(values - unit // 2, unit) quotient, remainder = np.divmod(values, unit) mask = np.logical_or( remainder > (unit // 2), np.logical_and(remainder == (unit // 2), quotient % 2) ) quotient[mask] += 1 return quotient * unit python-xarray-2026.01.0/xarray/coding/times.py0000664000175000017500000016742515136607163021326 0ustar alastairalastairfrom __future__ import annotations import contextlib import re import warnings from collections.abc import Callable, Hashable from datetime import datetime, timedelta from functools import partial from typing import TYPE_CHECKING, Union, cast import numpy as np import pandas as pd from pandas.errors import OutOfBoundsDatetime, OutOfBoundsTimedelta from xarray.coding.common import ( SerializationWarning, VariableCoder, lazy_elemwise_func, pop_to, safe_setitem, unpack_for_decoding, unpack_for_encoding, ) from xarray.compat.pdcompat import default_precision_timestamp, timestamp_as_unit from xarray.core import indexing from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like from xarray.core.duck_array_ops import array_all, asarray, ravel, reshape from xarray.core.formatting import first_n_items, format_timestamp, last_item from xarray.core.utils import attempt_import, emit_user_level_warning from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array, to_numpy from xarray.namedarray.utils import is_duck_dask_array try: import cftime except ImportError: cftime = None from xarray.core.types import ( CFCalendar, CFTimeDatetime, NPDatetimeUnitOptions, PDDatetimeUnitOptions, T_DuckArray, ) T_Name = Union[Hashable, None] # standard calendars recognized by cftime _STANDARD_CALENDARS = {"standard", "gregorian", "proleptic_gregorian"} _NS_PER_TIME_DELTA = { "ns": 1, "us": int(1e3), "ms": int(1e6), "s": int(1e9), "m": int(1e9) * 60, "h": int(1e9) * 60 * 60, "D": int(1e9) * 60 * 60 * 24, } _US_PER_TIME_DELTA = { "microseconds": 1, "milliseconds": 1_000, "seconds": 1_000_000, "minutes": 60 * 1_000_000, "hours": 60 * 60 * 1_000_000, "days": 24 * 60 * 60 * 1_000_000, } _NETCDF_TIME_UNITS_CFTIME = [ "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", ] _NETCDF_TIME_UNITS_NUMPY = _NETCDF_TIME_UNITS_CFTIME + ["nanoseconds"] TIME_UNITS = frozenset( [ "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", "nanoseconds", ] ) _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS = [ "add_offset", "scale_factor", ] _ORDERED_PANDAS_TIME_RESOLUTIONS: list[PDDatetimeUnitOptions] = ["s", "ms", "us", "ns"] def _is_standard_calendar(calendar: str) -> bool: return calendar.lower() in _STANDARD_CALENDARS def _is_numpy_compatible_time_range(times): if is_np_datetime_like(times.dtype): return True # times array contains cftime objects times = np.asarray(times) tmin = times.min() tmax = times.max() try: # before relaxing the nanosecond constrained # this raised OutOfBoundsDatetime for # times < 1678 and times > 2262 # this isn't the case anymore for other resolutions like "s" # now, we raise for dates before 1582-10-15 _check_date_is_after_shift(tmin, "standard") _check_date_is_after_shift(tmax, "standard") convert_time_or_go_back(tmin, pd.Timestamp) convert_time_or_go_back(tmax, pd.Timestamp) except pd.errors.OutOfBoundsDatetime: return False except ValueError as err: if err.args[0] == "year 0 is out of range": return False raise else: return True def _netcdf_to_numpy_timeunit(units: str) -> NPDatetimeUnitOptions: units = units.lower() if not units.endswith("s"): units = f"{units}s" return cast( NPDatetimeUnitOptions, { "nanoseconds": "ns", "microseconds": "us", "milliseconds": "ms", "seconds": "s", "minutes": "m", "hours": "h", "days": "D", }[units], ) def _numpy_to_netcdf_timeunit(units: NPDatetimeUnitOptions) -> str: return { "ns": "nanoseconds", "us": "microseconds", "ms": "milliseconds", "s": "seconds", "m": "minutes", "h": "hours", "D": "days", }[units] def _numpy_dtype_to_netcdf_timeunit(dtype: np.dtype) -> str: unit, _ = np.datetime_data(dtype) unit = cast(NPDatetimeUnitOptions, unit) return _numpy_to_netcdf_timeunit(unit) def _ensure_padded_year(ref_date: str) -> str: # Reference dates without a padded year (e.g. since 1-1-1 or since 2-3-4) # are ambiguous (is it YMD or DMY?). This can lead to some very odd # behaviour e.g. pandas (via dateutil) passes '1-1-1 00:00:0.0' as # '2001-01-01 00:00:00' (because it assumes a) DMY and b) that year 1 is # shorthand for 2001 (like 02 would be shorthand for year 2002)). # Here we ensure that there is always a four-digit year, with the # assumption being that year comes first if we get something ambiguous. matches_year = re.match(r".*\d{4}.*", ref_date) if matches_year: # all good, return return ref_date # No four-digit strings, assume the first digits are the year and pad # appropriately matches_start_digits = re.match(r"(\d+)(.*)", ref_date) if not matches_start_digits: raise ValueError(f"invalid reference date for time units: {ref_date}") ref_year, everything_else = (s for s in matches_start_digits.groups()) ref_date_padded = f"{int(ref_year):04d}{everything_else}" warning_msg = ( f"Ambiguous reference date string: {ref_date}. The first value is " "assumed to be the year hence will be padded with zeros to remove " f"the ambiguity (the padded reference date string is: {ref_date_padded}). " "To remove this message, remove the ambiguity by padding your reference " "date strings with zeros." ) warnings.warn(warning_msg, SerializationWarning, stacklevel=2) return ref_date_padded def _unpack_netcdf_time_units(units: str) -> tuple[str, str]: # CF datetime units follow the format: "UNIT since DATE" # this parses out the unit and date allowing for extraneous # whitespace. It also ensures that the year is padded with zeros # so it will be correctly understood by pandas (via dateutil). matches = re.match(r"(.+) since (.+)", units) if not matches: raise ValueError(f"invalid time units: {units}") delta_units, ref_date = (s.strip() for s in matches.groups()) ref_date = _ensure_padded_year(ref_date) return delta_units, ref_date def named(name: str, pattern: str) -> str: return "(?P<" + name + ">" + pattern + ")" def optional(x: str) -> str: return "(?:" + x + ")?" def trailing_optional(xs: list[str]) -> str: if not xs: return "" return xs[0] + optional(trailing_optional(xs[1:])) def build_pattern( date_sep: str = r"\-", datetime_sep: str = r"T", time_sep: str = r"\:", micro_sep: str = r".", ) -> str: pieces = [ (None, "year", r"[+-]?\d{4,5}"), (date_sep, "month", r"\d{2}"), (date_sep, "day", r"\d{2}"), (datetime_sep, "hour", r"\d{2}"), (time_sep, "minute", r"\d{2}"), (time_sep, "second", r"\d{2}"), (micro_sep, "microsecond", r"\d{1,6}"), ] pattern_list = [] for sep, name, sub_pattern in pieces: pattern_list.append((sep or "") + named(name, sub_pattern)) # TODO: allow timezone offsets? return "^" + trailing_optional(pattern_list) + "$" _BASIC_PATTERN = build_pattern(date_sep="", time_sep="") _EXTENDED_PATTERN = build_pattern() _CFTIME_PATTERN = build_pattern(datetime_sep=" ") _PATTERNS = [_BASIC_PATTERN, _EXTENDED_PATTERN, _CFTIME_PATTERN] def parse_iso8601_like(datetime_string: str) -> dict[str, str | None]: for pattern in _PATTERNS: match = re.match(pattern, datetime_string) if match: return match.groupdict() raise ValueError( f"no ISO-8601 or cftime-string-like match for string: {datetime_string}" ) def _parse_iso8601(date_type, timestr): default = date_type(1, 1, 1) result = parse_iso8601_like(timestr) replace = {} for attr in ["year", "month", "day", "hour", "minute", "second", "microsecond"]: value = result.get(attr, None) if value is not None: resolution = attr if attr == "microsecond": if len(value) <= 3: resolution = "millisecond" # convert match string into valid microsecond value value = 10 ** (6 - len(value)) * int(value) replace[attr] = int(value) return default.replace(**replace), resolution def _maybe_strip_tz_from_timestamp(date: pd.Timestamp) -> pd.Timestamp: # If the ref_date Timestamp is timezone-aware, convert to UTC and # make it timezone-naive (GH 2649). if date.tz is not None: return date.tz_convert("UTC").tz_convert(None) return date def _cast_timestamp_to_coarsest_resolution(timestamp: pd.Timestamp) -> pd.Timestamp: # Cast timestamp to the coarsest resolution that can be used without # changing its value. If provided a string, the pandas.Timestamp # constructor used to automatically infer this from the resolution of the # string, but this behavior was changed in pandas-dev/pandas#62801. This # function allows us to approximately restore the old behavior in a way # that is perhaps more consistent with how we infer the resolution of the # data values themselves. for unit in _ORDERED_PANDAS_TIME_RESOLUTIONS: coarsest_timestamp = timestamp.as_unit(unit) if coarsest_timestamp == timestamp: return coarsest_timestamp return timestamp def _unpack_time_unit_and_ref_date( units: str, ) -> tuple[NPDatetimeUnitOptions, pd.Timestamp]: # same us _unpack_netcdf_time_units but finalizes ref_date for # processing in encode_cf_datetime time_unit, _ref_date = _unpack_netcdf_time_units(units) time_unit = _netcdf_to_numpy_timeunit(time_unit) ref_date = pd.Timestamp(_ref_date) ref_date = _cast_timestamp_to_coarsest_resolution(ref_date) ref_date = _maybe_strip_tz_from_timestamp(ref_date) return time_unit, ref_date def _unpack_time_units_and_ref_date_cftime(units: str, calendar: str): # same as _unpack_netcdf_time_units but finalizes ref_date for # processing in encode_cf_datetime time_units, ref_date = _unpack_netcdf_time_units(units) ref_date = cftime.num2date( 0, units=f"microseconds since {ref_date}", calendar=calendar, only_use_cftime_datetimes=True, ) return time_units, ref_date def _decode_cf_datetime_dtype( data, units: str, calendar: str | None, use_cftime: bool | None, time_unit: PDDatetimeUnitOptions = "ns", ) -> np.dtype: # Verify that at least the first and last date can be decoded # successfully. Otherwise, tracebacks end up swallowed by # Dataset.__repr__ when users try to view their lazily decoded array. values = indexing.ImplicitToExplicitIndexingAdapter(indexing.as_indexable(data)) example_value = np.concatenate( [to_numpy(first_n_items(values, 1)), to_numpy(last_item(values))] ) try: result = decode_cf_datetime( example_value, units, calendar, use_cftime, time_unit ) except Exception as err: calendar_msg = ( "the default calendar" if calendar is None else f"calendar {calendar!r}" ) msg = ( f"unable to decode time units {units!r} with {calendar_msg!r}. Try " "opening your dataset with decode_times=False or installing cftime " "if it is not installed." ) raise ValueError(msg) from err else: dtype = getattr(result, "dtype", np.dtype("object")) return dtype def _decode_datetime_with_cftime( num_dates: np.ndarray, units: str, calendar: str ) -> np.ndarray: if TYPE_CHECKING: import cftime else: cftime = attempt_import("cftime") if num_dates.size > 0: return np.asarray( cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True) ) else: return np.array([], dtype=object) def _check_date_for_units_since_refdate( date, unit: NPDatetimeUnitOptions, ref_date: pd.Timestamp ) -> pd.Timestamp: # check for out-of-bounds floats and raise if date > np.iinfo("int64").max or date < np.iinfo("int64").min: raise OutOfBoundsTimedelta( f"Value {date} can't be represented as Datetime/Timedelta." ) delta = date * np.timedelta64(1, unit) if not np.isnan(delta): # this will raise on dtype overflow for integer dtypes if date.dtype.kind in "u" and not np.int64(delta) == date: raise OutOfBoundsTimedelta( "DType overflow in Datetime/Timedelta calculation." ) # this will raise on overflow if ref_date + delta # can't be represented in the current ref_date resolution return timestamp_as_unit(ref_date + delta, ref_date.unit) else: # if date is exactly NaT (np.iinfo("int64").min) return NaT # to make follow-up checks work return pd.Timestamp("NaT") def _check_timedelta_range(value, data_unit, time_unit): if value > np.iinfo("int64").max or value < np.iinfo("int64").min: OutOfBoundsTimedelta(f"Value {value} can't be represented as Timedelta.") # on windows multiplying nan leads to RuntimeWarning with warnings.catch_warnings(): warnings.filterwarnings( "ignore", "invalid value encountered in multiply", RuntimeWarning ) delta = value * np.timedelta64(1, data_unit) if not np.isnan(delta): # this will raise on dtype overflow for integer dtypes if value.dtype.kind in "u" and not np.int64(delta) == value: raise OutOfBoundsTimedelta( "DType overflow in Datetime/Timedelta calculation." ) # this will raise on overflow if delta cannot be represented with the # resolutions supported by pandas. pd.to_timedelta(delta) def _align_reference_date_and_unit( ref_date: pd.Timestamp, unit: NPDatetimeUnitOptions ) -> pd.Timestamp: # align to the highest needed resolution of ref_date or unit if np.timedelta64(1, ref_date.unit) > np.timedelta64(1, unit): # this will raise accordingly # if data can't be represented in the higher resolution return timestamp_as_unit(ref_date, cast(PDDatetimeUnitOptions, unit)) return ref_date def _check_date_is_after_shift( date: pd.Timestamp | datetime | CFTimeDatetime, calendar: str ) -> None: # if we have gregorian/standard we need to raise # if we are outside the well-defined date range # proleptic_gregorian and standard/gregorian are only equivalent # if reference date and date range is >= 1582-10-15 if calendar != "proleptic_gregorian" and date < type(date)(1582, 10, 15): raise OutOfBoundsDatetime( f"Dates before 1582-10-15 cannot be decoded " f"with pandas using {calendar!r} calendar: {date}" ) def _check_higher_resolution( flat_num_dates: np.ndarray, time_unit: PDDatetimeUnitOptions, ) -> tuple[np.ndarray, PDDatetimeUnitOptions]: """Iterate until fitting resolution found.""" index = _ORDERED_PANDAS_TIME_RESOLUTIONS.index(time_unit) new_units = _ORDERED_PANDAS_TIME_RESOLUTIONS[index:] for new_time_unit in new_units: if not ((np.unique(flat_num_dates % 1) > 0).any() and new_time_unit != "ns"): break flat_num_dates *= 1000 return flat_num_dates, new_time_unit def _decode_datetime_with_pandas( flat_num_dates: np.ndarray, units: str, calendar: str, time_resolution: PDDatetimeUnitOptions = "ns", ) -> np.ndarray: if not _is_standard_calendar(calendar): raise OutOfBoundsDatetime( f"Cannot decode times from a non-standard calendar, {calendar!r}, using " "pandas." ) # Work around pandas.to_timedelta issue with dtypes smaller than int64 and # NumPy 2.0 by casting all int and uint data to int64 and uint64, # respectively. See https://github.com/pandas-dev/pandas/issues/56996 for # more details. if flat_num_dates.dtype.kind == "i": flat_num_dates = flat_num_dates.astype(np.int64) elif flat_num_dates.dtype.kind == "u": flat_num_dates = flat_num_dates.astype(np.uint64) try: time_unit, ref_date = _unpack_time_unit_and_ref_date(units) ref_date = _align_reference_date_and_unit(ref_date, time_unit) # here the highest wanted resolution is set ref_date = _align_reference_date_and_unit(ref_date, time_resolution) except ValueError as err: # ValueError is raised by pd.Timestamp for non-ISO timestamp # strings, in which case we fall back to using cftime raise OutOfBoundsDatetime from err _check_date_is_after_shift(ref_date, calendar) with warnings.catch_warnings(): warnings.filterwarnings("ignore", "invalid value encountered", RuntimeWarning) if flat_num_dates.size > 0: # avoid size 0 datetimes GH1329 _check_date_for_units_since_refdate( flat_num_dates.min(), time_unit, ref_date ) _check_date_for_units_since_refdate( flat_num_dates.max(), time_unit, ref_date ) # To avoid integer overflow when converting to nanosecond units for integer # dtypes smaller than np.int64 cast all integer and unsigned integer dtype # arrays to np.int64 (GH 2002, GH 6589). Note this is safe even in the case # of np.uint64 values, because any np.uint64 value that would lead to # overflow when converting to np.int64 would not be representable with a # timedelta64 value, and therefore would raise an error in the lines above. if flat_num_dates.dtype.kind in "iu": flat_num_dates = flat_num_dates.astype(np.int64) elif flat_num_dates.dtype.kind in "f": flat_num_dates = flat_num_dates.astype(np.float64) timedeltas = _numbers_to_timedelta( flat_num_dates, time_unit, ref_date.unit, "datetimes" ) # add timedeltas to ref_date return ref_date + timedeltas def decode_cf_datetime( num_dates, units: str, calendar: str | None = None, use_cftime: bool | None = None, time_unit: PDDatetimeUnitOptions = "ns", ) -> np.ndarray: """Given an array of numeric dates in netCDF format, convert it into a numpy array of date time objects. For standard (Gregorian) calendars, this function uses vectorized operations, which makes it much faster than cftime.num2date. In such a case, the returned array will be of type np.datetime64. Note that time unit in `units` must not be smaller than microseconds and not larger than days. See Also -------- cftime.num2date """ num_dates = to_numpy(num_dates) flat_num_dates = ravel(num_dates) if calendar is None: calendar = "standard" if use_cftime is None: try: dates = _decode_datetime_with_pandas( flat_num_dates, units, calendar, time_unit ) except (KeyError, OutOfBoundsDatetime, OutOfBoundsTimedelta, OverflowError): dates = _decode_datetime_with_cftime( flat_num_dates.astype(float), units, calendar ) # retrieve cftype dates_min = dates[np.nanargmin(num_dates)] dates_max = dates[np.nanargmax(num_dates)] cftype = type(dates_min) # create first day of gregorian calendar in current cf calendar type border = cftype(1582, 10, 15) # "ns" borders # between ['1677-09-21T00:12:43.145224193', '2262-04-11T23:47:16.854775807'] lower = cftype(1677, 9, 21, 0, 12, 43, 145224) upper = cftype(2262, 4, 11, 23, 47, 16, 854775) if dates_min < border: if _is_standard_calendar(calendar): emit_user_level_warning( "Unable to decode time axis into full " "numpy.datetime64 objects, continuing using " "cftime.datetime objects instead, reason: dates prior " "reform date (1582-10-15). To silence this warning specify " "'use_cftime=True'.", SerializationWarning, ) elif time_unit == "ns" and (dates_min < lower or dates_max > upper): emit_user_level_warning( "Unable to decode time axis into full " "numpy.datetime64[ns] objects, continuing using " "cftime.datetime objects instead, reason: dates out " "of range. To silence this warning use a coarser resolution " "'time_unit' or specify 'use_cftime=True'.", SerializationWarning, ) elif _is_standard_calendar(calendar): dates = cftime_to_nptime(dates, time_unit=time_unit) elif use_cftime: dates = _decode_datetime_with_cftime(flat_num_dates, units, calendar) else: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar, time_unit) return reshape(dates, num_dates.shape) def to_datetime_unboxed(value, **kwargs): result = pd.to_datetime(value, **kwargs).to_numpy() assert np.issubdtype(result.dtype, "datetime64") return result def _numbers_to_timedelta( flat_num: np.ndarray, time_unit: NPDatetimeUnitOptions, ref_unit: PDDatetimeUnitOptions, datatype: str, target_unit: PDDatetimeUnitOptions | None = None, ) -> np.ndarray: """Transform numbers to np.timedelta64.""" # keep NaT/nan mask if flat_num.dtype.kind == "f": nan = np.asarray(np.isnan(flat_num)) elif flat_num.dtype.kind == "i": nan = np.asarray(flat_num == np.iinfo(np.int64).min) elif flat_num.dtype.kind == "u": nan = np.broadcast_to(np.asarray(False), flat_num.shape) # in case we need to change the unit, we fix the numbers here # this should be safe, as errors would have been raised above ns_time_unit = _NS_PER_TIME_DELTA[time_unit] ns_ref_date_unit = _NS_PER_TIME_DELTA[ref_unit] if ns_time_unit > ns_ref_date_unit: flat_num = np.asarray(flat_num * np.int64(ns_time_unit / ns_ref_date_unit)) time_unit = ref_unit # estimate fitting resolution for floating point values # this iterates until all floats are fractionless or time_unit == "ns" if flat_num.dtype.kind == "f" and time_unit != "ns": flat_num, new_time_unit = _check_higher_resolution( flat_num, cast(PDDatetimeUnitOptions, time_unit) ) if time_unit != new_time_unit: if target_unit is None or np.timedelta64(1, target_unit) > np.timedelta64( 1, new_time_unit ): if datatype == "datetimes": kwarg = "decode_times" coder = "CFDatetimeCoder" else: kwarg = "decode_timedelta" coder = "CFTimedeltaCoder" formatted_kwarg = f"{kwarg}={coder}(time_unit={new_time_unit!r})" message = ( f"Can't decode floating point {datatype} to {time_unit!r} " f"without precision loss; decoding to {new_time_unit!r} " f"instead. To silence this warning pass {formatted_kwarg} " f"to your opening function." ) emit_user_level_warning(message, SerializationWarning) time_unit = new_time_unit # Cast input ordinals to integers and properly handle NaN/NaT # to prevent casting NaN to int with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) flat_num = flat_num.astype(np.int64) if nan.any(): flat_num[nan] = np.iinfo(np.int64).min # cast to wanted type return flat_num.astype(f"timedelta64[{time_unit}]") def decode_cf_timedelta( num_timedeltas, units: str, time_unit: PDDatetimeUnitOptions = "ns" ) -> np.ndarray: """Given an array of numeric timedeltas in netCDF format, convert it into a numpy timedelta64 ["s", "ms", "us", "ns"] array. """ num_timedeltas = to_numpy(num_timedeltas) unit = _netcdf_to_numpy_timeunit(units) # special case empty arrays is_empty_array = num_timedeltas.size == 0 with warnings.catch_warnings(): warnings.filterwarnings("ignore", "All-NaN slice encountered", RuntimeWarning) if not is_empty_array: _check_timedelta_range(np.nanmin(num_timedeltas), unit, time_unit) _check_timedelta_range(np.nanmax(num_timedeltas), unit, time_unit) timedeltas = _numbers_to_timedelta( num_timedeltas, unit, "s", "timedeltas", target_unit=time_unit ) pd_timedeltas = pd.to_timedelta(ravel(timedeltas)) if not is_empty_array and np.isnat(timedeltas).all(): empirical_unit = time_unit else: empirical_unit = pd_timedeltas.unit if is_empty_array or np.timedelta64(1, time_unit) > np.timedelta64( 1, empirical_unit ): time_unit = empirical_unit if time_unit not in {"s", "ms", "us", "ns"}: raise ValueError( f"time_unit must be one of 's', 'ms', 'us', or 'ns'. Got: {time_unit}" ) result = pd_timedeltas.as_unit(time_unit).to_numpy() return reshape(result, num_timedeltas.shape) def _unit_timedelta_cftime(units: str) -> timedelta: return timedelta(microseconds=_US_PER_TIME_DELTA[units]) def _unit_timedelta_numpy(units: str) -> np.timedelta64: numpy_units = _netcdf_to_numpy_timeunit(units) return np.timedelta64(1, numpy_units) def _infer_time_units_from_diff(unique_timedeltas) -> str: # todo: check, if this function works correctly wrt np.timedelta64 unit_timedelta: Callable[[str], timedelta] | Callable[[str], np.timedelta64] zero_timedelta: timedelta | np.timedelta64 unique_timedeltas = asarray(unique_timedeltas) if unique_timedeltas.dtype == np.dtype("O"): time_units = _NETCDF_TIME_UNITS_CFTIME unit_timedelta = _unit_timedelta_cftime zero_timedelta = timedelta(microseconds=0) else: time_units = _NETCDF_TIME_UNITS_NUMPY unit_timedelta = _unit_timedelta_numpy zero_timedelta = np.timedelta64(0, "ns") for time_unit in time_units: if array_all(unique_timedeltas % unit_timedelta(time_unit) == zero_timedelta): return time_unit return "seconds" def _time_units_to_timedelta(units: str) -> timedelta: return timedelta(microseconds=_US_PER_TIME_DELTA[units]) def infer_calendar_name(dates) -> CFCalendar: """Given an array of datetimes, infer the CF calendar name""" if is_np_datetime_like(dates.dtype): return "proleptic_gregorian" elif dates.dtype == np.dtype("O") and dates.size > 0: # Logic copied from core.common.contains_cftime_datetimes. if cftime is not None: sample = np.asarray(dates).flat[0] if is_duck_dask_array(sample): sample = sample.compute() if isinstance(sample, np.ndarray): sample = sample.item() if isinstance(sample, cftime.datetime): return sample.calendar # Error raise if dtype is neither datetime or "O", if cftime is not importable, and if element of 'O' dtype is not cftime. raise ValueError("Array does not contain datetime objects.") def infer_datetime_units(dates) -> str: """Given an array of datetimes, returns a CF compatible time-unit string of the form "{time_unit} since {date[0]}", where `time_unit` is 'days', 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ dates = ravel(np.asarray(dates)) if np.issubdtype(np.asarray(dates).dtype, "datetime64"): dates = to_datetime_unboxed(dates) dates = dates[pd.notnull(dates)] reference_date = dates[0] if len(dates) > 0 else "1970-01-01" reference_date = pd.Timestamp(reference_date) else: reference_date = dates[0] if len(dates) > 0 else "1970-01-01" reference_date = format_cftime_datetime(reference_date) unique_timedeltas = np.unique(np.diff(dates)) units = _infer_time_units_from_diff(unique_timedeltas) return f"{units} since {reference_date}" def format_cftime_datetime(date) -> str: """Converts a cftime.datetime object to a string with the format: YYYY-MM-DD HH:MM:SS.UUUUUU """ return f"{date.year:04d}-{date.month:02d}-{date.day:02d} {date.hour:02d}:{date.minute:02d}:{date.second:02d}.{date.microsecond:06d}" def infer_timedelta_units(deltas) -> str: """Given an array of timedeltas, returns a CF compatible time-unit from {'days', 'hours', 'minutes' 'seconds'} (the first one that can evenly divide all unique time deltas in `deltas`) """ deltas = ravel(deltas) unique_timedeltas = np.unique(deltas[pd.notnull(deltas)]) return _infer_time_units_from_diff(unique_timedeltas) def cftime_to_nptime( times, raise_on_invalid: bool = True, time_unit: PDDatetimeUnitOptions = "ns" ) -> np.ndarray: """Given an array of cftime.datetime objects, return an array of numpy.datetime64 objects of the same size If raise_on_invalid is True (default), invalid dates trigger a ValueError. Otherwise, the invalid element is replaced by np.NaT.""" times = np.asarray(times) new = [] dt: np.datetime64 for _i, t in np.ndenumerate(times): try: # We expect either "us" resolution or "s" resolution depending on # whether 'microseconds' are defined for the input or not. dt = ( pd.Timestamp(np.datetime64(t.isoformat())).as_unit(time_unit).to_numpy() ) except ValueError as e: if raise_on_invalid: raise ValueError( f"Cannot convert date {t} to a date in the " f"standard calendar. Reason: {e}." ) from e else: dt = np.datetime64("NaT") new.append(dt) return np.asarray(new).reshape(times.shape) def convert_times(times, date_type, raise_on_invalid: bool = True) -> np.ndarray: """Given an array of datetimes, return the same dates in another cftime or numpy date type. Useful to convert between calendars in numpy and cftime or between cftime calendars. If raise_on_valid is True (default), invalid dates trigger a ValueError. Otherwise, the invalid element is replaced by np.nan for cftime types and np.NaT for np.datetime64. """ if date_type in (pd.Timestamp, np.datetime64) and not is_np_datetime_like( times.dtype ): return cftime_to_nptime(times, raise_on_invalid=raise_on_invalid) if is_np_datetime_like(times.dtype): # Convert datetime64 objects to Timestamps since those have year, month, day, etc. attributes times = pd.DatetimeIndex(times) new = np.empty(times.shape, dtype="O") for i, t in enumerate(times): try: dt = date_type( t.year, t.month, t.day, t.hour, t.minute, t.second, t.microsecond ) except ValueError as e: if raise_on_invalid: raise ValueError( f"Cannot convert date {t} to a date in the " f"{date_type(2000, 1, 1).calendar} calendar. Reason: {e}." ) from e else: dt = np.nan new[i] = dt return new def convert_time_or_go_back(date, date_type): """Convert a single date to a new date_type (cftime.datetime or pd.Timestamp). If the new date is invalid, it goes back a day and tries again. If it is still invalid, goes back a second day. This is meant to convert end-of-month dates into a new calendar. """ if date_type == pd.Timestamp: date_type = default_precision_timestamp try: return date_type( date.year, date.month, date.day, date.hour, date.minute, date.second, date.microsecond, ) except OutOfBoundsDatetime: raise except ValueError: # Day is invalid, happens at the end of months, try again the day before try: return date_type( date.year, date.month, date.day - 1, date.hour, date.minute, date.second, date.microsecond, ) except ValueError: # Still invalid, happens for 360_day to non-leap february. Try again 2 days before date. return date_type( date.year, date.month, date.day - 2, date.hour, date.minute, date.second, date.microsecond, ) def _should_cftime_be_used( source, target_calendar: str, use_cftime: bool | None ) -> bool: """Return whether conversion of the source to the target calendar should result in a cftime-backed array. Source is a 1D datetime array, target_cal a string (calendar name) and use_cftime is a boolean or None. If use_cftime is None, this returns True if the source's range and target calendar are convertible to np.datetime64 objects. """ # Arguments Checks for target if use_cftime is not True: if _is_standard_calendar(target_calendar): if _is_numpy_compatible_time_range(source): # Conversion is possible with pandas, force False if it was None return False elif use_cftime is False: raise ValueError( "Source time range is not valid for numpy datetimes. Try using `use_cftime=True`." ) elif use_cftime is False: raise ValueError( f"Calendar '{target_calendar}' is only valid with cftime. Try using `use_cftime=True`." ) return True def _cleanup_netcdf_time_units(units: str) -> str: time_units, ref_date = _unpack_netcdf_time_units(units) time_units = time_units.lower() if not time_units.endswith("s"): time_units = f"{time_units}s" # don't worry about reifying the units if they're out of bounds or # formatted badly with contextlib.suppress(OutOfBoundsDatetime, ValueError): units = f"{time_units} since {format_timestamp(ref_date)}" return units def _encode_datetime_with_cftime(dates, units: str, calendar: str) -> np.ndarray: """Fallback method for encoding dates using cftime. This method is more flexible than xarray's parsing using datetime64[ns] arrays but also slower because it loops over each element. """ if TYPE_CHECKING: import cftime else: cftime = attempt_import("cftime") dates = np.asarray(dates) original_shape = dates.shape if np.issubdtype(dates.dtype, np.datetime64): # numpy's broken datetime conversion only works for us precision dates = dates.astype("M8[us]").astype(datetime) dates = np.atleast_1d(dates) # Find all the None position none_position = dates == None # noqa: E711 filtered_dates = dates[~none_position] # Since netCDF files do not support storing float128 values, we ensure # that float64 values are used by setting longdouble=False in num2date. # This try except logic can be removed when xarray's minimum version of # cftime is at least 1.6.2. try: encoded_nums = cftime.date2num( filtered_dates, units, calendar, longdouble=False ) except TypeError: encoded_nums = cftime.date2num(filtered_dates, units, calendar) if filtered_dates.size == none_position.size: return encoded_nums.reshape(original_shape) # Create a full matrix of NaN # And fill the num dates in the not NaN or None position result = np.full(dates.shape, np.nan) result[np.nonzero(~none_position)] = encoded_nums return result.reshape(original_shape) def cast_to_int_if_safe(num) -> np.ndarray: int_num = np.asarray(num, dtype=np.int64) if array_all(num == int_num): num = int_num return num def _division(deltas, delta, floor): if floor: # calculate int64 floor division # to preserve integer dtype if possible (GH 4045, GH7817). num = deltas // delta.astype(np.int64) num = num.astype(np.int64, copy=False) else: num = deltas / delta return num def encode_cf_datetime( dates: T_DuckArray, # type: ignore[misc] units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, ) -> tuple[T_DuckArray, str, str]: """Given an array of datetime objects, returns the tuple `(num, units, calendar)` suitable for a CF compliant time variable. Unlike `date2num`, this function can handle datetime64 arrays. See Also -------- cftime.date2num """ dates = asarray(dates) if is_chunked_array(dates): return _lazily_encode_cf_datetime(dates, units, calendar, dtype) else: return _eagerly_encode_cf_datetime(dates, units, calendar, dtype) def _infer_needed_units_numpy(ref_date, data_units): needed_units, data_ref_date = _unpack_time_unit_and_ref_date(data_units) needed_units = _numpy_to_netcdf_timeunit(needed_units) ref_delta = abs(data_ref_date - ref_date).to_timedelta64() data_delta = _unit_timedelta_numpy(needed_units) if (ref_delta % data_delta) > np.timedelta64(0, "ns"): needed_units = _infer_time_units_from_diff(ref_delta) return needed_units def _infer_needed_units_cftime(ref_date, data_units, calendar): needed_units, data_ref_date = _unpack_time_units_and_ref_date_cftime( data_units, calendar ) ref_delta = abs(data_ref_date - ref_date) data_delta = _time_units_to_timedelta(needed_units) if (ref_delta % data_delta) > timedelta(seconds=0): needed_units = _infer_time_units_from_diff(ref_delta) return needed_units def _eagerly_encode_cf_datetime( dates: T_DuckArray, # type: ignore[misc] units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, allow_units_modification: bool = True, ) -> tuple[T_DuckArray, str, str]: dates = asarray(dates) data_units = infer_datetime_units(dates) if units is None: units = data_units else: units = _cleanup_netcdf_time_units(units) if calendar is None: calendar = infer_calendar_name(dates) raise_incompatible_units_error = False raise_gregorian_proleptic_gregorian_mismatch_error = False try: if not _is_standard_calendar(calendar) or dates.dtype.kind == "O": # parse with cftime instead raise OutOfBoundsDatetime assert np.issubdtype(dates.dtype, "datetime64") if ( calendar in ["standard", "gregorian"] and dates.size > 0 and np.nanmin(dates).astype("=M8[us]").astype(datetime) < datetime(1582, 10, 15) ): raise_gregorian_proleptic_gregorian_mismatch_error = True time_unit, ref_date = _unpack_time_unit_and_ref_date(units) # calendar equivalence only for days after the reform _check_date_is_after_shift(ref_date, calendar) time_delta = np.timedelta64(1, time_unit) # Wrap the dates in a DatetimeIndex to do the subtraction to ensure # an OverflowError is raised if the ref_date is too far away from # dates to be encoded (GH 2272). # DatetimeIndex will convert to units of ["s", "ms", "us", "ns"] dates_as_index = pd.DatetimeIndex(ravel(dates)) time_deltas = dates_as_index - ref_date # retrieve needed units to faithfully encode to int64 needed_units = _infer_needed_units_numpy(ref_date, data_units) needed_time_delta = _unit_timedelta_numpy(needed_units) floor_division = np.issubdtype(dtype, np.integer) or dtype is None if time_delta > needed_time_delta: floor_division = False if dtype is None: emit_user_level_warning( f"Times can't be serialized faithfully to int64 with requested units {units!r}. " f"Resolution of {needed_units!r} needed. Serializing times to floating point instead. " f"Set encoding['dtype'] to integer dtype to serialize to int64. " f"Set encoding['dtype'] to floating point dtype to silence this warning." ) elif np.issubdtype(dtype, np.integer) and allow_units_modification: new_units = f"{needed_units} since {format_timestamp(ref_date)}" emit_user_level_warning( f"Times can't be serialized faithfully to int64 with requested units {units!r}. " f"Serializing with units {new_units!r} instead. " f"Set encoding['dtype'] to floating point dtype to serialize with units {units!r}. " f"Set encoding['units'] to {new_units!r} to silence this warning ." ) units = new_units time_delta = needed_time_delta floor_division = True elif np.issubdtype(dtype, np.integer) and not allow_units_modification: new_units = f"{needed_units} since {format_timestamp(ref_date)}" raise_incompatible_units_error = True # get resolution of TimedeltaIndex and align time_delta # todo: check, if this works in any case num = _division( time_deltas, time_delta.astype(f"=m8[{time_deltas.unit}]"), floor_division ) num = reshape(num.values, dates.shape) except (OutOfBoundsDatetime, OverflowError, ValueError): time_units, ref_date = _unpack_time_units_and_ref_date_cftime(units, calendar) time_delta_cftime = _time_units_to_timedelta(time_units) needed_units = _infer_needed_units_cftime(ref_date, data_units, calendar) needed_time_delta_cftime = _time_units_to_timedelta(needed_units) if ( np.issubdtype(dtype, np.integer) and time_delta_cftime > needed_time_delta_cftime ): new_units = f"{needed_units} since {format_cftime_datetime(ref_date)}" if allow_units_modification: emit_user_level_warning( f"Times can't be serialized faithfully to int64 with requested units {units!r}. " f"Serializing with units {new_units!r} instead. " f"Set encoding['dtype'] to floating point dtype to serialize with units {units!r}. " f"Set encoding['units'] to {new_units!r} to silence this warning ." ) units = new_units else: raise_incompatible_units_error = True num = _encode_datetime_with_cftime(dates, units, calendar) # do it now only for cftime-based flow # we already covered for this in pandas-based flow num = cast_to_int_if_safe(num) if raise_incompatible_units_error: raise ValueError( f"Times can't be serialized faithfully to int64 with requested units {units!r}. " f"Consider setting encoding['dtype'] to a floating point dtype to serialize with " f"units {units!r}. Consider setting encoding['units'] to {new_units!r} to " f"serialize with an integer dtype." ) if raise_gregorian_proleptic_gregorian_mismatch_error: raise ValueError( f"Unable to encode np.datetime64 values with {calendar} " f"calendar, because some or all values are prior to the reform " f"date of 1582-10-15. To encode these times, set " f"encoding['calendar'] to 'proleptic_gregorian' instead, which " f"is the true calendar that np.datetime64 values use. The " f"'standard' or 'gregorian' calendar is only equivalent to the " f"'proleptic_gregorian' calendar after the reform date." ) return num, units, calendar def _encode_cf_datetime_within_map_blocks( dates: T_DuckArray, # type: ignore[misc] units: str, calendar: str, dtype: np.dtype, ) -> T_DuckArray: num, *_ = _eagerly_encode_cf_datetime( dates, units, calendar, dtype, allow_units_modification=False ) return num def _lazily_encode_cf_datetime( dates: T_ChunkedArray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, ) -> tuple[T_ChunkedArray, str, str]: if calendar is None: # This will only trigger minor compute if dates is an object dtype array. calendar = infer_calendar_name(dates) if units is None and dtype is None: if dates.dtype == "O": units = "microseconds since 1970-01-01" dtype = np.dtype("int64") else: netcdf_unit = _numpy_dtype_to_netcdf_timeunit(dates.dtype) units = f"{netcdf_unit} since 1970-01-01" dtype = np.dtype("int64") if units is None or dtype is None: raise ValueError( f"When encoding chunked arrays of datetime values, both the units " f"and dtype must be prescribed or both must be unprescribed. " f"Prescribing only one or the other is not currently supported. " f"Got a units encoding of {units} and a dtype encoding of {dtype}." ) chunkmanager = get_chunked_array_type(dates) num = chunkmanager.map_blocks( _encode_cf_datetime_within_map_blocks, dates, units, calendar, dtype, dtype=dtype, ) return num, units, calendar def encode_cf_timedelta( timedeltas: T_DuckArray, # type: ignore[misc] units: str | None = None, dtype: np.dtype | None = None, ) -> tuple[T_DuckArray, str]: timedeltas = asarray(timedeltas) if is_chunked_array(timedeltas): return _lazily_encode_cf_timedelta(timedeltas, units, dtype) else: return _eagerly_encode_cf_timedelta(timedeltas, units, dtype) def _eagerly_encode_cf_timedelta( timedeltas: T_DuckArray, # type: ignore[misc] units: str | None = None, dtype: np.dtype | None = None, allow_units_modification: bool = True, ) -> tuple[T_DuckArray, str]: data_units = infer_timedelta_units(timedeltas) if units is None: units = data_units # units take precedence in the case of zero-size array if timedeltas.size == 0: data_units = units time_delta = _unit_timedelta_numpy(units) time_deltas = pd.TimedeltaIndex(ravel(timedeltas)) # get resolution of TimedeltaIndex and align time_delta deltas_unit = time_deltas.unit time_delta = time_delta.astype(f"=m8[{deltas_unit}]") # retrieve needed units to faithfully encode to int64 needed_units = data_units if data_units != units: needed_units = _infer_time_units_from_diff(np.unique(time_deltas.dropna())) # needed time delta to encode faithfully to int64 needed_time_delta = _unit_timedelta_numpy(needed_units) floor_division = np.issubdtype(dtype, np.integer) or dtype is None if time_delta > needed_time_delta: floor_division = False if dtype is None: emit_user_level_warning( f"Timedeltas can't be serialized faithfully to int64 with requested units {units!r}. " f"Resolution of {needed_units!r} needed. Serializing timeseries to floating point instead. " f"Set encoding['dtype'] to integer dtype to serialize to int64. " f"Set encoding['dtype'] to floating point dtype to silence this warning." ) elif np.issubdtype(dtype, np.integer) and allow_units_modification: emit_user_level_warning( f"Timedeltas can't be serialized faithfully with requested units {units!r}. " f"Serializing with units {needed_units!r} instead. " f"Set encoding['dtype'] to floating point dtype to serialize with units {units!r}. " f"Set encoding['units'] to {needed_units!r} to silence this warning ." ) units = needed_units time_delta = needed_time_delta time_delta = time_delta.astype(f"=m8[{deltas_unit}]") floor_division = True elif np.issubdtype(dtype, np.integer) and not allow_units_modification: raise ValueError( f"Timedeltas can't be serialized faithfully to int64 with requested units {units!r}. " f"Consider setting encoding['dtype'] to a floating point dtype to serialize with " f"units {units!r}. Consider setting encoding['units'] to {needed_units!r} to " f"serialize with an integer dtype." ) num = _division(time_deltas, time_delta, floor_division) num = reshape(num.values, timedeltas.shape) return num, units def _encode_cf_timedelta_within_map_blocks( timedeltas: T_DuckArray, # type: ignore[misc] units: str, dtype: np.dtype, ) -> T_DuckArray: num, _ = _eagerly_encode_cf_timedelta( timedeltas, units, dtype, allow_units_modification=False ) return num def _lazily_encode_cf_timedelta( timedeltas: T_ChunkedArray, units: str | None = None, dtype: np.dtype | None = None ) -> tuple[T_ChunkedArray, str]: if units is None and dtype is None: units = _numpy_dtype_to_netcdf_timeunit(timedeltas.dtype) dtype = np.dtype("int64") if units is None or dtype is None: raise ValueError( f"When encoding chunked arrays of timedelta values, both the " f"units and dtype must be prescribed or both must be " f"unprescribed. Prescribing only one or the other is not " f"currently supported. Got a units encoding of {units} and a " f"dtype encoding of {dtype}." ) chunkmanager = get_chunked_array_type(timedeltas) num = chunkmanager.map_blocks( _encode_cf_timedelta_within_map_blocks, timedeltas, units, dtype, dtype=dtype, ) return num, units class CFDatetimeCoder(VariableCoder): """Coder for CF Datetime coding. Parameters ---------- use_cftime : bool, optional Only relevant if encoded dates come from a standard calendar (e.g. "gregorian", "proleptic_gregorian", "standard", or not specified). If None (default), attempt to decode times to ``np.datetime64`` objects; if this is not possible, decode times to ``cftime.datetime`` objects. If True, always decode times to ``cftime.datetime`` objects, regardless of whether or not they can be represented using ``np.datetime64`` objects. If False, always decode times to ``np.datetime64`` objects; if this is not possible raise an error. May not be supported by all the backends. time_unit : PDDatetimeUnitOptions Target resolution when decoding dates. Defaults to "ns". """ def __init__( self, use_cftime: bool | None = None, time_unit: PDDatetimeUnitOptions = "ns", ) -> None: self.use_cftime = use_cftime self.time_unit = time_unit def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.dtype, np.datetime64) or contains_cftime_datetimes( variable ): dims, data, attrs, encoding = unpack_for_encoding(variable) units = encoding.pop("units", None) calendar = encoding.pop("calendar", None) dtype = encoding.get("dtype", None) # in the case of packed data we need to encode into # float first, the correct dtype will be established # via CFScaleOffsetCoder/CFMaskCoder if "add_offset" in encoding or "scale_factor" in encoding: dtype = data.dtype if data.dtype.kind == "f" else "float64" (data, units, calendar) = encode_cf_datetime(data, units, calendar, dtype) safe_setitem(attrs, "units", units, name=name) safe_setitem(attrs, "calendar", calendar, name=name) return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable def decode(self, variable: Variable, name: T_Name = None) -> Variable: units = variable.attrs.get("units", None) if isinstance(units, str) and "since" in units: dims, data, attrs, encoding = unpack_for_decoding(variable) units = pop_to(attrs, encoding, "units") calendar = pop_to(attrs, encoding, "calendar") dtype = _decode_cf_datetime_dtype( data, units, calendar, self.use_cftime, self.time_unit ) transform = partial( decode_cf_datetime, units=units, calendar=calendar, use_cftime=self.use_cftime, time_unit=self.time_unit, ) data = lazy_elemwise_func(data, transform, dtype) return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable def has_timedelta64_encoding_dtype(attrs_or_encoding: dict) -> bool: dtype = attrs_or_encoding.get("dtype") return isinstance(dtype, str) and dtype.startswith("timedelta64") def resolve_time_unit_from_attrs_dtype( attrs_dtype: str, name: T_Name ) -> PDDatetimeUnitOptions: dtype = np.dtype(attrs_dtype) resolution, _ = np.datetime_data(dtype) resolution = cast(NPDatetimeUnitOptions, resolution) time_unit: PDDatetimeUnitOptions if np.timedelta64(1, resolution) > np.timedelta64(1, "s"): time_unit = "s" message = ( f"Following pandas, xarray only supports decoding to timedelta64 " f"values with a resolution of 's', 'ms', 'us', or 'ns'. Encoded " f"values for variable {name!r} have a resolution of " f"{resolution!r}. Attempting to decode to a resolution of 's'. " f"Note, depending on the encoded values, this may lead to an " f"OverflowError. Additionally, data will not be identically round " f"tripped; xarray will choose an encoding dtype of " f"'timedelta64[s]' when re-encoding." ) emit_user_level_warning(message) elif np.timedelta64(1, resolution) < np.timedelta64(1, "ns"): time_unit = "ns" message = ( f"Following pandas, xarray only supports decoding to timedelta64 " f"values with a resolution of 's', 'ms', 'us', or 'ns'. Encoded " f"values for variable {name!r} have a resolution of " f"{resolution!r}. Attempting to decode to a resolution of 'ns'. " f"Note, depending on the encoded values, this may lead to loss of " f"precision. Additionally, data will not be identically round " f"tripped; xarray will choose an encoding dtype of " f"'timedelta64[ns]' when re-encoding." ) emit_user_level_warning(message) else: time_unit = cast(PDDatetimeUnitOptions, resolution) return time_unit class CFTimedeltaCoder(VariableCoder): """Coder for CF Timedelta coding. Parameters ---------- time_unit : PDDatetimeUnitOptions Target resolution when decoding timedeltas via units. Defaults to "ns". When decoding via dtype, the resolution is specified in the dtype attribute, so this parameter is ignored. decode_via_units : bool Whether to decode timedeltas based on the presence of a timedelta-like units attribute, e.g. "seconds". Defaults to True, but in the future will default to False. decode_via_dtype : bool Whether to decode timedeltas based on the presence of an np.timedelta64 dtype attribute, e.g. "timedelta64[s]". Defaults to True. """ def __init__( self, time_unit: PDDatetimeUnitOptions | None = None, decode_via_units: bool = True, decode_via_dtype: bool = True, ) -> None: self.time_unit = time_unit self.decode_via_units = decode_via_units self.decode_via_dtype = decode_via_dtype self._emit_decode_timedelta_future_warning = False def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.dtype, np.timedelta64): dims, data, attrs, encoding = unpack_for_encoding(variable) dtype = encoding.get("dtype", None) units = encoding.pop("units", None) # in the case of packed data we need to encode into # float first, the correct dtype will be established # via CFScaleOffsetCoder/CFMaskCoder if "add_offset" in encoding or "scale_factor" in encoding: dtype = data.dtype if data.dtype.kind == "f" else "float64" resolution, _ = np.datetime_data(variable.dtype) attrs_dtype = f"timedelta64[{resolution}]" safe_setitem(attrs, "dtype", attrs_dtype, name=name) data, units = encode_cf_timedelta(data, units, dtype) safe_setitem(attrs, "units", units, name=name) return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable def decode(self, variable: Variable, name: T_Name = None) -> Variable: units = variable.attrs.get("units", None) has_timedelta_units = isinstance(units, str) and units in TIME_UNITS has_timedelta_dtype = has_timedelta64_encoding_dtype(variable.attrs) is_dtype_decodable = has_timedelta_units and has_timedelta_dtype is_units_decodable = has_timedelta_units if (is_dtype_decodable and self.decode_via_dtype) or ( is_units_decodable and self.decode_via_units ): dims, data, attrs, encoding = unpack_for_decoding(variable) units = pop_to(attrs, encoding, "units") if is_dtype_decodable: attrs_dtype = attrs.pop("dtype") if self.time_unit is None: time_unit = resolve_time_unit_from_attrs_dtype(attrs_dtype, name) else: time_unit = self.time_unit else: if self._emit_decode_timedelta_future_warning: var_string = f"the variable {name!r}" if name else "" emit_user_level_warning( "In a future version, xarray will not decode " f"{var_string} into a timedelta64 dtype based on the " "presence of a timedelta-like 'units' attribute by " "default. Instead it will rely on the presence of a " "timedelta64 'dtype' attribute, which is now xarray's " "default way of encoding timedelta64 values.\n" "To continue decoding into a timedelta64 dtype, either " "set `decode_timedelta=True` when opening this " "dataset, or add the attribute " "`dtype='timedelta64[ns]'` to this variable on disk.\n" "To opt-in to future behavior, set " "`decode_timedelta=False`.", FutureWarning, ) if self.time_unit is None: time_unit = "ns" else: time_unit = self.time_unit # Handle edge case that decode_via_dtype=False and # decode_via_units=True, and timedeltas were encoded with a # dtype attribute. We need to remove the dtype attribute # to prevent an error during round tripping. if has_timedelta_dtype: attrs.pop("dtype") dtype = np.dtype(f"timedelta64[{time_unit}]") transform = partial(decode_cf_timedelta, units=units, time_unit=time_unit) data = lazy_elemwise_func(data, transform, dtype=dtype) return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable python-xarray-2026.01.0/xarray/coding/cftime_offsets.py0000664000175000017500000017265315136607163023204 0ustar alastairalastair"""Time offset classes for use with cftime.datetime objects""" # The offset classes and mechanisms for generating time ranges defined in # this module were copied/adapted from those defined in pandas. See in # particular the objects and methods defined in pandas.tseries.offsets # and pandas.core.indexes.datetimes. # For reference, here is a copy of the pandas copyright notice: # (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team # All rights reserved. # Copyright (c) 2008-2011 AQR Capital Management, LLC # All rights reserved. # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # * Neither the name of the copyright holder nor the names of any # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import annotations import re import warnings from collections.abc import Mapping from datetime import datetime, timedelta from functools import partial from typing import TYPE_CHECKING, ClassVar, Literal, TypeVar, get_args import numpy as np import pandas as pd from packaging.version import Version from xarray.coding.cftimeindex import CFTimeIndex from xarray.coding.times import ( _is_standard_calendar, _parse_iso8601, _should_cftime_be_used, convert_time_or_go_back, format_cftime_datetime, ) from xarray.compat.pdcompat import ( count_not_none, default_precision_timestamp, ) from xarray.core.common import _contains_datetime_like_objects, is_np_datetime_like from xarray.core.types import InclusiveOptions from xarray.core.utils import attempt_import, emit_user_level_warning if TYPE_CHECKING: from xarray.core.types import ( PDDatetimeUnitOptions, Self, TypeAlias, ) DayOption: TypeAlias = Literal["start", "end"] T_FreqStr = TypeVar("T_FreqStr", str, None) def get_date_type(calendar, use_cftime=True): """Return the cftime date type for a given calendar name.""" if TYPE_CHECKING: import cftime else: cftime = attempt_import("cftime") if _is_standard_calendar(calendar) and not use_cftime: return default_precision_timestamp calendars = { "noleap": cftime.DatetimeNoLeap, "360_day": cftime.Datetime360Day, "365_day": cftime.DatetimeNoLeap, "366_day": cftime.DatetimeAllLeap, "gregorian": cftime.DatetimeGregorian, "proleptic_gregorian": cftime.DatetimeProlepticGregorian, "julian": cftime.DatetimeJulian, "all_leap": cftime.DatetimeAllLeap, "standard": cftime.DatetimeGregorian, } return calendars[calendar] class BaseCFTimeOffset: _freq: ClassVar[str | None] = None _day_option: ClassVar[DayOption | None] = None n: int def __init__(self, n: int = 1) -> None: if not isinstance(n, int): raise TypeError( "The provided multiple 'n' must be an integer. " f"Instead a value of type {type(n)!r} was provided." ) self.n = n def rule_code(self) -> str | None: return self._freq def __eq__(self, other: object) -> bool: if not isinstance(other, BaseCFTimeOffset): return NotImplemented return self.n == other.n and self.rule_code() == other.rule_code() def __ne__(self, other: object) -> bool: return not self == other def __add__(self, other): return self.__apply__(other) def __sub__(self, other): if TYPE_CHECKING: import cftime else: cftime = attempt_import("cftime") if isinstance(other, cftime.datetime): raise TypeError("Cannot subtract a cftime.datetime from a time offset.") elif type(other) is type(self): return type(self)(self.n - other.n) else: return NotImplemented def __mul__(self, other: int) -> Self: if not isinstance(other, int): return NotImplemented return type(self)(n=other * self.n) def __neg__(self) -> Self: return self * -1 def __rmul__(self, other): return self.__mul__(other) def __radd__(self, other): return self.__add__(other) def __rsub__(self, other): if isinstance(other, BaseCFTimeOffset) and type(self) is not type(other): raise TypeError("Cannot subtract cftime offsets of differing types") return -self + other def __apply__(self, other): return NotImplemented def onOffset(self, date) -> bool: """Check if the given date is in the set of possible dates created using a length-one version of this offset class.""" test_date = (self + date) - self return date == test_date def rollforward(self, date): if self.onOffset(date): return date else: return date + type(self)() def rollback(self, date): if self.onOffset(date): return date else: return date - type(self)() def __str__(self): return f"<{type(self).__name__}: n={self.n}>" def __repr__(self): return str(self) def _get_offset_day(self, other): # subclass must implement `_day_option`; calling from the base class # will raise NotImplementedError. return _get_day_of_month(other, self._day_option) class Tick(BaseCFTimeOffset): # analogous https://github.com/pandas-dev/pandas/blob/ccb25ab1d24c4fb9691270706a59c8d319750870/pandas/_libs/tslibs/offsets.pyx#L806 def _next_higher_resolution(self) -> Tick: self_type = type(self) if self_type is Day: return Hour(self.n * 24) if self_type is Hour: return Minute(self.n * 60) if self_type is Minute: return Second(self.n * 60) if self_type is Second: return Millisecond(self.n * 1000) if self_type is Millisecond: return Microsecond(self.n * 1000) raise ValueError("Could not convert to integer offset at any resolution") def __mul__(self, other: int | float) -> Tick: if not isinstance(other, int | float): return NotImplemented if isinstance(other, float): n = other * self.n # If the new `n` is an integer, we can represent it using the # same BaseCFTimeOffset subclass as self, otherwise we need to move up # to a higher-resolution subclass if np.isclose(n % 1, 0): return type(self)(int(n)) new_self = self._next_higher_resolution() return new_self * other return type(self)(n=other * self.n) def as_timedelta(self) -> timedelta: """All Tick subclasses must implement an as_timedelta method.""" raise NotImplementedError def _get_day_of_month(other, day_option: DayOption) -> int: """Find the day in `other`'s month that satisfies a BaseCFTimeOffset's onOffset policy, as described by the `day_option` argument. Parameters ---------- other : cftime.datetime day_option : 'start', 'end' 'start': returns 1 'end': returns last day of the month Returns ------- day_of_month : int """ if day_option == "start": return 1 elif day_option == "end": return other.daysinmonth elif day_option is None: # Note: unlike `_shift_month`, _get_day_of_month does not # allow day_option = None raise NotImplementedError() raise ValueError(day_option) def _adjust_n_months(other_day, n, reference_day): """Adjust the number of times a monthly offset is applied based on the day of a given date, and the reference day provided. """ if n > 0 and other_day < reference_day: n = n - 1 elif n <= 0 and other_day > reference_day: n = n + 1 return n def _adjust_n_years(other, n, month, reference_day): """Adjust the number of times an annual offset is applied based on another date, and the reference day provided""" if n > 0: if other.month < month or (other.month == month and other.day < reference_day): n -= 1 elif other.month > month or (other.month == month and other.day > reference_day): n += 1 return n def _shift_month(date, months, day_option: DayOption = "start"): """Shift the date to a month start or end a given number of months away.""" _ = attempt_import("cftime") has_year_zero = date.has_year_zero year = date.year + (date.month + months) // 12 month = (date.month + months) % 12 if month == 0: month = 12 year -= 1 if not has_year_zero: if date.year < 0 <= year: year += 1 elif year <= 0 < date.year: year -= 1 # Silence warnings associated with generating dates with years < 1. with warnings.catch_warnings(): warnings.filterwarnings("ignore", message="this date/calendar/year zero") if day_option == "start": day = 1 elif day_option == "end": reference = type(date)(year, month, 1, has_year_zero=has_year_zero) day = reference.daysinmonth else: raise ValueError(day_option) return date.replace(year=year, month=month, day=day) def roll_qtrday( other, n: int, month: int, day_option: DayOption, modby: int = 3 ) -> int: """Possibly increment or decrement the number of periods to shift based on rollforward/rollbackward conventions. Parameters ---------- other : cftime.datetime n : number of periods to increment, before adjusting for rolling month : int reference month giving the first month of the year day_option : 'start', 'end' The convention to use in finding the day in a given month against which to compare for rollforward/rollbackward decisions. modby : int 3 for quarters, 12 for years Returns ------- n : int number of periods to increment See Also -------- _get_day_of_month : Find the day in a month provided an offset. """ months_since = other.month % modby - month % modby if n > 0: if months_since < 0 or ( months_since == 0 and other.day < _get_day_of_month(other, day_option) ): # pretend to roll back if on same month but # before compare_day n -= 1 elif months_since > 0 or ( months_since == 0 and other.day > _get_day_of_month(other, day_option) ): # make sure to roll forward, so negate n += 1 return n def _validate_month(month: int | None, default_month: int) -> int: result_month = default_month if month is None else month if not isinstance(result_month, int): raise TypeError( "'self.month' must be an integer value between 1 " "and 12. Instead, it was set to a value of " f"{result_month!r}" ) elif not (1 <= result_month <= 12): raise ValueError( "'self.month' must be an integer value between 1 " "and 12. Instead, it was set to a value of " f"{result_month!r}" ) return result_month class MonthBegin(BaseCFTimeOffset): _freq = "MS" def __apply__(self, other): n = _adjust_n_months(other.day, self.n, 1) return _shift_month(other, n, "start") def onOffset(self, date) -> bool: """Check if the given date is in the set of possible dates created using a length-one version of this offset class.""" return date.day == 1 class MonthEnd(BaseCFTimeOffset): _freq = "ME" def __apply__(self, other): n = _adjust_n_months(other.day, self.n, other.daysinmonth) return _shift_month(other, n, "end") def onOffset(self, date) -> bool: """Check if the given date is in the set of possible dates created using a length-one version of this offset class.""" return date.day == date.daysinmonth _MONTH_ABBREVIATIONS = { 1: "JAN", 2: "FEB", 3: "MAR", 4: "APR", 5: "MAY", 6: "JUN", 7: "JUL", 8: "AUG", 9: "SEP", 10: "OCT", 11: "NOV", 12: "DEC", } class QuarterOffset(BaseCFTimeOffset): """Quarter representation copied off of pandas/tseries/offsets.py""" _default_month: ClassVar[int] month: int def __init__(self, n: int = 1, month: int | None = None) -> None: BaseCFTimeOffset.__init__(self, n) self.month = _validate_month(month, self._default_month) def __apply__(self, other): # months_since: find the calendar quarter containing other.month, # e.g. if other.month == 8, the calendar quarter is [Jul, Aug, Sep]. # Then find the month in that quarter containing an onOffset date for # self. `months_since` is the number of months to shift other.month # to get to this on-offset month. months_since = other.month % 3 - self.month % 3 qtrs = roll_qtrday( other, self.n, self.month, day_option=self._day_option, modby=3 ) months = qtrs * 3 - months_since return _shift_month(other, months, self._day_option) def onOffset(self, date) -> bool: """Check if the given date is in the set of possible dates created using a length-one version of this offset class.""" mod_month = (date.month - self.month) % 3 return mod_month == 0 and date.day == self._get_offset_day(date) def __sub__(self, other: Self) -> Self: if TYPE_CHECKING: import cftime else: cftime = attempt_import("cftime") if isinstance(other, cftime.datetime): raise TypeError("Cannot subtract cftime.datetime from offset.") if type(other) is type(self) and other.month == self.month: return type(self)(self.n - other.n, month=self.month) return NotImplemented def __mul__(self, other): if isinstance(other, float): return NotImplemented return type(self)(n=other * self.n, month=self.month) def rule_code(self) -> str: return f"{self._freq}-{_MONTH_ABBREVIATIONS[self.month]}" def __str__(self): return f"<{type(self).__name__}: n={self.n}, month={self.month}>" class QuarterBegin(QuarterOffset): # When converting a string to an offset, pandas converts # 'QS' to a QuarterBegin offset starting in the month of # January. When creating a QuarterBegin offset directly # from the constructor, however, the default month is March. # We follow that behavior here. _default_month = 3 _freq = "QS" _day_option = "start" def rollforward(self, date): """Roll date forward to nearest start of quarter""" if self.onOffset(date): return date else: return date + QuarterBegin(month=self.month) def rollback(self, date): """Roll date backward to nearest start of quarter""" if self.onOffset(date): return date else: return date - QuarterBegin(month=self.month) class QuarterEnd(QuarterOffset): # When converting a string to an offset, pandas converts # 'Q' to a QuarterEnd offset starting in the month of # December. When creating a QuarterEnd offset directly # from the constructor, however, the default month is March. # We follow that behavior here. _default_month = 3 _freq = "QE" _day_option = "end" def rollforward(self, date): """Roll date forward to nearest end of quarter""" if self.onOffset(date): return date else: return date + QuarterEnd(month=self.month) def rollback(self, date): """Roll date backward to nearest end of quarter""" if self.onOffset(date): return date else: return date - QuarterEnd(month=self.month) class YearOffset(BaseCFTimeOffset): _default_month: ClassVar[int] month: int def __init__(self, n: int = 1, month: int | None = None) -> None: BaseCFTimeOffset.__init__(self, n) self.month = _validate_month(month, self._default_month) def __apply__(self, other): reference_day = _get_day_of_month(other, self._day_option) years = _adjust_n_years(other, self.n, self.month, reference_day) months = years * 12 + (self.month - other.month) return _shift_month(other, months, self._day_option) def __sub__(self, other): if TYPE_CHECKING: import cftime else: cftime = attempt_import("cftime") if isinstance(other, cftime.datetime): raise TypeError("Cannot subtract cftime.datetime from offset.") elif type(other) is type(self) and other.month == self.month: return type(self)(self.n - other.n, month=self.month) else: return NotImplemented def __mul__(self, other): if isinstance(other, float): return NotImplemented return type(self)(n=other * self.n, month=self.month) def rule_code(self) -> str: return f"{self._freq}-{_MONTH_ABBREVIATIONS[self.month]}" def __str__(self) -> str: return f"<{type(self).__name__}: n={self.n}, month={self.month}>" class YearBegin(YearOffset): _freq = "YS" _day_option = "start" _default_month = 1 def onOffset(self, date) -> bool: """Check if the given date is in the set of possible dates created using a length-one version of this offset class.""" return date.day == 1 and date.month == self.month def rollforward(self, date): """Roll date forward to nearest start of year""" if self.onOffset(date): return date else: return date + YearBegin(month=self.month) def rollback(self, date): """Roll date backward to nearest start of year""" if self.onOffset(date): return date else: return date - YearBegin(month=self.month) class YearEnd(YearOffset): _freq = "YE" _day_option = "end" _default_month = 12 def onOffset(self, date) -> bool: """Check if the given date is in the set of possible dates created using a length-one version of this offset class.""" return date.day == date.daysinmonth and date.month == self.month def rollforward(self, date): """Roll date forward to nearest end of year""" if self.onOffset(date): return date else: return date + YearEnd(month=self.month) def rollback(self, date): """Roll date backward to nearest end of year""" if self.onOffset(date): return date else: return date - YearEnd(month=self.month) class Day(BaseCFTimeOffset): """Day offset following definition in pandas/_libs/tslibs/offsets.pyx""" _freq = "D" def __apply__(self, other): if isinstance(other, Day): return Day(self.n + other.n) else: return other + timedelta(days=self.n) def onOffset(self, date) -> bool: return True class Hour(Tick): _freq = "h" def as_timedelta(self) -> timedelta: return timedelta(hours=self.n) def __apply__(self, other): return other + self.as_timedelta() class Minute(Tick): _freq = "min" def as_timedelta(self) -> timedelta: return timedelta(minutes=self.n) def __apply__(self, other): return other + self.as_timedelta() class Second(Tick): _freq = "s" def as_timedelta(self) -> timedelta: return timedelta(seconds=self.n) def __apply__(self, other): return other + self.as_timedelta() class Millisecond(Tick): _freq = "ms" def as_timedelta(self) -> timedelta: return timedelta(milliseconds=self.n) def __apply__(self, other): return other + self.as_timedelta() class Microsecond(Tick): _freq = "us" def as_timedelta(self) -> timedelta: return timedelta(microseconds=self.n) def __apply__(self, other): return other + self.as_timedelta() def _generate_anchored_offsets( base_freq: str, offset: type[YearOffset | QuarterOffset] ) -> dict[str, type[BaseCFTimeOffset]]: offsets: dict[str, type[BaseCFTimeOffset]] = {} for month, abbreviation in _MONTH_ABBREVIATIONS.items(): anchored_freq = f"{base_freq}-{abbreviation}" offsets[anchored_freq] = partial(offset, month=month) # type: ignore[assignment] return offsets _FREQUENCIES: Mapping[str, type[BaseCFTimeOffset]] = { "A": YearEnd, "AS": YearBegin, "Y": YearEnd, "YE": YearEnd, "YS": YearBegin, "Q": partial(QuarterEnd, month=12), # type: ignore[dict-item] "QE": partial(QuarterEnd, month=12), # type: ignore[dict-item] "QS": partial(QuarterBegin, month=1), # type: ignore[dict-item] "M": MonthEnd, "ME": MonthEnd, "MS": MonthBegin, "D": Day, "H": Hour, "h": Hour, "T": Minute, "min": Minute, "S": Second, "s": Second, "L": Millisecond, "ms": Millisecond, "U": Microsecond, "us": Microsecond, **_generate_anchored_offsets("AS", YearBegin), **_generate_anchored_offsets("A", YearEnd), **_generate_anchored_offsets("YS", YearBegin), **_generate_anchored_offsets("Y", YearEnd), **_generate_anchored_offsets("YE", YearEnd), **_generate_anchored_offsets("QS", QuarterBegin), **_generate_anchored_offsets("Q", QuarterEnd), **_generate_anchored_offsets("QE", QuarterEnd), } _FREQUENCY_CONDITION = "|".join(_FREQUENCIES.keys()) _PATTERN = rf"^((?P[+-]?\d+)|())(?P({_FREQUENCY_CONDITION}))$" # pandas defines these offsets as "Tick" objects, which for instance have # distinct behavior from daily or longer frequencies in resample. CFTIME_TICKS = (Hour, Minute, Second) def _generate_anchored_deprecated_frequencies( deprecated: str, recommended: str ) -> dict[str, str]: pairs = {} for abbreviation in _MONTH_ABBREVIATIONS.values(): anchored_deprecated = f"{deprecated}-{abbreviation}" anchored_recommended = f"{recommended}-{abbreviation}" pairs[anchored_deprecated] = anchored_recommended return pairs _DEPRECATED_FREQUENCIES: dict[str, str] = { "A": "YE", "Y": "YE", "AS": "YS", "Q": "QE", "M": "ME", "H": "h", "T": "min", "S": "s", "L": "ms", "U": "us", **_generate_anchored_deprecated_frequencies("A", "YE"), **_generate_anchored_deprecated_frequencies("Y", "YE"), **_generate_anchored_deprecated_frequencies("AS", "YS"), **_generate_anchored_deprecated_frequencies("Q", "QE"), } _DEPRECATION_MESSAGE = ( "{deprecated_freq!r} is deprecated and will be removed in a future " "version. Please use {recommended_freq!r} instead of " "{deprecated_freq!r}." ) def _emit_freq_deprecation_warning(deprecated_freq): recommended_freq = _DEPRECATED_FREQUENCIES[deprecated_freq] message = _DEPRECATION_MESSAGE.format( deprecated_freq=deprecated_freq, recommended_freq=recommended_freq ) emit_user_level_warning(message, FutureWarning) def to_offset( freq: BaseCFTimeOffset | str | timedelta | pd.Timedelta | pd.DateOffset, warn: bool = True, ) -> BaseCFTimeOffset: """Convert a frequency string to the appropriate subclass of BaseCFTimeOffset.""" if isinstance(freq, BaseCFTimeOffset): return freq if isinstance(freq, timedelta | pd.Timedelta): return delta_to_tick(freq) if isinstance(freq, pd.DateOffset): freq = _legacy_to_new_freq(freq.freqstr) match = re.match(_PATTERN, freq) if match is None: raise ValueError("Invalid frequency string provided") freq_data = match.groupdict() freq = freq_data["freq"] if warn and freq in _DEPRECATED_FREQUENCIES: _emit_freq_deprecation_warning(freq) multiples = freq_data["multiple"] multiples = 1 if multiples is None else int(multiples) return _FREQUENCIES[freq](n=multiples) def delta_to_tick(delta: timedelta | pd.Timedelta) -> Tick: """Adapted from pandas.tslib.delta_to_tick""" if isinstance(delta, pd.Timedelta) and delta.nanoseconds != 0: # pandas.Timedelta has nanoseconds, but these are not supported raise ValueError( "Unable to convert 'pandas.Timedelta' object with non-zero " "nanoseconds to 'CFTimeOffset' object" ) if delta.microseconds == 0: seconds = delta.days * 86400 + delta.seconds if seconds % 3600 == 0: return Hour(n=seconds // 3600) elif seconds % 60 == 0: return Minute(n=seconds // 60) else: return Second(n=seconds) # Regardless of the days and seconds this will always be a Millisecond # or Microsecond object elif delta.microseconds % 1_000 == 0: return Millisecond(n=delta.microseconds // 1_000) else: return Microsecond(n=delta.microseconds) def to_cftime_datetime(date_str_or_date, calendar=None): if TYPE_CHECKING: import cftime else: cftime = attempt_import("cftime") if isinstance(date_str_or_date, str): if calendar is None: raise ValueError( "If converting a string to a cftime.datetime object, " "a calendar type must be provided" ) date, _ = _parse_iso8601(get_date_type(calendar), date_str_or_date) return date elif isinstance(date_str_or_date, cftime.datetime): return date_str_or_date elif isinstance(date_str_or_date, datetime | pd.Timestamp): return cftime.DatetimeProlepticGregorian(*date_str_or_date.timetuple()) else: raise TypeError( "date_str_or_date must be a string or a " "subclass of cftime.datetime. Instead got " f"{date_str_or_date!r}." ) def normalize_date(date): """Round datetime down to midnight.""" return date.replace(hour=0, minute=0, second=0, microsecond=0) def _get_normalized_cfdate(date, calendar, normalize): """convert to cf datetime and round down to midnight if normalize.""" if date is None: return date cf_date = to_cftime_datetime(date, calendar) return normalize_date(cf_date) if normalize else cf_date def _generate_linear_date_range(start, end, periods): """Generate an equally-spaced sequence of cftime.datetime objects between and including two dates (whose length equals the number of periods).""" if TYPE_CHECKING: import cftime else: cftime = attempt_import("cftime") total_seconds = (end - start).total_seconds() values = np.linspace(0.0, total_seconds, periods, endpoint=True) units = f"seconds since {format_cftime_datetime(start)}" calendar = start.calendar return cftime.num2date( values, units=units, calendar=calendar, only_use_cftime_datetimes=True ) def _generate_linear_date_range_with_freq(start, end, periods, freq): """Generate a regular range of cftime.datetime objects with a given frequency. Adapted from pandas.tseries.offsets.generate_range (now at pandas.core.arrays.datetimes._generate_range). Parameters ---------- start : cftime.datetime, or None Start of range end : cftime.datetime, or None End of range periods : int, or None Number of elements in the sequence freq: str Step size between cftime.datetime objects. Not None. Returns ------- A generator object of cftime.datetime objects """ offset = to_offset(freq) if start: # From pandas GH 56147 / 56832 to account for negative direction and # range bounds if offset.n >= 0: start = offset.rollforward(start) else: start = offset.rollback(start) if periods is None and end < start and offset.n >= 0: end = None periods = 0 if end is None: end = start + (periods - 1) * offset if start is None: start = end - (periods - 1) * offset current = start if offset.n >= 0: while current <= end: yield current next_date = current + offset if next_date <= current: raise ValueError(f"Offset {offset} did not increment date") current = next_date else: while current >= end: yield current next_date = current + offset if next_date >= current: raise ValueError(f"Offset {offset} did not decrement date") current = next_date def cftime_range( start=None, end=None, periods=None, freq=None, normalize=False, name=None, inclusive: InclusiveOptions = "both", calendar="standard", ) -> CFTimeIndex: """Return a fixed frequency CFTimeIndex. .. deprecated:: 2025.02.0 Use :py:func:`~xarray.date_range` with ``use_cftime=True`` instead. Parameters ---------- start : str or cftime.datetime, optional Left bound for generating dates. end : str or cftime.datetime, optional Right bound for generating dates. periods : int, optional Number of periods to generate. freq : str or None, default: "D" Frequency strings can have multiples, e.g. "5h" and negative values, e.g. "-1D". normalize : bool, default: False Normalize start/end dates to midnight before generating date range. name : str, default: None Name of the resulting index inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; whether to set each bound as closed or open. .. versionadded:: 2023.02.0 calendar : str, default: "standard" Calendar type for the datetimes. Returns ------- CFTimeIndex Notes ----- This function is an analog of ``pandas.date_range`` for use in generating sequences of ``cftime.datetime`` objects. It supports most of the features of ``pandas.date_range`` (e.g. specifying how the index is ``closed`` on either side, or whether or not to ``normalize`` the start and end bounds); however, there are some notable exceptions: - You cannot specify a ``tz`` (time zone) argument. - Start or end dates specified as partial-datetime strings must use the `ISO-8601 format `_. - It supports many, but not all, frequencies supported by ``pandas.date_range``. For example it does not currently support any of the business-related or semi-monthly frequencies. - Compound sub-monthly frequencies are not supported, e.g. '1H1min', as these can easily be written in terms of the finest common resolution, e.g. '61min'. Valid simple frequency strings for use with ``cftime``-calendars include any multiples of the following. +--------+--------------------------+ | Alias | Description | +========+==========================+ | YE | Year-end frequency | +--------+--------------------------+ | YS | Year-start frequency | +--------+--------------------------+ | QE | Quarter-end frequency | +--------+--------------------------+ | QS | Quarter-start frequency | +--------+--------------------------+ | ME | Month-end frequency | +--------+--------------------------+ | MS | Month-start frequency | +--------+--------------------------+ | D | Day frequency | +--------+--------------------------+ | h | Hour frequency | +--------+--------------------------+ | min | Minute frequency | +--------+--------------------------+ | s | Second frequency | +--------+--------------------------+ | ms | Millisecond frequency | +--------+--------------------------+ | us | Microsecond frequency | +--------+--------------------------+ Any multiples of the following anchored offsets are also supported. +------------+--------------------------------------------------------------------+ | Alias | Description | +============+====================================================================+ | Y(E,S)-JAN | Annual frequency, anchored at the (end, beginning) of January | +------------+--------------------------------------------------------------------+ | Y(E,S)-FEB | Annual frequency, anchored at the (end, beginning) of February | +------------+--------------------------------------------------------------------+ | Y(E,S)-MAR | Annual frequency, anchored at the (end, beginning) of March | +------------+--------------------------------------------------------------------+ | Y(E,S)-APR | Annual frequency, anchored at the (end, beginning) of April | +------------+--------------------------------------------------------------------+ | Y(E,S)-MAY | Annual frequency, anchored at the (end, beginning) of May | +------------+--------------------------------------------------------------------+ | Y(E,S)-JUN | Annual frequency, anchored at the (end, beginning) of June | +------------+--------------------------------------------------------------------+ | Y(E,S)-JUL | Annual frequency, anchored at the (end, beginning) of July | +------------+--------------------------------------------------------------------+ | Y(E,S)-AUG | Annual frequency, anchored at the (end, beginning) of August | +------------+--------------------------------------------------------------------+ | Y(E,S)-SEP | Annual frequency, anchored at the (end, beginning) of September | +------------+--------------------------------------------------------------------+ | Y(E,S)-OCT | Annual frequency, anchored at the (end, beginning) of October | +------------+--------------------------------------------------------------------+ | Y(E,S)-NOV | Annual frequency, anchored at the (end, beginning) of November | +------------+--------------------------------------------------------------------+ | Y(E,S)-DEC | Annual frequency, anchored at the (end, beginning) of December | +------------+--------------------------------------------------------------------+ | Q(E,S)-JAN | Quarter frequency, anchored at the (end, beginning) of January | +------------+--------------------------------------------------------------------+ | Q(E,S)-FEB | Quarter frequency, anchored at the (end, beginning) of February | +------------+--------------------------------------------------------------------+ | Q(E,S)-MAR | Quarter frequency, anchored at the (end, beginning) of March | +------------+--------------------------------------------------------------------+ | Q(E,S)-APR | Quarter frequency, anchored at the (end, beginning) of April | +------------+--------------------------------------------------------------------+ | Q(E,S)-MAY | Quarter frequency, anchored at the (end, beginning) of May | +------------+--------------------------------------------------------------------+ | Q(E,S)-JUN | Quarter frequency, anchored at the (end, beginning) of June | +------------+--------------------------------------------------------------------+ | Q(E,S)-JUL | Quarter frequency, anchored at the (end, beginning) of July | +------------+--------------------------------------------------------------------+ | Q(E,S)-AUG | Quarter frequency, anchored at the (end, beginning) of August | +------------+--------------------------------------------------------------------+ | Q(E,S)-SEP | Quarter frequency, anchored at the (end, beginning) of September | +------------+--------------------------------------------------------------------+ | Q(E,S)-OCT | Quarter frequency, anchored at the (end, beginning) of October | +------------+--------------------------------------------------------------------+ | Q(E,S)-NOV | Quarter frequency, anchored at the (end, beginning) of November | +------------+--------------------------------------------------------------------+ | Q(E,S)-DEC | Quarter frequency, anchored at the (end, beginning) of December | +------------+--------------------------------------------------------------------+ Finally, the following calendar aliases are supported. +--------------------------------+---------------------------------------+ | Alias | Date type | +================================+=======================================+ | standard, gregorian | ``cftime.DatetimeGregorian`` | +--------------------------------+---------------------------------------+ | proleptic_gregorian | ``cftime.DatetimeProlepticGregorian`` | +--------------------------------+---------------------------------------+ | noleap, 365_day | ``cftime.DatetimeNoLeap`` | +--------------------------------+---------------------------------------+ | all_leap, 366_day | ``cftime.DatetimeAllLeap`` | +--------------------------------+---------------------------------------+ | 360_day | ``cftime.Datetime360Day`` | +--------------------------------+---------------------------------------+ | julian | ``cftime.DatetimeJulian`` | +--------------------------------+---------------------------------------+ Examples -------- This function returns a ``CFTimeIndex``, populated with ``cftime.datetime`` objects associated with the specified calendar type, e.g. >>> xr.date_range( ... start="2000", periods=6, freq="2MS", calendar="noleap", use_cftime=True ... ) CFTimeIndex([2000-01-01 00:00:00, 2000-03-01 00:00:00, 2000-05-01 00:00:00, 2000-07-01 00:00:00, 2000-09-01 00:00:00, 2000-11-01 00:00:00], dtype='object', length=6, calendar='noleap', freq='2MS') As in the standard pandas function, three of the ``start``, ``end``, ``periods``, or ``freq`` arguments must be specified at a given time, with the other set to ``None``. See the `pandas documentation `_ for more examples of the behavior of ``date_range`` with each of the parameters. See Also -------- pandas.date_range """ emit_user_level_warning( "cftime_range() is deprecated, please use xarray.date_range(..., use_cftime=True) instead.", DeprecationWarning, ) return date_range( start=start, end=end, periods=periods, freq=freq, normalize=normalize, name=name, inclusive=inclusive, calendar=calendar, use_cftime=True, ) def _cftime_range( start=None, end=None, periods=None, freq=None, normalize=False, name=None, inclusive: InclusiveOptions = "both", calendar="standard", ) -> CFTimeIndex: """Return a fixed frequency CFTimeIndex. Parameters ---------- start : str or cftime.datetime, optional Left bound for generating dates. end : str or cftime.datetime, optional Right bound for generating dates. periods : int, optional Number of periods to generate. freq : str or None, default: "D" Frequency strings can have multiples, e.g. "5h" and negative values, e.g. "-1D". normalize : bool, default: False Normalize start/end dates to midnight before generating date range. name : str, default: None Name of the resulting index inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; whether to set each bound as closed or open. calendar : str, default: "standard" Calendar type for the datetimes. Returns ------- CFTimeIndex Notes ----- see cftime_range """ if freq is None and any(arg is None for arg in [periods, start, end]): freq = "D" # Adapted from pandas.core.indexes.datetimes._generate_range. if count_not_none(start, end, periods, freq) != 3: raise ValueError( "Exactly three of 'start', 'end', 'periods', or 'freq' must be " "specified to generate a date range. Note that 'freq' defaults to " "'D' in the event that any of 'start', 'end', or 'periods' are " "None." ) start = _get_normalized_cfdate(start, calendar, normalize) end = _get_normalized_cfdate(end, calendar, normalize) if freq is None: dates = _generate_linear_date_range(start, end, periods) else: dates = np.array( list(_generate_linear_date_range_with_freq(start, end, periods, freq)) ) if not TYPE_CHECKING and inclusive not in get_args(InclusiveOptions): raise ValueError( f"Argument `inclusive` must be either 'both', 'neither', " f"'left', or 'right'. Got {inclusive}." ) if len(dates) and inclusive != "both": if inclusive != "left" and dates[0] == start: dates = dates[1:] if inclusive != "right" and dates[-1] == end: dates = dates[:-1] return CFTimeIndex(dates, name=name) def date_range( start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None, inclusive: InclusiveOptions = "both", unit: PDDatetimeUnitOptions = "ns", calendar="standard", use_cftime=None, ): """Return a fixed frequency datetime index. The type (:py:class:`xarray.CFTimeIndex` or :py:class:`pandas.DatetimeIndex`) of the returned index depends on the requested calendar and on `use_cftime`. Parameters ---------- start : str or datetime-like, optional Left bound for generating dates. end : str or datetime-like, optional Right bound for generating dates. periods : int, optional Number of periods to generate. freq : str or None, default: "D" Frequency strings can have multiples, e.g. "5h" and negative values, e.g. "-1D". tz : str or tzinfo, optional Time zone name for returning localized DatetimeIndex, for example 'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is timezone-naive. Only valid with pandas DatetimeIndex. normalize : bool, default: False Normalize start/end dates to midnight before generating date range. name : str, default: None Name of the resulting index inclusive : {"both", "neither", "left", "right"}, default: "both" Include boundaries; whether to set each bound as closed or open. .. versionadded:: 2023.02.0 unit : {"s", "ms", "us", "ns"}, default "ns" Specify the desired resolution of the result. .. versionadded:: 2024.12.0 calendar : str, default: "standard" Calendar type for the datetimes. use_cftime : boolean, optional If True, always return a CFTimeIndex. If False, return a pd.DatetimeIndex if possible or raise a ValueError. If None (default), return a pd.DatetimeIndex if possible, otherwise return a CFTimeIndex. Overridden to False if `tz` is not None. Returns ------- CFTimeIndex or pd.DatetimeIndex Notes ----- When ``use_cftime=True``, or a calendar other than "standard", "gregorian", or "proleptic_gregorian" is provided, this function is an analog of ``pandas.date_range`` for use in generating sequences of ``cftime.datetime`` objects. It supports most of the features of ``pandas.date_range`` (e.g. specifying how the index is ``closed`` on either side, or whether or not to ``normalize`` the start and end bounds); however, there are some notable exceptions: - You cannot specify a ``tz`` (time zone) argument. - Start or end dates specified as partial-datetime strings must use the `ISO-8601 format `_. - It supports many, but not all, frequencies supported by ``pandas.date_range``. For example it does not currently support any of the business-related or semi-monthly frequencies. - Compound sub-monthly frequencies are not supported, e.g. '1H1min', as these can easily be written in terms of the finest common resolution, e.g. '61min'. Valid simple frequency strings for use with ``cftime``-calendars include any multiples of the following. +--------+--------------------------+ | Alias | Description | +========+==========================+ | YE | Year-end frequency | +--------+--------------------------+ | YS | Year-start frequency | +--------+--------------------------+ | QE | Quarter-end frequency | +--------+--------------------------+ | QS | Quarter-start frequency | +--------+--------------------------+ | ME | Month-end frequency | +--------+--------------------------+ | MS | Month-start frequency | +--------+--------------------------+ | D | Day frequency | +--------+--------------------------+ | h | Hour frequency | +--------+--------------------------+ | min | Minute frequency | +--------+--------------------------+ | s | Second frequency | +--------+--------------------------+ | ms | Millisecond frequency | +--------+--------------------------+ | us | Microsecond frequency | +--------+--------------------------+ Any multiples of the following anchored offsets are also supported. +------------+--------------------------------------------------------------------+ | Alias | Description | +============+====================================================================+ | Y(E,S)-JAN | Annual frequency, anchored at the (end, beginning) of January | +------------+--------------------------------------------------------------------+ | Y(E,S)-FEB | Annual frequency, anchored at the (end, beginning) of February | +------------+--------------------------------------------------------------------+ | Y(E,S)-MAR | Annual frequency, anchored at the (end, beginning) of March | +------------+--------------------------------------------------------------------+ | Y(E,S)-APR | Annual frequency, anchored at the (end, beginning) of April | +------------+--------------------------------------------------------------------+ | Y(E,S)-MAY | Annual frequency, anchored at the (end, beginning) of May | +------------+--------------------------------------------------------------------+ | Y(E,S)-JUN | Annual frequency, anchored at the (end, beginning) of June | +------------+--------------------------------------------------------------------+ | Y(E,S)-JUL | Annual frequency, anchored at the (end, beginning) of July | +------------+--------------------------------------------------------------------+ | Y(E,S)-AUG | Annual frequency, anchored at the (end, beginning) of August | +------------+--------------------------------------------------------------------+ | Y(E,S)-SEP | Annual frequency, anchored at the (end, beginning) of September | +------------+--------------------------------------------------------------------+ | Y(E,S)-OCT | Annual frequency, anchored at the (end, beginning) of October | +------------+--------------------------------------------------------------------+ | Y(E,S)-NOV | Annual frequency, anchored at the (end, beginning) of November | +------------+--------------------------------------------------------------------+ | Y(E,S)-DEC | Annual frequency, anchored at the (end, beginning) of December | +------------+--------------------------------------------------------------------+ | Q(E,S)-JAN | Quarter frequency, anchored at the (end, beginning) of January | +------------+--------------------------------------------------------------------+ | Q(E,S)-FEB | Quarter frequency, anchored at the (end, beginning) of February | +------------+--------------------------------------------------------------------+ | Q(E,S)-MAR | Quarter frequency, anchored at the (end, beginning) of March | +------------+--------------------------------------------------------------------+ | Q(E,S)-APR | Quarter frequency, anchored at the (end, beginning) of April | +------------+--------------------------------------------------------------------+ | Q(E,S)-MAY | Quarter frequency, anchored at the (end, beginning) of May | +------------+--------------------------------------------------------------------+ | Q(E,S)-JUN | Quarter frequency, anchored at the (end, beginning) of June | +------------+--------------------------------------------------------------------+ | Q(E,S)-JUL | Quarter frequency, anchored at the (end, beginning) of July | +------------+--------------------------------------------------------------------+ | Q(E,S)-AUG | Quarter frequency, anchored at the (end, beginning) of August | +------------+--------------------------------------------------------------------+ | Q(E,S)-SEP | Quarter frequency, anchored at the (end, beginning) of September | +------------+--------------------------------------------------------------------+ | Q(E,S)-OCT | Quarter frequency, anchored at the (end, beginning) of October | +------------+--------------------------------------------------------------------+ | Q(E,S)-NOV | Quarter frequency, anchored at the (end, beginning) of November | +------------+--------------------------------------------------------------------+ | Q(E,S)-DEC | Quarter frequency, anchored at the (end, beginning) of December | +------------+--------------------------------------------------------------------+ Finally, the following calendar aliases are supported. +--------------------------------+---------------------------------------+----------------------------+ | Alias | Date type | Available use_cftime=False | +================================+=======================================+============================+ | standard, gregorian | ``cftime.DatetimeGregorian`` | True | +--------------------------------+---------------------------------------+----------------------------+ | proleptic_gregorian | ``cftime.DatetimeProlepticGregorian`` | True | +--------------------------------+---------------------------------------+----------------------------+ | noleap, 365_day | ``cftime.DatetimeNoLeap`` | False | +--------------------------------+---------------------------------------+----------------------------+ | all_leap, 366_day | ``cftime.DatetimeAllLeap`` | False | +--------------------------------+---------------------------------------+----------------------------+ | 360_day | ``cftime.Datetime360Day`` | False | +--------------------------------+---------------------------------------+----------------------------+ | julian | ``cftime.DatetimeJulian`` | False | +--------------------------------+---------------------------------------+----------------------------+ As in the standard pandas function, exactly three of ``start``, ``end``, ``periods``, or ``freq`` are required to generate a date range. Note that ``freq`` defaults to ``"D"`` in the event that any of ``start``, ``end``, or ``periods`` are set to ``None``. See :py:func:`pandas.date_range`. for more examples of the behavior of ``date_range`` with each of the parameters. Examples -------- This function returns a ``CFTimeIndex``, populated with ``cftime.datetime`` objects associated with the specified calendar type, e.g. >>> xr.date_range( ... start="2000", periods=6, freq="2MS", calendar="noleap", use_cftime=True ... ) CFTimeIndex([2000-01-01 00:00:00, 2000-03-01 00:00:00, 2000-05-01 00:00:00, 2000-07-01 00:00:00, 2000-09-01 00:00:00, 2000-11-01 00:00:00], dtype='object', length=6, calendar='noleap', freq='2MS') See also -------- pandas.date_range cftime_range date_range_like """ if tz is not None: use_cftime = False if _is_standard_calendar(calendar) and use_cftime is not True: try: return pd.date_range( # type: ignore[call-overload,unused-ignore] start=start, end=end, periods=periods, # TODO remove translation once requiring pandas >= 2.2 freq=_new_to_legacy_freq(freq), tz=tz, normalize=normalize, name=name, inclusive=inclusive, unit=unit, ) except pd.errors.OutOfBoundsDatetime as err: if use_cftime is False: raise ValueError( "Date range is invalid for pandas DatetimeIndex, try using `use_cftime=True`." ) from err elif use_cftime is False: raise ValueError( f"Invalid calendar {calendar} for pandas DatetimeIndex, try using `use_cftime=True`." ) return _cftime_range( start=start, end=end, periods=periods, freq=freq, normalize=normalize, name=name, inclusive=inclusive, calendar=calendar, ) def _new_to_legacy_freq(freq): # xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd # frequencies, but older versions of pandas do not support these as # frequency strings. Until xarray's minimum pandas version is 2.2 or above, # we add logic to continue using the deprecated "M" and "Q" frequency # strings in these circumstances. # NOTE: other conversions ("h" -> "H", ..., "ns" -> "N") not required # TODO: remove once requiring pandas >= 2.2 if not freq or Version(pd.__version__) >= Version("2.2"): return freq try: freq_as_offset = to_offset(freq) except ValueError: # freq may be valid in pandas but not in xarray return freq if isinstance(freq_as_offset, MonthEnd) and "ME" in freq: freq = freq.replace("ME", "M") elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq: freq = freq.replace("QE", "Q") elif isinstance(freq_as_offset, YearBegin) and "YS" in freq: freq = freq.replace("YS", "AS") elif isinstance(freq_as_offset, YearEnd): # testing for "Y" is required as this was valid in xarray 2023.11 - 2024.01 if "Y-" in freq: # Check for and replace "Y-" instead of just "Y" to prevent # corrupting anchored offsets that contain "Y" in the month # abbreviation, e.g. "Y-MAY" -> "A-MAY". freq = freq.replace("Y-", "A-") elif "YE-" in freq: freq = freq.replace("YE-", "A-") elif "A-" not in freq and freq.endswith("Y"): freq = freq.replace("Y", "A") elif freq.endswith("YE"): freq = freq.replace("YE", "A") return freq def _legacy_to_new_freq(freq: T_FreqStr) -> T_FreqStr: # to avoid internal deprecation warnings when freq is determined using pandas < 2.2 # TODO: remove once requiring pandas >= 2.2 if not freq or Version(pd.__version__) >= Version("2.2"): return freq try: freq_as_offset = to_offset(freq, warn=False) except ValueError: # freq may be valid in pandas but not in xarray return freq if isinstance(freq_as_offset, MonthEnd) and "ME" not in freq: freq = freq.replace("M", "ME") elif isinstance(freq_as_offset, QuarterEnd) and "QE" not in freq: freq = freq.replace("Q", "QE") elif isinstance(freq_as_offset, YearBegin) and "YS" not in freq: freq = freq.replace("AS", "YS") elif isinstance(freq_as_offset, YearEnd): if "A-" in freq: # Check for and replace "A-" instead of just "A" to prevent # corrupting anchored offsets that contain "Y" in the month # abbreviation, e.g. "A-MAY" -> "YE-MAY". freq = freq.replace("A-", "YE-") elif "Y-" in freq: freq = freq.replace("Y-", "YE-") elif freq.endswith("A"): # the "A-MAY" case is already handled above freq = freq.replace("A", "YE") elif "YE" not in freq and freq.endswith("Y"): # the "Y-MAY" case is already handled above freq = freq.replace("Y", "YE") elif isinstance(freq_as_offset, Hour): freq = freq.replace("H", "h") elif isinstance(freq_as_offset, Minute): freq = freq.replace("T", "min") elif isinstance(freq_as_offset, Second): freq = freq.replace("S", "s") elif isinstance(freq_as_offset, Millisecond): freq = freq.replace("L", "ms") elif isinstance(freq_as_offset, Microsecond): freq = freq.replace("U", "us") return freq def date_range_like(source, calendar, use_cftime=None): """Generate a datetime array with the same frequency, start and end as another one, but in a different calendar. Parameters ---------- source : DataArray, CFTimeIndex, or pd.DatetimeIndex 1D datetime array calendar : str New calendar name. use_cftime : bool, optional If True, the output uses :py:class:`cftime.datetime` objects. If None (default), :py:class:`numpy.datetime64` values are used if possible. If False, :py:class:`numpy.datetime64` values are used or an error is raised. Returns ------- DataArray 1D datetime coordinate with the same start, end and frequency as the source, but in the new calendar. The start date is assumed to exist in the target calendar. If the end date doesn't exist, the code tries 1 and 2 calendar days before. There is a special case when the source time series is daily or coarser and the end of the input range is on the last day of the month. Then the output range will also end on the last day of the month in the new calendar. """ from xarray.coding.frequencies import infer_freq from xarray.core.dataarray import DataArray if not isinstance(source, pd.DatetimeIndex | CFTimeIndex) and ( (isinstance(source, DataArray) and (source.ndim != 1)) or not _contains_datetime_like_objects(source.variable) ): raise ValueError( "'source' must be a 1D array of datetime objects for inferring its range." ) freq = infer_freq(source) if freq is None: raise ValueError( "`date_range_like` was unable to generate a range as the source frequency was not inferable." ) # TODO remove once requiring pandas >= 2.2 freq = _legacy_to_new_freq(freq) use_cftime = _should_cftime_be_used(source, calendar, use_cftime) source_start = source.values.min() source_end = source.values.max() freq_as_offset = to_offset(freq) if freq_as_offset.n < 0: source_start, source_end = source_end, source_start if is_np_datetime_like(source.dtype): # We want to use datetime fields (datetime64 object don't have them) source_calendar = "standard" source_start = default_precision_timestamp(source_start) source_end = default_precision_timestamp(source_end) elif isinstance(source, CFTimeIndex): source_calendar = source.calendar else: # DataArray source_calendar = source.dt.calendar if calendar == source_calendar and is_np_datetime_like(source.dtype) ^ use_cftime: return source date_type = get_date_type(calendar, use_cftime) start = convert_time_or_go_back(source_start, date_type) end = convert_time_or_go_back(source_end, date_type) # For the cases where the source ends on the end of the month, we expect the same in the new calendar. if source_end.day == source_end.daysinmonth and isinstance( freq_as_offset, YearEnd | QuarterEnd | MonthEnd | Day ): end = end.replace(day=end.daysinmonth) return date_range( start=start.isoformat(), end=end.isoformat(), freq=freq, calendar=calendar, ) python-xarray-2026.01.0/xarray/coding/variables.py0000664000175000017500000006335415136607163022151 0ustar alastairalastair"""Coders for individual Variable objects.""" from __future__ import annotations import warnings from collections.abc import Hashable, MutableMapping from functools import partial from typing import TYPE_CHECKING, Any, Union import numpy as np import pandas as pd from xarray.coding.common import ( SerializationWarning, VariableCoder, lazy_elemwise_func, pop_to, safe_setitem, unpack_for_decoding, unpack_for_encoding, ) from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.types import Self from xarray.core.variable import Variable if TYPE_CHECKING: T_VarTuple = tuple[tuple[Hashable, ...], Any, dict, dict] T_Name = Union[Hashable, None] class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): """Decode arrays on the fly from non-native to native endianness This is useful for decoding arrays from netCDF3 files (which are all big endian) into native endianness, so they can be used with Cython functions, such as those found in bottleneck and pandas. >>> x = np.arange(5, dtype=">i2") >>> x.dtype dtype('>i2') >>> NativeEndiannessArray(x).dtype dtype('int16') >>> indexer = indexing.BasicIndexer((slice(None),)) >>> NativeEndiannessArray(x)[indexer].dtype dtype('int16') """ __slots__ = ("array",) def __init__(self, array) -> None: self.array = indexing.as_indexable(array) @property def dtype(self) -> np.dtype: return np.dtype(self.array.dtype.kind + str(self.array.dtype.itemsize)) def _oindex_get(self, key): return type(self)(self.array.oindex[key]) def _vindex_get(self, key): return type(self)(self.array.vindex[key]) def __getitem__(self, key) -> Self: return type(self)(self.array[key]) def get_duck_array(self): return duck_array_ops.astype(self.array.get_duck_array(), dtype=self.dtype) def transpose(self, order): return type(self)(self.array.transpose(order)) class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): """Decode arrays on the fly from integer to boolean datatype This is useful for decoding boolean arrays from integer typed netCDF variables. >>> x = np.array([1, 0, 1, 1, 0], dtype="i1") >>> x.dtype dtype('int8') >>> BoolTypeArray(x).dtype dtype('bool') >>> indexer = indexing.BasicIndexer((slice(None),)) >>> BoolTypeArray(x)[indexer].dtype dtype('bool') """ __slots__ = ("array",) def __init__(self, array) -> None: self.array = indexing.as_indexable(array) @property def dtype(self) -> np.dtype: return np.dtype("bool") def _oindex_get(self, key): return type(self)(self.array.oindex[key]) def _vindex_get(self, key): return type(self)(self.array.vindex[key]) def __getitem__(self, key) -> Self: return type(self)(self.array[key]) def get_duck_array(self): return duck_array_ops.astype(self.array.get_duck_array(), dtype=self.dtype) def transpose(self, order): return type(self)(self.array.transpose(order)) def _apply_mask( data: np.ndarray, encoded_fill_values: list, decoded_fill_value: Any, dtype: np.typing.DTypeLike | None, ) -> np.ndarray: """Mask all matching values in a NumPy arrays.""" data = np.array(data, dtype=dtype, copy=True) if encoded_fill_values: condition = False for fv in encoded_fill_values: condition |= data == fv data[condition] = decoded_fill_value return data def _is_time_like(units): # test for time-like # return "datetime" for datetime-like # return "timedelta" for timedelta-like if units is None: return False time_strings = [ "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", "nanoseconds", ] units = str(units) # to prevent detecting units like `days accumulated` as time-like # special casing for datetime-units and timedelta-units (GH-8269) if "since" in units: from xarray.coding.times import _unpack_netcdf_time_units try: _unpack_netcdf_time_units(units) except ValueError: return False return "datetime" else: return "timedelta" if any(tstr == units for tstr in time_strings) else False def _check_fill_values(attrs, name, dtype): """Check _FillValue and missing_value if available. Return dictionary with raw fill values and set with encoded fill values. Issue SerializationWarning if appropriate. """ raw_fill_dict = {} for attr in ("missing_value", "_FillValue"): pop_to(attrs, raw_fill_dict, attr, name=name) encoded_fill_values = set() for k in list(raw_fill_dict): v = raw_fill_dict[k] kfill = {fv for fv in np.ravel(v) if not pd.isnull(fv)} if not kfill and np.issubdtype(dtype, np.integer): warnings.warn( f"variable {name!r} has non-conforming {k!r} " f"{v!r} defined, dropping {k!r} entirely.", SerializationWarning, stacklevel=3, ) del raw_fill_dict[k] else: encoded_fill_values |= kfill if len(encoded_fill_values) > 1: warnings.warn( f"variable {name!r} has multiple fill values " f"{encoded_fill_values} defined, decoding all values to NaN.", SerializationWarning, stacklevel=3, ) return raw_fill_dict, encoded_fill_values def _convert_unsigned_fill_value( name: T_Name, data: Any, unsigned: str, raw_fill_value: Any, encoded_fill_values: set, ) -> Any: if data.dtype.kind == "i": if unsigned == "true": unsigned_dtype = np.dtype(f"u{data.dtype.itemsize}") transform = partial(np.asarray, dtype=unsigned_dtype) if raw_fill_value is not None: new_fill = np.array(raw_fill_value, dtype=data.dtype) encoded_fill_values.remove(raw_fill_value) # use view here to prevent OverflowError encoded_fill_values.add(new_fill.view(unsigned_dtype).item()) data = lazy_elemwise_func(data, transform, unsigned_dtype) elif data.dtype.kind == "u": if unsigned == "false": signed_dtype = np.dtype(f"i{data.dtype.itemsize}") transform = partial(np.asarray, dtype=signed_dtype) data = lazy_elemwise_func(data, transform, signed_dtype) if raw_fill_value is not None: new_fill = signed_dtype.type(raw_fill_value) encoded_fill_values.remove(raw_fill_value) encoded_fill_values.add(new_fill) else: warnings.warn( f"variable {name!r} has _Unsigned attribute but is not " "of integer type. Ignoring attribute.", SerializationWarning, stacklevel=3, ) return data def _encode_unsigned_fill_value( name: T_Name, fill_value: Any, encoded_dtype: np.dtype, ) -> Any: try: if hasattr(fill_value, "item"): # if numpy type, convert to python native integer to determine overflow # otherwise numpy unsigned ints will silently cast to the signed counterpart fill_value = fill_value.item() # passes if provided fill value fits in encoded on-disk type new_fill = encoded_dtype.type(fill_value) except OverflowError: encoded_kind_str = "signed" if encoded_dtype.kind == "i" else "unsigned" warnings.warn( f"variable {name!r} will be stored as {encoded_kind_str} integers " f"but _FillValue attribute can't be represented as a " f"{encoded_kind_str} integer.", SerializationWarning, stacklevel=3, ) # user probably provided the fill as the in-memory dtype, # convert to on-disk type to match CF standard orig_kind = "u" if encoded_dtype.kind == "i" else "i" orig_dtype = np.dtype(f"{orig_kind}{encoded_dtype.itemsize}") # use view here to prevent OverflowError new_fill = np.array(fill_value, dtype=orig_dtype).view(encoded_dtype).item() return new_fill class CFMaskCoder(VariableCoder): """Mask or unmask fill values according to CF conventions.""" def __init__( self, decode_times: bool | CFDatetimeCoder = False, decode_timedelta: bool | CFTimedeltaCoder = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta def encode(self, variable: Variable, name: T_Name = None): dims, data, attrs, encoding = unpack_for_encoding(variable) dtype = np.dtype(encoding.get("dtype", data.dtype)) # from netCDF best practices # https://docs.unidata.ucar.edu/nug/current/best_practices.html#bp_Unsigned-Data # "_Unsigned = "true" to indicate that # integer data should be treated as unsigned" has_unsigned = encoding.get("_Unsigned") is not None fv = encoding.get("_FillValue") mv = encoding.get("missing_value") fill_value = None fv_exists = fv is not None mv_exists = mv is not None if not fv_exists and not mv_exists: return variable if fv_exists and mv_exists and not duck_array_ops.allclose_or_equiv(fv, mv): raise ValueError( f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data." ) if fv_exists: # Ensure _FillValue is cast to same dtype as data's # but not for packed data if has_unsigned: encoding["_FillValue"] = _encode_unsigned_fill_value(name, fv, dtype) elif "add_offset" not in encoding and "scale_factor" not in encoding: encoding["_FillValue"] = dtype.type(fv) else: encoding["_FillValue"] = fv fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if mv_exists: # try to use _FillValue, if it exists to align both values # or use missing_value and ensure it's cast to same dtype as data's # but not for packed data encoding["missing_value"] = attrs.get( "_FillValue", ( _encode_unsigned_fill_value(name, mv, dtype) if has_unsigned else ( dtype.type(mv) if "add_offset" not in encoding and "scale_factor" not in encoding else mv ) ), ) fill_value = pop_to(encoding, attrs, "missing_value", name=name) # apply fillna if fill_value is not None and not pd.isnull(fill_value): # special case DateTime to properly handle NaT if _is_time_like(attrs.get("units")): if data.dtype.kind in "iu": data = duck_array_ops.where( data != np.iinfo(np.int64).min, data, fill_value ) else: # if we have float data (data was packed prior masking) # we just fillna data = duck_array_ops.fillna(data, fill_value) # but if the fill_value is of integer type # we need to round and cast if np.array(fill_value).dtype.kind in "iu": data = duck_array_ops.astype( duck_array_ops.around(data), type(fill_value) ) else: data = duck_array_ops.fillna(data, fill_value) if fill_value is not None and has_unsigned: pop_to(encoding, attrs, "_Unsigned") # XXX: Is this actually needed? Doesn't the backend handle this? # two-stage casting to prevent undefined cast from float to unsigned int # first float -> int with corresponding itemsize # second int -> int/uint to final itemsize signed_dtype = np.dtype(f"i{data.itemsize}") data = duck_array_ops.astype( duck_array_ops.astype( duck_array_ops.around(data), signed_dtype, copy=False ), dtype, copy=False, ) attrs["_FillValue"] = fill_value return Variable(dims, data, attrs, encoding, fastpath=True) def decode(self, variable: Variable, name: T_Name = None): raw_fill_dict, encoded_fill_values = _check_fill_values( variable.attrs, name, variable.dtype ) if "_Unsigned" not in variable.attrs and not raw_fill_dict: return variable dims, data, attrs, encoding = unpack_for_decoding(variable) # Even if _Unsigned is used, retain on-disk _FillValue for attr, value in raw_fill_dict.items(): safe_setitem(encoding, attr, value, name=name) if "_Unsigned" in attrs: unsigned = pop_to(attrs, encoding, "_Unsigned") data = _convert_unsigned_fill_value( name, data, unsigned, raw_fill_dict.get("_FillValue"), encoded_fill_values, ) if encoded_fill_values: dtype: np.typing.DTypeLike decoded_fill_value: Any # in case of packed data we have to decode into float # in any case if "scale_factor" in attrs or "add_offset" in attrs: dtype, decoded_fill_value = ( _choose_float_dtype(data.dtype, attrs), np.nan, ) else: # in case of no-packing special case DateTime/Timedelta to properly # handle NaT, we need to check if time-like will be decoded # or not in further processing is_time_like = _is_time_like(attrs.get("units")) if ( (is_time_like == "datetime" and self.decode_times) or (is_time_like == "timedelta" and self.decode_timedelta) ) and data.dtype.kind in "iu": dtype = np.int64 decoded_fill_value = np.iinfo(np.int64).min else: dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) transform = partial( _apply_mask, encoded_fill_values=encoded_fill_values, decoded_fill_value=decoded_fill_value, dtype=dtype, ) data = lazy_elemwise_func(data, transform, dtype) return Variable(dims, data, attrs, encoding, fastpath=True) def _scale_offset_decoding( data, scale_factor, add_offset, dtype: np.typing.DTypeLike | None ): data = data.astype(dtype=dtype, copy=True) if scale_factor is not None: data *= scale_factor if add_offset is not None: data += add_offset return data def _choose_float_dtype( dtype: np.dtype, mapping: MutableMapping ) -> type[np.floating[Any]]: """Return a float dtype that can losslessly represent `dtype` values.""" # check scale/offset first to derive wanted float dtype # see https://github.com/pydata/xarray/issues/5597#issuecomment-879561954 scale_factor = mapping.get("scale_factor") add_offset = mapping.get("add_offset") if scale_factor is not None or add_offset is not None: # get the type from scale_factor/add_offset to determine # the needed floating point type if scale_factor is not None: scale_type = np.dtype(type(scale_factor)) if add_offset is not None: offset_type = np.dtype(type(add_offset)) # CF conforming, both scale_factor and add-offset are given and # of same floating point type (float32/64) if ( add_offset is not None and scale_factor is not None and offset_type == scale_type and scale_type in [np.float32, np.float64] ): # in case of int32 -> we need upcast to float64 # due to precision issues if dtype.itemsize == 4 and np.issubdtype(dtype, np.integer): return np.float64 return scale_type.type # Not CF conforming and add_offset given: # A scale factor is entirely safe (vanishing into the mantissa), # but a large integer offset could lead to loss of precision. # Sensitivity analysis can be tricky, so we just use a float64 # if there's any offset at all - better unoptimised than wrong! if add_offset is not None: return np.float64 # return dtype depending on given scale_factor return scale_type.type # If no scale_factor or add_offset is given, use some general rules. # Keep float32 as-is. Upcast half-precision to single-precision, # because float16 is "intended for storage but not computation" if dtype.itemsize <= 4 and np.issubdtype(dtype, np.floating): return np.float32 # float32 can exactly represent all integers up to 24 bits if dtype.itemsize <= 2 and np.issubdtype(dtype, np.integer): return np.float32 # For all other types and circumstances, we just use float64. # Todo: with nc-complex from netcdf4-python >= 1.7.0 this is available # (safe because eg. complex numbers are not supported in NetCDF) return np.float64 class CFScaleOffsetCoder(VariableCoder): """Scale and offset variables according to CF conventions. Follows the formula: decode_values = encoded_values * scale_factor + add_offset """ def __init__( self, decode_times: bool | CFDatetimeCoder = False, decode_timedelta: bool | CFTimedeltaCoder = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta def encode(self, variable: Variable, name: T_Name = None) -> Variable: dims, data, attrs, encoding = unpack_for_encoding(variable) if "scale_factor" in encoding or "add_offset" in encoding: # if we have a _FillValue/masked_value we do not want to cast now # but leave that to CFMaskCoder dtype = data.dtype if "_FillValue" not in encoding and "missing_value" not in encoding: dtype = _choose_float_dtype(data.dtype, encoding) # but still we need a copy prevent changing original data data = duck_array_ops.astype(data, dtype=dtype, copy=True) if "add_offset" in encoding: data -= pop_to(encoding, attrs, "add_offset", name=name) if "scale_factor" in encoding: data /= pop_to(encoding, attrs, "scale_factor", name=name) return Variable(dims, data, attrs, encoding, fastpath=True) def decode(self, variable: Variable, name: T_Name = None) -> Variable: _attrs = variable.attrs if "scale_factor" in _attrs or "add_offset" in _attrs: dims, data, attrs, encoding = unpack_for_decoding(variable) scale_factor = pop_to(attrs, encoding, "scale_factor", name=name) add_offset = pop_to(attrs, encoding, "add_offset", name=name) if duck_array_ops.ndim(scale_factor) > 0: scale_factor = np.asarray(scale_factor).item() if duck_array_ops.ndim(add_offset) > 0: add_offset = np.asarray(add_offset).item() # if we have a _FillValue/masked_value in encoding we already have the wanted # floating point dtype here (via CFMaskCoder), so no check is necessary # only check in other cases and for time-like dtype = data.dtype is_time_like = _is_time_like(attrs.get("units")) if ( ("_FillValue" not in encoding and "missing_value" not in encoding) or (is_time_like == "datetime" and self.decode_times) or (is_time_like == "timedelta" and self.decode_timedelta) ): dtype = _choose_float_dtype(dtype, encoding) transform = partial( _scale_offset_decoding, scale_factor=scale_factor, add_offset=add_offset, dtype=dtype, ) data = lazy_elemwise_func(data, transform, dtype) return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable class DefaultFillvalueCoder(VariableCoder): """Encode default _FillValue if needed.""" def encode(self, variable: Variable, name: T_Name = None) -> Variable: dims, data, attrs, encoding = unpack_for_encoding(variable) # make NaN the fill value for float types if ( "_FillValue" not in attrs and "_FillValue" not in encoding and np.issubdtype(variable.dtype, np.floating) ): attrs["_FillValue"] = variable.dtype.type(np.nan) return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable def decode(self, variable: Variable, name: T_Name = None) -> Variable: raise NotImplementedError() class BooleanCoder(VariableCoder): """Code boolean values.""" def encode(self, variable: Variable, name: T_Name = None) -> Variable: if ( (variable.dtype == bool) and ("dtype" not in variable.encoding) and ("dtype" not in variable.attrs) ): dims, data, attrs, encoding = unpack_for_encoding(variable) attrs["dtype"] = "bool" data = duck_array_ops.astype(data, dtype="i1", copy=True) return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable def decode(self, variable: Variable, name: T_Name = None) -> Variable: if variable.attrs.get("dtype", False) == "bool": dims, data, attrs, encoding = unpack_for_decoding(variable) # overwrite (!) dtype in encoding, and remove from attrs # needed for correct subsequent encoding encoding["dtype"] = attrs.pop("dtype") data = BoolTypeArray(data) return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable class EndianCoder(VariableCoder): """Decode Endianness to native.""" def encode(self): raise NotImplementedError() def decode(self, variable: Variable, name: T_Name = None) -> Variable: dims, data, attrs, encoding = unpack_for_decoding(variable) if not data.dtype.isnative: data = NativeEndiannessArray(data) return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable class NonStringCoder(VariableCoder): """Encode NonString variables if dtypes differ.""" def encode(self, variable: Variable, name: T_Name = None) -> Variable: if "dtype" in variable.encoding and variable.encoding["dtype"] not in ( "S1", str, ): dims, data, attrs, encoding = unpack_for_encoding(variable) dtype = np.dtype(encoding.pop("dtype")) if dtype != variable.dtype: if np.issubdtype(dtype, np.integer): if ( np.issubdtype(variable.dtype, np.floating) and "_FillValue" not in variable.attrs and "missing_value" not in variable.attrs ): warnings.warn( f"saving variable {name} with floating " "point data as an integer dtype without " "any _FillValue to use for NaNs", SerializationWarning, stacklevel=10, ) data = duck_array_ops.round(data) data = duck_array_ops.astype(data, dtype=dtype) return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable def decode(self): raise NotImplementedError() class ObjectVLenStringCoder(VariableCoder): def encode(self): raise NotImplementedError def decode(self, variable: Variable, name: T_Name = None) -> Variable: if variable.dtype.kind == "O" and variable.encoding.get("dtype", False) is str: variable = variable.astype(variable.encoding["dtype"]) return variable else: return variable class Numpy2StringDTypeCoder(VariableCoder): # Convert Numpy 2 StringDType arrays to object arrays for backwards compatibility # TODO: remove this if / when we decide to allow StringDType arrays in Xarray def encode(self): raise NotImplementedError def decode(self, variable: Variable, name: T_Name = None) -> Variable: if variable.dtype.kind == "T": return variable.astype(object) else: return variable class NativeEnumCoder(VariableCoder): """Encode Enum into variable dtype metadata.""" def encode(self, variable: Variable, name: T_Name = None) -> Variable: if ( "dtype" in variable.encoding and np.dtype(variable.encoding["dtype"]).metadata and "enum" in variable.encoding["dtype"].metadata ): dims, data, attrs, encoding = unpack_for_encoding(variable) data = data.astype(dtype=variable.encoding.pop("dtype")) return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable def decode(self, variable: Variable, name: T_Name = None) -> Variable: raise NotImplementedError() python-xarray-2026.01.0/xarray/coding/common.py0000664000175000017500000001174215136607163021463 0ustar alastairalastairfrom __future__ import annotations from collections.abc import Callable, Hashable, MutableMapping from typing import TYPE_CHECKING, Any, Union import numpy as np from xarray.core import indexing from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: T_VarTuple = tuple[tuple[Hashable, ...], Any, dict, dict] T_Name = Union[Hashable, None] class SerializationWarning(RuntimeWarning): """Warnings about encoding/decoding issues in serialization.""" class VariableCoder: """Base class for encoding and decoding transformations on variables. We use coders for transforming variables between xarray's data model and a format suitable for serialization. For example, coders apply CF conventions for how data should be represented in netCDF files. Subclasses should implement encode() and decode(), which should satisfy the identity ``coder.decode(coder.encode(variable)) == variable``. If any options are necessary, they should be implemented as arguments to the __init__ method. The optional name argument to encode() and decode() exists solely for the sake of better error messages, and should correspond to the name of variables in the underlying store. """ def encode(self, variable: Variable, name: T_Name = None) -> Variable: """Convert an encoded variable to a decoded variable""" raise NotImplementedError() def decode(self, variable: Variable, name: T_Name = None) -> Variable: """Convert a decoded variable to an encoded variable""" raise NotImplementedError() class _ElementwiseFunctionArray(indexing.ExplicitlyIndexedNDArrayMixin): """Lazily computed array holding values of elemwise-function. Do not construct this object directly: call lazy_elemwise_func instead. Values are computed upon indexing or coercion to a NumPy array. """ def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike | None): assert not is_chunked_array(array) self.array = indexing.as_indexable(array) self.func = func self._dtype = dtype @property def dtype(self) -> np.dtype: return np.dtype(self._dtype) def transpose(self, order): # For elementwise functions, we can compose transpose and function application return type(self)(self.array.transpose(order), self.func, self.dtype) def _oindex_get(self, key): return type(self)(self.array.oindex[key], self.func, self.dtype) def _vindex_get(self, key): return type(self)(self.array.vindex[key], self.func, self.dtype) def __getitem__(self, key): return type(self)(self.array[key], self.func, self.dtype) def get_duck_array(self): return self.func(self.array.get_duck_array()) async def async_get_duck_array(self): return self.func(await self.array.async_get_duck_array()) def __repr__(self) -> str: return f"{type(self).__name__}({self.array!r}, func={self.func!r}, dtype={self.dtype!r})" def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike | None): """Lazily apply an element-wise function to an array. Parameters ---------- array : any valid value of Variable._data func : callable Function to apply to indexed slices of an array. For use with dask, this should be a pickle-able object. dtype : coercible to np.dtype Dtype for the result of this function. Returns ------- Either a dask.array.Array or _ElementwiseFunctionArray. """ if is_chunked_array(array): chunkmanager = get_chunked_array_type(array) return chunkmanager.map_blocks(func, array, dtype=dtype) # type: ignore[arg-type] else: return _ElementwiseFunctionArray(array, func, dtype) def safe_setitem(dest, key: Hashable, value, name: T_Name = None): if key in dest: var_str = f" on variable {name!r}" if name else "" raise ValueError( f"Key '{key}' already exists in attrs{var_str}, and will not be overwritten. " "This is probably an encoding field used by xarray to describe " "how a variable is serialized. To proceed, remove this key from " "the variable's attributes manually." ) dest[key] = value def pop_to( source: MutableMapping, dest: MutableMapping, key: Hashable, name: T_Name = None ) -> Any: """ A convenience function which pops a key k from source to dest. None values are not passed on. If k already exists in dest an error is raised. """ value = source.pop(key, None) if value is not None: safe_setitem(dest, key, value, name=name) return value def unpack_for_encoding(var: Variable) -> T_VarTuple: return var.dims, var.data, var.attrs.copy(), var.encoding.copy() def unpack_for_decoding(var: Variable) -> T_VarTuple: return var.dims, var._data, var.attrs.copy(), var.encoding.copy() python-xarray-2026.01.0/xarray/coding/frequencies.py0000664000175000017500000002224415136607163022503 0ustar alastairalastair"""FrequencyInferer analog for cftime.datetime objects""" # The infer_freq method and the _CFTimeFrequencyInferer # subclass defined here were copied and adapted for # use with cftime.datetime objects based on the source code in # pandas.tseries.Frequencies._FrequencyInferer # For reference, here is a copy of the pandas copyright notice: # (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team # All rights reserved. # Copyright (c) 2008-2011 AQR Capital Management, LLC # All rights reserved. # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # * Neither the name of the copyright holder nor the names of any # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import annotations import numpy as np import pandas as pd from xarray.coding.cftime_offsets import _MONTH_ABBREVIATIONS, _legacy_to_new_freq from xarray.coding.cftimeindex import CFTimeIndex from xarray.core.common import _contains_datetime_like_objects from xarray.core.dtypes import _is_numpy_subdtype _ONE_MICRO = 1 _ONE_MILLI = _ONE_MICRO * 1000 _ONE_SECOND = _ONE_MILLI * 1000 _ONE_MINUTE = 60 * _ONE_SECOND _ONE_HOUR = 60 * _ONE_MINUTE _ONE_DAY = 24 * _ONE_HOUR def infer_freq(index): """ Infer the most likely frequency given the input index. Parameters ---------- index : CFTimeIndex, DataArray, DatetimeIndex, TimedeltaIndex, Series If not passed a CFTimeIndex, this simply calls `pandas.infer_freq`. If passed a Series or a DataArray will use the values of the series (NOT THE INDEX). Returns ------- str or None None if no discernible frequency. Raises ------ TypeError If the index is not datetime-like. ValueError If there are fewer than three values or the index is not 1D. """ from xarray.core.dataarray import DataArray from xarray.core.variable import Variable if isinstance(index, DataArray | pd.Series): if index.ndim != 1: raise ValueError("'index' must be 1D") elif not _contains_datetime_like_objects(Variable("dim", index)): raise ValueError("'index' must contain datetime-like objects") dtype = np.asarray(index).dtype if _is_numpy_subdtype(dtype, "datetime64"): index = pd.DatetimeIndex(index.values) elif _is_numpy_subdtype(dtype, "timedelta64"): index = pd.TimedeltaIndex(index.values) else: index = CFTimeIndex(index.values) if isinstance(index, CFTimeIndex): inferer = _CFTimeFrequencyInferer(index) return inferer.get_freq() return _legacy_to_new_freq(pd.infer_freq(index)) class _CFTimeFrequencyInferer: # (pd.tseries.frequencies._FrequencyInferer): def __init__(self, index): self.index = index self.values = index.asi8 if len(index) < 3: raise ValueError("Need at least 3 dates to infer frequency") self.is_monotonic = ( self.index.is_monotonic_decreasing or self.index.is_monotonic_increasing ) self._deltas = None self._year_deltas = None self._month_deltas = None def get_freq(self): """Find the appropriate frequency string to describe the inferred frequency of self.index Adapted from `pandas.tsseries.frequencies._FrequencyInferer.get_freq` for CFTimeIndexes. Returns ------- str or None """ if not self.is_monotonic or not self.index.is_unique: return None delta = self.deltas[0] # Smallest delta if _is_multiple(delta, _ONE_DAY): return self._infer_daily_rule() # There is no possible intraday frequency with a non-unique delta # Different from pandas: we don't need to manage DST and business offsets in cftime elif len(self.deltas) != 1: return None if _is_multiple(delta, _ONE_HOUR): return _maybe_add_count("h", delta / _ONE_HOUR) elif _is_multiple(delta, _ONE_MINUTE): return _maybe_add_count("min", delta / _ONE_MINUTE) elif _is_multiple(delta, _ONE_SECOND): return _maybe_add_count("s", delta / _ONE_SECOND) elif _is_multiple(delta, _ONE_MILLI): return _maybe_add_count("ms", delta / _ONE_MILLI) else: return _maybe_add_count("us", delta / _ONE_MICRO) def _infer_daily_rule(self): annual_rule = self._get_annual_rule() if annual_rule: nyears = self.year_deltas[0] month = _MONTH_ABBREVIATIONS[self.index[0].month] alias = f"{annual_rule}-{month}" return _maybe_add_count(alias, nyears) quartely_rule = self._get_quartely_rule() if quartely_rule: nquarters = self.month_deltas[0] / 3 mod_dict = {0: 12, 2: 11, 1: 10} month = _MONTH_ABBREVIATIONS[mod_dict[self.index[0].month % 3]] alias = f"{quartely_rule}-{month}" return _maybe_add_count(alias, nquarters) monthly_rule = self._get_monthly_rule() if monthly_rule: return _maybe_add_count(monthly_rule, self.month_deltas[0]) if len(self.deltas) == 1: # Daily as there is no "Weekly" offsets with CFTime days = self.deltas[0] / _ONE_DAY return _maybe_add_count("D", days) # CFTime has no business freq and no "week of month" (WOM) return None def _get_annual_rule(self): if len(self.year_deltas) > 1: return None if len(np.unique(self.index.month)) > 1: return None return {"cs": "YS", "ce": "YE"}.get(month_anchor_check(self.index)) def _get_quartely_rule(self): if len(self.month_deltas) > 1: return None if self.month_deltas[0] % 3 != 0: return None return {"cs": "QS", "ce": "QE"}.get(month_anchor_check(self.index)) def _get_monthly_rule(self): if len(self.month_deltas) > 1: return None return {"cs": "MS", "ce": "ME"}.get(month_anchor_check(self.index)) @property def deltas(self): """Sorted unique timedeltas as microseconds.""" if self._deltas is None: self._deltas = _unique_deltas(self.values) return self._deltas @property def year_deltas(self): """Sorted unique year deltas.""" if self._year_deltas is None: self._year_deltas = _unique_deltas(self.index.year) return self._year_deltas @property def month_deltas(self): """Sorted unique month deltas.""" if self._month_deltas is None: self._month_deltas = _unique_deltas(self.index.year * 12 + self.index.month) return self._month_deltas def _unique_deltas(arr): """Sorted unique deltas of numpy array""" return np.sort(np.unique(np.diff(arr))) def _is_multiple(us, mult: int): """Whether us is a multiple of mult""" return us % mult == 0 def _maybe_add_count(base: str, count: float): """If count is greater than 1, add it to the base offset string""" if count != 1: assert count == int(count) count = int(count) return f"{count}{base}" else: return base def month_anchor_check(dates): """Return the monthly offset string. Return "cs" if all dates are the first days of the month, "ce" if all dates are the last day of the month, None otherwise. Replicated pandas._libs.tslibs.resolution.month_position_check but without business offset handling. """ calendar_end = True calendar_start = True for date in dates: if calendar_start: calendar_start &= date.day == 1 if calendar_end: cal = date.day == date.daysinmonth calendar_end &= cal elif not calendar_start: break if calendar_end: return "ce" elif calendar_start: return "cs" else: return None python-xarray-2026.01.0/xarray/coding/calendar_ops.py0000664000175000017500000003742415136607163022632 0ustar alastairalastairfrom __future__ import annotations import numpy as np import pandas as pd from xarray.coding.cftime_offsets import date_range_like, get_date_type from xarray.coding.cftimeindex import CFTimeIndex from xarray.coding.times import ( _should_cftime_be_used, convert_times, ) from xarray.core.common import ( _contains_datetime_like_objects, full_like, is_np_datetime_like, ) try: import cftime except ImportError: cftime = None _CALENDARS_WITHOUT_YEAR_ZERO = [ "gregorian", "proleptic_gregorian", "julian", "standard", ] def convert_calendar( obj, calendar, dim="time", align_on=None, missing=None, use_cftime=None, ): """Transform a time-indexed Dataset or DataArray to one that uses another calendar. This function only converts the individual timestamps; it does not modify any data except in dropping invalid/surplus dates, or inserting values for missing dates. If the source and target calendars are both from a standard type, only the type of the time array is modified. When converting to a calendar with a leap year from to a calendar without a leap year, the 29th of February will be removed from the array. In the other direction the 29th of February will be missing in the output, unless `missing` is specified, in which case that value is inserted. For conversions involving the `360_day` calendar, see Notes. This method is safe to use with sub-daily data as it doesn't touch the time part of the timestamps. Parameters ---------- obj : DataArray or Dataset Input DataArray or Dataset with a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime`). calendar : str The target calendar name. dim : str Name of the time coordinate in the input DataArray or Dataset. align_on : {None, 'date', 'year', 'random'} Must be specified when either the source or target is a `"360_day"` calendar; ignored otherwise. See Notes. missing : any, optional By default, i.e. if the value is None, this method will simply attempt to convert the dates in the source calendar to the same dates in the target calendar, and drop any of those that are not possible to represent. If a value is provided, a new time coordinate will be created in the target calendar with the same frequency as the original time coordinate; for any dates that are not present in the source, the data will be filled with this value. Note that using this mode requires that the source data have an inferable frequency; for more information see :py:func:`xarray.infer_freq`. For certain frequency, source, and target calendar combinations, this could result in many missing values, see notes. use_cftime : bool, optional Whether to use cftime objects in the output, only used if `calendar` is one of {"proleptic_gregorian", "gregorian" or "standard"}. If True, the new time axis uses cftime objects. If None (default), it uses :py:class:`numpy.datetime64` values if the date range permits it, and :py:class:`cftime.datetime` objects if not. If False, it uses :py:class:`numpy.datetime64` or fails. Returns ------- Copy of source with the time coordinate converted to the target calendar. If `missing` was None (default), invalid dates in the new calendar are dropped, but missing dates are not inserted. If `missing` was given, the new data is reindexed to have a time axis with the same frequency as the source, but in the new calendar; any missing datapoints are filled with `missing`. Notes ----- Passing a value to `missing` is only usable if the source's time coordinate as an inferable frequencies (see :py:func:`~xarray.infer_freq`) and is only appropriate if the target coordinate, generated from this frequency, has dates equivalent to the source. It is usually **not** appropriate to use this mode with: - Period-end frequencies: 'A', 'Y', 'Q' or 'M', in opposition to 'AS' 'YS', 'QS' and 'MS' - Sub-monthly frequencies that do not divide a day evenly: 'W', 'nD' where `n != 1` or 'mH' where 24 % m != 0). If one of the source or target calendars is `"360_day"`, `align_on` must be specified and two options are offered. "year" The dates are translated according to their relative position in the year, ignoring their original month and day information, meaning that the missing/surplus days are added/removed at regular intervals. From a `360_day` to a standard calendar, the output will be missing the following dates (day of year in parentheses): To a leap year: January 31st (31), March 31st (91), June 1st (153), July 31st (213), September 31st (275) and November 30th (335). To a non-leap year: February 6th (36), April 19th (109), July 2nd (183), September 12th (255), November 25th (329). From a standard calendar to a `"360_day"`, the following dates in the source array will be dropped: From a leap year: January 31st (31), April 1st (92), June 1st (153), August 1st (214), September 31st (275), December 1st (336) From a non-leap year: February 6th (37), April 20th (110), July 2nd (183), September 13th (256), November 25th (329) This option is best used on daily and subdaily data. "date" The month/day information is conserved and invalid dates are dropped from the output. This means that when converting from a `"360_day"` to a standard calendar, all 31sts (Jan, March, May, July, August, October and December) will be missing as there is no equivalent dates in the `"360_day"` calendar and the 29th (on non-leap years) and 30th of February will be dropped as there are no equivalent dates in a standard calendar. This option is best used with data on a frequency coarser than daily. "random" Similar to "year", each day of year of the source is mapped to another day of year of the target. However, instead of having always the same missing days according the source and target years, here 5 days are chosen randomly, one for each fifth of the year. However, February 29th is always missing when converting to a leap year, or its value is dropped when converting from a leap year. This is similar to the method used in the LOCA dataset (see Pierce, Cayan, and Thrasher (2014). doi:10.1175/JHM-D-14-0082.1). This option is best used on daily data. """ from xarray.core.dataarray import DataArray time = obj[dim] if not _contains_datetime_like_objects(time.variable): raise ValueError(f"Coordinate {dim} must contain datetime objects.") use_cftime = _should_cftime_be_used(time, calendar, use_cftime) source_calendar = time.dt.calendar # Do nothing if request calendar is the same as the source # AND source is np XOR use_cftime if source_calendar == calendar and is_np_datetime_like(time.dtype) ^ use_cftime: return obj if (time.dt.year == 0).any() and calendar in _CALENDARS_WITHOUT_YEAR_ZERO: raise ValueError( f"Source time coordinate contains dates with year 0, which is not supported by target calendar {calendar}." ) if (source_calendar == "360_day" or calendar == "360_day") and align_on is None: raise ValueError( "Argument `align_on` must be specified with either 'date' or " "'year' when converting to or from a '360_day' calendar." ) if source_calendar != "360_day" and calendar != "360_day": align_on = "date" out = obj.copy() if align_on in ["year", "random"]: # Special case for conversion involving 360_day calendar if align_on == "year": # Instead of translating dates directly, this tries to keep the position within a year similar. new_doy = _interpolate_day_of_year(time, target_calendar=calendar) elif align_on == "random": # The 5 days to remove are randomly chosen, one for each of the five 72-days periods of the year. new_doy = time.groupby(f"{dim}.year").map( _random_day_of_year, target_calendar=calendar, use_cftime=use_cftime ) # Convert the source datetimes, but override the day of year with our new day of years. out[dim] = DataArray( [ _convert_to_new_calendar_with_new_day_of_year( date, newdoy, calendar, use_cftime ) for date, newdoy in zip(time.variable._data.array, new_doy, strict=True) ], dims=(dim,), name=dim, ) # Remove duplicate timestamps, happens when reducing the number of days out = out.isel({dim: np.unique(out[dim], return_index=True)[1]}) elif align_on == "date": new_times = convert_times( time.data, get_date_type(calendar, use_cftime=use_cftime), raise_on_invalid=False, ) out[dim] = new_times # Remove NaN that where put on invalid dates in target calendar out = out.sel({dim: out[dim].notnull()}) if use_cftime: # Reassign times to ensure time index of output is a CFTimeIndex # (previously it was an Index due to the presence of NaN values). # Note this is not needed in the case that the output time index is # a DatetimeIndex, since DatetimeIndexes can handle NaN values. out[dim] = CFTimeIndex(out[dim].data) if missing is not None: time_target = date_range_like(time, calendar=calendar, use_cftime=use_cftime) out = out.reindex({dim: time_target}, fill_value=missing) # Copy attrs but remove `calendar` if still present. out[dim].attrs.update(time.attrs) out[dim].attrs.pop("calendar", None) return out def _is_leap_year(years, calendar): func = np.vectorize(cftime.is_leap_year) return func(years, calendar=calendar) def _days_in_year(years, calendar): """The number of days in the year according to given calendar.""" if calendar == "360_day": return full_like(years, 360) return _is_leap_year(years, calendar).astype(int) + 365 def _interpolate_day_of_year(times, target_calendar): """Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar.""" source_calendar = times.dt.calendar return np.round( _days_in_year(times.dt.year, target_calendar) * times.dt.dayofyear / _days_in_year(times.dt.year, source_calendar) ).astype(int) def _random_day_of_year(time, target_calendar, use_cftime): """Return a day of year in the new calendar. Removes Feb 29th and five other days chosen randomly within five sections of 72 days. """ year = time.dt.year[0] source_calendar = time.dt.calendar new_doy = np.arange(360) + 1 rm_idx = np.random.default_rng().integers(0, 72, 5) + 72 * np.arange(5) if source_calendar == "360_day": for idx in rm_idx: new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1 if _days_in_year(year, target_calendar) == 366: new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1 elif target_calendar == "360_day": new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1) if _days_in_year(year, source_calendar) == 366: new_doy = np.insert(new_doy, 60, -1) return new_doy[time.dt.dayofyear - 1] def _convert_to_new_calendar_with_new_day_of_year( date, day_of_year, calendar, use_cftime ): """Convert a datetime object to another calendar with a new day of year. Redefines the day of year (and thus ignores the month and day information from the source datetime). Nanosecond information is lost as cftime.datetime doesn't support it. """ new_date = cftime.num2date( day_of_year - 1, f"days since {date.year}-01-01", calendar=calendar if use_cftime else "standard", ) try: return get_date_type(calendar, use_cftime)( date.year, new_date.month, new_date.day, date.hour, date.minute, date.second, date.microsecond, ) except ValueError: return np.nan def _decimal_year_cftime(time, year, days_in_year, *, date_class): year_start = date_class(year, 1, 1) delta = np.timedelta64(time - year_start, "ns") days_in_year = np.timedelta64(days_in_year, "D") return year + delta / days_in_year def _decimal_year_numpy(time, year, days_in_year, *, dtype): time = np.asarray(time).astype(dtype) year_start = np.datetime64(int(year) - 1970, "Y").astype(dtype) delta = time - year_start days_in_year = np.timedelta64(days_in_year, "D") return year + delta / days_in_year def _decimal_year(times): """Convert a datetime DataArray to decimal years according to its calendar. The decimal year of a timestamp is its year plus its sub-year component converted to the fraction of its year. Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar, 2000.16301 in a "noleap" or 2000.16806 in a "360_day". """ if times.dtype == "O": function = _decimal_year_cftime kwargs = {"date_class": get_date_type(times.dt.calendar, True)} else: function = _decimal_year_numpy kwargs = {"dtype": times.dtype} from xarray.computation.apply_ufunc import apply_ufunc return apply_ufunc( function, times, times.dt.year, times.dt.days_in_year, kwargs=kwargs, vectorize=True, dask="parallelized", output_dtypes=[np.float64], ) def interp_calendar(source, target, dim="time"): """Interpolates a DataArray or Dataset indexed by a time coordinate to another calendar based on decimal year measure. Each timestamp in `source` and `target` are first converted to their decimal year equivalent then `source` is interpolated on the target coordinate. The decimal year of a timestamp is its year plus its sub-year component converted to the fraction of its year. For example "2000-03-01 12:00" is 2000.1653 in a standard calendar or 2000.16301 in a `"noleap"` calendar. This method should only be used when the time (HH:MM:SS) information of time coordinate is not important. Parameters ---------- source: DataArray or Dataset The source data to interpolate; must have a time coordinate of a valid dtype (:py:class:`numpy.datetime64` or :py:class:`cftime.datetime` objects) target: DataArray, DatetimeIndex, or CFTimeIndex The target time coordinate of a valid dtype (np.datetime64 or cftime objects) dim : str The time coordinate name. Return ------ DataArray or Dataset The source interpolated on the decimal years of target, """ from xarray.core.dataarray import DataArray if isinstance(target, pd.DatetimeIndex | CFTimeIndex): target = DataArray(target, dims=(dim,), name=dim) if not _contains_datetime_like_objects( source[dim].variable ) or not _contains_datetime_like_objects(target.variable): raise ValueError( f"Both 'source.{dim}' and 'target' must contain datetime objects." ) target_calendar = target.dt.calendar if ( source[dim].time.dt.year == 0 ).any() and target_calendar in _CALENDARS_WITHOUT_YEAR_ZERO: raise ValueError( f"Source time coordinate contains dates with year 0, which is not supported by target calendar {target_calendar}." ) out = source.copy() out[dim] = _decimal_year(source[dim]) target_idx = _decimal_year(target) out = out.interp(**{dim: target_idx}) out[dim] = target return out python-xarray-2026.01.0/xarray/coding/__init__.py0000664000175000017500000000000015136607163021713 0ustar alastairalastairpython-xarray-2026.01.0/xarray/plot/0000775000175000017500000000000015136607163017327 5ustar alastairalastairpython-xarray-2026.01.0/xarray/plot/facetgrid.py0000664000175000017500000011213515136607163021634 0ustar alastairalastairfrom __future__ import annotations import functools import itertools import warnings from collections.abc import Callable, Hashable, Iterable, MutableMapping from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, cast import numpy as np from xarray.core.formatting import format_item from xarray.core.types import HueStyleOptions, T_DataArrayOrSet from xarray.plot.utils import ( _LINEWIDTH_RANGE, _MARKERSIZE_RANGE, _add_legend, _determine_guide, _get_nice_quiver_magnitude, _guess_coords_to_plot, _infer_xy_labels, _Normalize, _parse_size, _process_cmap_cbar_kwargs, label_from_attrs, ) if TYPE_CHECKING: from matplotlib.axes import Axes from matplotlib.cm import ScalarMappable from matplotlib.colorbar import Colorbar from matplotlib.figure import Figure from matplotlib.legend import Legend from matplotlib.quiver import QuiverKey from matplotlib.text import Annotation from xarray.core.dataarray import DataArray # Overrides axes.labelsize, xtick.major.size, ytick.major.size # from mpl.rcParams _FONTSIZE = "small" # For major ticks on x, y axes _NTICKS = 5 def _nicetitle(coord, value, maxchar, template): """ Put coord, value in template and truncate at maxchar """ prettyvalue = format_item(value, quote_strings=False) title = template.format(coord=coord, value=prettyvalue) if len(title) > maxchar: title = title[: (maxchar - 3)] + "..." return title T_FacetGrid = TypeVar("T_FacetGrid", bound="FacetGrid") class FacetGrid(Generic[T_DataArrayOrSet]): """ Initialize the Matplotlib figure and FacetGrid object. The :class:`FacetGrid` is an object that links an xarray DataArray to a Matplotlib figure with a particular structure. In particular, :class:`FacetGrid` is used to draw plots with multiple axes, where each axes shows the same relationship conditioned on different levels of some dimension. It's possible to condition on up to two variables by assigning variables to the rows and columns of the grid. The general approach to plotting here is called "small multiples", where the same kind of plot is repeated multiple times, and the specific use of small multiples to display the same relationship conditioned on one or more other variables is often called a "trellis plot". The basic workflow is to initialize the :class:`FacetGrid` object with the DataArray and the variable names that are used to structure the grid. Then plotting functions can be applied to each subset by calling :meth:`FacetGrid.map_dataarray` or :meth:`FacetGrid.map`. Attributes ---------- axs : ndarray of matplotlib.axes.Axes Array containing axes in corresponding position, as returned from :py:func:`matplotlib.pyplot.subplots`. col_labels : list of matplotlib.text.Annotation Column titles. row_labels : list of matplotlib.text.Annotation Row titles. fig : matplotlib.figure.Figure The figure containing all the axes. name_dicts : ndarray of dict Array containing dictionaries mapping coordinate names to values. ``None`` is used as a sentinel value for axes that should remain empty, i.e., sometimes the rightmost grid positions in the bottom row. """ data: T_DataArrayOrSet name_dicts: np.ndarray fig: Figure axs: np.ndarray row_names: list[np.ndarray] col_names: list[np.ndarray] figlegend: Legend | None quiverkey: QuiverKey | None cbar: Colorbar | None _single_group: bool | Hashable _nrow: int _row_var: Hashable | None _ncol: int _col_var: Hashable | None _col_wrap: int | None row_labels: list[Annotation | None] col_labels: list[Annotation | None] _x_var: None _y_var: None _hue_var: DataArray | None _cmap_extend: Any | None _mappables: list[ScalarMappable] _finalized: bool def __init__( self, data: T_DataArrayOrSet, col: Hashable | None = None, row: Hashable | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, figsize: Iterable[float] | None = None, aspect: float = 1, size: float = 3, subplot_kws: dict[str, Any] | None = None, ) -> None: """ Parameters ---------- data : DataArray or Dataset DataArray or Dataset to be plotted. row, col : str Dimension names that define subsets of the data, which will be drawn on separate facets in the grid. col_wrap : int, optional "Wrap" the grid the for the column variable after this number of columns, adding rows if ``col_wrap`` is less than the number of facets. sharex : bool, optional If true, the facets will share *x* axes. sharey : bool, optional If true, the facets will share *y* axes. figsize : Iterable of float or None, optional A tuple (width, height) of the figure in inches. If set, overrides ``size`` and ``aspect``. aspect : scalar, default: 1 Aspect ratio of each facet, so that ``aspect * size`` gives the width of each facet in inches. size : scalar, default: 3 Height (in inches) of each facet. See also: ``aspect``. subplot_kws : dict, optional Dictionary of keyword arguments for Matplotlib subplots (:py:func:`matplotlib.pyplot.subplots`). """ import matplotlib.pyplot as plt # Handle corner case of nonunique coordinates rep_col = col is not None and not data[col].to_index().is_unique rep_row = row is not None and not data[row].to_index().is_unique if rep_col or rep_row: raise ValueError( "Coordinates used for faceting cannot " "contain repeated (nonunique) values." ) # single_group is the grouping variable, if there is exactly one single_group: bool | Hashable if col and row: single_group = False nrow = len(data[row]) ncol = len(data[col]) nfacet = nrow * ncol if col_wrap is not None: warnings.warn( "Ignoring col_wrap since both col and row were passed", stacklevel=2 ) elif row and not col: single_group = row elif not row and col: single_group = col else: raise ValueError("Pass a coordinate name as an argument for row or col") # Compute grid shape if single_group: nfacet = len(data[single_group]) if col: # idea - could add heuristic for nice shapes like 3x4 ncol = nfacet if row: ncol = 1 if col_wrap is not None: # Overrides previous settings ncol = col_wrap nrow = int(np.ceil(nfacet / ncol)) # Set the subplot kwargs subplot_kws = {} if subplot_kws is None else subplot_kws if figsize is None: # Calculate the base figure size with extra horizontal space for a # colorbar cbar_space = 1 figsize = (ncol * size * aspect + cbar_space, nrow * size) fig, axs = plt.subplots( nrow, ncol, sharex=sharex, sharey=sharey, squeeze=False, figsize=figsize, subplot_kw=subplot_kws, ) # Set up the lists of names for the row and column facet variables col_names = list(data[col].to_numpy()) if col else [] row_names = list(data[row].to_numpy()) if row else [] if single_group: full: list[dict[Hashable, Any] | None] = [ {single_group: x} for x in data[single_group].to_numpy() ] empty: list[dict[Hashable, Any] | None] = [ None for x in range(nrow * ncol - len(full)) ] name_dict_list = full + empty else: rowcols = itertools.product(row_names, col_names) name_dict_list = [{row: r, col: c} for r, c in rowcols] name_dicts = np.array(name_dict_list).reshape(nrow, ncol) # Set up the class attributes # --------------------------- # First the public API self.data = data self.name_dicts = name_dicts self.fig = fig self.axs = axs self.row_names = row_names self.col_names = col_names # guides self.figlegend = None self.quiverkey = None self.cbar = None # Next the private variables self._single_group = single_group self._nrow = nrow self._row_var = row self._ncol = ncol self._col_var = col self._col_wrap = col_wrap self.row_labels = [None] * nrow self.col_labels = [None] * ncol self._x_var = None self._y_var = None self._hue_var = None self._cmap_extend = None self._mappables = [] self._finalized = False @property def axes(self) -> np.ndarray: warnings.warn( ( "self.axes is deprecated since 2022.11 in order to align with " "matplotlibs plt.subplots, use self.axs instead." ), DeprecationWarning, stacklevel=2, ) return self.axs @axes.setter def axes(self, axs: np.ndarray) -> None: warnings.warn( ( "self.axes is deprecated since 2022.11 in order to align with " "matplotlibs plt.subplots, use self.axs instead." ), DeprecationWarning, stacklevel=2, ) self.axs = axs @property def _left_axes(self) -> np.ndarray: return self.axs[:, 0] @property def _bottom_axes(self) -> np.ndarray: return self.axs[-1, :] def map_dataarray( self: T_FacetGrid, func: Callable, x: Hashable | None, y: Hashable | None, **kwargs: Any, ) -> T_FacetGrid: """ Apply a plotting function to a 2d facet's subset of the data. This is more convenient and less general than ``FacetGrid.map`` Parameters ---------- func : callable A plotting function with the same signature as a 2d xarray plotting method such as `xarray.plot.imshow` x, y : string Names of the coordinates to plot on x, y axes **kwargs additional keyword arguments to func Returns ------- self : FacetGrid object """ if kwargs.get("cbar_ax") is not None: raise ValueError("cbar_ax not supported by FacetGrid.") cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( func, self.data.to_numpy(), **kwargs ) self._cmap_extend = cmap_params.get("extend") # Order is important func_kwargs = { k: v for k, v in kwargs.items() if k not in {"cmap", "colors", "cbar_kwargs", "levels"} } func_kwargs.update(cmap_params) # to avoid redundant calling, colorbar and labelling is instead handled # by `_finalize_grid` at the end func_kwargs["add_colorbar"] = False if func.__name__ != "surface": func_kwargs["add_labels"] = False # Get x, y labels for the first subplot x, y = _infer_xy_labels( darray=self.data.loc[self.name_dicts.flat[0]], x=x, y=y, imshow=func.__name__ == "imshow", rgb=kwargs.get("rgb"), ) for d, ax in zip(self.name_dicts.flat, self.axs.flat, strict=True): # None is the sentinel value if d is not None: subset = self.data.loc[d] mappable = func( subset, x=x, y=y, ax=ax, **func_kwargs, _is_facetgrid=True ) self._mappables.append(mappable) xlabel = label_from_attrs(self.data[x]) ylabel = label_from_attrs(self.data[y]) self._finalize_grid(xlabel, ylabel) if kwargs.get("add_colorbar", True): self.add_colorbar(**cbar_kwargs) return self def map_plot1d( self: T_FacetGrid, func: Callable, x: Hashable | None, y: Hashable | None, *, z: Hashable | None = None, hue: Hashable | None = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, **kwargs: Any, ) -> T_FacetGrid: """ Apply a plotting function to a 1d facet's subset of the data. This is more convenient and less general than ``FacetGrid.map`` Parameters ---------- func : A plotting function with the same signature as a 1d xarray plotting method such as `xarray.plot.scatter` x, y : Names of the coordinates to plot on x, y axes **kwargs additional keyword arguments to func Returns ------- self : FacetGrid object """ # Copy data to allow converting categoricals to integers and storing # them in self.data. It is not possible to copy in the init # unfortunately as there are tests that relies on self.data being # mutable (test_names_appear_somewhere()). Maybe something to deprecate # not sure how much that is used outside these tests. self.data = self.data.copy() if kwargs.get("cbar_ax") is not None: raise ValueError("cbar_ax not supported by FacetGrid.") if func.__name__ == "scatter": size_ = kwargs.pop("_size", markersize) size_r = _MARKERSIZE_RANGE else: size_ = kwargs.pop("_size", linewidth) size_r = _LINEWIDTH_RANGE # Guess what coords to use if some of the values in coords_to_plot are None: coords_to_plot: MutableMapping[str, Hashable | None] = dict( x=x, z=z, hue=hue, size=size_ ) coords_to_plot = _guess_coords_to_plot(self.data, coords_to_plot, kwargs) # Handle hues: hue = coords_to_plot["hue"] hueplt = self.data.coords[hue] if hue else None # TODO: _infer_line_data2 ? hueplt_norm = _Normalize(hueplt) self._hue_var = hueplt cbar_kwargs = kwargs.pop("cbar_kwargs", {}) if hueplt_norm.data is not None: if not hueplt_norm.data_is_numeric: # TODO: Ticks seems a little too hardcoded, since it will always # show all the values. But maybe it's ok, since plotting hundreds # of categorical data isn't that meaningful anyway. cbar_kwargs.update(format=hueplt_norm.format, ticks=hueplt_norm.ticks) kwargs.update(levels=hueplt_norm.levels) cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( func, cast("DataArray", hueplt_norm.values).data, cbar_kwargs=cbar_kwargs, **kwargs, ) self._cmap_extend = cmap_params.get("extend") else: cmap_params = {} # Handle sizes: size_ = coords_to_plot["size"] sizeplt = self.data.coords[size_] if size_ else None sizeplt_norm = _Normalize(data=sizeplt, width=size_r) if sizeplt_norm.data is not None: self.data[size_] = sizeplt_norm.values # Add kwargs that are sent to the plotting function, # order is important ??? func_kwargs = { k: v for k, v in kwargs.items() if k not in {"cmap", "colors", "cbar_kwargs", "levels"} } func_kwargs.update(cmap_params) # Annotations will be handled later, skip those parts in the plotfunc: func_kwargs["add_colorbar"] = False func_kwargs["add_legend"] = False func_kwargs["add_title"] = False add_labels_ = np.zeros(self.axs.shape + (3,), dtype=bool) if kwargs.get("z") is not None: # 3d plots looks better with all labels. 3d plots can't sharex either so it # is easy to get lost while rotating the plots: add_labels_[:] = True else: # Subplots should have labels on the left and bottom edges only: add_labels_[-1, :, 0] = True # x add_labels_[:, 0, 1] = True # y # add_labels_[:, :, 2] = True # z # Set up the lists of names for the row and column facet variables: if self._single_group: full = tuple( {self._single_group: x} for x in range(self.data[self._single_group].size) ) empty = tuple(None for x in range(self._nrow * self._ncol - len(full))) name_d = full + empty else: rowcols = itertools.product( range(self.data[self._row_var].size), range(self.data[self._col_var].size), ) name_d = tuple({self._row_var: r, self._col_var: c} for r, c in rowcols) name_dicts = np.array(name_d).reshape(self._nrow, self._ncol) # Plot the data for each subplot: for add_lbls, d, ax in zip( add_labels_.reshape((self.axs.size, -1)), name_dicts.flat, self.axs.flat, strict=True, ): func_kwargs["add_labels"] = add_lbls # None is the sentinel value if d is not None: subset = self.data.isel(d) mappable = func( subset, x=x, y=y, ax=ax, hue=hue, _size=size_, **func_kwargs, _is_facetgrid=True, ) self._mappables.append(mappable) # Add titles and some touch ups: self._finalize_grid() self._set_lims() add_colorbar, add_legend = _determine_guide( hueplt_norm, sizeplt_norm, kwargs.get("add_colorbar"), kwargs.get("add_legend"), # kwargs.get("add_guide", None), # kwargs.get("hue_style", None), ) if add_legend: use_legend_elements = func.__name__ != "hist" if use_legend_elements: self.add_legend( use_legend_elements=use_legend_elements, hueplt_norm=hueplt_norm if not add_colorbar else _Normalize(None), sizeplt_norm=sizeplt_norm, primitive=self._mappables, legend_ax=self.fig, plotfunc=func.__name__, ) else: self.add_legend(use_legend_elements=use_legend_elements) if add_colorbar: # Colorbar is after legend so it correctly fits the plot: if "label" not in cbar_kwargs: cbar_kwargs["label"] = label_from_attrs(hueplt_norm.data) self.add_colorbar(**cbar_kwargs) return self def map_dataarray_line( self: T_FacetGrid, func: Callable, x: Hashable | None, y: Hashable | None, hue: Hashable | None, add_legend: bool = True, _labels=None, **kwargs: Any, ) -> T_FacetGrid: from xarray.plot.dataarray_plot import _infer_line_data for d, ax in zip(self.name_dicts.flat, self.axs.flat, strict=True): # None is the sentinel value if d is not None: subset = self.data.loc[d] mappable = func( subset, x=x, y=y, ax=ax, hue=hue, add_legend=False, _labels=False, **kwargs, ) self._mappables.append(mappable) xplt, yplt, hueplt, huelabel = _infer_line_data( darray=self.data.loc[self.name_dicts.flat[0]], x=x, y=y, hue=hue ) xlabel = label_from_attrs(xplt) ylabel = label_from_attrs(yplt) self._hue_var = hueplt self._finalize_grid(xlabel, ylabel) if add_legend and hueplt is not None and huelabel is not None: self.add_legend(label=huelabel) return self def map_dataset( self: T_FacetGrid, func: Callable, x: Hashable | None = None, y: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, add_guide: bool | None = None, **kwargs: Any, ) -> T_FacetGrid: from xarray.plot.dataset_plot import _infer_meta_data kwargs["add_guide"] = False if kwargs.get("markersize"): kwargs["size_mapping"] = _parse_size( self.data[kwargs["markersize"]], kwargs.pop("size_norm", None) ) meta_data = _infer_meta_data( self.data, x, y, hue, hue_style, add_guide, funcname=func.__name__ ) kwargs["meta_data"] = meta_data if hue and meta_data["hue_style"] == "continuous": cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( func, self.data[hue].to_numpy(), **kwargs ) kwargs["meta_data"]["cmap_params"] = cmap_params kwargs["meta_data"]["cbar_kwargs"] = cbar_kwargs kwargs["_is_facetgrid"] = True if func.__name__ == "quiver" and "scale" not in kwargs: raise ValueError("Please provide scale.") # TODO: come up with an algorithm for reasonable scale choice for d, ax in zip(self.name_dicts.flat, self.axs.flat, strict=True): # None is the sentinel value if d is not None: subset = self.data.loc[d] maybe_mappable = func( ds=subset, x=x, y=y, hue=hue, hue_style=hue_style, ax=ax, **kwargs ) # TODO: this is needed to get legends to work. # but maybe_mappable is a list in that case :/ self._mappables.append(maybe_mappable) self._finalize_grid(meta_data["xlabel"], meta_data["ylabel"]) if hue: hue_label = meta_data.pop("hue_label", None) self._hue_label = hue_label if meta_data["add_legend"]: self._hue_var = meta_data["hue"] self.add_legend(label=hue_label) elif meta_data["add_colorbar"]: self.add_colorbar(label=hue_label, **cbar_kwargs) if meta_data["add_quiverkey"]: self.add_quiverkey(kwargs["u"], kwargs["v"]) return self def _finalize_grid(self, *axlabels: Hashable) -> None: """Finalize the annotations and layout.""" if not self._finalized: self.set_axis_labels(*axlabels) self.set_titles() self.fig.tight_layout() for ax, namedict in zip(self.axs.flat, self.name_dicts.flat, strict=True): if namedict is None: ax.set_visible(False) self._finalized = True def _adjust_fig_for_guide(self, guide) -> None: # Draw the plot to set the bounding boxes correctly if hasattr(self.fig.canvas, "get_renderer"): renderer = self.fig.canvas.get_renderer() else: raise RuntimeError("MPL backend has no renderer") self.fig.draw(renderer) # Calculate and set the new width of the figure so the legend fits guide_width = guide.get_window_extent(renderer).width / self.fig.dpi figure_width = self.fig.get_figwidth() total_width = figure_width + guide_width self.fig.set_figwidth(total_width) # Draw the plot again to get the new transformations self.fig.draw(renderer) # Now calculate how much space we need on the right side guide_width = guide.get_window_extent(renderer).width / self.fig.dpi space_needed = guide_width / total_width + 0.02 # margin = .01 # _space_needed = margin + space_needed right = 1 - space_needed # Place the subplot axes to give space for the legend self.fig.subplots_adjust(right=right) def add_legend( self, *, label: str | None = None, use_legend_elements: bool = False, **kwargs: Any, ) -> None: if use_legend_elements: self.figlegend = _add_legend(**kwargs) else: assert self._hue_var is not None self.figlegend = self.fig.legend( handles=self._mappables[-1], labels=list(self._hue_var.to_numpy()), title=label if label is not None else label_from_attrs(self._hue_var), loc=kwargs.pop("loc", "center right"), **kwargs, ) self._adjust_fig_for_guide(self.figlegend) def add_colorbar(self, **kwargs: Any) -> None: """Draw a colorbar.""" kwargs = kwargs.copy() if self._cmap_extend is not None: kwargs.setdefault("extend", self._cmap_extend) # dont pass extend as kwarg if it is in the mappable if hasattr(self._mappables[-1], "extend"): kwargs.pop("extend", None) if "label" not in kwargs: from xarray import DataArray assert isinstance(self.data, DataArray) kwargs.setdefault("label", label_from_attrs(self.data)) self.cbar = self.fig.colorbar( self._mappables[-1], ax=list(self.axs.flat), **kwargs ) def add_quiverkey(self, u: Hashable, v: Hashable, **kwargs: Any) -> None: kwargs = kwargs.copy() magnitude = _get_nice_quiver_magnitude(self.data[u], self.data[v]) units = self.data[u].attrs.get("units", "") self.quiverkey = self.axs.flat[-1].quiverkey( self._mappables[-1], X=0.8, Y=0.9, U=magnitude, label=f"{magnitude}\n{units}", labelpos="E", coordinates="figure", ) # TODO: does not work because self.quiverkey.get_window_extent(renderer) = 0 # https://github.com/matplotlib/matplotlib/issues/18530 # self._adjust_fig_for_guide(self.quiverkey.text) def _get_largest_lims(self) -> dict[str, tuple[float, float]]: """ Get largest limits in the facetgrid. Returns ------- lims_largest : dict[str, tuple[float, float]] Dictionary with the largest limits along each axis. Examples -------- >>> ds = xr.tutorial.scatter_example_dataset(seed=42) >>> fg = ds.plot.scatter(x="A", y="B", hue="y", row="x", col="w") >>> round(fg._get_largest_lims()["x"][0], 3) np.float64(-0.334) """ lims_largest: dict[str, tuple[float, float]] = dict( x=(np.inf, -np.inf), y=(np.inf, -np.inf), z=(np.inf, -np.inf) ) for axis in ("x", "y", "z"): # Find the plot with the largest xlim values: lower, upper = lims_largest[axis] for ax in self.axs.flat: get_lim: Callable[[], tuple[float, float]] | None = getattr( ax, f"get_{axis}lim", None ) if get_lim: lower_new, upper_new = get_lim() lower, upper = (min(lower, lower_new), max(upper, upper_new)) lims_largest[axis] = (lower, upper) return lims_largest def _set_lims( self, x: tuple[float, float] | None = None, y: tuple[float, float] | None = None, z: tuple[float, float] | None = None, ) -> None: """ Set the same limits for all the subplots in the facetgrid. Parameters ---------- x : tuple[float, float] or None, optional x axis limits. y : tuple[float, float] or None, optional y axis limits. z : tuple[float, float] or None, optional z axis limits. Examples -------- >>> ds = xr.tutorial.scatter_example_dataset(seed=42) >>> fg = ds.plot.scatter(x="A", y="B", hue="y", row="x", col="w") >>> fg._set_lims(x=(-0.3, 0.3), y=(0, 2), z=(0, 4)) >>> fg.axs[0, 0].get_xlim(), fg.axs[0, 0].get_ylim() ((np.float64(-0.3), np.float64(0.3)), (np.float64(0.0), np.float64(2.0))) """ lims_largest = self._get_largest_lims() # Set limits: for ax in self.axs.flat: for (axis, data_limit), parameter_limit in zip( lims_largest.items(), (x, y, z), strict=True ): set_lim = getattr(ax, f"set_{axis}lim", None) if set_lim: set_lim(data_limit if parameter_limit is None else parameter_limit) def set_axis_labels(self, *axlabels: Hashable) -> None: """Set axis labels on the left column and bottom row of the grid.""" from xarray.core.dataarray import DataArray for var, axis in zip(axlabels, ["x", "y", "z"], strict=False): if var is not None: if isinstance(var, DataArray): getattr(self, f"set_{axis}labels")(label_from_attrs(var)) else: getattr(self, f"set_{axis}labels")(str(var)) def _set_labels( self, axis: str, axes: Iterable, label: str | None = None, **kwargs ) -> None: if label is None: label = label_from_attrs(self.data[getattr(self, f"_{axis}_var")]) for ax in axes: getattr(ax, f"set_{axis}label")(label, **kwargs) def set_xlabels(self, label: str | None = None, **kwargs: Any) -> None: """Label the x axis on the bottom row of the grid.""" self._set_labels("x", self._bottom_axes, label, **kwargs) def set_ylabels(self, label: str | None = None, **kwargs: Any) -> None: """Label the y axis on the left column of the grid.""" self._set_labels("y", self._left_axes, label, **kwargs) def set_zlabels(self, label: str | None = None, **kwargs: Any) -> None: """Label the z axis.""" self._set_labels("z", self._left_axes, label, **kwargs) def set_titles( self, template: str = "{coord} = {value}", maxchar: int = 30, size=None, **kwargs, ) -> None: """ Draw titles either above each facet or on the grid margins. Parameters ---------- template : str, default: "{coord} = {value}" Template for plot titles containing {coord} and {value} maxchar : int, default: 30 Truncate titles at maxchar **kwargs : keyword args additional arguments to matplotlib.text Returns ------- self: FacetGrid object """ import matplotlib as mpl if size is None: size = mpl.rcParams["axes.labelsize"] nicetitle = functools.partial(_nicetitle, maxchar=maxchar, template=template) if self._single_group: for d, ax in zip(self.name_dicts.flat, self.axs.flat, strict=True): # Only label the ones with data if d is not None: coord, value = list(d.items()).pop() title = nicetitle(coord, value) ax.set_title(title, size=size, **kwargs) else: # The row titles on the right edge of the grid for index, (ax, row_name, handle) in enumerate( zip(self.axs[:, -1], self.row_names, self.row_labels, strict=True) ): title = nicetitle(coord=self._row_var, value=row_name) if not handle: self.row_labels[index] = ax.annotate( title, xy=(1.02, 0.5), xycoords="axes fraction", rotation=270, ha="left", va="center", **kwargs, ) else: handle.set_text(title) handle.update(kwargs) # The column titles on the top row for index, (ax, col_name, handle) in enumerate( zip(self.axs[0, :], self.col_names, self.col_labels, strict=True) ): title = nicetitle(coord=self._col_var, value=col_name) if not handle: self.col_labels[index] = ax.set_title(title, size=size, **kwargs) else: handle.set_text(title) handle.update(kwargs) def set_ticks( self, max_xticks: int = _NTICKS, max_yticks: int = _NTICKS, fontsize: str | int = _FONTSIZE, ) -> None: """ Set and control tick behavior. Parameters ---------- max_xticks, max_yticks : int, optional Maximum number of labeled ticks to plot on x, y axes fontsize : string or int Font size as used by matplotlib text Returns ------- self : FacetGrid object """ from matplotlib.ticker import MaxNLocator # Both are necessary x_major_locator = MaxNLocator(nbins=max_xticks) y_major_locator = MaxNLocator(nbins=max_yticks) for ax in self.axs.flat: ax.xaxis.set_major_locator(x_major_locator) ax.yaxis.set_major_locator(y_major_locator) for tick in itertools.chain( ax.xaxis.get_major_ticks(), ax.yaxis.get_major_ticks() ): tick.label1.set_fontsize(fontsize) def map( self: T_FacetGrid, func: Callable, *args: Hashable, **kwargs: Any ) -> T_FacetGrid: """ Apply a plotting function to each facet's subset of the data. Parameters ---------- func : callable A plotting function that takes data and keyword arguments. It must plot to the currently active matplotlib Axes and take a `color` keyword argument. If faceting on the `hue` dimension, it must also take a `label` keyword argument. *args : Hashable Column names in self.data that identify variables with data to plot. The data for each variable is passed to `func` in the order the variables are specified in the call. **kwargs : keyword arguments All keyword arguments are passed to the plotting function. Returns ------- self : FacetGrid object """ import matplotlib.pyplot as plt for ax, namedict in zip(self.axs.flat, self.name_dicts.flat, strict=True): if namedict is not None: data = self.data.loc[namedict] plt.sca(ax) innerargs = [data[a].to_numpy() for a in args] maybe_mappable = func(*innerargs, **kwargs) # TODO: better way to verify that an artist is mappable? # https://stackoverflow.com/questions/33023036/is-it-possible-to-detect-if-a-matplotlib-artist-is-a-mappable-suitable-for-use-w#33023522 if maybe_mappable and hasattr(maybe_mappable, "autoscale_None"): self._mappables.append(maybe_mappable) self._finalize_grid(*args[:2]) return self def _easy_facetgrid( data: T_DataArrayOrSet, plotfunc: Callable, kind: Literal["line", "dataarray", "dataset", "plot1d"], x: Hashable | None = None, y: Hashable | None = None, row: Hashable | None = None, col: Hashable | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: float | None = None, size: float | None = None, subplot_kws: dict[str, Any] | None = None, ax: Axes | None = None, figsize: Iterable[float] | None = None, **kwargs: Any, ) -> FacetGrid[T_DataArrayOrSet]: """ Convenience method to call xarray.plot.FacetGrid from 2d plotting methods kwargs are the arguments to 2d plotting method """ if ax is not None: raise ValueError("Can't use axes when making faceted plots.") if aspect is None: aspect = 1 if size is None: size = 3 elif figsize is not None: raise ValueError("cannot provide both `figsize` and `size` arguments") if kwargs.get("z") is not None: # 3d plots doesn't support sharex, sharey, reset to mpl defaults: sharex = False sharey = False g = FacetGrid( data=data, col=col, row=row, col_wrap=col_wrap, sharex=sharex, sharey=sharey, figsize=figsize, aspect=aspect, size=size, subplot_kws=subplot_kws, ) if kind == "line": return g.map_dataarray_line(plotfunc, x, y, **kwargs) if kind == "dataarray": return g.map_dataarray(plotfunc, x, y, **kwargs) if kind == "plot1d": return g.map_plot1d(plotfunc, x, y, **kwargs) if kind == "dataset": return g.map_dataset(plotfunc, x, y, **kwargs) raise ValueError( f"kind must be one of `line`, `dataarray`, `dataset` or `plot1d`, got {kind}" ) python-xarray-2026.01.0/xarray/plot/accessor.py0000664000175000017500000012431515136607163021511 0ustar alastairalastairfrom __future__ import annotations import functools from collections.abc import Hashable, Iterable from typing import TYPE_CHECKING, Any, Literal, NoReturn, overload import numpy as np # Accessor methods have the same name as plotting methods, so we need a different namespace from xarray.plot import dataarray_plot, dataset_plot if TYPE_CHECKING: from matplotlib.axes import Axes from matplotlib.collections import LineCollection, PathCollection, QuadMesh from matplotlib.colors import Normalize from matplotlib.container import BarContainer from matplotlib.contour import QuadContourSet from matplotlib.image import AxesImage from matplotlib.patches import Polygon from matplotlib.quiver import Quiver from mpl_toolkits.mplot3d.art3d import Line3D, Poly3DCollection from numpy.typing import ArrayLike from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.types import AspectOptions, HueStyleOptions, ScaleOptions from xarray.plot.facetgrid import FacetGrid class DataArrayPlotAccessor: """ Enables use of xarray.plot functions as attributes on a DataArray. For example, DataArray.plot.imshow """ _da: DataArray __slots__ = ("_da",) __doc__ = dataarray_plot.plot.__doc__ def __init__(self, darray: DataArray) -> None: self._da = darray # Should return Any such that the user does not run into problems # with the many possible return values @functools.wraps(dataarray_plot.plot, assigned=("__doc__", "__annotations__")) def __call__(self, **kwargs) -> Any: return dataarray_plot.plot(self._da, **kwargs) @functools.wraps(dataarray_plot.hist) def hist( self, *args, **kwargs ) -> tuple[np.ndarray, np.ndarray, BarContainer | Polygon]: return dataarray_plot.hist(self._da, *args, **kwargs) @overload def line( # type: ignore[misc,unused-ignore] # None is hashable :( self, *args: Any, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive figsize: Iterable[float] | None = None, aspect: AspectOptions = None, size: float | None = None, ax: Axes | None = None, hue: Hashable | None = None, x: Hashable | None = None, y: Hashable | None = None, xincrease: bool | None = None, yincrease: bool | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, add_legend: bool = True, _labels: bool = True, **kwargs: Any, ) -> list[Line3D]: ... @overload def line( self, *args: Any, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, figsize: Iterable[float] | None = None, aspect: AspectOptions = None, size: float | None = None, ax: Axes | None = None, hue: Hashable | None = None, x: Hashable | None = None, y: Hashable | None = None, xincrease: bool | None = None, yincrease: bool | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, add_legend: bool = True, _labels: bool = True, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @overload def line( self, *args: Any, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid figsize: Iterable[float] | None = None, aspect: AspectOptions = None, size: float | None = None, ax: Axes | None = None, hue: Hashable | None = None, x: Hashable | None = None, y: Hashable | None = None, xincrease: bool | None = None, yincrease: bool | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, add_legend: bool = True, _labels: bool = True, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @functools.wraps(dataarray_plot.line, assigned=("__doc__",)) def line(self, *args, **kwargs) -> list[Line3D] | FacetGrid[DataArray]: return dataarray_plot.line(self._da, *args, **kwargs) @overload def step( # type: ignore[misc,unused-ignore] # None is hashable :( self, *args: Any, where: Literal["pre", "post", "mid"] = "pre", drawstyle: str | None = None, ds: str | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive **kwargs: Any, ) -> list[Line3D]: ... @overload def step( self, *args: Any, where: Literal["pre", "post", "mid"] = "pre", drawstyle: str | None = None, ds: str | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @overload def step( self, *args: Any, where: Literal["pre", "post", "mid"] = "pre", drawstyle: str | None = None, ds: str | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid **kwargs: Any, ) -> FacetGrid[DataArray]: ... @functools.wraps(dataarray_plot.step, assigned=("__doc__",)) def step(self, *args, **kwargs) -> list[Line3D] | FacetGrid[DataArray]: return dataarray_plot.step(self._da, *args, **kwargs) @overload def scatter( # type: ignore[misc,unused-ignore] # None is hashable :( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, cmap=None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend=None, levels=None, **kwargs, ) -> PathCollection: ... @overload def scatter( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, cmap=None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend=None, levels=None, **kwargs, ) -> FacetGrid[DataArray]: ... @overload def scatter( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, cmap=None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend=None, levels=None, **kwargs, ) -> FacetGrid[DataArray]: ... @functools.wraps(dataarray_plot.scatter, assigned=("__doc__",)) def scatter(self, *args, **kwargs) -> PathCollection | FacetGrid[DataArray]: return dataarray_plot.scatter(self._da, *args, **kwargs) @overload def imshow( # type: ignore[misc,unused-ignore] # None is hashable :( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> AxesImage: ... @overload def imshow( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @overload def imshow( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @functools.wraps(dataarray_plot.imshow, assigned=("__doc__",)) def imshow(self, *args, **kwargs) -> AxesImage | FacetGrid[DataArray]: return dataarray_plot.imshow(self._da, *args, **kwargs) @overload def contour( # type: ignore[misc,unused-ignore] # None is hashable :( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> QuadContourSet: ... @overload def contour( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @overload def contour( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @functools.wraps(dataarray_plot.contour, assigned=("__doc__",)) def contour(self, *args, **kwargs) -> QuadContourSet | FacetGrid[DataArray]: return dataarray_plot.contour(self._da, *args, **kwargs) @overload def contourf( # type: ignore[misc,unused-ignore] # None is hashable :( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> QuadContourSet: ... @overload def contourf( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @overload def contourf( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid: ... @functools.wraps(dataarray_plot.contourf, assigned=("__doc__",)) def contourf(self, *args, **kwargs) -> QuadContourSet | FacetGrid[DataArray]: return dataarray_plot.contourf(self._da, *args, **kwargs) @overload def pcolormesh( # type: ignore[misc,unused-ignore] # None is hashable :( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> QuadMesh: ... @overload def pcolormesh( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @overload def pcolormesh( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @functools.wraps(dataarray_plot.pcolormesh, assigned=("__doc__",)) def pcolormesh(self, *args, **kwargs) -> QuadMesh | FacetGrid[DataArray]: return dataarray_plot.pcolormesh(self._da, *args, **kwargs) @overload def surface( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> Poly3DCollection: ... @overload def surface( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid: ... @overload def surface( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap=None, center=None, robust: bool = False, extend=None, levels=None, infer_intervals=None, colors=None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid: ... @functools.wraps(dataarray_plot.surface, assigned=("__doc__",)) def surface(self, *args, **kwargs) -> Poly3DCollection: return dataarray_plot.surface(self._da, *args, **kwargs) class DatasetPlotAccessor: """ Enables use of xarray.plot functions as attributes on a Dataset. For example, Dataset.plot.scatter """ _ds: Dataset __slots__ = ("_ds",) def __init__(self, dataset: Dataset) -> None: self._ds = dataset def __call__(self, *args, **kwargs) -> NoReturn: raise ValueError( "Dataset.plot cannot be called directly. Use " "an explicit plot method, e.g. ds.plot.scatter(...)" ) @overload def scatter( # type: ignore[misc,unused-ignore] # None is hashable :( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, cmap=None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend=None, levels=None, **kwargs: Any, ) -> PathCollection: ... @overload def scatter( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, cmap=None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend=None, levels=None, **kwargs: Any, ) -> FacetGrid[Dataset]: ... @overload def scatter( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, cmap=None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend=None, levels=None, **kwargs: Any, ) -> FacetGrid[Dataset]: ... @functools.wraps(dataset_plot.scatter, assigned=("__doc__",)) def scatter(self, *args, **kwargs) -> PathCollection | FacetGrid[Dataset]: return dataset_plot.scatter(self._ds, *args, **kwargs) @overload def quiver( # type: ignore[misc,unused-ignore] # None is hashable :( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, col: None = None, # no wrap -> primitive row: None = None, # no wrap -> primitive ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: AspectOptions = None, subplot_kws: dict[str, Any] | None = None, add_guide: bool | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals=None, center=None, levels=None, robust: bool | None = None, colors=None, extend=None, cmap=None, **kwargs: Any, ) -> Quiver: ... @overload def quiver( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, col: Hashable, # wrap -> FacetGrid row: Hashable | None = None, ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: AspectOptions = None, subplot_kws: dict[str, Any] | None = None, add_guide: bool | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals=None, center=None, levels=None, robust: bool | None = None, colors=None, extend=None, cmap=None, **kwargs: Any, ) -> FacetGrid[Dataset]: ... @overload def quiver( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, col: Hashable | None = None, row: Hashable, # wrap -> FacetGrid ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: AspectOptions = None, subplot_kws: dict[str, Any] | None = None, add_guide: bool | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals=None, center=None, levels=None, robust: bool | None = None, colors=None, extend=None, cmap=None, **kwargs: Any, ) -> FacetGrid[Dataset]: ... @functools.wraps(dataset_plot.quiver, assigned=("__doc__",)) def quiver(self, *args, **kwargs) -> Quiver | FacetGrid[Dataset]: return dataset_plot.quiver(self._ds, *args, **kwargs) @overload def streamplot( # type: ignore[misc,unused-ignore] # None is hashable :( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, col: None = None, # no wrap -> primitive row: None = None, # no wrap -> primitive ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: AspectOptions = None, subplot_kws: dict[str, Any] | None = None, add_guide: bool | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals=None, center=None, levels=None, robust: bool | None = None, colors=None, extend=None, cmap=None, **kwargs: Any, ) -> LineCollection: ... @overload def streamplot( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, col: Hashable, # wrap -> FacetGrid row: Hashable | None = None, ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: AspectOptions = None, subplot_kws: dict[str, Any] | None = None, add_guide: bool | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals=None, center=None, levels=None, robust: bool | None = None, colors=None, extend=None, cmap=None, **kwargs: Any, ) -> FacetGrid[Dataset]: ... @overload def streamplot( self, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, col: Hashable | None = None, row: Hashable, # wrap -> FacetGrid ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: AspectOptions = None, subplot_kws: dict[str, Any] | None = None, add_guide: bool | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals=None, center=None, levels=None, robust: bool | None = None, colors=None, extend=None, cmap=None, **kwargs: Any, ) -> FacetGrid[Dataset]: ... @functools.wraps(dataset_plot.streamplot, assigned=("__doc__",)) def streamplot(self, *args, **kwargs) -> LineCollection | FacetGrid[Dataset]: return dataset_plot.streamplot(self._ds, *args, **kwargs) python-xarray-2026.01.0/xarray/plot/dataarray_plot.py0000664000175000017500000025114215136607163022714 0ustar alastairalastairfrom __future__ import annotations import functools import warnings from collections.abc import Callable, Hashable, Iterable, MutableMapping from typing import TYPE_CHECKING, Any, Literal, Union, cast, overload import numpy as np import pandas as pd from xarray.core.utils import attempt_import from xarray.plot.facetgrid import _easy_facetgrid from xarray.plot.utils import ( _LINEWIDTH_RANGE, _MARKERSIZE_RANGE, _add_colorbar, _add_legend, _assert_valid_xy, _determine_guide, _ensure_plottable, _guess_coords_to_plot, _infer_interval_breaks, _infer_xy_labels, _Normalize, _process_cmap_cbar_kwargs, _rescale_imshow_rgb, _resolve_intervals_1dplot, _resolve_intervals_2dplot, _set_concise_date, _update_axes, get_axis, label_from_attrs, ) from xarray.structure.alignment import broadcast from xarray.structure.concat import concat if TYPE_CHECKING: from matplotlib.axes import Axes from matplotlib.collections import PathCollection, QuadMesh from matplotlib.colors import Colormap, Normalize from matplotlib.container import BarContainer from matplotlib.contour import QuadContourSet from matplotlib.image import AxesImage from matplotlib.patches import Polygon from mpl_toolkits.mplot3d.art3d import Line3D, Poly3DCollection from numpy.typing import ArrayLike from xarray.core.dataarray import DataArray from xarray.core.types import ( AspectOptions, ExtendOptions, HueStyleOptions, ScaleOptions, T_DataArray, ) from xarray.plot.facetgrid import FacetGrid _styles: dict[str, Any] = { # Add a white border to make it easier seeing overlapping markers: "scatter.edgecolors": "w", } def _infer_line_data( darray: DataArray, x: Hashable | None, y: Hashable | None, hue: Hashable | None ) -> tuple[DataArray, DataArray, DataArray | None, str]: ndims = len(darray.dims) if x is not None and y is not None: raise ValueError("Cannot specify both x and y kwargs for line plots.") if x is not None: _assert_valid_xy(darray, x, "x") if y is not None: _assert_valid_xy(darray, y, "y") if ndims == 1: huename = None hueplt = None huelabel = "" if x is not None: xplt = darray[x] yplt = darray elif y is not None: xplt = darray yplt = darray[y] else: # Both x & y are None dim = darray.dims[0] xplt = darray[dim] yplt = darray else: if x is None and y is None and hue is None: raise ValueError("For 2D inputs, please specify either hue, x or y.") if y is None: if hue is not None: _assert_valid_xy(darray, hue, "hue") xname, huename = _infer_xy_labels(darray=darray, x=x, y=hue) xplt = darray[xname] if xplt.ndim > 1: if huename in darray.dims: otherindex = 1 if darray.dims.index(huename) == 0 else 0 otherdim = darray.dims[otherindex] yplt = darray.transpose(otherdim, huename, transpose_coords=False) xplt = xplt.transpose(otherdim, huename, transpose_coords=False) else: raise ValueError( "For 2D inputs, hue must be a dimension" " i.e. one of " + repr(darray.dims) ) else: (xdim,) = darray[xname].dims (huedim,) = darray[huename].dims yplt = darray.transpose(xdim, huedim) else: yname, huename = _infer_xy_labels(darray=darray, x=y, y=hue) yplt = darray[yname] if yplt.ndim > 1: if huename in darray.dims: otherindex = 1 if darray.dims.index(huename) == 0 else 0 otherdim = darray.dims[otherindex] xplt = darray.transpose(otherdim, huename, transpose_coords=False) yplt = yplt.transpose(otherdim, huename, transpose_coords=False) else: raise ValueError( "For 2D inputs, hue must be a dimension" " i.e. one of " + repr(darray.dims) ) else: (ydim,) = darray[yname].dims (huedim,) = darray[huename].dims xplt = darray.transpose(ydim, huedim) huelabel = label_from_attrs(darray[huename]) hueplt = darray[huename] return xplt, yplt, hueplt, huelabel def _prepare_plot1d_data( darray: T_DataArray, coords_to_plot: MutableMapping[str, Hashable], plotfunc_name: str | None = None, _is_facetgrid: bool = False, ) -> dict[str, T_DataArray]: """ Prepare data for usage with plt.scatter. Parameters ---------- darray : T_DataArray Base DataArray. coords_to_plot : MutableMapping[str, Hashable] Coords that will be plotted. plotfunc_name : str | None Name of the plotting function that will be used. Returns ------- plts : dict[str, T_DataArray] Dict of DataArrays that will be sent to matplotlib. Examples -------- >>> # Make sure int coords are plotted: >>> a = xr.DataArray( ... data=[1, 2], ... coords={1: ("x", [0, 1], {"units": "s"})}, ... dims=("x",), ... name="a", ... ) >>> plts = xr.plot.dataarray_plot._prepare_plot1d_data( ... a, coords_to_plot={"x": 1, "z": None, "hue": None, "size": None} ... ) >>> # Check which coords to plot: >>> print({k: v.name for k, v in plts.items()}) {'y': 'a', 'x': 1} """ # If there are more than 1 dimension in the array than stack all the # dimensions so the plotter can plot anything: if darray.ndim > 1: # When stacking dims the lines will continue connecting. For floats # this can be solved by adding a nan element in between the flattening # points: dims_T = [] if np.issubdtype(darray.dtype, np.floating): for v in ["z", "x"]: dim = coords_to_plot.get(v, None) if (dim is not None) and (dim in darray.dims): darray_nan = np.nan * darray.isel({dim: -1}) darray = concat( [darray, darray_nan], dim=dim, coords="minimal", compat="override", join="exact", ) dims_T.append(coords_to_plot[v]) # Lines should never connect to the same coordinate when stacked, # transpose to avoid this as much as possible: darray = darray.transpose(..., *dims_T) # Array is now ready to be stacked: darray = darray.stack(_stacked_dim=darray.dims) # Broadcast together all the chosen variables: plts = dict(y=darray) plts.update( {k: darray.coords[v] for k, v in coords_to_plot.items() if v is not None} ) plts = dict(zip(plts.keys(), broadcast(*(plts.values())), strict=True)) return plts # return type is Any due to the many different possibilities def plot( darray: DataArray, *, row: Hashable | None = None, col: Hashable | None = None, col_wrap: int | None = None, ax: Axes | None = None, hue: Hashable | None = None, subplot_kws: dict[str, Any] | None = None, **kwargs: Any, ) -> Any: """ Default plot of DataArray using :py:mod:`matplotlib:matplotlib.pyplot`. Calls xarray plotting function based on the dimensions of the squeezed DataArray. =============== =========================== Dimensions Plotting function =============== =========================== 1 :py:func:`xarray.plot.line` 2 :py:func:`xarray.plot.pcolormesh` Anything else :py:func:`xarray.plot.hist` =============== =========================== Parameters ---------- darray : DataArray row : Hashable or None, optional If passed, make row faceted plots on this dimension name. col : Hashable or None, optional If passed, make column faceted plots on this dimension name. col_wrap : int or None, optional Use together with ``col`` to wrap faceted plots. ax : matplotlib axes object, optional Axes on which to plot. By default, use the current axes. Mutually exclusive with ``size``, ``figsize`` and facets. hue : Hashable or None, optional If passed, make faceted line plots with hue on this dimension name. subplot_kws : dict, optional Dictionary of keyword arguments for Matplotlib subplots (see :py:meth:`matplotlib:matplotlib.figure.Figure.add_subplot`). **kwargs : optional Additional keyword arguments for Matplotlib. See Also -------- xarray.DataArray.squeeze """ darray = darray.squeeze( d for d, s in darray.sizes.items() if s == 1 and d not in (row, col, hue) ).compute() plot_dims = set(darray.dims) plot_dims.discard(row) plot_dims.discard(col) plot_dims.discard(hue) ndims = len(plot_dims) plotfunc: Callable if ndims == 0 or darray.size == 0: raise TypeError("No numeric data to plot.") if ndims in (1, 2): if row or col: kwargs["subplot_kws"] = subplot_kws kwargs["row"] = row kwargs["col"] = col kwargs["col_wrap"] = col_wrap if ndims == 1: plotfunc = line kwargs["hue"] = hue elif ndims == 2: if hue: plotfunc = line kwargs["hue"] = hue else: plotfunc = pcolormesh kwargs["subplot_kws"] = subplot_kws else: if row or col or hue: raise ValueError( "Only 1d and 2d plots are supported for facets in xarray. " "See the package `Seaborn` for more options." ) plotfunc = hist kwargs["ax"] = ax return plotfunc(darray, **kwargs) @overload def line( # type: ignore[misc,unused-ignore] # None is hashable :( darray: DataArray, *args: Any, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive figsize: Iterable[float] | None = None, aspect: AspectOptions = None, size: float | None = None, ax: Axes | None = None, hue: Hashable | None = None, x: Hashable | None = None, y: Hashable | None = None, xincrease: bool | None = None, yincrease: bool | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, add_legend: bool = True, _labels: bool = True, **kwargs: Any, ) -> list[Line3D]: ... @overload def line( darray: T_DataArray, *args: Any, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, figsize: Iterable[float] | None = None, aspect: AspectOptions = None, size: float | None = None, ax: Axes | None = None, hue: Hashable | None = None, x: Hashable | None = None, y: Hashable | None = None, xincrease: bool | None = None, yincrease: bool | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, add_legend: bool = True, _labels: bool = True, **kwargs: Any, ) -> FacetGrid[T_DataArray]: ... @overload def line( darray: T_DataArray, *args: Any, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid figsize: Iterable[float] | None = None, aspect: AspectOptions = None, size: float | None = None, ax: Axes | None = None, hue: Hashable | None = None, x: Hashable | None = None, y: Hashable | None = None, xincrease: bool | None = None, yincrease: bool | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, add_legend: bool = True, _labels: bool = True, **kwargs: Any, ) -> FacetGrid[T_DataArray]: ... # This function signature should not change so that it can use # matplotlib format strings def line( darray: T_DataArray, *args: Any, row: Hashable | None = None, col: Hashable | None = None, figsize: Iterable[float] | None = None, aspect: AspectOptions = None, size: float | None = None, ax: Axes | None = None, hue: Hashable | None = None, x: Hashable | None = None, y: Hashable | None = None, xincrease: bool | None = None, yincrease: bool | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, add_legend: bool = True, _labels: bool = True, **kwargs: Any, ) -> list[Line3D] | FacetGrid[T_DataArray]: """ Line plot of DataArray values. Wraps :py:func:`matplotlib:matplotlib.pyplot.plot`. Parameters ---------- darray : DataArray Either 1D or 2D. If 2D, one of ``hue``, ``x`` or ``y`` must be provided. row : Hashable, optional If passed, make row faceted plots on this dimension name. col : Hashable, optional If passed, make column faceted plots on this dimension name. figsize : tuple, optional A tuple (width, height) of the figure in inches. Mutually exclusive with ``size`` and ``ax``. aspect : "auto", "equal", scalar or None, optional Aspect ratio of plot, so that ``aspect * size`` gives the *width* in inches. Only used if a ``size`` is provided. size : scalar, optional If provided, create a new figure for the plot with the given size: *height* (in inches) of each plot. See also: ``aspect``. ax : matplotlib axes object, optional Axes on which to plot. By default, the current is used. Mutually exclusive with ``size`` and ``figsize``. hue : Hashable, optional Dimension or coordinate for which you want multiple lines plotted. If plotting against a 2D coordinate, ``hue`` must be a dimension. x, y : Hashable, optional Dimension, coordinate or multi-index level for *x*, *y* axis. Only one of these may be specified. The other will be used for values from the DataArray on which this plot method is called. xincrease : bool or None, optional Should the values on the *x* axis be increasing from left to right? if ``None``, use the default for the Matplotlib function. yincrease : bool or None, optional Should the values on the *y* axis be increasing from top to bottom? if ``None``, use the default for the Matplotlib function. xscale, yscale : {'linear', 'symlog', 'log', 'logit'}, optional Specifies scaling for the *x*- and *y*-axis, respectively. xticks, yticks : array-like, optional Specify tick locations for *x*- and *y*-axis. xlim, ylim : tuple[float, float], optional Specify *x*- and *y*-axis limits. add_legend : bool, default: True Add legend with *y* axis coordinates (2D inputs only). *args, **kwargs : optional Additional arguments to :py:func:`matplotlib:matplotlib.pyplot.plot`. Returns ------- primitive : list of Line3D or FacetGrid When either col or row is given, returns a FacetGrid, otherwise a list of matplotlib Line3D objects. """ # Handle facetgrids first if row or col: allargs = locals().copy() allargs.update(allargs.pop("kwargs")) allargs.pop("darray") return _easy_facetgrid(darray, line, kind="line", **allargs) ndims = len(darray.dims) if ndims == 0 or darray.size == 0: # TypeError to be consistent with pandas raise TypeError("No numeric data to plot.") if ndims > 2: raise ValueError( "Line plots are for 1- or 2-dimensional DataArrays. " f"Passed DataArray has {ndims} " "dimensions" ) # The allargs dict passed to _easy_facetgrid above contains args if args == (): args = kwargs.pop("args", ()) else: assert "args" not in kwargs ax = get_axis(figsize, size, aspect, ax) xplt, yplt, hueplt, hue_label = _infer_line_data(darray, x, y, hue) # Remove pd.Intervals if contained in xplt.values and/or yplt.values. xplt_val, yplt_val, x_suffix, y_suffix, kwargs = _resolve_intervals_1dplot( xplt.to_numpy(), yplt.to_numpy(), kwargs ) xlabel = label_from_attrs(xplt, extra=x_suffix) ylabel = label_from_attrs(yplt, extra=y_suffix) _ensure_plottable(xplt_val, yplt_val) primitive = ax.plot(xplt_val, yplt_val, *args, **kwargs) if _labels: if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.set_title(darray._title_for_slice()) if darray.ndim == 2 and add_legend: assert hueplt is not None ax.legend(handles=primitive, labels=list(hueplt.to_numpy()), title=hue_label) if isinstance(xplt.dtype, np.dtype) and np.issubdtype(xplt.dtype, np.datetime64): # type: ignore[redundant-expr] _set_concise_date(ax, axis="x") _update_axes(ax, xincrease, yincrease, xscale, yscale, xticks, yticks, xlim, ylim) return primitive @overload def step( # type: ignore[misc,unused-ignore] # None is hashable :( darray: DataArray, *args: Any, where: Literal["pre", "post", "mid"] = "pre", drawstyle: str | None = None, ds: str | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive **kwargs: Any, ) -> list[Line3D]: ... @overload def step( darray: DataArray, *args: Any, where: Literal["pre", "post", "mid"] = "pre", drawstyle: str | None = None, ds: str | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @overload def step( darray: DataArray, *args: Any, where: Literal["pre", "post", "mid"] = "pre", drawstyle: str | None = None, ds: str | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid **kwargs: Any, ) -> FacetGrid[DataArray]: ... def step( darray: DataArray, *args: Any, where: Literal["pre", "post", "mid"] = "pre", drawstyle: str | None = None, ds: str | None = None, row: Hashable | None = None, col: Hashable | None = None, **kwargs: Any, ) -> list[Line3D] | FacetGrid[DataArray]: """ Step plot of DataArray values. Similar to :py:func:`matplotlib:matplotlib.pyplot.step`. Parameters ---------- where : {'pre', 'post', 'mid'}, default: 'pre' Define where the steps should be placed: - ``'pre'``: The y value is continued constantly to the left from every *x* position, i.e. the interval ``(x[i-1], x[i]]`` has the value ``y[i]``. - ``'post'``: The y value is continued constantly to the right from every *x* position, i.e. the interval ``[x[i], x[i+1])`` has the value ``y[i]``. - ``'mid'``: Steps occur half-way between the *x* positions. Note that this parameter is ignored if one coordinate consists of :py:class:`pandas.Interval` values, e.g. as a result of :py:func:`xarray.Dataset.groupby_bins`. In this case, the actual boundaries of the interval are used. drawstyle, ds : str or None, optional Additional drawstyle. Only use one of drawstyle and ds. row : Hashable, optional If passed, make row faceted plots on this dimension name. col : Hashable, optional If passed, make column faceted plots on this dimension name. *args, **kwargs : optional Additional arguments for :py:func:`xarray.plot.line`. Returns ------- primitive : list of Line3D or FacetGrid When either col or row is given, returns a FacetGrid, otherwise a list of matplotlib Line3D objects. """ if where not in {"pre", "post", "mid"}: raise ValueError("'where' argument to step must be 'pre', 'post' or 'mid'") if ds is not None: if drawstyle is None: drawstyle = ds else: raise TypeError("ds and drawstyle are mutually exclusive") if drawstyle is None: drawstyle = "" drawstyle = "steps-" + where + drawstyle return line(darray, *args, drawstyle=drawstyle, col=col, row=row, **kwargs) def hist( darray: DataArray, *args: Any, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, xincrease: bool | None = None, yincrease: bool | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, **kwargs: Any, ) -> tuple[np.ndarray, np.ndarray, BarContainer | Polygon]: """ Histogram of DataArray. Wraps :py:func:`matplotlib:matplotlib.pyplot.hist`. Plots *N*-dimensional arrays by first flattening the array. Parameters ---------- darray : DataArray Can have any number of dimensions. figsize : Iterable of float, optional A tuple (width, height) of the figure in inches. Mutually exclusive with ``size`` and ``ax``. aspect : "auto", "equal", scalar or None, optional Aspect ratio of plot, so that ``aspect * size`` gives the *width* in inches. Only used if a ``size`` is provided. size : scalar, optional If provided, create a new figure for the plot with the given size: *height* (in inches) of each plot. See also: ``aspect``. ax : matplotlib axes object, optional Axes on which to plot. By default, use the current axes. Mutually exclusive with ``size`` and ``figsize``. xincrease : bool or None, optional Should the values on the *x* axis be increasing from left to right? if ``None``, use the default for the Matplotlib function. yincrease : bool or None, optional Should the values on the *y* axis be increasing from top to bottom? if ``None``, use the default for the Matplotlib function. xscale, yscale : {'linear', 'symlog', 'log', 'logit'}, optional Specifies scaling for the *x*- and *y*-axis, respectively. xticks, yticks : array-like, optional Specify tick locations for *x*- and *y*-axis. xlim, ylim : tuple[float, float], optional Specify *x*- and *y*-axis limits. **kwargs : optional Additional keyword arguments to :py:func:`matplotlib:matplotlib.pyplot.hist`. """ assert len(args) == 0 if darray.ndim == 0 or darray.size == 0: # TypeError to be consistent with pandas raise TypeError("No numeric data to plot.") ax = get_axis(figsize, size, aspect, ax) no_nan_arr = np.ravel(darray.to_numpy()) no_nan = no_nan_arr[pd.notnull(no_nan_arr)] n, bins, patches = cast( tuple[np.ndarray, np.ndarray, Union["BarContainer", "Polygon"]], ax.hist(no_nan, **kwargs), ) ax.set_title(darray._title_for_slice()) ax.set_xlabel(label_from_attrs(darray)) _update_axes(ax, xincrease, yincrease, xscale, yscale, xticks, yticks, xlim, ylim) return n, bins, patches def _plot1d(plotfunc): """Decorator for common 1d plotting logic.""" commondoc = """ Parameters ---------- darray : DataArray Must be 2 dimensional, unless creating faceted plots. x : Hashable or None, optional Coordinate for x axis. If None use darray.dims[1]. y : Hashable or None, optional Coordinate for y axis. If None use darray.dims[0]. z : Hashable or None, optional If specified plot 3D and use this coordinate for *z* axis. hue : Hashable or None, optional Dimension or coordinate for which you want multiple lines plotted. markersize: Hashable or None, optional scatter only. Variable by which to vary size of scattered points. linewidth: Hashable or None, optional Variable by which to vary linewidth. row : Hashable, optional If passed, make row faceted plots on this dimension name. col : Hashable, optional If passed, make column faceted plots on this dimension name. col_wrap : int, optional Use together with ``col`` to wrap faceted plots ax : matplotlib axes object, optional If None, uses the current axis. Not applicable when using facets. figsize : Iterable[float] or None, optional A tuple (width, height) of the figure in inches. Mutually exclusive with ``size`` and ``ax``. size : scalar, optional If provided, create a new figure for the plot with the given size. Height (in inches) of each plot. See also: ``aspect``. aspect : "auto", "equal", scalar or None, optional Aspect ratio of plot, so that ``aspect * size`` gives the width in inches. Only used if a ``size`` is provided. xincrease : bool or None, default: True Should the values on the x axes be increasing from left to right? if None, use the default for the matplotlib function. yincrease : bool or None, default: True Should the values on the y axes be increasing from top to bottom? if None, use the default for the matplotlib function. add_legend : bool or None, optional If True use xarray metadata to add a legend. add_colorbar : bool or None, optional If True add a colorbar. add_labels : bool or None, optional If True use xarray metadata to label axes add_title : bool or None, optional If True use xarray metadata to add a title subplot_kws : dict, optional Dictionary of keyword arguments for matplotlib subplots. Only applies to FacetGrid plotting. xscale : {'linear', 'symlog', 'log', 'logit'} or None, optional Specifies scaling for the x-axes. yscale : {'linear', 'symlog', 'log', 'logit'} or None, optional Specifies scaling for the y-axes. xticks : ArrayLike or None, optional Specify tick locations for x-axes. yticks : ArrayLike or None, optional Specify tick locations for y-axes. xlim : tuple[float, float] or None, optional Specify x-axes limits. ylim : tuple[float, float] or None, optional Specify y-axes limits. cmap : matplotlib colormap name or colormap, optional The mapping from data values to color space. Either a Matplotlib colormap name or object. If not provided, this will be either ``'viridis'`` (if the function infers a sequential dataset) or ``'RdBu_r'`` (if the function infers a diverging dataset). See :doc:`Choosing Colormaps in Matplotlib ` for more information. If *seaborn* is installed, ``cmap`` may also be a `seaborn color palette `_. Note: if ``cmap`` is a seaborn color palette, ``levels`` must also be specified. vmin : float or None, optional Lower value to anchor the colormap, otherwise it is inferred from the data and other keyword arguments. When a diverging dataset is inferred, setting `vmin` or `vmax` will fix the other by symmetry around ``center``. Setting both values prevents use of a diverging colormap. If discrete levels are provided as an explicit list, both of these values are ignored. vmax : float or None, optional Upper value to anchor the colormap, otherwise it is inferred from the data and other keyword arguments. When a diverging dataset is inferred, setting `vmin` or `vmax` will fix the other by symmetry around ``center``. Setting both values prevents use of a diverging colormap. If discrete levels are provided as an explicit list, both of these values are ignored. norm : matplotlib.colors.Normalize, optional If ``norm`` has ``vmin`` or ``vmax`` specified, the corresponding kwarg must be ``None``. extend : {'neither', 'both', 'min', 'max'}, optional How to draw arrows extending the colorbar beyond its limits. If not provided, ``extend`` is inferred from ``vmin``, ``vmax`` and the data limits. levels : int or array-like, optional Split the colormap (``cmap``) into discrete color intervals. If an integer is provided, "nice" levels are chosen based on the data range: this can imply that the final number of levels is not exactly the expected one. Setting ``vmin`` and/or ``vmax`` with ``levels=N`` is equivalent to setting ``levels=np.linspace(vmin, vmax, N)``. **kwargs : optional Additional arguments to wrapped matplotlib function Returns ------- artist : The same type of primitive artist that the wrapped matplotlib function returns """ # Build on the original docstring plotfunc.__doc__ = f"{plotfunc.__doc__}\n{commondoc}" @functools.wraps( plotfunc, assigned=("__module__", "__name__", "__qualname__", "__doc__") ) def newplotfunc( darray: DataArray, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, row: Hashable | None = None, col: Hashable | None = None, col_wrap: int | None = None, ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, cmap: str | Colormap | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend: ExtendOptions = None, levels: ArrayLike | None = None, **kwargs, ) -> Any: # All 1d plots in xarray share this function signature. # Method signature below should be consistent. if TYPE_CHECKING: import matplotlib.pyplot as plt else: plt = attempt_import("matplotlib.pyplot") if subplot_kws is None: subplot_kws = dict() # Handle facetgrids first if row or col: if z is not None: subplot_kws.update(projection="3d") allargs = locals().copy() allargs.update(allargs.pop("kwargs")) allargs.pop("darray") allargs.pop("plt") allargs["plotfunc"] = globals()[plotfunc.__name__] return _easy_facetgrid(darray, kind="plot1d", **allargs) if darray.ndim == 0 or darray.size == 0: # TypeError to be consistent with pandas raise TypeError("No numeric data to plot.") # The allargs dict passed to _easy_facetgrid above contains args if args == (): args = kwargs.pop("args", ()) if args: assert "args" not in kwargs # TODO: Deprecated since 2022.10: msg = "Using positional arguments is deprecated for plot methods, use keyword arguments instead." assert x is None x = args[0] if len(args) > 1: assert y is None y = args[1] if len(args) > 2: assert z is None z = args[2] if len(args) > 3: assert hue is None hue = args[3] if len(args) > 4: raise ValueError(msg) else: warnings.warn(msg, DeprecationWarning, stacklevel=2) del args if hue_style is not None: # TODO: Not used since 2022.10. Deprecated since 2023.07. warnings.warn( ( "hue_style is no longer used for plot1d plots " "and the argument will eventually be removed. " "Convert numbers to string for a discrete hue " "and use add_legend or add_colorbar to control which guide to display." ), DeprecationWarning, stacklevel=2, ) _is_facetgrid = kwargs.pop("_is_facetgrid", False) if plotfunc.__name__ == "scatter": size_ = kwargs.pop("_size", markersize) size_r = _MARKERSIZE_RANGE # Remove any nulls, .where(m, drop=True) doesn't work when m is # a dask array, so load the array to memory. # It will have to be loaded to memory at some point anyway: darray = darray.compute() darray = darray.where(darray.notnull(), drop=True) else: size_ = kwargs.pop("_size", linewidth) size_r = _LINEWIDTH_RANGE # Get data to plot: coords_to_plot: MutableMapping[str, Hashable | None] = dict( x=x, z=z, hue=hue, size=size_ ) if not _is_facetgrid: # Guess what coords to use if some of the values in coords_to_plot are None: coords_to_plot = _guess_coords_to_plot(darray, coords_to_plot, kwargs) plts = _prepare_plot1d_data(darray, coords_to_plot, plotfunc.__name__) xplt = plts.pop("x", None) yplt = plts.pop("y", None) zplt = plts.pop("z", None) kwargs.update(zplt=zplt) hueplt = plts.pop("hue", None) sizeplt = plts.pop("size", None) # Handle size and hue: hueplt_norm = _Normalize(data=hueplt) kwargs.update(hueplt=hueplt_norm.values) sizeplt_norm = _Normalize( data=sizeplt, width=size_r, _is_facetgrid=_is_facetgrid ) kwargs.update(sizeplt=sizeplt_norm.values) cmap_params_subset = kwargs.pop("cmap_params_subset", {}) cbar_kwargs = kwargs.pop("cbar_kwargs", {}) if hueplt_norm.data is not None: if not hueplt_norm.data_is_numeric: # Map hue values back to its original value: cbar_kwargs.update(format=hueplt_norm.format, ticks=hueplt_norm.ticks) levels = kwargs.get("levels", hueplt_norm.levels) cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( plotfunc, cast("DataArray", hueplt_norm.values).data, **locals(), ) # subset that can be passed to scatter, hist2d if not cmap_params_subset: ckw = {vv: cmap_params[vv] for vv in ("vmin", "vmax", "norm", "cmap")} cmap_params_subset.update(**ckw) with plt.rc_context(_styles): # type: ignore[arg-type, unused-ignore] if z is not None: import mpl_toolkits if ax is None: subplot_kws.update(projection="3d") ax = get_axis(figsize, size, aspect, ax, **subplot_kws) assert isinstance(ax, mpl_toolkits.mplot3d.axes3d.Axes3D) # Using 30, 30 minimizes rotation of the plot. Making it easier to # build on your intuition from 2D plots: ax.view_init(azim=30, elev=30, vertical_axis="y") else: ax = get_axis(figsize, size, aspect, ax, **subplot_kws) primitive = plotfunc( xplt, yplt, ax=ax, add_labels=add_labels, **cmap_params_subset, **kwargs, ) if np.any(np.asarray(add_labels)) and add_title: ax.set_title(darray._title_for_slice()) add_colorbar_, add_legend_ = _determine_guide( hueplt_norm, sizeplt_norm, add_colorbar, add_legend, plotfunc_name=plotfunc.__name__, ) if add_colorbar_: if "label" not in cbar_kwargs: cbar_kwargs["label"] = label_from_attrs(hueplt_norm.data) _add_colorbar( primitive, ax, kwargs.get("cbar_ax"), cbar_kwargs, cmap_params ) if add_legend_: if plotfunc.__name__ in ["scatter", "line"]: _add_legend( ( hueplt_norm if add_legend or not add_colorbar_ else _Normalize(None) ), sizeplt_norm, primitive, legend_ax=ax, plotfunc=plotfunc.__name__, ) else: hueplt_norm_values: list[np.ndarray | None] if hueplt_norm.data is not None: hueplt_norm_values = list(hueplt_norm.data.to_numpy()) else: hueplt_norm_values = [hueplt_norm.data] if plotfunc.__name__ == "hist": ax.legend( handles=primitive[-1], labels=hueplt_norm_values, title=label_from_attrs(hueplt_norm.data), ) else: ax.legend( handles=primitive, labels=hueplt_norm_values, title=label_from_attrs(hueplt_norm.data), ) _update_axes( ax, xincrease, yincrease, xscale, yscale, xticks, yticks, xlim, ylim ) return primitive # we want to actually expose the signature of newplotfunc # and not the copied **kwargs from the plotfunc which # functools.wraps adds, so delete the wrapped attr del newplotfunc.__wrapped__ return newplotfunc def _add_labels( add_labels: bool | Iterable[bool], darrays: Iterable[DataArray | None], suffixes: Iterable[str], ax: Axes, ) -> None: """Set x, y, z labels.""" add_labels = [add_labels] * 3 if isinstance(add_labels, bool) else add_labels axes: tuple[Literal["x", "y", "z"], ...] = ("x", "y", "z") for axis, add_label, darray, suffix in zip( axes, add_labels, darrays, suffixes, strict=True ): if darray is None: continue if add_label: label = label_from_attrs(darray, extra=suffix) if label is not None: getattr(ax, f"set_{axis}label")(label) if np.issubdtype(darray.dtype, np.datetime64): _set_concise_date(ax, axis=axis) @overload def scatter( # type: ignore[misc,unused-ignore] # None is hashable :( darray: DataArray, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, cmap: str | Colormap | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend: ExtendOptions = None, levels: ArrayLike | None = None, **kwargs, ) -> PathCollection: ... @overload def scatter( darray: T_DataArray, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, cmap: str | Colormap | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend: ExtendOptions = None, levels: ArrayLike | None = None, **kwargs, ) -> FacetGrid[T_DataArray]: ... @overload def scatter( darray: T_DataArray, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, cmap: str | Colormap | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend: ExtendOptions = None, levels: ArrayLike | None = None, **kwargs, ) -> FacetGrid[T_DataArray]: ... @_plot1d def scatter( xplt: DataArray | None, yplt: DataArray | None, ax: Axes, add_labels: bool | Iterable[bool] = True, **kwargs, ) -> PathCollection: """Scatter variables against each other. Wraps :py:func:`matplotlib:matplotlib.pyplot.scatter`. """ if "u" in kwargs or "v" in kwargs: raise ValueError("u, v are not allowed in scatter plots.") zplt: DataArray | None = kwargs.pop("zplt", None) hueplt: DataArray | None = kwargs.pop("hueplt", None) sizeplt: DataArray | None = kwargs.pop("sizeplt", None) if hueplt is not None: kwargs.update(c=hueplt.to_numpy().ravel()) if sizeplt is not None: kwargs.update(s=sizeplt.to_numpy().ravel()) plts_or_none = (xplt, yplt, zplt) _add_labels(add_labels, plts_or_none, ("", "", ""), ax) xplt_np = None if xplt is None else xplt.to_numpy().ravel() yplt_np = None if yplt is None else yplt.to_numpy().ravel() zplt_np = None if zplt is None else zplt.to_numpy().ravel() plts_np = tuple(p for p in (xplt_np, yplt_np, zplt_np) if p is not None) if len(plts_np) == 3: import mpl_toolkits assert isinstance(ax, mpl_toolkits.mplot3d.axes3d.Axes3D) return ax.scatter(xplt_np, yplt_np, zplt_np, **kwargs) if len(plts_np) == 2: return ax.scatter(plts_np[0], plts_np[1], **kwargs) raise ValueError("At least two variables required for a scatter plot.") def _plot2d(plotfunc): """Decorator for common 2d plotting logic.""" commondoc = """ Parameters ---------- darray : DataArray Must be two-dimensional, unless creating faceted plots. x : Hashable or None, optional Coordinate for *x* axis. If ``None``, use ``darray.dims[1]``. y : Hashable or None, optional Coordinate for *y* axis. If ``None``, use ``darray.dims[0]``. figsize : Iterable or float or None, optional A tuple (width, height) of the figure in inches. Mutually exclusive with ``size`` and ``ax``. size : scalar, optional If provided, create a new figure for the plot with the given size: *height* (in inches) of each plot. See also: ``aspect``. aspect : "auto", "equal", scalar or None, optional Aspect ratio of plot, so that ``aspect * size`` gives the *width* in inches. Only used if a ``size`` is provided. ax : matplotlib axes object, optional Axes on which to plot. By default, use the current axes. Mutually exclusive with ``size`` and ``figsize``. row : Hashable or None, optional If passed, make row faceted plots on this dimension name. col : Hashable or None, optional If passed, make column faceted plots on this dimension name. col_wrap : int, optional Use together with ``col`` to wrap faceted plots. xincrease : None, True, or False, optional Should the values on the *x* axis be increasing from left to right? If ``None``, use the default for the Matplotlib function. yincrease : None, True, or False, optional Should the values on the *y* axis be increasing from top to bottom? If ``None``, use the default for the Matplotlib function. add_colorbar : bool, optional Add colorbar to axes. add_labels : bool, optional Use xarray metadata to label axes. vmin : float or None, optional Lower value to anchor the colormap, otherwise it is inferred from the data and other keyword arguments. When a diverging dataset is inferred, setting `vmin` or `vmax` will fix the other by symmetry around ``center``. Setting both values prevents use of a diverging colormap. If discrete levels are provided as an explicit list, both of these values are ignored. vmax : float or None, optional Upper value to anchor the colormap, otherwise it is inferred from the data and other keyword arguments. When a diverging dataset is inferred, setting `vmin` or `vmax` will fix the other by symmetry around ``center``. Setting both values prevents use of a diverging colormap. If discrete levels are provided as an explicit list, both of these values are ignored. cmap : matplotlib colormap name or colormap, optional The mapping from data values to color space. If not provided, this will be either be ``'viridis'`` (if the function infers a sequential dataset) or ``'RdBu_r'`` (if the function infers a diverging dataset). See :doc:`Choosing Colormaps in Matplotlib ` for more information. If *seaborn* is installed, ``cmap`` may also be a `seaborn color palette `_. Note: if ``cmap`` is a seaborn color palette and the plot type is not ``'contour'`` or ``'contourf'``, ``levels`` must also be specified. center : float or False, optional The value at which to center the colormap. Passing this value implies use of a diverging colormap. Setting it to ``False`` prevents use of a diverging colormap. robust : bool, optional If ``True`` and ``vmin`` or ``vmax`` are absent, the colormap range is computed with 2nd and 98th percentiles instead of the extreme values. extend : {'neither', 'both', 'min', 'max'}, optional How to draw arrows extending the colorbar beyond its limits. If not provided, ``extend`` is inferred from ``vmin``, ``vmax`` and the data limits. levels : int or array-like, optional Split the colormap (``cmap``) into discrete color intervals. If an integer is provided, "nice" levels are chosen based on the data range: this can imply that the final number of levels is not exactly the expected one. Setting ``vmin`` and/or ``vmax`` with ``levels=N`` is equivalent to setting ``levels=np.linspace(vmin, vmax, N)``. infer_intervals : bool, optional Only applies to pcolormesh. If ``True``, the coordinate intervals are passed to pcolormesh. If ``False``, the original coordinates are used (this can be useful for certain map projections). The default is to always infer intervals, unless the mesh is irregular and plotted on a map projection. colors : str or array-like of color-like, optional A single color or a sequence of colors. If the plot type is not ``'contour'`` or ``'contourf'``, the ``levels`` argument is required. subplot_kws : dict, optional Dictionary of keyword arguments for Matplotlib subplots. Only used for 2D and faceted plots. (see :py:meth:`matplotlib:matplotlib.figure.Figure.add_subplot`). cbar_ax : matplotlib axes object, optional Axes in which to draw the colorbar. cbar_kwargs : dict, optional Dictionary of keyword arguments to pass to the colorbar (see :meth:`matplotlib:matplotlib.figure.Figure.colorbar`). xscale : {'linear', 'symlog', 'log', 'logit'} or None, optional Specifies scaling for the x-axes. yscale : {'linear', 'symlog', 'log', 'logit'} or None, optional Specifies scaling for the y-axes. xticks : ArrayLike or None, optional Specify tick locations for x-axes. yticks : ArrayLike or None, optional Specify tick locations for y-axes. xlim : tuple[float, float] or None, optional Specify x-axes limits. ylim : tuple[float, float] or None, optional Specify y-axes limits. norm : matplotlib.colors.Normalize, optional If ``norm`` has ``vmin`` or ``vmax`` specified, the corresponding kwarg must be ``None``. **kwargs : optional Additional keyword arguments to wrapped Matplotlib function. Returns ------- artist : The same type of primitive artist that the wrapped Matplotlib function returns. """ # Build on the original docstring plotfunc.__doc__ = f"{plotfunc.__doc__}\n{commondoc}" @functools.wraps( plotfunc, assigned=("__module__", "__name__", "__qualname__", "__doc__") ) def newplotfunc( darray: DataArray, *args: Any, x: Hashable | None = None, y: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> Any: # All 2d plots in xarray share this function signature. if args: # TODO: Deprecated since 2022.10: msg = "Using positional arguments is deprecated for plot methods, use keyword arguments instead." assert x is None x = args[0] if len(args) > 1: assert y is None y = args[1] if len(args) > 2: raise ValueError(msg) else: warnings.warn(msg, DeprecationWarning, stacklevel=2) del args # Decide on a default for the colorbar before facetgrids if add_colorbar is None: add_colorbar = True if plotfunc.__name__ == "contour" or ( plotfunc.__name__ == "surface" and cmap is None ): add_colorbar = False imshow_rgb = plotfunc.__name__ == "imshow" and darray.ndim == ( 3 + (row is not None) + (col is not None) ) if imshow_rgb: # Don't add a colorbar when showing an image with explicit colors add_colorbar = False # Matplotlib does not support normalising RGB data, so do it here. # See eg. https://github.com/matplotlib/matplotlib/pull/10220 if robust or vmax is not None or vmin is not None: darray = _rescale_imshow_rgb(darray.as_numpy(), vmin, vmax, robust) vmin, vmax, robust = None, None, False if subplot_kws is None: subplot_kws = dict() if plotfunc.__name__ == "surface" and not kwargs.get("_is_facetgrid"): if ax is None: # TODO: Importing Axes3D is no longer necessary in matplotlib >= 3.2. # Remove when minimum requirement of matplotlib is 3.2: from mpl_toolkits.mplot3d import Axes3D # delete so it does not end up in locals() del Axes3D # Need to create a "3d" Axes instance for surface plots subplot_kws["projection"] = "3d" # In facet grids, shared axis labels don't make sense for surface plots sharex = False sharey = False # Handle facetgrids first if row or col: allargs = locals().copy() del allargs["darray"] del allargs["imshow_rgb"] allargs.update(allargs.pop("kwargs")) # Need the decorated plotting function allargs["plotfunc"] = globals()[plotfunc.__name__] return _easy_facetgrid(darray, kind="dataarray", **allargs) if darray.ndim == 0 or darray.size == 0: # TypeError to be consistent with pandas raise TypeError("No numeric data to plot.") if ( plotfunc.__name__ == "surface" and not kwargs.get("_is_facetgrid") and ax is not None ): import mpl_toolkits if not isinstance(ax, mpl_toolkits.mplot3d.Axes3D): raise ValueError( "If ax is passed to surface(), it must be created with " 'projection="3d"' ) rgb = kwargs.pop("rgb", None) if rgb is not None and plotfunc.__name__ != "imshow": raise ValueError('The "rgb" keyword is only valid for imshow()') elif rgb is not None and not imshow_rgb: raise ValueError( 'The "rgb" keyword is only valid for imshow()' "with a three-dimensional array (per facet)" ) xlab, ylab = _infer_xy_labels( darray=darray, x=x, y=y, imshow=imshow_rgb, rgb=rgb ) xval = darray[xlab] yval = darray[ylab] if xval.ndim > 1 or yval.ndim > 1 or plotfunc.__name__ == "surface": # Passing 2d coordinate values, need to ensure they are transposed the same # way as darray. # Also surface plots always need 2d coordinates xval = xval.broadcast_like(darray) yval = yval.broadcast_like(darray) dims = darray.dims else: dims = (yval.dims[0], xval.dims[0]) # May need to transpose for correct x, y labels # xlab may be the name of a coord, we have to check for dim names if imshow_rgb: # For RGB[A] images, matplotlib requires the color dimension # to be last. In Xarray the order should be unimportant, so # we transpose to (y, x, color) to make this work. yx_dims = (ylab, xlab) dims = yx_dims + tuple(d for d in darray.dims if d not in yx_dims) if dims != darray.dims: darray = darray.transpose(*dims, transpose_coords=True) # better to pass the ndarrays directly to plotting functions xvalnp = xval.to_numpy() yvalnp = yval.to_numpy() # Pass the data as a masked ndarray too zval = darray.to_masked_array(copy=False) # Replace pd.Intervals if contained in xval or yval. xplt, xlab_extra = _resolve_intervals_2dplot(xvalnp, plotfunc.__name__) yplt, ylab_extra = _resolve_intervals_2dplot(yvalnp, plotfunc.__name__) _ensure_plottable(xplt, yplt, zval) cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( plotfunc, zval.data, **locals(), _is_facetgrid=kwargs.pop("_is_facetgrid", False), ) if "contour" in plotfunc.__name__: # extend is a keyword argument only for contour and contourf, but # passing it to the colorbar is sufficient for imshow and # pcolormesh kwargs["extend"] = cmap_params["extend"] kwargs["levels"] = cmap_params["levels"] # if colors == a single color, matplotlib draws dashed negative # contours. we lose this feature if we pass cmap and not colors if colors is not None: cmap_params["cmap"] = None kwargs["colors"] = colors if "pcolormesh" == plotfunc.__name__: kwargs["infer_intervals"] = infer_intervals kwargs["xscale"] = xscale kwargs["yscale"] = yscale if "imshow" == plotfunc.__name__ and isinstance(aspect, str): # forbid usage of mpl strings raise ValueError("plt.imshow's `aspect` kwarg is not available in xarray") ax = get_axis(figsize, size, aspect, ax, **subplot_kws) primitive = plotfunc( xplt, yplt, zval, ax=ax, cmap=cmap_params["cmap"], vmin=cmap_params["vmin"], vmax=cmap_params["vmax"], norm=cmap_params["norm"], **kwargs, ) # Label the plot with metadata if add_labels: ax.set_xlabel(label_from_attrs(darray[xlab], xlab_extra)) ax.set_ylabel(label_from_attrs(darray[ylab], ylab_extra)) ax.set_title(darray._title_for_slice()) if plotfunc.__name__ == "surface": import mpl_toolkits assert isinstance(ax, mpl_toolkits.mplot3d.axes3d.Axes3D) ax.set_zlabel(label_from_attrs(darray)) if add_colorbar: if add_labels and "label" not in cbar_kwargs: cbar_kwargs["label"] = label_from_attrs(darray) cbar = _add_colorbar(primitive, ax, cbar_ax, cbar_kwargs, cmap_params) elif cbar_ax is not None or cbar_kwargs: # inform the user about keywords which aren't used raise ValueError( "cbar_ax and cbar_kwargs can't be used with add_colorbar=False." ) # origin kwarg overrides yincrease if "origin" in kwargs: yincrease = None _update_axes( ax, xincrease, yincrease, xscale, yscale, xticks, yticks, xlim, ylim ) if np.issubdtype(xplt.dtype, np.datetime64): _set_concise_date(ax, "x") return primitive # we want to actually expose the signature of newplotfunc # and not the copied **kwargs from the plotfunc which # functools.wraps adds, so delete the wrapped attr del newplotfunc.__wrapped__ return newplotfunc @overload def imshow( # type: ignore[misc,unused-ignore] # None is hashable :( darray: DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> AxesImage: ... @overload def imshow( darray: T_DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[T_DataArray]: ... @overload def imshow( darray: T_DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[T_DataArray]: ... @_plot2d def imshow( x: np.ndarray, y: np.ndarray, z: np.ma.core.MaskedArray, ax: Axes, **kwargs: Any ) -> AxesImage: """ Image plot of 2D DataArray. Wraps :py:func:`matplotlib:matplotlib.pyplot.imshow`. While other plot methods require the DataArray to be strictly two-dimensional, ``imshow`` also accepts a 3D array where some dimension can be interpreted as RGB or RGBA color channels and allows this dimension to be specified via the kwarg ``rgb=``. Unlike :py:func:`matplotlib:matplotlib.pyplot.imshow`, which ignores ``vmin``/``vmax`` for RGB(A) data, xarray *will* use ``vmin`` and ``vmax`` for RGB(A) data by applying a single scaling factor and offset to all bands. Passing ``robust=True`` infers ``vmin`` and ``vmax`` :ref:`in the usual way `. Additionally the y-axis is not inverted by default, you can restore the matplotlib behavior by setting `yincrease=False`. .. note:: This function needs uniformly spaced coordinates to properly label the axes. Call :py:meth:`DataArray.plot` to check. The pixels are centered on the coordinates. For example, if the coordinate value is 3.2, then the pixels for those coordinates will be centered on 3.2. """ if x.ndim != 1 or y.ndim != 1: raise ValueError( "imshow requires 1D coordinates, try using pcolormesh or contour(f)" ) def _center_pixels(x): """Center the pixels on the coordinates.""" if np.issubdtype(x.dtype, str): # When using strings as inputs imshow converts it to # integers. Choose extent values which puts the indices in # in the center of the pixels: return 0 - 0.5, len(x) - 0.5 try: # Center the pixels assuming uniform spacing: xstep = 0.5 * (x[1] - x[0]) except IndexError: # Arbitrary default value, similar to matplotlib behaviour: xstep = 0.1 return x[0] - xstep, x[-1] + xstep # Center the pixels: left, right = _center_pixels(x) top, bottom = _center_pixels(y) defaults: dict[str, Any] = {"origin": "upper", "interpolation": "nearest"} if not hasattr(ax, "projection"): # not for cartopy geoaxes defaults["aspect"] = "auto" # Allow user to override these defaults defaults.update(kwargs) if defaults["origin"] == "upper": defaults["extent"] = [left, right, bottom, top] else: defaults["extent"] = [left, right, top, bottom] if z.ndim == 3: # matplotlib imshow uses black for missing data, but Xarray makes # missing data transparent. We therefore add an alpha channel if # there isn't one, and set it to transparent where data is masked. if z.shape[-1] == 3: safe_dtype = np.promote_types(z.dtype, np.uint8) alpha = np.ma.ones(z.shape[:2] + (1,), dtype=safe_dtype) if np.issubdtype(z.dtype, np.integer): alpha[:] = 255 z = np.ma.concatenate((z, alpha), axis=2) else: z = z.copy() z[np.any(z.mask, axis=-1), -1] = 0 primitive = ax.imshow(z, **defaults) # If x or y are strings the ticklabels have been replaced with # integer indices. Replace them back to strings: for axis, v in [("x", x), ("y", y)]: if np.issubdtype(v.dtype, str): getattr(ax, f"set_{axis}ticks")(np.arange(len(v))) getattr(ax, f"set_{axis}ticklabels")(v) return primitive @overload def contour( # type: ignore[misc,unused-ignore] # None is hashable :( darray: DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> QuadContourSet: ... @overload def contour( darray: T_DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[T_DataArray]: ... @overload def contour( darray: T_DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[T_DataArray]: ... @_plot2d def contour( x: np.ndarray, y: np.ndarray, z: np.ndarray, ax: Axes, **kwargs: Any ) -> QuadContourSet: """ Contour plot of 2D DataArray. Wraps :py:func:`matplotlib:matplotlib.pyplot.contour`. """ primitive = ax.contour(x, y, z, **kwargs) return primitive @overload def contourf( # type: ignore[misc,unused-ignore] # None is hashable :( darray: DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> QuadContourSet: ... @overload def contourf( darray: T_DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[T_DataArray]: ... @overload def contourf( darray: T_DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[T_DataArray]: ... @_plot2d def contourf( x: np.ndarray, y: np.ndarray, z: np.ndarray, ax: Axes, **kwargs: Any ) -> QuadContourSet: """ Filled contour plot of 2D DataArray. Wraps :py:func:`matplotlib:matplotlib.pyplot.contourf`. """ primitive = ax.contourf(x, y, z, **kwargs) return primitive @overload def pcolormesh( # type: ignore[misc,unused-ignore] # None is hashable :( darray: DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> QuadMesh: ... @overload def pcolormesh( darray: T_DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[T_DataArray]: ... @overload def pcolormesh( darray: T_DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[T_DataArray]: ... @_plot2d def pcolormesh( x: np.ndarray, y: np.ndarray, z: np.ndarray, ax: Axes, xscale: ScaleOptions | None = None, yscale: ScaleOptions | None = None, infer_intervals=None, **kwargs: Any, ) -> QuadMesh: """ Pseudocolor plot of 2D DataArray. Wraps :py:func:`matplotlib:matplotlib.pyplot.pcolormesh`. """ # decide on a default for infer_intervals (GH781) x = np.asarray(x) if infer_intervals is None: if hasattr(ax, "projection"): if len(x.shape) == 1: infer_intervals = True else: infer_intervals = False else: infer_intervals = True if any(np.issubdtype(k.dtype, str) for k in (x, y)): # do not infer intervals if any axis contains str ticks, see #6775 infer_intervals = False if infer_intervals and ( (np.shape(x)[0] == np.shape(z)[1]) or ((x.ndim > 1) and (np.shape(x)[1] == np.shape(z)[1])) ): if x.ndim == 1: x = _infer_interval_breaks(x, check_monotonic=True, scale=xscale) else: # we have to infer the intervals on both axes x = _infer_interval_breaks(x, axis=1, scale=xscale) x = _infer_interval_breaks(x, axis=0, scale=xscale) if infer_intervals and (np.shape(y)[0] == np.shape(z)[0]): if y.ndim == 1: y = _infer_interval_breaks(y, check_monotonic=True, scale=yscale) else: # we have to infer the intervals on both axes y = _infer_interval_breaks(y, axis=1, scale=yscale) y = _infer_interval_breaks(y, axis=0, scale=yscale) ax.grid(False) primitive = ax.pcolormesh(x, y, z, **kwargs) # by default, pcolormesh picks "round" values for bounds # this results in ugly looking plots with lots of surrounding whitespace if not hasattr(ax, "projection") and x.ndim == 1 and y.ndim == 1: # not a cartopy geoaxis ax.set_xlim(x[0], x[-1]) ax.set_ylim(y[0], y[-1]) return primitive @overload def surface( darray: DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> Poly3DCollection: ... @overload def surface( darray: T_DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[T_DataArray]: ... @overload def surface( darray: T_DataArray, x: Hashable | None = None, y: Hashable | None = None, *, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_colorbar: bool | None = None, add_labels: bool = True, vmin: float | None = None, vmax: float | None = None, cmap: str | Colormap | None = None, center: float | Literal[False] | None = None, robust: bool = False, extend: ExtendOptions = None, levels: ArrayLike | None = None, infer_intervals=None, colors: str | ArrayLike | None = None, subplot_kws: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cbar_kwargs: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, norm: Normalize | None = None, **kwargs: Any, ) -> FacetGrid[T_DataArray]: ... @_plot2d def surface( x: np.ndarray, y: np.ndarray, z: np.ndarray, ax: Axes, **kwargs: Any ) -> Poly3DCollection: """ Surface plot of 2D DataArray. Wraps :py:meth:`matplotlib:mpl_toolkits.mplot3d.axes3d.Axes3D.plot_surface`. """ import mpl_toolkits assert isinstance(ax, mpl_toolkits.mplot3d.axes3d.Axes3D) primitive = ax.plot_surface(x, y, z, **kwargs) return primitive python-xarray-2026.01.0/xarray/plot/utils.py0000664000175000017500000016610015136607163021045 0ustar alastairalastairfrom __future__ import annotations import itertools import textwrap import warnings from collections.abc import ( Callable, Hashable, Iterable, Mapping, MutableMapping, Sequence, ) from datetime import date, datetime from inspect import getfullargspec from typing import TYPE_CHECKING, Any, Literal, cast, overload import numpy as np import pandas as pd from xarray.core.indexes import PandasMultiIndex from xarray.core.options import OPTIONS from xarray.core.utils import ( attempt_import, is_scalar, module_available, ) from xarray.namedarray.pycompat import DuckArrayModule nc_time_axis_available = module_available("nc_time_axis") try: import cftime except ImportError: cftime = None if TYPE_CHECKING: from matplotlib.axes import Axes from matplotlib.colors import Normalize from matplotlib.ticker import FuncFormatter from numpy.typing import ArrayLike from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.types import AspectOptions, ScaleOptions try: import matplotlib.pyplot as plt except ImportError: plt: Any = None # type: ignore[no-redef] ROBUST_PERCENTILE = 2.0 # copied from seaborn _MARKERSIZE_RANGE = (18.0, 36.0, 72.0) _LINEWIDTH_RANGE = (1.5, 1.5, 6.0) def _determine_extend(calc_data, vmin, vmax): extend_min = calc_data.min() < vmin extend_max = calc_data.max() > vmax if extend_min and extend_max: return "both" elif extend_min: return "min" elif extend_max: return "max" else: return "neither" def _build_discrete_cmap(cmap, levels, extend, filled): """ Build a discrete colormap and normalization of the data. """ import matplotlib as mpl if len(levels) == 1: levels = [levels[0], levels[0]] if not filled: # non-filled contour plots extend = "max" if extend == "both": ext_n = 2 elif extend in ["min", "max"]: ext_n = 1 else: ext_n = 0 n_colors = len(levels) + ext_n - 1 pal = _color_palette(cmap, n_colors) new_cmap, cnorm = mpl.colors.from_levels_and_colors(levels, pal, extend=extend) # copy the old cmap name, for easier testing new_cmap.name = getattr(cmap, "name", cmap) # copy colors to use for bad, under, and over values in case they have been # set to non-default values if isinstance(cmap, mpl.colors.Colormap): bad = cmap(np.nan) # Only update under and over if they were explicitly changed by the user # (i.e. are different from the lowest or highest values in cmap). Otherwise # leave unchanged so new_cmap uses its default values (its own lowest and # highest values). under = cmap(-np.inf) if under == cmap(0): under = None over = cmap(np.inf) if over == cmap(cmap.N - 1): over = None new_cmap = new_cmap.with_extremes(bad=bad, under=under, over=over) return new_cmap, cnorm def _color_palette(cmap, n_colors): import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap colors_i = np.linspace(0, 1.0, n_colors) if isinstance(cmap, list | tuple): # expand or truncate the list of colors to n_colors cmap = list(itertools.islice(itertools.cycle(cmap), n_colors)) cmap = ListedColormap(cmap) pal = cmap(colors_i) elif isinstance(cmap, str): # we have some sort of named palette try: # is this a matplotlib cmap? cmap = plt.get_cmap(cmap) pal = cmap(colors_i) except ValueError: # ValueError happens when mpl doesn't like a colormap, try seaborn try: from seaborn import color_palette pal = color_palette(cmap, n_colors=n_colors) except (ValueError, ImportError): # or maybe we just got a single color as a string cmap = ListedColormap([cmap] * n_colors) pal = cmap(colors_i) else: # cmap better be a LinearSegmentedColormap (e.g. viridis) pal = cmap(colors_i) return pal # _determine_cmap_params is adapted from Seaborn: # https://github.com/mwaskom/seaborn/blob/v0.6/seaborn/matrix.py#L158 # Used under the terms of Seaborn's license, see licenses/SEABORN_LICENSE. def _determine_cmap_params( plot_data, vmin=None, vmax=None, cmap=None, center=None, robust=False, extend=None, levels=None, filled=True, norm=None, _is_facetgrid=False, ): """ Use some heuristics to set good defaults for colorbar and range. Parameters ---------- plot_data : Numpy array Doesn't handle xarray objects Returns ------- cmap_params : dict Use depends on the type of the plotting function """ if TYPE_CHECKING: import matplotlib as mpl else: mpl = attempt_import("matplotlib") if isinstance(levels, Iterable): levels = sorted(levels) calc_data = np.ravel(plot_data[np.isfinite(plot_data)]) # Handle all-NaN input data gracefully if calc_data.size == 0: # Arbitrary default for when all values are NaN calc_data = np.array(0.0) # Setting center=False prevents a divergent cmap possibly_divergent = center is not False # Set center to 0 so math below makes sense but remember its state center_is_none = False if center is None: center = 0 center_is_none = True # Setting both vmin and vmax prevents a divergent cmap if (vmin is not None) and (vmax is not None): possibly_divergent = False # Setting vmin or vmax implies linspaced levels user_minmax = (vmin is not None) or (vmax is not None) # vlim might be computed below vlim = None # save state; needed later vmin_was_none = vmin is None vmax_was_none = vmax is None if vmin is None: if robust: vmin = np.percentile(calc_data, ROBUST_PERCENTILE) else: vmin = calc_data.min() elif possibly_divergent: vlim = abs(vmin - center) if vmax is None: if robust: vmax = np.percentile(calc_data, 100 - ROBUST_PERCENTILE) else: vmax = calc_data.max() elif possibly_divergent: vlim = abs(vmax - center) if possibly_divergent: levels_are_divergent = ( isinstance(levels, Iterable) and levels[0] * levels[-1] < 0 ) # kwargs not specific about divergent or not: infer defaults from data divergent = (vmin < 0 < vmax) or not center_is_none or levels_are_divergent else: divergent = False # A divergent map should be symmetric around the center value if divergent: if vlim is None: vlim = max(abs(vmin - center), abs(vmax - center)) vmin, vmax = -vlim, vlim # Now add in the centering value and set the limits vmin += center vmax += center # now check norm and harmonize with vmin, vmax if norm is not None: if norm.vmin is None: norm.vmin = vmin else: if not vmin_was_none and vmin != norm.vmin: raise ValueError("Cannot supply vmin and a norm with a different vmin.") vmin = norm.vmin if norm.vmax is None: norm.vmax = vmax else: if not vmax_was_none and vmax != norm.vmax: raise ValueError("Cannot supply vmax and a norm with a different vmax.") vmax = norm.vmax # if BoundaryNorm, then set levels if isinstance(norm, mpl.colors.BoundaryNorm): levels = norm.boundaries # Choose default colormaps if not provided if cmap is None: if divergent: cmap = OPTIONS["cmap_divergent"] else: cmap = OPTIONS["cmap_sequential"] # Handle discrete levels if levels is not None: if is_scalar(levels): if user_minmax: levels = np.linspace(vmin, vmax, levels) elif levels == 1: levels = np.asarray([(vmin + vmax) / 2]) else: # N in MaxNLocator refers to bins, not ticks ticker = mpl.ticker.MaxNLocator(levels - 1) levels = ticker.tick_values(vmin, vmax) vmin, vmax = levels[0], levels[-1] # GH3734 if vmin == vmax: vmin, vmax = mpl.ticker.LinearLocator(2).tick_values(vmin, vmax) if extend is None: extend = _determine_extend(calc_data, vmin, vmax) if (levels is not None) and (not isinstance(norm, mpl.colors.BoundaryNorm)): cmap, newnorm = _build_discrete_cmap(cmap, levels, extend, filled) norm = newnorm if norm is None else norm # vmin & vmax needs to be None if norm is passed # TODO: always return a norm with vmin and vmax if norm is not None: vmin = None vmax = None return dict( vmin=vmin, vmax=vmax, cmap=cmap, extend=extend, levels=levels, norm=norm ) def _infer_xy_labels_3d( darray: DataArray | Dataset, x: Hashable | None, y: Hashable | None, rgb: Hashable | None, ) -> tuple[Hashable, Hashable]: """ Determine x and y labels for showing RGB images. Attempts to infer which dimension is RGB/RGBA by size and order of dims. """ assert rgb is None or rgb != x assert rgb is None or rgb != y # Start by detecting and reporting invalid combinations of arguments assert darray.ndim == 3 not_none = [a for a in (x, y, rgb) if a is not None] if len(set(not_none)) < len(not_none): raise ValueError( "Dimension names must be None or unique strings, but imshow was " f"passed x={x!r}, y={y!r}, and rgb={rgb!r}." ) for label in not_none: if label not in darray.dims: raise ValueError(f"{label!r} is not a dimension") # Then calculate rgb dimension if certain and check validity could_be_color = [ label for label in darray.dims if darray[label].size in (3, 4) and label not in (x, y) ] if rgb is None and not could_be_color: raise ValueError( "A 3-dimensional array was passed to imshow(), but there is no " "dimension that could be color. At least one dimension must be " "of size 3 (RGB) or 4 (RGBA), and not given as x or y." ) if rgb is None and len(could_be_color) == 1: rgb = could_be_color[0] if rgb is not None and darray[rgb].size not in (3, 4): raise ValueError( f"Cannot interpret dim {rgb!r} of size {darray[rgb].size} as RGB or RGBA." ) # If rgb dimension is still unknown, there must be two or three dimensions # in could_be_color. We therefore warn, and use a heuristic to break ties. if rgb is None: assert len(could_be_color) in (2, 3) rgb = could_be_color[-1] warnings.warn( "Several dimensions of this array could be colors. Xarray " f"will use the last possible dimension ({rgb!r}) to match " "matplotlib.pyplot.imshow. You can pass names of x, y, " "and/or rgb dimensions to override this guess.", stacklevel=2, ) assert rgb is not None # Finally, we pick out the red slice and delegate to the 2D version: return _infer_xy_labels(darray.isel({rgb: 0}), x, y) def _infer_xy_labels( darray: DataArray | Dataset, x: Hashable | None, y: Hashable | None, imshow: bool = False, rgb: Hashable | None = None, ) -> tuple[Hashable, Hashable]: """ Determine x and y labels. For use in _plot2d darray must be a 2 dimensional data array, or 3d for imshow only. """ if (x is not None) and (x == y): raise ValueError("x and y cannot be equal.") if imshow and darray.ndim == 3: return _infer_xy_labels_3d(darray, x, y, rgb) if x is None and y is None: if darray.ndim != 2: raise ValueError("DataArray must be 2d") y, x = darray.dims elif x is None: _assert_valid_xy(darray, y, "y") x = darray.dims[0] if y == darray.dims[1] else darray.dims[1] elif y is None: _assert_valid_xy(darray, x, "x") y = darray.dims[0] if x == darray.dims[1] else darray.dims[1] else: _assert_valid_xy(darray, x, "x") _assert_valid_xy(darray, y, "y") if darray._indexes.get(x, 1) is darray._indexes.get(y, 2) and isinstance( darray._indexes[x], PandasMultiIndex ): raise ValueError("x and y cannot be levels of the same MultiIndex") return x, y # TODO: Can by used to more than x or y, rename? def _assert_valid_xy( darray: DataArray | Dataset, xy: Hashable | None, name: str ) -> None: """ make sure x and y passed to plotting functions are valid """ # MultiIndex cannot be plotted; no point in allowing them here multiindex_dims = { idx.dim for idx in darray.xindexes.get_unique() if isinstance(idx, PandasMultiIndex) } valid_xy = (set(darray.dims) | set(darray.coords)) - multiindex_dims if (xy is not None) and (xy not in valid_xy): valid_xy_str = "', '".join(sorted(str(v) for v in valid_xy)) raise ValueError( f"{name} must be one of None, '{valid_xy_str}'. Received '{xy}' instead." ) def get_axis( figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, ax: Axes | None = None, **subplot_kws: Any, ) -> Axes: if TYPE_CHECKING: import matplotlib as mpl import matplotlib.pyplot as plt else: mpl = attempt_import("matplotlib") plt = attempt_import("matplotlib.pyplot") if figsize is not None: if ax is not None: raise ValueError("cannot provide both `figsize` and `ax` arguments") if size is not None: raise ValueError("cannot provide both `figsize` and `size` arguments") _, ax = plt.subplots(figsize=figsize, subplot_kw=subplot_kws) return ax if size is not None: if ax is not None: raise ValueError("cannot provide both `size` and `ax` arguments") if aspect is None or aspect == "auto": width, height = mpl.rcParams["figure.figsize"] faspect = width / height elif aspect == "equal": faspect = 1 else: faspect = aspect figsize = (size * faspect, size) _, ax = plt.subplots(figsize=figsize, subplot_kw=subplot_kws) return ax if aspect is not None: raise ValueError("cannot provide `aspect` argument without `size`") if subplot_kws and ax is not None: raise ValueError("cannot use subplot_kws with existing ax") if ax is None: ax = _maybe_gca(**subplot_kws) return ax def _maybe_gca(**subplot_kws: Any) -> Axes: import matplotlib.pyplot as plt # can call gcf unconditionally: either it exists or would be created by plt.axes f = plt.gcf() # only call gca if an active axes exists if f.axes: # can not pass kwargs to active axes return plt.gca() return plt.axes(**subplot_kws) def _get_units_from_attrs(da: DataArray) -> str: """Extracts and formats the unit/units from their attributes.""" pint_array_type = DuckArrayModule("pint").type units = " [{}]" if isinstance(da.data, pint_array_type): return units.format(str(da.data.units)) if "units" in da.attrs: return units.format(da.attrs["units"]) if "unit" in da.attrs: return units.format(da.attrs["unit"]) return "" def label_from_attrs(da: DataArray | None, extra: str = "") -> str: """Makes informative labels if variable metadata (attrs) follows CF conventions.""" if da is None: return "" name: str = "{}" if "long_name" in da.attrs: name = name.format(da.attrs["long_name"]) elif "standard_name" in da.attrs: name = name.format(da.attrs["standard_name"]) elif da.name is not None: name = name.format(da.name) else: name = "" units = _get_units_from_attrs(da) # Treat `name` differently if it's a latex sequence if name.startswith("$") and (name.count("$") % 2 == 0): return "$\n$".join( textwrap.wrap(name + extra + units, 60, break_long_words=False) ) else: return "\n".join(textwrap.wrap(name + extra + units, 30)) def _interval_to_mid_points(array: Iterable[pd.Interval]) -> np.ndarray: """ Helper function which returns an array with the Intervals' mid points. """ return np.array([x.mid for x in array]) def _interval_to_bound_points(array: Sequence[pd.Interval]) -> np.ndarray: """ Helper function which returns an array with the Intervals' boundaries. """ array_boundaries = np.array([x.left for x in array]) array_boundaries = np.concatenate((array_boundaries, np.array([array[-1].right]))) return array_boundaries def _interval_to_double_bound_points( xarray: Iterable[pd.Interval], yarray: Iterable ) -> tuple[np.ndarray, np.ndarray]: """ Helper function to deal with an xarray consisting of pd.Intervals. Each interval is replaced with both boundaries. I.e. the length of xarray doubles. yarray is modified so it matches the new shape of xarray. """ xarray1 = np.array([x.left for x in xarray]) xarray2 = np.array([x.right for x in xarray]) xarray_out = np.array( list(itertools.chain.from_iterable(zip(xarray1, xarray2, strict=True))) ) yarray_out = np.array( list(itertools.chain.from_iterable(zip(yarray, yarray, strict=True))) ) return xarray_out, yarray_out def _resolve_intervals_1dplot( xval: np.ndarray, yval: np.ndarray, kwargs: dict ) -> tuple[np.ndarray, np.ndarray, str, str, dict]: """ Helper function to replace the values of x and/or y coordinate arrays containing pd.Interval with their mid-points or - for step plots - double points which double the length. """ x_suffix = "" y_suffix = "" # Is it a step plot? (see matplotlib.Axes.step) if kwargs.get("drawstyle", "").startswith("steps-"): remove_drawstyle = False # Convert intervals to double points x_is_interval = _valid_other_type(xval, pd.Interval) y_is_interval = _valid_other_type(yval, pd.Interval) if x_is_interval and y_is_interval: raise TypeError("Can't step plot intervals against intervals.") elif x_is_interval: xval, yval = _interval_to_double_bound_points(xval, yval) remove_drawstyle = True elif y_is_interval: yval, xval = _interval_to_double_bound_points(yval, xval) remove_drawstyle = True # Remove steps-* to be sure that matplotlib is not confused if remove_drawstyle: del kwargs["drawstyle"] # Is it another kind of plot? else: # Convert intervals to mid points and adjust labels if _valid_other_type(xval, pd.Interval): xval = _interval_to_mid_points(xval) x_suffix = "_center" if _valid_other_type(yval, pd.Interval): yval = _interval_to_mid_points(yval) y_suffix = "_center" # return converted arguments return xval, yval, x_suffix, y_suffix, kwargs def _resolve_intervals_2dplot(val, func_name): """ Helper function to replace the values of a coordinate array containing pd.Interval with their mid-points or - for pcolormesh - boundaries which increases length by 1. """ label_extra = "" if _valid_other_type(val, pd.Interval): if func_name == "pcolormesh": val = _interval_to_bound_points(val) else: val = _interval_to_mid_points(val) label_extra = "_center" return val, label_extra def _valid_other_type( x: ArrayLike, types: type[object] | tuple[type[object], ...] ) -> bool: """ Do all elements of x have a type from types? """ return all(isinstance(el, types) for el in np.ravel(x)) def _valid_numpy_subdtype(x, numpy_types): """ Is any dtype from numpy_types superior to the dtype of x? """ # If any of the types given in numpy_types is understood as numpy.generic, # all possible x will be considered valid. This is probably unwanted. for t in numpy_types: assert not np.issubdtype(np.generic, t) return any(np.issubdtype(x.dtype, t) for t in numpy_types) def _ensure_plottable(*args) -> None: """ Raise exception if there is anything in args that can't be plotted on an axis by matplotlib. """ numpy_types: tuple[type[object], ...] = ( np.floating, np.integer, np.timedelta64, np.datetime64, np.bool_, np.str_, ) other_types: tuple[type[object], ...] = (datetime, date) cftime_datetime_types: tuple[type[object], ...] = ( () if cftime is None else (cftime.datetime,) ) other_types += cftime_datetime_types for x in args: if not ( _valid_numpy_subdtype(np.asarray(x), numpy_types) or _valid_other_type(np.asarray(x), other_types) ): raise TypeError( "Plotting requires coordinates to be numeric, boolean, " "or dates of type numpy.datetime64, " "datetime.datetime, cftime.datetime or " f"pandas.Interval. Received data of type {np.asarray(x).dtype} instead." ) if _valid_other_type(np.asarray(x), cftime_datetime_types): if nc_time_axis_available: # Register cftime datetypes to matplotlib.units.registry, # otherwise matplotlib will raise an error: import nc_time_axis # noqa: F401 else: raise ImportError( "Plotting of arrays of cftime.datetime " "objects or arrays indexed by " "cftime.datetime objects requires the " "optional `nc-time-axis` (v1.2.0 or later) " "package." ) def _is_numeric(arr): numpy_types = [np.floating, np.integer] return _valid_numpy_subdtype(arr, numpy_types) def _add_colorbar(primitive, ax, cbar_ax, cbar_kwargs, cmap_params): cbar_kwargs.setdefault("extend", cmap_params["extend"]) if cbar_ax is None: cbar_kwargs.setdefault("ax", ax) else: cbar_kwargs.setdefault("cax", cbar_ax) # dont pass extend as kwarg if it is in the mappable if hasattr(primitive, "extend"): cbar_kwargs.pop("extend") fig = ax.get_figure() cbar = fig.colorbar(primitive, **cbar_kwargs) return cbar def _rescale_imshow_rgb(darray, vmin, vmax, robust): assert robust or vmin is not None or vmax is not None # Calculate vmin and vmax automatically for `robust=True` if robust: if vmax is None: vmax = np.nanpercentile(darray, 100 - ROBUST_PERCENTILE) if vmin is None: vmin = np.nanpercentile(darray, ROBUST_PERCENTILE) # If not robust and one bound is None, calculate the default other bound # and check that an interval between them exists. elif vmax is None: vmax = 255 if np.issubdtype(darray.dtype, np.integer) else 1 if vmax < vmin: raise ValueError( f"vmin={vmin!r} is less than the default vmax ({vmax!r}) - you must supply " "a vmax > vmin in this case." ) elif vmin is None: vmin = 0 if vmin > vmax: raise ValueError( f"vmax={vmax!r} is less than the default vmin (0) - you must supply " "a vmin < vmax in this case." ) # Scale interval [vmin .. vmax] to [0 .. 1], with darray as 64-bit float # to avoid precision loss, integer over/underflow, etc with extreme inputs. # After scaling, downcast to 32-bit float. This substantially reduces # memory usage after we hand `darray` off to matplotlib. darray = ((darray.astype("f8") - vmin) / (vmax - vmin)).astype("f4") return np.minimum(np.maximum(darray, 0), 1) def _update_axes( ax: Axes, xincrease: bool | None, yincrease: bool | None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, ) -> None: """ Update axes with provided parameters """ if xincrease is None: pass elif (xincrease and ax.xaxis_inverted()) or ( not xincrease and not ax.xaxis_inverted() ): ax.invert_xaxis() if yincrease is None: pass elif (yincrease and ax.yaxis_inverted()) or ( not yincrease and not ax.yaxis_inverted() ): ax.invert_yaxis() # The default xscale, yscale needs to be None. # If we set a scale it resets the axes formatters, # This means that set_xscale('linear') on a datetime axis # will remove the date labels. So only set the scale when explicitly # asked to. https://github.com/matplotlib/matplotlib/issues/8740 if xscale is not None: ax.set_xscale(xscale) if yscale is not None: ax.set_yscale(yscale) if xticks is not None: ax.set_xticks(xticks) if yticks is not None: ax.set_yticks(yticks) if xlim is not None: ax.set_xlim(xlim) if ylim is not None: ax.set_ylim(ylim) def _is_monotonic(coord, axis=0): """ >>> _is_monotonic(np.array([0, 1, 2])) np.True_ >>> _is_monotonic(np.array([2, 1, 0])) np.True_ >>> _is_monotonic(np.array([0, 2, 1])) np.False_ """ if coord.shape[axis] < 3: return True else: n = coord.shape[axis] delta_pos = coord.take(np.arange(1, n), axis=axis) >= coord.take( np.arange(0, n - 1), axis=axis ) delta_neg = coord.take(np.arange(1, n), axis=axis) <= coord.take( np.arange(0, n - 1), axis=axis ) return np.all(delta_pos) or np.all(delta_neg) def _infer_interval_breaks(coord, axis=0, scale=None, check_monotonic=False): """ >>> _infer_interval_breaks(np.arange(5)) array([-0.5, 0.5, 1.5, 2.5, 3.5, 4.5]) >>> _infer_interval_breaks([[0, 1], [3, 4]], axis=1) array([[-0.5, 0.5, 1.5], [ 2.5, 3.5, 4.5]]) >>> _infer_interval_breaks(np.logspace(-2, 2, 5), scale="log") array([3.16227766e-03, 3.16227766e-02, 3.16227766e-01, 3.16227766e+00, 3.16227766e+01, 3.16227766e+02]) """ coord = np.asarray(coord) if check_monotonic and not _is_monotonic(coord, axis=axis): raise ValueError( "The input coordinate is not sorted in increasing " f"order along axis {axis}. This can lead to unexpected " "results. Consider calling the `sortby` method on " "the input DataArray. To plot data with categorical " "axes, consider using the `heatmap` function from " "the `seaborn` statistical plotting library." ) # If logscale, compute the intervals in the logarithmic space if scale == "log": if (coord <= 0).any(): raise ValueError( "Found negative or zero value in coordinates. " "Coordinates must be positive on logscale plots." ) coord = np.log10(coord) deltas = 0.5 * np.diff(coord, axis=axis) if deltas.size == 0: deltas = np.array(0.0) first = np.take(coord, [0], axis=axis) - np.take(deltas, [0], axis=axis) last = np.take(coord, [-1], axis=axis) + np.take(deltas, [-1], axis=axis) trim_last = tuple( slice(None, -1) if n == axis else slice(None) for n in range(coord.ndim) ) interval_breaks = np.concatenate( [first, coord[trim_last] + deltas, last], axis=axis ) if scale == "log": # Recovert the intervals into the linear space return np.power(10, interval_breaks) return interval_breaks def _process_cmap_cbar_kwargs( func, data, cmap=None, colors=None, cbar_kwargs: Iterable[tuple[str, Any]] | Mapping[str, Any] | None = None, levels=None, _is_facetgrid=False, **kwargs, ) -> tuple[dict[str, Any], dict[str, Any]]: """ Parameters ---------- func : plotting function data : ndarray, Data values Returns ------- cmap_params : dict cbar_kwargs : dict """ if func.__name__ == "surface": # Leave user to specify cmap settings for surface plots kwargs["cmap"] = cmap return { k: kwargs.get(k) for k in ["vmin", "vmax", "cmap", "extend", "levels", "norm"] }, {} cbar_kwargs = {} if cbar_kwargs is None else dict(cbar_kwargs) # colors is mutually exclusive with cmap if cmap and colors: raise ValueError("Can't specify both cmap and colors.") # colors is only valid when levels is supplied or the plot is of type # contour or contourf if colors and (("contour" not in func.__name__) and (levels is None)): raise ValueError("Can only specify colors with contour or levels") # we should not be getting a list of colors in cmap anymore # is there a better way to do this test? if isinstance(cmap, list | tuple): raise ValueError( "Specifying a list of colors in cmap is deprecated. " "Use colors keyword instead." ) cmap_kwargs = { "plot_data": data, "levels": levels, "cmap": colors or cmap, "filled": func.__name__ != "contour", } cmap_args = getfullargspec(_determine_cmap_params).args cmap_kwargs.update((a, kwargs[a]) for a in cmap_args if a in kwargs) if not _is_facetgrid: cmap_params = _determine_cmap_params(**cmap_kwargs) else: cmap_params = { k: cmap_kwargs[k] for k in ["vmin", "vmax", "cmap", "extend", "levels", "norm"] } return cmap_params, cbar_kwargs def _get_nice_quiver_magnitude(u, v): import matplotlib as mpl ticker = mpl.ticker.MaxNLocator(3) mean = np.mean(np.hypot(u.to_numpy(), v.to_numpy())) magnitude = ticker.tick_values(0, mean)[-2] return magnitude # Copied from matplotlib, tweaked so func can return strings. # https://github.com/matplotlib/matplotlib/issues/19555 def legend_elements( self, prop="colors", num="auto", fmt=None, func=lambda x: x, **kwargs ): """ Create legend handles and labels for a PathCollection. Each legend handle is a `.Line2D` representing the Path that was drawn, and each label is a string what each Path represents. This is useful for obtaining a legend for a `~.Axes.scatter` plot; e.g.:: scatter = plt.scatter([1, 2, 3], [4, 5, 6], c=[7, 2, 3]) plt.legend(*scatter.legend_elements()) creates three legend elements, one for each color with the numerical values passed to *c* as the labels. Also see the :ref:`automatedlegendcreation` example. Parameters ---------- prop : {"colors", "sizes"}, default: "colors" If "colors", the legend handles will show the different colors of the collection. If "sizes", the legend will show the different sizes. To set both, use *kwargs* to directly edit the `.Line2D` properties. num : int, None, "auto" (default), array-like, or `~.ticker.Locator` Target number of elements to create. If None, use all unique elements of the mappable array. If an integer, target to use *num* elements in the normed range. If *"auto"*, try to determine which option better suits the nature of the data. The number of created elements may slightly deviate from *num* due to a `~.ticker.Locator` being used to find useful locations. If a list or array, use exactly those elements for the legend. Finally, a `~.ticker.Locator` can be provided. fmt : str, `~matplotlib.ticker.Formatter`, or None (default) The format or formatter to use for the labels. If a string must be a valid input for a `~.StrMethodFormatter`. If None (the default), use a `~.ScalarFormatter`. func : function, default: ``lambda x: x`` Function to calculate the labels. Often the size (or color) argument to `~.Axes.scatter` will have been pre-processed by the user using a function ``s = f(x)`` to make the markers visible; e.g. ``size = np.log10(x)``. Providing the inverse of this function here allows that pre-processing to be inverted, so that the legend labels have the correct values; e.g. ``func = lambda x: 10**x``. **kwargs Allowed keyword arguments are *color* and *size*. E.g. it may be useful to set the color of the markers if *prop="sizes"* is used; similarly to set the size of the markers if *prop="colors"* is used. Any further parameters are passed onto the `.Line2D` instance. This may be useful to e.g. specify a different *markeredgecolor* or *alpha* for the legend handles. Returns ------- handles : list of `.Line2D` Visual representation of each element of the legend. labels : list of str The string labels for elements of the legend. """ import matplotlib as mpl mlines = mpl.lines handles = [] labels = [] if prop == "colors": arr = self.get_array() if arr is None: warnings.warn( "Collection without array used. Make sure to " "specify the values to be colormapped via the " "`c` argument.", stacklevel=2, ) return handles, labels _size = kwargs.pop("size", mpl.rcParams["lines.markersize"]) def _get_color_and_size(value): return self.cmap(self.norm(value)), _size elif prop == "sizes": if isinstance(self, mpl.collections.LineCollection): arr = self.get_linewidths() else: arr = self.get_sizes() _color = kwargs.pop("color", "k") def _get_color_and_size(value): return _color, np.sqrt(value) else: raise ValueError( "Valid values for `prop` are 'colors' or " f"'sizes'. You supplied '{prop}' instead." ) # Get the unique values and their labels: values = np.unique(arr) label_values = np.asarray(func(values)) label_values_are_numeric = np.issubdtype(label_values.dtype, np.number) # Handle the label format: if fmt is None and label_values_are_numeric: fmt = mpl.ticker.ScalarFormatter(useOffset=False, useMathText=True) elif fmt is None and not label_values_are_numeric: fmt = mpl.ticker.StrMethodFormatter("{x}") elif isinstance(fmt, str): fmt = mpl.ticker.StrMethodFormatter(fmt) fmt.create_dummy_axis() if num == "auto": num = 9 if len(values) <= num: num = None if label_values_are_numeric: label_values_min = label_values.min() label_values_max = label_values.max() fmt.axis.set_view_interval(label_values_min, label_values_max) fmt.axis.set_data_interval(label_values_min, label_values_max) if num is not None: # Labels are numerical but larger than the target # number of elements, reduce to target using matplotlibs # ticker classes: if isinstance(num, mpl.ticker.Locator): loc = num elif np.iterable(num): loc = mpl.ticker.FixedLocator(num) else: num = int(num) loc = mpl.ticker.MaxNLocator( nbins=num, min_n_ticks=num - 1, steps=[1, 2, 2.5, 3, 5, 6, 8, 10] ) # Get nicely spaced label_values: label_values = loc.tick_values(label_values_min, label_values_max) # Remove extrapolated label_values: cond = (label_values >= label_values_min) & ( label_values <= label_values_max ) label_values = label_values[cond] # Get the corresponding values by creating a linear interpolant # with small step size: values_interp = np.linspace(values.min(), values.max(), 256) label_values_interp = func(values_interp) ix = np.argsort(label_values_interp) values = np.interp(label_values, label_values_interp[ix], values_interp[ix]) elif num is not None and not label_values_are_numeric: # Labels are not numerical so modifying label_values is not # possible, instead filter the array with nicely distributed # indexes: if type(num) is int: loc = mpl.ticker.LinearLocator(num) else: raise ValueError("`num` only supports integers for non-numeric labels.") ind = loc.tick_values(0, len(label_values) - 1).astype(int) label_values = label_values[ind] values = values[ind] # Some formatters requires set_locs: if hasattr(fmt, "set_locs"): fmt.set_locs(label_values) # Default settings for handles, add or override with kwargs: kw = dict(markeredgewidth=self.get_linewidths()[0], alpha=self.get_alpha()) kw.update(kwargs) for val, lab in zip(values, label_values, strict=True): color, size = _get_color_and_size(val) if isinstance(self, mpl.collections.PathCollection): kw.update(linestyle="", marker=self.get_paths()[0], markersize=size) elif isinstance(self, mpl.collections.LineCollection): kw.update(linestyle=self.get_linestyle()[0], linewidth=size) h = mlines.Line2D([0], [0], color=color, **kw) handles.append(h) labels.append(fmt(lab)) return handles, labels def _legend_add_subtitle(handles, labels, text): """Add a subtitle to legend handles.""" import matplotlib.pyplot as plt if text and len(handles) > 1: # Create a blank handle that's not visible, the # invisibility will be used to discern which are subtitles # or not: blank_handle = plt.Line2D([], [], label=text) blank_handle.set_visible(False) # Subtitles are shown first: handles = [blank_handle] + handles labels = [text] + labels return handles, labels def _adjust_legend_subtitles(legend): """Make invisible-handle "subtitles" entries look more like titles.""" import matplotlib.pyplot as plt # Legend title not in rcParams until 3.0 font_size = plt.rcParams.get("legend.title_fontsize", None) hpackers = legend.findobj(plt.matplotlib.offsetbox.VPacker)[0].get_children() hpackers = [v for v in hpackers if isinstance(v, plt.matplotlib.offsetbox.HPacker)] for hpack in hpackers: areas = hpack.get_children() if len(areas) < 2: continue draw_area, text_area = areas handles = draw_area.get_children() # Assume that all artists that are not visible are # subtitles: if not all(artist.get_visible() for artist in handles): # Remove the dummy marker which will bring the text # more to the center: draw_area.set_width(0) for text in text_area.get_children(): if font_size is not None: # The sutbtitles should have the same font size # as normal legend titles: text.set_size(font_size) def _infer_meta_data(ds, x, y, hue, hue_style, add_guide, funcname): dvars = set(ds.variables.keys()) error_msg = f" must be one of ({', '.join(sorted(str(v) for v in dvars))})" if x not in dvars: raise ValueError(f"Expected 'x' {error_msg}. Received {x} instead.") if y not in dvars: raise ValueError(f"Expected 'y' {error_msg}. Received {y} instead.") if hue is not None and hue not in dvars: raise ValueError(f"Expected 'hue' {error_msg}. Received {hue} instead.") if hue: hue_is_numeric = _is_numeric(ds[hue].values) if hue_style is None: hue_style = "continuous" if hue_is_numeric else "discrete" if not hue_is_numeric and (hue_style == "continuous"): raise ValueError( f"Cannot create a colorbar for a non numeric coordinate: {hue}" ) if add_guide is None or add_guide is True: add_colorbar = hue_style == "continuous" add_legend = hue_style == "discrete" else: add_colorbar = False add_legend = False else: if add_guide is True and funcname not in ("quiver", "streamplot"): raise ValueError("Cannot set add_guide when hue is None.") add_legend = False add_colorbar = False if (add_guide or add_guide is None) and funcname == "quiver": add_quiverkey = True if hue: add_colorbar = True if not hue_style: hue_style = "continuous" elif hue_style != "continuous": raise ValueError( "hue_style must be 'continuous' or None for .plot.quiver or " ".plot.streamplot" ) else: add_quiverkey = False if (add_guide or add_guide is None) and funcname == "streamplot" and hue: add_colorbar = True if not hue_style: hue_style = "continuous" elif hue_style != "continuous": raise ValueError( "hue_style must be 'continuous' or None for .plot.quiver or " ".plot.streamplot" ) if hue_style is not None and hue_style not in ["discrete", "continuous"]: raise ValueError("hue_style must be either None, 'discrete' or 'continuous'.") if hue: hue_label = label_from_attrs(ds[hue]) hue = ds[hue] else: hue_label = None hue = None return { "add_colorbar": add_colorbar, "add_legend": add_legend, "add_quiverkey": add_quiverkey, "hue_label": hue_label, "hue_style": hue_style, "xlabel": label_from_attrs(ds[x]), "ylabel": label_from_attrs(ds[y]), "hue": hue, } @overload def _parse_size( data: None, norm: tuple[float | None, float | None, bool] | Normalize | None, ) -> None: ... @overload def _parse_size( data: DataArray, norm: tuple[float | None, float | None, bool] | Normalize | None, ) -> pd.Series: ... # copied from seaborn def _parse_size( data: DataArray | None, norm: tuple[float | None, float | None, bool] | Normalize | None, ) -> pd.Series | None: import matplotlib as mpl if data is None: return None flatdata = data.values.flatten() if not _is_numeric(flatdata): levels = np.unique(flatdata) numbers = np.arange(1, 1 + len(levels))[::-1] else: levels = numbers = np.sort(np.unique(flatdata)) min_width, _default_width, max_width = _MARKERSIZE_RANGE # width_range = min_width, max_width if norm is None: norm = mpl.colors.Normalize() elif isinstance(norm, tuple): norm = mpl.colors.Normalize(*norm) elif not isinstance(norm, mpl.colors.Normalize): err = "``size_norm`` must be None, tuple, or Normalize object." raise ValueError(err) assert isinstance(norm, mpl.colors.Normalize) norm.clip = True if not norm.scaled(): norm(np.asarray(numbers)) # limits = norm.vmin, norm.vmax scl = norm(numbers) widths = np.asarray(min_width + scl * (max_width - min_width)) if scl.mask.any(): widths[scl.mask] = 0 sizes = dict(zip(levels, widths, strict=True)) return pd.Series(sizes) class _Normalize(Sequence): """ Normalize numerical or categorical values to numerical values. The class includes helper methods that simplifies transforming to and from normalized values. Parameters ---------- data : DataArray DataArray to normalize. width : Sequence of three numbers, optional Normalize the data to these (min, default, max) values. The default is None. """ _data: DataArray | None _data_unique: np.ndarray _data_unique_index: np.ndarray _data_unique_inverse: np.ndarray _data_is_numeric: bool _width: tuple[float, float, float] | None __slots__ = ( "_data", "_data_is_numeric", "_data_unique", "_data_unique_index", "_data_unique_inverse", "_width", ) def __init__( self, data: DataArray | None, width: tuple[float, float, float] | None = None, _is_facetgrid: bool = False, ) -> None: self._data = data self._width = width if not _is_facetgrid else None pint_array_type = DuckArrayModule("pint").type to_unique = ( data.to_numpy() # type: ignore[union-attr] if isinstance(data if data is None else data.data, pint_array_type) else data ) data_unique, data_unique_inverse = np.unique(to_unique, return_inverse=True) # type: ignore[call-overload] self._data_unique = data_unique self._data_unique_index = np.arange(0, data_unique.size) self._data_unique_inverse = data_unique_inverse self._data_is_numeric = False if data is None else _is_numeric(data) def __repr__(self) -> str: with np.printoptions(precision=4, suppress=True, threshold=5): return ( f"<_Normalize(data, width={self._width})>\n" f"{self._data_unique} -> {self._values_unique}" ) def __len__(self) -> int: return len(self._data_unique) def __getitem__(self, key): return self._data_unique[key] @property def data(self) -> DataArray | None: return self._data @property def data_is_numeric(self) -> bool: """ Check if data is numeric. Examples -------- >>> a = xr.DataArray(["b", "a", "a", "b", "c"]) >>> _Normalize(a).data_is_numeric False >>> a = xr.DataArray([0.5, 0, 0, 0.5, 2, 3]) >>> _Normalize(a).data_is_numeric True >>> # TODO: Datetime should be numeric right? >>> a = xr.DataArray(pd.date_range("2000-1-1", periods=4)) >>> _Normalize(a).data_is_numeric False # TODO: Timedelta should be numeric right? >>> a = xr.DataArray(pd.timedelta_range("-1D", periods=4, freq="D")) >>> _Normalize(a).data_is_numeric True """ return self._data_is_numeric @overload def _calc_widths(self, y: np.ndarray) -> np.ndarray: ... @overload def _calc_widths(self, y: DataArray) -> DataArray: ... def _calc_widths(self, y: np.ndarray | DataArray) -> np.ndarray | DataArray: """ Normalize the values so they're in between self._width. """ if self._width is None: return y xmin, xdefault, xmax = self._width diff_maxy_miny = np.max(y) - np.min(y) if diff_maxy_miny == 0: # Use default with if y is constant: widths = xdefault + 0 * y else: # Normalize in between xmin and xmax: k = (y - np.min(y)) / diff_maxy_miny widths = xmin + k * (xmax - xmin) return widths @overload def _indexes_centered(self, x: np.ndarray) -> np.ndarray: ... @overload def _indexes_centered(self, x: DataArray) -> DataArray: ... def _indexes_centered(self, x: np.ndarray | DataArray) -> np.ndarray | DataArray: """ Offset indexes to make sure being in the center of self.levels. ["a", "b", "c"] -> [1, 3, 5] """ return x * 2 + 1 @property def values(self) -> DataArray | None: """ Return a normalized number array for the unique levels. Examples -------- >>> a = xr.DataArray(["b", "a", "a", "b", "c"]) >>> _Normalize(a).values Size: 40B array([3, 1, 1, 3, 5]) Dimensions without coordinates: dim_0 >>> _Normalize(a, width=(18, 36, 72)).values Size: 40B array([45., 18., 18., 45., 72.]) Dimensions without coordinates: dim_0 >>> a = xr.DataArray([0.5, 0, 0, 0.5, 2, 3]) >>> _Normalize(a).values Size: 48B array([0.5, 0. , 0. , 0.5, 2. , 3. ]) Dimensions without coordinates: dim_0 >>> _Normalize(a, width=(18, 36, 72)).values Size: 48B array([27., 18., 18., 27., 54., 72.]) Dimensions without coordinates: dim_0 >>> _Normalize(a * 0, width=(18, 36, 72)).values Size: 48B array([36., 36., 36., 36., 36., 36.]) Dimensions without coordinates: dim_0 """ if self.data is None: return None val: DataArray if self.data_is_numeric: val = self.data else: arr = self._indexes_centered(self._data_unique_inverse) val = self.data.copy(data=arr.reshape(self.data.shape)) return self._calc_widths(val) @property def _values_unique(self) -> np.ndarray | None: """ Return unique values. Examples -------- >>> a = xr.DataArray(["b", "a", "a", "b", "c"]) >>> _Normalize(a)._values_unique array([1, 3, 5]) >>> _Normalize(a, width=(18, 36, 72))._values_unique array([18., 45., 72.]) >>> a = xr.DataArray([0.5, 0, 0, 0.5, 2, 3]) >>> _Normalize(a)._values_unique array([0. , 0.5, 2. , 3. ]) >>> _Normalize(a, width=(18, 36, 72))._values_unique array([18., 27., 54., 72.]) """ if self.data is None: return None val: np.ndarray if self.data_is_numeric: val = self._data_unique else: val = self._indexes_centered(self._data_unique_index) return self._calc_widths(val) @property def ticks(self) -> np.ndarray | None: """ Return ticks for plt.colorbar if the data is not numeric. Examples -------- >>> a = xr.DataArray(["b", "a", "a", "b", "c"]) >>> _Normalize(a).ticks array([1, 3, 5]) """ val: np.ndarray | None if self.data_is_numeric: val = None else: val = self._indexes_centered(self._data_unique_index) return val @property def levels(self) -> np.ndarray: """ Return discrete levels that will evenly bound self.values. ["a", "b", "c"] -> [0, 2, 4, 6] Examples -------- >>> a = xr.DataArray(["b", "a", "a", "b", "c"]) >>> _Normalize(a).levels array([0, 2, 4, 6]) """ return ( np.append(self._data_unique_index, np.max(self._data_unique_index) + 1) * 2 ) @property def _lookup(self) -> pd.Series: if self._values_unique is None: raise ValueError("self.data can't be None.") return pd.Series(dict(zip(self._values_unique, self._data_unique, strict=True))) def _lookup_arr(self, x) -> np.ndarray: # Use reindex to be less sensitive to float errors. reindex only # works with sorted index. # Return as numpy array since legend_elements # seems to require that: return self._lookup.sort_index().reindex(x, method="nearest").to_numpy() @property def format(self) -> FuncFormatter: """ Return a FuncFormatter that maps self.values elements back to the original value as a string. Useful with plt.colorbar. Examples -------- >>> a = xr.DataArray([0.5, 0, 0, 0.5, 2, 3]) >>> aa = _Normalize(a, width=(0, 0.5, 1)) >>> aa._lookup 0.000000 0.0 0.166667 0.5 0.666667 2.0 1.000000 3.0 dtype: float64 >>> aa.format(1) '3.0' """ import matplotlib.pyplot as plt def _func(x: Any, pos: Any | None = None): return f"{self._lookup_arr([x])[0]}" return plt.FuncFormatter(_func) @property def func(self) -> Callable[[Any, Any | None], Any]: """ Return a lambda function that maps self.values elements back to the original value as a numpy array. Useful with ax.legend_elements. Examples -------- >>> a = xr.DataArray([0.5, 0, 0, 0.5, 2, 3]) >>> aa = _Normalize(a, width=(0, 0.5, 1)) >>> aa._lookup 0.000000 0.0 0.166667 0.5 0.666667 2.0 1.000000 3.0 dtype: float64 >>> aa.func([0.16, 1]) array([0.5, 3. ]) """ def _func(x: Any, pos: Any | None = None): return self._lookup_arr(x) return _func def _determine_guide( hueplt_norm: _Normalize, sizeplt_norm: _Normalize, add_colorbar: bool | None = None, add_legend: bool | None = None, plotfunc_name: str | None = None, ) -> tuple[bool, bool]: if plotfunc_name == "hist": return False, False if (add_colorbar) and hueplt_norm.data is None: raise KeyError("Cannot create a colorbar when hue is None.") if add_colorbar is None: if hueplt_norm.data is not None: add_colorbar = True else: add_colorbar = False if add_legend and hueplt_norm.data is None and sizeplt_norm.data is None: raise KeyError("Cannot create a legend when hue and markersize is None.") if add_legend is None: if ( not add_colorbar and (hueplt_norm.data is not None and hueplt_norm.data_is_numeric is False) ) or sizeplt_norm.data is not None: add_legend = True else: add_legend = False return add_colorbar, add_legend def _add_legend( hueplt_norm: _Normalize, sizeplt_norm: _Normalize, primitive, legend_ax, plotfunc: str, ): primitive = primitive if isinstance(primitive, list) else [primitive] handles, labels = [], [] for huesizeplt, prop in [ (hueplt_norm, "colors"), (sizeplt_norm, "sizes"), ]: if huesizeplt.data is not None: # Get legend handles and labels that displays the # values correctly. Order might be different because # legend_elements uses np.unique instead of pd.unique, # FacetGrid.add_legend might have troubles with this: hdl, lbl = [], [] for p in primitive: hdl_, lbl_ = legend_elements(p, prop, num="auto", func=huesizeplt.func) hdl += hdl_ lbl += lbl_ # Only save unique values: u, ind = np.unique(lbl, return_index=True) ind = np.argsort(ind) lbl = cast(list, u[ind].tolist()) hdl = cast(list, np.array(hdl)[ind].tolist()) # Add a subtitle: hdl, lbl = _legend_add_subtitle(hdl, lbl, label_from_attrs(huesizeplt.data)) handles += hdl labels += lbl legend = legend_ax.legend(handles, labels, framealpha=0.5) _adjust_legend_subtitles(legend) return legend def _guess_coords_to_plot( darray: DataArray, coords_to_plot: MutableMapping[str, Hashable | None], kwargs: dict, default_guess: tuple[str, ...] = ("x",), # TODO: Can this be normalized, plt.cbook.normalize_kwargs? ignore_guess_kwargs: tuple[tuple[str, ...], ...] = ((),), ) -> MutableMapping[str, Hashable]: """ Guess what coords to plot if some of the values in coords_to_plot are None which happens when the user has not defined all available ways of visualizing the data. Parameters ---------- darray : DataArray The DataArray to check for available coords. coords_to_plot : MutableMapping[str, Hashable] Coords defined by the user to plot. kwargs : dict Extra kwargs that will be sent to matplotlib. default_guess : Iterable[str], optional Default values and order to retrieve dims if values in dims_plot is missing, default: ("x", "hue", "size"). ignore_guess_kwargs : tuple[tuple[str, ...], ...] Matplotlib arguments to ignore. Examples -------- >>> ds = xr.tutorial.scatter_example_dataset(seed=42) >>> # Only guess x by default: >>> xr.plot.utils._guess_coords_to_plot( ... ds.A, ... coords_to_plot={"x": None, "z": None, "hue": None, "size": None}, ... kwargs={}, ... ) {'x': 'x', 'z': None, 'hue': None, 'size': None} >>> # Guess all plot dims with other default values: >>> xr.plot.utils._guess_coords_to_plot( ... ds.A, ... coords_to_plot={"x": None, "z": None, "hue": None, "size": None}, ... kwargs={}, ... default_guess=("x", "hue", "size"), ... ignore_guess_kwargs=((), ("c", "color"), ("s",)), ... ) {'x': 'x', 'z': None, 'hue': 'y', 'size': 'z'} >>> # Don't guess ´size´, since the matplotlib kwarg ´s´ has been defined: >>> xr.plot.utils._guess_coords_to_plot( ... ds.A, ... coords_to_plot={"x": None, "z": None, "hue": None, "size": None}, ... kwargs={"s": 5}, ... default_guess=("x", "hue", "size"), ... ignore_guess_kwargs=((), ("c", "color"), ("s",)), ... ) {'x': 'x', 'z': None, 'hue': 'y', 'size': None} >>> # Prioritize ´size´ over ´s´: >>> xr.plot.utils._guess_coords_to_plot( ... ds.A, ... coords_to_plot={"x": None, "z": None, "hue": None, "size": "x"}, ... kwargs={"s": 5}, ... default_guess=("x", "hue", "size"), ... ignore_guess_kwargs=((), ("c", "color"), ("s",)), ... ) {'x': 'y', 'z': None, 'hue': 'z', 'size': 'x'} """ coords_to_plot_exist = {k: v for k, v in coords_to_plot.items() if v is not None} available_coords = tuple( k for k in darray.coords.keys() if k not in coords_to_plot_exist.values() ) # If dims_plot[k] isn't defined then fill with one of the available dims, unless # one of related mpl kwargs has been used. This should have similar behaviour as # * plt.plot(x, y) -> Multiple lines with different colors if y is 2d. # * plt.plot(x, y, color="red") -> Multiple red lines if y is 2d. for k, dim, ign_kws in zip( default_guess, available_coords, ignore_guess_kwargs, strict=False ): if coords_to_plot.get(k, None) is None and all( kwargs.get(ign_kw) is None for ign_kw in ign_kws ): coords_to_plot[k] = dim for k, dim in coords_to_plot.items(): _assert_valid_xy(darray, dim, k) return coords_to_plot def _set_concise_date(ax: Axes, axis: Literal["x", "y", "z"] = "x") -> None: """ Use ConciseDateFormatter which is meant to improve the strings chosen for the ticklabels, and to minimize the strings used in those tick labels as much as possible. https://matplotlib.org/stable/gallery/ticks/date_concise_formatter.html Parameters ---------- ax : Axes Figure axes. axis : Literal["x", "y", "z"], optional Which axis to make concise. The default is "x". """ import matplotlib.dates as mdates locator = mdates.AutoDateLocator() formatter = mdates.ConciseDateFormatter(locator) _axis = getattr(ax, f"{axis}axis") _axis.set_major_locator(locator) _axis.set_major_formatter(formatter) python-xarray-2026.01.0/xarray/plot/dataset_plot.py0000664000175000017500000007422615136607163022377 0ustar alastairalastairfrom __future__ import annotations import functools import inspect import warnings from collections.abc import Callable, Hashable, Iterable from typing import TYPE_CHECKING, Any, TypeVar, overload from xarray.plot import dataarray_plot from xarray.plot.facetgrid import _easy_facetgrid from xarray.plot.utils import ( _add_colorbar, _get_nice_quiver_magnitude, _infer_meta_data, _process_cmap_cbar_kwargs, get_axis, ) from xarray.structure.alignment import broadcast if TYPE_CHECKING: from matplotlib.axes import Axes from matplotlib.collections import LineCollection, PathCollection from matplotlib.colors import Colormap, Normalize from matplotlib.quiver import Quiver from numpy.typing import ArrayLike from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.types import ( AspectOptions, ExtendOptions, HueStyleOptions, ScaleOptions, ) from xarray.plot.facetgrid import FacetGrid def _dsplot(plotfunc): commondoc = """ Parameters ---------- ds : Dataset x : Hashable or None, optional Variable name for x-axis. y : Hashable or None, optional Variable name for y-axis. u : Hashable or None, optional Variable name for the *u* velocity (in *x* direction). quiver/streamplot plots only. v : Hashable or None, optional Variable name for the *v* velocity (in *y* direction). quiver/streamplot plots only. hue: Hashable or None, optional Variable by which to color scatter points or arrows. hue_style: {'continuous', 'discrete'} or None, optional How to use the ``hue`` variable: - ``'continuous'`` -- continuous color scale (default for numeric ``hue`` variables) - ``'discrete'`` -- a color for each unique value, using the default color cycle (default for non-numeric ``hue`` variables) row : Hashable or None, optional If passed, make row faceted plots on this dimension name. col : Hashable or None, optional If passed, make column faceted plots on this dimension name. col_wrap : int, optional Use together with ``col`` to wrap faceted plots. ax : matplotlib axes object or None, optional If ``None``, use the current axes. Not applicable when using facets. figsize : Iterable[float] or None, optional A tuple (width, height) of the figure in inches. Mutually exclusive with ``size`` and ``ax``. size : scalar, optional If provided, create a new figure for the plot with the given size. Height (in inches) of each plot. See also: ``aspect``. aspect : "auto", "equal", scalar or None, optional Aspect ratio of plot, so that ``aspect * size`` gives the width in inches. Only used if a ``size`` is provided. sharex : bool or None, optional If True all subplots share the same x-axis. sharey : bool or None, optional If True all subplots share the same y-axis. add_guide: bool or None, optional Add a guide that depends on ``hue_style``: - ``'continuous'`` -- build a colorbar - ``'discrete'`` -- build a legend subplot_kws : dict or None, optional Dictionary of keyword arguments for Matplotlib subplots (see :py:meth:`matplotlib:matplotlib.figure.Figure.add_subplot`). Only applies to FacetGrid plotting. cbar_kwargs : dict, optional Dictionary of keyword arguments to pass to the colorbar (see :meth:`matplotlib:matplotlib.figure.Figure.colorbar`). cbar_ax : matplotlib axes object, optional Axes in which to draw the colorbar. cmap : matplotlib colormap name or colormap, optional The mapping from data values to color space. Either a Matplotlib colormap name or object. If not provided, this will be either ``'viridis'`` (if the function infers a sequential dataset) or ``'RdBu_r'`` (if the function infers a diverging dataset). See :doc:`Choosing Colormaps in Matplotlib ` for more information. If *seaborn* is installed, ``cmap`` may also be a `seaborn color palette `_. Note: if ``cmap`` is a seaborn color palette, ``levels`` must also be specified. vmin : float or None, optional Lower value to anchor the colormap, otherwise it is inferred from the data and other keyword arguments. When a diverging dataset is inferred, setting `vmin` or `vmax` will fix the other by symmetry around ``center``. Setting both values prevents use of a diverging colormap. If discrete levels are provided as an explicit list, both of these values are ignored. vmax : float or None, optional Upper value to anchor the colormap, otherwise it is inferred from the data and other keyword arguments. When a diverging dataset is inferred, setting `vmin` or `vmax` will fix the other by symmetry around ``center``. Setting both values prevents use of a diverging colormap. If discrete levels are provided as an explicit list, both of these values are ignored. norm : matplotlib.colors.Normalize, optional If ``norm`` has ``vmin`` or ``vmax`` specified, the corresponding kwarg must be ``None``. infer_intervals: bool | None If True the intervals are inferred. center : float, optional The value at which to center the colormap. Passing this value implies use of a diverging colormap. Setting it to ``False`` prevents use of a diverging colormap. robust : bool, optional If ``True`` and ``vmin`` or ``vmax`` are absent, the colormap range is computed with 2nd and 98th percentiles instead of the extreme values. colors : str or array-like of color-like, optional A single color or a list of colors. The ``levels`` argument is required. extend : {'neither', 'both', 'min', 'max'}, optional How to draw arrows extending the colorbar beyond its limits. If not provided, ``extend`` is inferred from ``vmin``, ``vmax`` and the data limits. levels : int or array-like, optional Split the colormap (``cmap``) into discrete color intervals. If an integer is provided, "nice" levels are chosen based on the data range: this can imply that the final number of levels is not exactly the expected one. Setting ``vmin`` and/or ``vmax`` with ``levels=N`` is equivalent to setting ``levels=np.linspace(vmin, vmax, N)``. **kwargs : optional Additional keyword arguments to wrapped Matplotlib function. """ # Build on the original docstring plotfunc.__doc__ = f"{plotfunc.__doc__}\n{commondoc}" @functools.wraps( plotfunc, assigned=("__module__", "__name__", "__qualname__", "__doc__") ) def newplotfunc( ds: Dataset, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, row: Hashable | None = None, col: Hashable | None = None, col_wrap: int | None = None, ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: AspectOptions = None, sharex: bool = True, sharey: bool = True, add_guide: bool | None = None, subplot_kws: dict[str, Any] | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, cmap: str | Colormap | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals: bool | None = None, center: float | None = None, robust: bool | None = None, colors: str | ArrayLike | None = None, extend: ExtendOptions = None, levels: ArrayLike | None = None, **kwargs: Any, ) -> Any: if args: # TODO: Deprecated since 2022.10: msg = "Using positional arguments is deprecated for plot methods, use keyword arguments instead." assert x is None x = args[0] if len(args) > 1: assert y is None y = args[1] if len(args) > 2: assert u is None u = args[2] if len(args) > 3: assert v is None v = args[3] if len(args) > 4: assert hue is None hue = args[4] if len(args) > 5: raise ValueError(msg) else: warnings.warn(msg, DeprecationWarning, stacklevel=2) del args _is_facetgrid = kwargs.pop("_is_facetgrid", False) if _is_facetgrid: # facetgrid call meta_data = kwargs.pop("meta_data") else: meta_data = _infer_meta_data( ds, x, y, hue, hue_style, add_guide, funcname=plotfunc.__name__ ) hue_style = meta_data["hue_style"] # handle facetgrids first if col or row: allargs = locals().copy() allargs["plotfunc"] = globals()[plotfunc.__name__] allargs["data"] = ds # remove kwargs to avoid passing the information twice for arg in ["meta_data", "kwargs", "ds"]: del allargs[arg] return _easy_facetgrid(kind="dataset", **allargs, **kwargs) figsize = kwargs.pop("figsize", None) ax = get_axis(figsize, size, aspect, ax) if hue_style == "continuous" and hue is not None: if _is_facetgrid: cbar_kwargs = meta_data["cbar_kwargs"] cmap_params = meta_data["cmap_params"] else: cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( plotfunc, ds[hue].values, **locals() ) # subset that can be passed to scatter, hist2d cmap_params_subset = { vv: cmap_params[vv] for vv in ["vmin", "vmax", "norm", "cmap"] } else: cmap_params_subset = {} if (u is not None or v is not None) and plotfunc.__name__ not in ( "quiver", "streamplot", ): raise ValueError("u, v are only allowed for quiver or streamplot plots.") primitive = plotfunc( ds=ds, x=x, y=y, ax=ax, u=u, v=v, hue=hue, hue_style=hue_style, cmap_params=cmap_params_subset, **kwargs, ) if _is_facetgrid: # if this was called from Facetgrid.map_dataset, return primitive # finish here. Else, make labels if meta_data.get("xlabel", None): ax.set_xlabel(meta_data.get("xlabel")) if meta_data.get("ylabel", None): ax.set_ylabel(meta_data.get("ylabel")) if meta_data["add_legend"]: ax.legend(handles=primitive, title=meta_data.get("hue_label", None)) if meta_data["add_colorbar"]: cbar_kwargs = {} if cbar_kwargs is None else cbar_kwargs if "label" not in cbar_kwargs: cbar_kwargs["label"] = meta_data.get("hue_label", None) _add_colorbar(primitive, ax, cbar_ax, cbar_kwargs, cmap_params) if meta_data["add_quiverkey"]: magnitude = _get_nice_quiver_magnitude(ds[u], ds[v]) units = ds[u].attrs.get("units", "") ax.quiverkey( primitive, X=0.85, Y=0.9, U=magnitude, label=f"{magnitude}\n{units}", labelpos="E", coordinates="figure", ) if plotfunc.__name__ in ("quiver", "streamplot"): title = ds[u]._title_for_slice() else: title = ds[x]._title_for_slice() ax.set_title(title) return primitive # we want to actually expose the signature of newplotfunc # and not the copied **kwargs from the plotfunc which # functools.wraps adds, so delete the wrapped attr del newplotfunc.__wrapped__ return newplotfunc @overload def quiver( # type: ignore[misc,unused-ignore] # None is hashable :( ds: Dataset, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, col: None = None, # no wrap -> primitive row: None = None, # no wrap -> primitive ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: AspectOptions = None, subplot_kws: dict[str, Any] | None = None, add_guide: bool | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals: bool | None = None, center: float | None = None, levels: ArrayLike | None = None, robust: bool | None = None, colors: str | ArrayLike | None = None, extend: ExtendOptions = None, cmap: str | Colormap | None = None, **kwargs: Any, ) -> Quiver: ... @overload def quiver( ds: Dataset, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, col: Hashable, # wrap -> FacetGrid row: Hashable | None = None, ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: AspectOptions = None, subplot_kws: dict[str, Any] | None = None, add_guide: bool | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals: bool | None = None, center: float | None = None, levels: ArrayLike | None = None, robust: bool | None = None, colors: str | ArrayLike | None = None, extend: ExtendOptions = None, cmap: str | Colormap | None = None, **kwargs: Any, ) -> FacetGrid[Dataset]: ... @overload def quiver( ds: Dataset, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, col: Hashable | None = None, row: Hashable, # wrap -> FacetGrid ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: AspectOptions = None, subplot_kws: dict[str, Any] | None = None, add_guide: bool | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals: bool | None = None, center: float | None = None, levels: ArrayLike | None = None, robust: bool | None = None, colors: str | ArrayLike | None = None, extend: ExtendOptions = None, cmap: str | Colormap | None = None, **kwargs: Any, ) -> FacetGrid[Dataset]: ... @_dsplot def quiver( ds: Dataset, x: Hashable, y: Hashable, ax: Axes, u: Hashable, v: Hashable, **kwargs: Any, ) -> Quiver: """Quiver plot of Dataset variables. Wraps :py:func:`matplotlib:matplotlib.pyplot.quiver`. """ import matplotlib as mpl if x is None or y is None or u is None or v is None: raise ValueError("Must specify x, y, u, v for quiver plots.") dx, dy, du, dv = broadcast(ds[x], ds[y], ds[u], ds[v]) args = [dx.values, dy.values, du.values, dv.values] hue = kwargs.pop("hue") cmap_params = kwargs.pop("cmap_params") if hue: args.append(ds[hue].values) # TODO: Fix this by always returning a norm with vmin, vmax in cmap_params if not cmap_params["norm"]: cmap_params["norm"] = mpl.colors.Normalize( cmap_params.pop("vmin"), cmap_params.pop("vmax") ) kwargs.pop("hue_style") kwargs.setdefault("pivot", "middle") hdl = ax.quiver(*args, **kwargs, **cmap_params) return hdl @overload def streamplot( # type: ignore[misc,unused-ignore] # None is hashable :( ds: Dataset, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, col: None = None, # no wrap -> primitive row: None = None, # no wrap -> primitive ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: AspectOptions = None, subplot_kws: dict[str, Any] | None = None, add_guide: bool | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals: bool | None = None, center: float | None = None, levels: ArrayLike | None = None, robust: bool | None = None, colors: str | ArrayLike | None = None, extend: ExtendOptions = None, cmap: str | Colormap | None = None, **kwargs: Any, ) -> LineCollection: ... @overload def streamplot( ds: Dataset, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, col: Hashable, # wrap -> FacetGrid row: Hashable | None = None, ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: AspectOptions = None, subplot_kws: dict[str, Any] | None = None, add_guide: bool | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals: bool | None = None, center: float | None = None, levels: ArrayLike | None = None, robust: bool | None = None, colors: str | ArrayLike | None = None, extend: ExtendOptions = None, cmap: str | Colormap | None = None, **kwargs: Any, ) -> FacetGrid[Dataset]: ... @overload def streamplot( ds: Dataset, *args: Any, x: Hashable | None = None, y: Hashable | None = None, u: Hashable | None = None, v: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, col: Hashable | None = None, row: Hashable, # wrap -> FacetGrid ax: Axes | None = None, figsize: Iterable[float] | None = None, size: float | None = None, col_wrap: int | None = None, sharex: bool = True, sharey: bool = True, aspect: AspectOptions = None, subplot_kws: dict[str, Any] | None = None, add_guide: bool | None = None, cbar_kwargs: dict[str, Any] | None = None, cbar_ax: Axes | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, infer_intervals: bool | None = None, center: float | None = None, levels: ArrayLike | None = None, robust: bool | None = None, colors: str | ArrayLike | None = None, extend: ExtendOptions = None, cmap: str | Colormap | None = None, **kwargs: Any, ) -> FacetGrid[Dataset]: ... @_dsplot def streamplot( ds: Dataset, x: Hashable, y: Hashable, ax: Axes, u: Hashable, v: Hashable, **kwargs: Any, ) -> LineCollection: """Plot streamlines of Dataset variables. Wraps :py:func:`matplotlib:matplotlib.pyplot.streamplot`. """ import matplotlib as mpl if x is None or y is None or u is None or v is None: raise ValueError("Must specify x, y, u, v for streamplot plots.") # Matplotlib's streamplot has strong restrictions on what x and y can be, so need to # get arrays transposed the 'right' way around. 'x' cannot vary within 'rows', so # the dimension of x must be the second dimension. 'y' cannot vary with 'columns' so # the dimension of y must be the first dimension. If x and y are both 2d, assume the # user has got them right already. xdim = ds[x].dims[0] if len(ds[x].dims) == 1 else None ydim = ds[y].dims[0] if len(ds[y].dims) == 1 else None if xdim is not None and ydim is None: ydims = set(ds[y].dims) - {xdim} if len(ydims) == 1: ydim = next(iter(ydims)) if ydim is not None and xdim is None: xdims = set(ds[x].dims) - {ydim} if len(xdims) == 1: xdim = next(iter(xdims)) dx, dy, du, dv = broadcast(ds[x], ds[y], ds[u], ds[v]) if xdim is not None and ydim is not None: # Need to ensure the arrays are transposed correctly dx = dx.transpose(ydim, xdim) dy = dy.transpose(ydim, xdim) du = du.transpose(ydim, xdim) dv = dv.transpose(ydim, xdim) hue = kwargs.pop("hue") cmap_params = kwargs.pop("cmap_params") if hue: if xdim is not None and ydim is not None: ds[hue] = ds[hue].transpose(ydim, xdim) kwargs["color"] = ds[hue].values # TODO: Fix this by always returning a norm with vmin, vmax in cmap_params if not cmap_params["norm"]: cmap_params["norm"] = mpl.colors.Normalize( cmap_params.pop("vmin"), cmap_params.pop("vmax") ) kwargs.pop("hue_style") hdl = ax.streamplot( dx.values, dy.values, du.values, dv.values, **kwargs, **cmap_params ) # Return .lines so colorbar creation works properly return hdl.lines F = TypeVar("F", bound=Callable) def _update_doc_to_dataset(dataarray_plotfunc: Callable) -> Callable[[F], F]: """ Add a common docstring by reusing the DataArray one. TODO: Reduce code duplication. * The goal is to reduce code duplication by moving all Dataset specific plots to the DataArray side and use this thin wrapper to handle the conversion between Dataset and DataArray. * Improve docstring handling, maybe reword the DataArray versions to explain Datasets better. Parameters ---------- dataarray_plotfunc : Callable Function that returns a finished plot primitive. """ # Build on the original docstring da_doc = dataarray_plotfunc.__doc__ if da_doc is None: raise NotImplementedError("DataArray plot method requires a docstring") da_str = """ Parameters ---------- darray : DataArray """ ds_str = """ The `y` DataArray will be used as base, any other variables are added as coords. Parameters ---------- ds : Dataset """ # TODO: improve this? if da_str in da_doc: ds_doc = da_doc.replace(da_str, ds_str).replace("darray", "ds") else: ds_doc = da_doc @functools.wraps(dataarray_plotfunc) def wrapper(dataset_plotfunc: F) -> F: dataset_plotfunc.__doc__ = ds_doc return dataset_plotfunc return wrapper # type: ignore[return-value] def _normalize_args( plotmethod: str, args: tuple[Any, ...], kwargs: dict[str, Any] ) -> dict[str, Any]: from xarray.core.dataarray import DataArray # Determine positional arguments keyword by inspecting the # signature of the plotmethod: locals_ = dict( inspect.signature(getattr(DataArray().plot, plotmethod)) .bind(*args, **kwargs) .arguments.items() ) locals_.update(locals_.pop("kwargs", {})) return locals_ def _temp_dataarray(ds: Dataset, y: Hashable, locals_: dict[str, Any]) -> DataArray: """Create a temporary datarray with extra coords.""" from xarray.core.dataarray import DataArray coords = dict(ds[y].coords) dims = set(ds[y].dims) # Add extra coords to the DataArray from valid kwargs, if using all # kwargs there is a risk that we add unnecessary dataarrays as # coords straining RAM further for example: # ds.both and extend="both" would add ds.both to the coords: valid_coord_kwargs = {"x", "z", "markersize", "hue", "row", "col", "u", "v"} coord_kwargs = locals_.keys() & valid_coord_kwargs for k in coord_kwargs: key = locals_[k] darray = ds.get(key) if darray is not None: coords[key] = darray dims.update(darray.dims) # Trim dataset from unnecessary dims: ds_trimmed = ds.drop_dims(ds.sizes.keys() - dims) # TODO: Use ds.dims in the future # The dataarray has to include all the dims. Broadcast to that shape # and add the additional coords: _y = ds[y].broadcast_like(ds_trimmed) return DataArray(_y, coords=coords) @overload def scatter( # type: ignore[misc,unused-ignore] # None is hashable :( ds: Dataset, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: None = None, # no wrap -> primitive col: None = None, # no wrap -> primitive col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, cmap: str | Colormap | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend: ExtendOptions = None, levels: ArrayLike | None = None, **kwargs: Any, ) -> PathCollection: ... @overload def scatter( ds: Dataset, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable, # wrap -> FacetGrid col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, cmap: str | Colormap | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend: ExtendOptions = None, levels: ArrayLike | None = None, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @overload def scatter( ds: Dataset, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: Hashable, # wrap -> FacetGrid col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, cmap: str | Colormap | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend: ExtendOptions = None, levels: ArrayLike | None = None, **kwargs: Any, ) -> FacetGrid[DataArray]: ... @_update_doc_to_dataset(dataarray_plot.scatter) def scatter( ds: Dataset, *args: Any, x: Hashable | None = None, y: Hashable | None = None, z: Hashable | None = None, hue: Hashable | None = None, hue_style: HueStyleOptions = None, markersize: Hashable | None = None, linewidth: Hashable | None = None, figsize: Iterable[float] | None = None, size: float | None = None, aspect: float | None = None, ax: Axes | None = None, row: Hashable | None = None, col: Hashable | None = None, col_wrap: int | None = None, xincrease: bool | None = True, yincrease: bool | None = True, add_legend: bool | None = None, add_colorbar: bool | None = None, add_labels: bool | Iterable[bool] = True, add_title: bool = True, subplot_kws: dict[str, Any] | None = None, xscale: ScaleOptions = None, yscale: ScaleOptions = None, xticks: ArrayLike | None = None, yticks: ArrayLike | None = None, xlim: ArrayLike | None = None, ylim: ArrayLike | None = None, cmap: str | Colormap | None = None, vmin: float | None = None, vmax: float | None = None, norm: Normalize | None = None, extend: ExtendOptions = None, levels: ArrayLike | None = None, **kwargs: Any, ) -> PathCollection | FacetGrid[DataArray]: """Scatter plot Dataset data variables against each other.""" locals_ = locals() del locals_["ds"] locals_.update(locals_.pop("kwargs", {})) da = _temp_dataarray(ds, y, locals_) return da.plot.scatter(*locals_.pop("args", ()), **locals_) python-xarray-2026.01.0/xarray/plot/__init__.py0000664000175000017500000000107615136607163021444 0ustar alastairalastair""" Use this module directly: import xarray.plot as xplt Or use the methods on a DataArray or Dataset: DataArray.plot._____ Dataset.plot._____ """ from xarray.plot.dataarray_plot import ( contour, contourf, hist, imshow, line, pcolormesh, plot, step, surface, ) from xarray.plot.dataset_plot import scatter from xarray.plot.facetgrid import FacetGrid __all__ = [ "FacetGrid", "contour", "contourf", "hist", "imshow", "line", "pcolormesh", "plot", "scatter", "step", "surface", ] python-xarray-2026.01.0/xarray/__init__.py0000664000175000017500000000736015136607163020470 0ustar alastairalastairfrom importlib.metadata import version as _version from xarray import coders, groupers, indexes, testing, tutorial, ufuncs from xarray.backends.api import ( load_dataarray, load_dataset, load_datatree, open_dataarray, open_dataset, open_datatree, open_groups, open_mfdataset, ) from xarray.backends.writers import save_mfdataset from xarray.backends.zarr import open_zarr from xarray.coding.cftime_offsets import cftime_range, date_range, date_range_like from xarray.coding.cftimeindex import CFTimeIndex from xarray.coding.frequencies import infer_freq from xarray.computation.apply_ufunc import ( apply_ufunc, ) from xarray.computation.computation import ( corr, cov, cross, dot, polyval, where, ) from xarray.conventions import SerializationWarning, decode_cf from xarray.core.common import ALL_DIMS, full_like, ones_like, zeros_like from xarray.core.coordinates import Coordinates, CoordinateValidationError from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.datatree_mapping import map_over_datasets from xarray.core.extensions import ( register_dataarray_accessor, register_dataset_accessor, register_datatree_accessor, ) from xarray.core.indexes import Index from xarray.core.indexing import IndexSelResult from xarray.core.options import get_options, set_options from xarray.core.parallel import map_blocks from xarray.core.treenode import ( InvalidTreeError, NotFoundInTreeError, TreeIsomorphismError, group_subtrees, ) from xarray.core.variable import IndexVariable, Variable, as_variable from xarray.namedarray.core import NamedArray from xarray.structure.alignment import AlignmentError, align, broadcast from xarray.structure.chunks import unify_chunks from xarray.structure.combine import combine_by_coords, combine_nested from xarray.structure.concat import concat from xarray.structure.merge import Context, MergeError, merge from xarray.util.print_versions import show_versions try: __version__ = _version("xarray") except Exception: # Local copy or not installed with setuptools. # Disable minimum version checks on downstream libraries. __version__ = "9999" # A hardcoded __all__ variable is necessary to appease # `mypy --strict` running in projects that import xarray. __all__ = ( # noqa: RUF022 # Sub-packages "coders", "groupers", "indexes", "testing", "tutorial", "ufuncs", # Top-level functions "align", "apply_ufunc", "as_variable", "broadcast", "cftime_range", "combine_by_coords", "combine_nested", "concat", "corr", "cov", "cross", "date_range", "date_range_like", "decode_cf", "dot", "full_like", "get_options", "group_subtrees", "infer_freq", "load_dataarray", "load_dataset", "load_datatree", "map_blocks", "map_over_datasets", "merge", "ones_like", "open_dataarray", "open_dataset", "open_datatree", "open_groups", "open_mfdataset", "open_zarr", "polyval", "register_dataarray_accessor", "register_dataset_accessor", "register_datatree_accessor", "save_mfdataset", "set_options", "show_versions", "unify_chunks", "where", "zeros_like", # Classes "CFTimeIndex", "Context", "Coordinates", "DataArray", "DataTree", "Dataset", "Index", "IndexSelResult", "IndexVariable", "NamedArray", "Variable", # Exceptions "AlignmentError", "CoordinateValidationError", "InvalidTreeError", "MergeError", "NotFoundInTreeError", "SerializationWarning", "TreeIsomorphismError", # Constants "ALL_DIMS", "__version__", ) python-xarray-2026.01.0/CLAUDE.md0000664000175000017500000000243515136607163016326 0ustar alastairalastair# xarray development setup ## Setup ```bash uv sync ``` ## Run tests ```bash uv run pytest xarray -n auto # All tests in parallel uv run pytest xarray/tests/test_dataarray.py # Specific file ``` ## Linting & type checking ```bash pre-commit run --all-files # Includes ruff and other checks uv run dmypy run # Type checking with mypy ``` ## Code Style Guidelines ### Import Organization - **Always place imports at the top of the file** in the standard import section - Never add imports inside functions or nested scopes unless there's a specific reason (e.g., circular import avoidance, optional dependencies in TYPE_CHECKING) - Group imports following PEP 8 conventions: 1. Standard library imports 2. Related third-party imports 3. Local application/library specific imports ## GitHub Interaction Guidelines - **NEVER impersonate the user on GitHub**, always sign off with something like "[This is Claude Code on behalf of Jane Doe]" - Never create issues nor pull requests on the xarray GitHub repository unless explicitly instructed - Never post "update" messages, progress reports, or explanatory comments on GitHub issues/PRs unless specifically instructed - When creating commits, always include a co-authorship trailer: `Co-authored-by: Claude ` python-xarray-2026.01.0/conftest.py0000664000175000017500000000506615136607163017251 0ustar alastairalastair"""Configuration for pytest.""" import pytest def pytest_addoption(parser: pytest.Parser): """Add command-line flags for pytest.""" parser.addoption("--run-flaky", action="store_true", help="runs flaky tests") parser.addoption( "--run-network-tests", action="store_true", help="runs tests requiring a network connection", ) parser.addoption("--run-mypy", action="store_true", help="runs mypy tests") def pytest_runtest_setup(item): # based on https://stackoverflow.com/questions/47559524 if "flaky" in item.keywords and not item.config.getoption("--run-flaky"): pytest.skip("set --run-flaky option to run flaky tests") if "network" in item.keywords and not item.config.getoption("--run-network-tests"): pytest.skip( "set --run-network-tests to run test requiring an internet connection" ) if any("mypy" in m.name for m in item.own_markers) and not item.config.getoption( "--run-mypy" ): pytest.skip("set --run-mypy option to run mypy tests") # See https://docs.pytest.org/en/stable/example/markers.html#automatically-adding-markers-based-on-test-names def pytest_collection_modifyitems(items): for item in items: if "mypy" in item.nodeid: # IMPORTANT: mypy type annotation tests leverage the pytest-mypy-plugins # plugin, and are thus written in test_*.yml files. As such, there are # no explicit test functions on which we can apply a pytest.mark.mypy # decorator. Therefore, we mark them via this name-based, automatic # marking approach, meaning that each test case must contain "mypy" in the # name. item.add_marker(pytest.mark.mypy) @pytest.fixture(autouse=True) def set_zarr_v3_api(monkeypatch): """Set ZARR_V3_EXPERIMENTAL_API environment variable for all tests.""" monkeypatch.setenv("ZARR_V3_EXPERIMENTAL_API", "1") @pytest.fixture(autouse=True) def add_standard_imports(doctest_namespace, tmpdir): import numpy as np import pandas as pd import xarray as xr doctest_namespace["np"] = np doctest_namespace["pd"] = pd doctest_namespace["xr"] = xr # always seed numpy.random to make the examples deterministic np.random.seed(0) # always switch to the temporary directory, so files get written there tmpdir.chdir() # Avoid the dask deprecation warning, can remove if CI passes without this. try: import dask except ImportError: pass else: dask.config.set({"dataframe.query-planning": True}) python-xarray-2026.01.0/asv_bench/0000775000175000017500000000000015136607163016773 5ustar alastairalastairpython-xarray-2026.01.0/asv_bench/benchmarks/0000775000175000017500000000000015136607163021110 5ustar alastairalastairpython-xarray-2026.01.0/asv_bench/benchmarks/import.py0000664000175000017500000000076415136607163023003 0ustar alastairalastairclass Import: """Benchmark importing xarray""" def timeraw_import_xarray(self): return "import xarray" def timeraw_import_xarray_plot(self): return "import xarray.plot" def timeraw_import_xarray_backends(self): return """ from xarray.backends import list_engines list_engines() """ def timeraw_import_xarray_only(self): # import numpy and pandas in the setup stage return "import xarray", "import numpy, pandas" python-xarray-2026.01.0/asv_bench/benchmarks/interp.py0000664000175000017500000000411215136607163022761 0ustar alastairalastairimport numpy as np import pandas as pd import xarray as xr from . import parameterized, randn, requires_dask nx = 1500 ny = 1000 nt = 500 randn_xy = randn((nx, ny), frac_nan=0.1) randn_xt = randn((nx, nt)) randn_t = randn((nt,)) new_x_short = np.linspace(0.3 * nx, 0.7 * nx, 100) new_x_long = np.linspace(0.3 * nx, 0.7 * nx, 500) new_y_long = np.linspace(0.1, 0.9, 500) class Interpolation: def setup(self, *args, **kwargs): self.ds = xr.Dataset( { "var1": (("x", "y"), randn_xy), "var2": (("x", "t"), randn_xt), "var3": (("t",), randn_t), "var4": (("z",), np.array(["text"])), "var5": (("k",), np.array(["a", "b", "c"])), }, coords={ "x": np.arange(nx), "y": np.linspace(0, 1, ny), "t": pd.date_range("1970-01-01", periods=nt, freq="D"), "x_coords": ("x", np.linspace(1.1, 2.1, nx)), "z": np.array([1]), "k": np.linspace(0, nx, 3), }, ) @parameterized(["method", "is_short"], (["linear", "cubic"], [True, False])) def time_interpolation_numeric_1d(self, method, is_short): new_x = new_x_short if is_short else new_x_long self.ds.interp(x=new_x, method=method).compute() @parameterized(["method"], (["linear", "nearest"])) def time_interpolation_numeric_2d(self, method): self.ds.interp(x=new_x_long, y=new_y_long, method=method).compute() @parameterized(["is_short"], ([True, False])) def time_interpolation_string_scalar(self, is_short): new_z = new_x_short if is_short else new_x_long self.ds.interp(z=new_z).compute() @parameterized(["is_short"], ([True, False])) def time_interpolation_string_1d(self, is_short): new_k = new_x_short if is_short else new_x_long self.ds.interp(k=new_k).compute() class InterpolationDask(Interpolation): def setup(self, *args, **kwargs): requires_dask() super().setup(**kwargs) self.ds = self.ds.chunk({"t": 50}) python-xarray-2026.01.0/asv_bench/benchmarks/unstacking.py0000664000175000017500000000350715136607163023635 0ustar alastairalastairimport numpy as np import pandas as pd import xarray as xr from . import requires_dask, requires_sparse class Unstacking: def setup(self): data = np.random.default_rng(0).random((250, 500)) self.da_full = xr.DataArray(data, dims=list("ab")).stack(flat_dim=[...]) self.da_missing = self.da_full[:-1] self.df_missing = self.da_missing.to_pandas() def time_unstack_fast(self): self.da_full.unstack("flat_dim") def time_unstack_slow(self): self.da_missing.unstack("flat_dim") def time_unstack_pandas_slow(self): self.df_missing.unstack() class UnstackingDask(Unstacking): def setup(self, *args, **kwargs): requires_dask() super().setup(**kwargs) self.da_full = self.da_full.chunk({"flat_dim": 25}) class UnstackingSparse(Unstacking): def setup(self, *args, **kwargs): requires_sparse() import sparse data = sparse.random((500, 1000), random_state=0, fill_value=0) self.da_full = xr.DataArray(data, dims=list("ab")).stack(flat_dim=[...]) self.da_missing = self.da_full[:-1] mindex = pd.MultiIndex.from_arrays([np.arange(100), np.arange(100)]) self.da_eye_2d = xr.DataArray(np.ones((100,)), dims="z", coords={"z": mindex}) self.da_eye_3d = xr.DataArray( np.ones((100, 50)), dims=("z", "foo"), coords={"z": mindex, "foo": np.arange(50)}, ) def time_unstack_to_sparse_2d(self): self.da_eye_2d.unstack(sparse=True) def time_unstack_to_sparse_3d(self): self.da_eye_3d.unstack(sparse=True) def peakmem_unstack_to_sparse_2d(self): self.da_eye_2d.unstack(sparse=True) def peakmem_unstack_to_sparse_3d(self): self.da_eye_3d.unstack(sparse=True) def time_unstack_pandas_slow(self): pass python-xarray-2026.01.0/asv_bench/benchmarks/rolling.py0000664000175000017500000001201515136607163023127 0ustar alastairalastairimport numpy as np import pandas as pd import xarray as xr from . import _skip_slow, parameterized, randn, requires_dask nx = 3000 long_nx = 30000 ny = 200 nt = 1000 window = 20 randn_xy = randn((nx, ny), frac_nan=0.1) randn_xt = randn((nx, nt)) randn_t = randn((nt,)) randn_long = randn((long_nx,), frac_nan=0.1) class Rolling: def setup(self, *args, **kwargs): self.ds = xr.Dataset( { "var1": (("x", "y"), randn_xy), "var2": (("x", "t"), randn_xt), "var3": (("t",), randn_t), }, coords={ "x": np.arange(nx), "y": np.linspace(0, 1, ny), "t": pd.date_range("1970-01-01", periods=nt, freq="D"), "x_coords": ("x", np.linspace(1.1, 2.1, nx)), }, ) self.da_long = xr.DataArray( randn_long, dims="x", coords={"x": np.arange(long_nx) * 0.1} ) @parameterized( ["func", "center", "use_bottleneck"], (["mean", "count"], [True, False], [True, False]), ) def time_rolling(self, func, center, use_bottleneck): with xr.set_options(use_bottleneck=use_bottleneck): getattr(self.ds.rolling(x=window, center=center), func)().load() @parameterized( ["func", "pandas", "use_bottleneck"], (["mean", "count"], [True, False], [True, False]), ) def time_rolling_long(self, func, pandas, use_bottleneck): if pandas: se = self.da_long.to_series() getattr(se.rolling(window=window, min_periods=window), func)() else: with xr.set_options(use_bottleneck=use_bottleneck): getattr( self.da_long.rolling(x=window, min_periods=window), func )().load() @parameterized( ["window_", "min_periods", "use_bottleneck"], ([20, 40], [5, 5], [True, False]) ) def time_rolling_np(self, window_, min_periods, use_bottleneck): with xr.set_options(use_bottleneck=use_bottleneck): self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce( np.nansum ).load() @parameterized( ["center", "stride", "use_bottleneck"], ([True, False], [1, 1], [True, False]) ) def time_rolling_construct(self, center, stride, use_bottleneck): with xr.set_options(use_bottleneck=use_bottleneck): self.ds.rolling(x=window, center=center).construct( "window_dim", stride=stride ).sum(dim="window_dim").load() class RollingDask(Rolling): def setup(self, *args, **kwargs): requires_dask() # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() super().setup(**kwargs) self.ds = self.ds.chunk({"x": 100, "y": 50, "t": 50}) self.da_long = self.da_long.chunk({"x": 10000}) class RollingMemory: def setup(self, *args, **kwargs): self.ds = xr.Dataset( { "var1": (("x", "y"), randn_xy), "var2": (("x", "t"), randn_xt), "var3": (("t",), randn_t), }, coords={ "x": np.arange(nx), "y": np.linspace(0, 1, ny), "t": pd.date_range("1970-01-01", periods=nt, freq="D"), "x_coords": ("x", np.linspace(1.1, 2.1, nx)), }, ) class DataArrayRollingMemory(RollingMemory): @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) def peakmem_ndrolling_reduce(self, func, use_bottleneck): with xr.set_options(use_bottleneck=use_bottleneck): roll = self.ds.var1.rolling(x=10, y=4) getattr(roll, func)() @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) def peakmem_1drolling_reduce(self, func, use_bottleneck): with xr.set_options(use_bottleneck=use_bottleneck): roll = self.ds.var3.rolling(t=100) getattr(roll, func)() @parameterized(["stride"], ([None, 5, 50])) def peakmem_1drolling_construct(self, stride): self.ds.var2.rolling(t=100).construct("w", stride=stride) self.ds.var3.rolling(t=100).construct("w", stride=stride) class DatasetRollingMemory(RollingMemory): @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) def peakmem_ndrolling_reduce(self, func, use_bottleneck): with xr.set_options(use_bottleneck=use_bottleneck): roll = self.ds.rolling(x=10, y=4) getattr(roll, func)() @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) def peakmem_1drolling_reduce(self, func, use_bottleneck): with xr.set_options(use_bottleneck=use_bottleneck): roll = self.ds.rolling(t=100) getattr(roll, func)() @parameterized(["stride"], ([None, 5, 50])) def peakmem_1drolling_construct(self, stride): self.ds.rolling(t=100).construct("w", stride=stride) python-xarray-2026.01.0/asv_bench/benchmarks/groupby.py0000664000175000017500000001442315136607163023155 0ustar alastairalastair# import flox to avoid the cost of first import import cftime import flox.xarray # noqa: F401 import numpy as np import pandas as pd import xarray as xr from . import _skip_slow, parameterized, requires_dask class GroupBy: def setup(self, *args, **kwargs): self.n = 100 self.ds1d = xr.Dataset( { "a": xr.DataArray(np.r_[np.repeat(1, self.n), np.repeat(2, self.n)]), "b": xr.DataArray(np.arange(2 * self.n)), "c": xr.DataArray(np.arange(2 * self.n)), } ) self.ds2d = self.ds1d.expand_dims(z=10).copy() self.ds1d_mean = self.ds1d.groupby("b").mean() self.ds2d_mean = self.ds2d.groupby("b").mean() @parameterized(["ndim"], [(1, 2)]) def time_init(self, ndim): getattr(self, f"ds{ndim}d").groupby("b") @parameterized( ["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)] ) def time_agg_small_num_groups(self, method, ndim, use_flox): ds = getattr(self, f"ds{ndim}d") with xr.set_options(use_flox=use_flox): getattr(ds.groupby("a"), method)().compute() @parameterized( ["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)] ) def time_agg_large_num_groups(self, method, ndim, use_flox): ds = getattr(self, f"ds{ndim}d") with xr.set_options(use_flox=use_flox): getattr(ds.groupby("b"), method)().compute() def time_binary_op_1d(self): (self.ds1d.groupby("b") - self.ds1d_mean).compute() def time_binary_op_2d(self): (self.ds2d.groupby("b") - self.ds2d_mean).compute() def peakmem_binary_op_1d(self): (self.ds1d.groupby("b") - self.ds1d_mean).compute() def peakmem_binary_op_2d(self): (self.ds2d.groupby("b") - self.ds2d_mean).compute() class GroupByDask(GroupBy): def setup(self, *args, **kwargs): requires_dask() super().setup(**kwargs) self.ds1d = self.ds1d.sel(dim_0=slice(None, None, 2)) self.ds1d["c"] = self.ds1d["c"].chunk({"dim_0": 50}) self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2)) self.ds2d["c"] = self.ds2d["c"].chunk({"dim_0": 50, "z": 5}) self.ds1d_mean = self.ds1d.groupby("b").mean().compute() self.ds2d_mean = self.ds2d.groupby("b").mean().compute() # TODO: These don't work now because we are calling `.compute` explicitly. class GroupByPandasDataFrame(GroupBy): """Run groupby tests using pandas DataFrame.""" def setup(self, *args, **kwargs): # Skip testing in CI as it won't ever change in a commit: _skip_slow() super().setup(**kwargs) self.ds1d = self.ds1d.to_dataframe() self.ds1d_mean = self.ds1d.groupby("b").mean() def time_binary_op_2d(self): raise NotImplementedError def peakmem_binary_op_2d(self): raise NotImplementedError class GroupByDaskDataFrame(GroupBy): """Run groupby tests using dask DataFrame.""" def setup(self, *args, **kwargs): # Skip testing in CI as it won't ever change in a commit: _skip_slow() requires_dask() super().setup(**kwargs) self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dask_dataframe() self.ds1d_mean = self.ds1d.groupby("b").mean().compute() def time_binary_op_2d(self): raise NotImplementedError def peakmem_binary_op_2d(self): raise NotImplementedError class Resample: def setup(self, *args, **kwargs): self.ds1d = xr.Dataset( { "b": ("time", np.arange(365.0 * 24)), }, coords={"time": pd.date_range("2001-01-01", freq="h", periods=365 * 24)}, ) self.ds2d = self.ds1d.expand_dims(z=10) self.ds1d_mean = self.ds1d.resample(time="48h").mean() self.ds2d_mean = self.ds2d.resample(time="48h").mean() @parameterized(["ndim"], [(1, 2)]) def time_init(self, ndim): getattr(self, f"ds{ndim}d").resample(time="D") @parameterized( ["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)] ) def time_agg_small_num_groups(self, method, ndim, use_flox): ds = getattr(self, f"ds{ndim}d") with xr.set_options(use_flox=use_flox): getattr(ds.resample(time="3ME"), method)().compute() @parameterized( ["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)] ) def time_agg_large_num_groups(self, method, ndim, use_flox): ds = getattr(self, f"ds{ndim}d") with xr.set_options(use_flox=use_flox): getattr(ds.resample(time="48h"), method)().compute() class ResampleDask(Resample): def setup(self, *args, **kwargs): requires_dask() super().setup(**kwargs) self.ds1d = self.ds1d.chunk({"time": 50}) self.ds2d = self.ds2d.chunk({"time": 50, "z": 4}) class ResampleCFTime(Resample): def setup(self, *args, **kwargs): self.ds1d = xr.Dataset( { "b": ("time", np.arange(365.0 * 24)), }, coords={ "time": xr.date_range( "2001-01-01", freq="h", periods=365 * 24, calendar="noleap" ) }, ) self.ds2d = self.ds1d.expand_dims(z=10) self.ds1d_mean = self.ds1d.resample(time="48h").mean() self.ds2d_mean = self.ds2d.resample(time="48h").mean() @parameterized(["use_cftime", "use_flox"], [[True, False], [True, False]]) class GroupByLongTime: def setup(self, use_cftime, use_flox): arr = np.random.randn(10, 10, 365 * 30) time = xr.date_range("2000", periods=30 * 365, use_cftime=use_cftime) # GH9426 - deep-copying CFTime object arrays is weirdly slow asda = xr.DataArray(time) labeled_time = [] for year, month in zip(asda.dt.year, asda.dt.month, strict=True): labeled_time.append(cftime.datetime(year, month, 1)) self.da = xr.DataArray( arr, dims=("y", "x", "time"), coords={"time": time, "time2": ("time", labeled_time)}, ) def time_setup(self, use_cftime, use_flox): self.da.groupby("time.month") def time_mean(self, use_cftime, use_flox): with xr.set_options(use_flox=use_flox): self.da.groupby("time.year").mean() python-xarray-2026.01.0/asv_bench/benchmarks/coarsen.py0000664000175000017500000000313715136607163023120 0ustar alastairalastairimport numpy as np import xarray as xr from . import randn # Sizes chosen to test padding optimization nx_padded = 4003 # Not divisible by 10 - requires padding ny_padded = 4007 # Not divisible by 10 - requires padding nx_exact = 4000 # Divisible by 10 - no padding needed ny_exact = 4000 # Divisible by 10 - no padding needed window = 10 class Coarsen: def setup(self, *args, **kwargs): # Case 1: Requires padding on both dimensions self.da_padded = xr.DataArray( randn((nx_padded, ny_padded)), dims=("x", "y"), coords={"x": np.arange(nx_padded), "y": np.arange(ny_padded)}, ) # Case 2: No padding required self.da_exact = xr.DataArray( randn((nx_exact, ny_exact)), dims=("x", "y"), coords={"x": np.arange(nx_exact), "y": np.arange(ny_exact)}, ) def time_coarsen_with_padding(self): """Coarsen 2D array where both dimensions require padding.""" self.da_padded.coarsen(x=window, y=window, boundary="pad").mean() def time_coarsen_no_padding(self): """Coarsen 2D array where dimensions are exact multiples (no padding).""" self.da_exact.coarsen(x=window, y=window, boundary="pad").mean() def peakmem_coarsen_with_padding(self): """Peak memory for coarsening with padding on both dimensions.""" self.da_padded.coarsen(x=window, y=window, boundary="pad").mean() def peakmem_coarsen_no_padding(self): """Peak memory for coarsening without padding.""" self.da_exact.coarsen(x=window, y=window, boundary="pad").mean() python-xarray-2026.01.0/asv_bench/benchmarks/polyfit.py0000664000175000017500000000177515136607163023162 0ustar alastairalastairimport numpy as np import xarray as xr from . import parameterized, randn, requires_dask NDEGS = (2, 5, 20) NX = (10**2, 10**6) class Polyval: def setup(self, *args, **kwargs): self.xs = {nx: xr.DataArray(randn((nx,)), dims="x", name="x") for nx in NX} self.coeffs = { ndeg: xr.DataArray( randn((ndeg,)), dims="degree", coords={"degree": np.arange(ndeg)} ) for ndeg in NDEGS } @parameterized(["nx", "ndeg"], [NX, NDEGS]) def time_polyval(self, nx, ndeg): x = self.xs[nx] c = self.coeffs[ndeg] xr.polyval(x, c).compute() @parameterized(["nx", "ndeg"], [NX, NDEGS]) def peakmem_polyval(self, nx, ndeg): x = self.xs[nx] c = self.coeffs[ndeg] xr.polyval(x, c).compute() class PolyvalDask(Polyval): def setup(self, *args, **kwargs): requires_dask() super().setup(*args, **kwargs) self.xs = {k: v.chunk({"x": 10000}) for k, v in self.xs.items()} python-xarray-2026.01.0/asv_bench/benchmarks/dataset.py0000664000175000017500000000127715136607163023116 0ustar alastairalastairimport numpy as np from xarray import Dataset from . import requires_dask class DatasetBinaryOp: def setup(self): self.ds = Dataset( { "a": (("x", "y"), np.ones((300, 400))), "b": (("x", "y"), np.ones((300, 400))), } ) self.mean = self.ds.mean() self.std = self.ds.std() def time_normalize(self): (self.ds - self.mean) / self.std class DatasetChunk: def setup(self): requires_dask() self.ds = Dataset() array = np.ones(1000) for i in range(250): self.ds[f"var{i}"] = ("x", array) def time_chunk(self): self.ds.chunk(x=(1,) * 1000) python-xarray-2026.01.0/asv_bench/benchmarks/dataset_io.py0000664000175000017500000006100215136607163023575 0ustar alastairalastairfrom __future__ import annotations import os from dataclasses import dataclass import numpy as np import pandas as pd import xarray as xr from . import _skip_slow, parameterized, randint, randn, requires_dask try: import dask import dask.multiprocessing except ImportError: pass os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" _ENGINES = tuple(xr.backends.list_engines().keys() - {"store"}) class IOSingleNetCDF: """ A few examples that benchmark reading/writing a single netCDF file with xarray """ timeout = 300.0 repeat = 1 number = 5 def make_ds(self): # single Dataset self.ds = xr.Dataset() self.nt = 1000 self.nx = 90 self.ny = 45 self.block_chunks = { "time": self.nt / 4, "lon": self.nx / 3, "lat": self.ny / 3, } self.time_chunks = {"time": int(self.nt / 36)} times = pd.date_range("1970-01-01", periods=self.nt, freq="D") lons = xr.DataArray( np.linspace(0, 360, self.nx), dims=("lon",), attrs={"units": "degrees east", "long_name": "longitude"}, ) lats = xr.DataArray( np.linspace(-90, 90, self.ny), dims=("lat",), attrs={"units": "degrees north", "long_name": "latitude"}, ) self.ds["foo"] = xr.DataArray( randn((self.nt, self.nx, self.ny), frac_nan=0.2), coords={"lon": lons, "lat": lats, "time": times}, dims=("time", "lon", "lat"), name="foo", attrs={"units": "foo units", "description": "a description"}, ) self.ds["bar"] = xr.DataArray( randn((self.nt, self.nx, self.ny), frac_nan=0.2), coords={"lon": lons, "lat": lats, "time": times}, dims=("time", "lon", "lat"), name="bar", attrs={"units": "bar units", "description": "a description"}, ) self.ds["baz"] = xr.DataArray( randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32), coords={"lon": lons, "lat": lats}, dims=("lon", "lat"), name="baz", attrs={"units": "baz units", "description": "a description"}, ) self.ds.attrs = {"history": "created for xarray benchmarking"} self.oinds = { "time": randint(0, self.nt, 120), "lon": randint(0, self.nx, 20), "lat": randint(0, self.ny, 10), } self.vinds = { "time": xr.DataArray(randint(0, self.nt, 120), dims="x"), "lon": xr.DataArray(randint(0, self.nx, 120), dims="x"), "lat": slice(3, 20), } class IOWriteSingleNetCDF3(IOSingleNetCDF): def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() self.format = "NETCDF3_64BIT" self.make_ds() def time_write_dataset_netcdf4(self): self.ds.to_netcdf("test_netcdf4_write.nc", engine="netcdf4", format=self.format) def time_write_dataset_scipy(self): self.ds.to_netcdf("test_scipy_write.nc", engine="scipy", format=self.format) class IOReadSingleNetCDF4(IOSingleNetCDF): def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() self.make_ds() self.filepath = "test_single_file.nc4.nc" self.format = "NETCDF4" self.ds.to_netcdf(self.filepath, format=self.format) def time_load_dataset_netcdf4(self): xr.open_dataset(self.filepath, engine="netcdf4").load() def time_orthogonal_indexing(self): ds = xr.open_dataset(self.filepath, engine="netcdf4") ds = ds.isel(**self.oinds).load() def time_vectorized_indexing(self): ds = xr.open_dataset(self.filepath, engine="netcdf4") ds = ds.isel(**self.vinds).load() class IOReadSingleNetCDF3(IOReadSingleNetCDF4): def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() self.make_ds() self.filepath = "test_single_file.nc3.nc" self.format = "NETCDF3_64BIT" self.ds.to_netcdf(self.filepath, format=self.format) def time_load_dataset_scipy(self): xr.open_dataset(self.filepath, engine="scipy").load() def time_orthogonal_indexing(self): ds = xr.open_dataset(self.filepath, engine="scipy") ds = ds.isel(**self.oinds).load() def time_vectorized_indexing(self): ds = xr.open_dataset(self.filepath, engine="scipy") ds = ds.isel(**self.vinds).load() class IOReadSingleNetCDF4Dask(IOSingleNetCDF): def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() requires_dask() self.make_ds() self.filepath = "test_single_file.nc4.nc" self.format = "NETCDF4" self.ds.to_netcdf(self.filepath, format=self.format) def time_load_dataset_netcdf4_with_block_chunks(self): xr.open_dataset( self.filepath, engine="netcdf4", chunks=self.block_chunks ).load() def time_load_dataset_netcdf4_with_block_chunks_oindexing(self): ds = xr.open_dataset(self.filepath, engine="netcdf4", chunks=self.block_chunks) ds = ds.isel(**self.oinds).load() def time_load_dataset_netcdf4_with_block_chunks_vindexing(self): ds = xr.open_dataset(self.filepath, engine="netcdf4", chunks=self.block_chunks) ds = ds.isel(**self.vinds).load() def time_load_dataset_netcdf4_with_block_chunks_multiprocessing(self): with dask.config.set(scheduler="multiprocessing"): xr.open_dataset( self.filepath, engine="netcdf4", chunks=self.block_chunks ).load() def time_load_dataset_netcdf4_with_time_chunks(self): xr.open_dataset(self.filepath, engine="netcdf4", chunks=self.time_chunks).load() def time_load_dataset_netcdf4_with_time_chunks_multiprocessing(self): with dask.config.set(scheduler="multiprocessing"): xr.open_dataset( self.filepath, engine="netcdf4", chunks=self.time_chunks ).load() class IOReadSingleNetCDF3Dask(IOReadSingleNetCDF4Dask): def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() requires_dask() self.make_ds() self.filepath = "test_single_file.nc3.nc" self.format = "NETCDF3_64BIT" self.ds.to_netcdf(self.filepath, format=self.format) def time_load_dataset_scipy_with_block_chunks(self): with dask.config.set(scheduler="multiprocessing"): xr.open_dataset( self.filepath, engine="scipy", chunks=self.block_chunks ).load() def time_load_dataset_scipy_with_block_chunks_oindexing(self): ds = xr.open_dataset(self.filepath, engine="scipy", chunks=self.block_chunks) ds = ds.isel(**self.oinds).load() def time_load_dataset_scipy_with_block_chunks_vindexing(self): ds = xr.open_dataset(self.filepath, engine="scipy", chunks=self.block_chunks) ds = ds.isel(**self.vinds).load() def time_load_dataset_scipy_with_time_chunks(self): with dask.config.set(scheduler="multiprocessing"): xr.open_dataset( self.filepath, engine="scipy", chunks=self.time_chunks ).load() class IOMultipleNetCDF: """ A few examples that benchmark reading/writing multiple netCDF files with xarray """ timeout = 300.0 repeat = 1 number = 5 def make_ds(self, nfiles=10): # multiple Dataset self.ds = xr.Dataset() self.nt = 1000 self.nx = 90 self.ny = 45 self.nfiles = nfiles self.block_chunks = { "time": self.nt / 4, "lon": self.nx / 3, "lat": self.ny / 3, } self.time_chunks = {"time": int(self.nt / 36)} self.time_vars = np.split( pd.date_range("1970-01-01", periods=self.nt, freq="D"), self.nfiles ) self.ds_list = [] self.filenames_list = [] for i, times in enumerate(self.time_vars): ds = xr.Dataset() nt = len(times) lons = xr.DataArray( np.linspace(0, 360, self.nx), dims=("lon",), attrs={"units": "degrees east", "long_name": "longitude"}, ) lats = xr.DataArray( np.linspace(-90, 90, self.ny), dims=("lat",), attrs={"units": "degrees north", "long_name": "latitude"}, ) ds["foo"] = xr.DataArray( randn((nt, self.nx, self.ny), frac_nan=0.2), coords={"lon": lons, "lat": lats, "time": times}, dims=("time", "lon", "lat"), name="foo", attrs={"units": "foo units", "description": "a description"}, ) ds["bar"] = xr.DataArray( randn((nt, self.nx, self.ny), frac_nan=0.2), coords={"lon": lons, "lat": lats, "time": times}, dims=("time", "lon", "lat"), name="bar", attrs={"units": "bar units", "description": "a description"}, ) ds["baz"] = xr.DataArray( randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32), coords={"lon": lons, "lat": lats}, dims=("lon", "lat"), name="baz", attrs={"units": "baz units", "description": "a description"}, ) ds.attrs = {"history": "created for xarray benchmarking"} self.ds_list.append(ds) self.filenames_list.append(f"test_netcdf_{i}.nc") class IOWriteMultipleNetCDF3(IOMultipleNetCDF): def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() self.make_ds() self.format = "NETCDF3_64BIT" def time_write_dataset_netcdf4(self): xr.save_mfdataset( self.ds_list, self.filenames_list, engine="netcdf4", format=self.format ) def time_write_dataset_scipy(self): xr.save_mfdataset( self.ds_list, self.filenames_list, engine="scipy", format=self.format ) class IOReadMultipleNetCDF4(IOMultipleNetCDF): def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() requires_dask() self.make_ds() self.format = "NETCDF4" xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format) def time_load_dataset_netcdf4(self): xr.open_mfdataset(self.filenames_list, engine="netcdf4").load() def time_open_dataset_netcdf4(self): xr.open_mfdataset(self.filenames_list, engine="netcdf4") class IOReadMultipleNetCDF3(IOReadMultipleNetCDF4): def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() requires_dask() self.make_ds() self.format = "NETCDF3_64BIT" xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format) def time_load_dataset_scipy(self): xr.open_mfdataset(self.filenames_list, engine="scipy").load() def time_open_dataset_scipy(self): xr.open_mfdataset(self.filenames_list, engine="scipy") class IOReadMultipleNetCDF4Dask(IOMultipleNetCDF): def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() requires_dask() self.make_ds() self.format = "NETCDF4" xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format) def time_load_dataset_netcdf4_with_block_chunks(self): xr.open_mfdataset( self.filenames_list, engine="netcdf4", chunks=self.block_chunks ).load() def time_load_dataset_netcdf4_with_block_chunks_multiprocessing(self): with dask.config.set(scheduler="multiprocessing"): xr.open_mfdataset( self.filenames_list, engine="netcdf4", chunks=self.block_chunks ).load() def time_load_dataset_netcdf4_with_time_chunks(self): xr.open_mfdataset( self.filenames_list, engine="netcdf4", chunks=self.time_chunks ).load() def time_load_dataset_netcdf4_with_time_chunks_multiprocessing(self): with dask.config.set(scheduler="multiprocessing"): xr.open_mfdataset( self.filenames_list, engine="netcdf4", chunks=self.time_chunks ).load() def time_open_dataset_netcdf4_with_block_chunks(self): xr.open_mfdataset( self.filenames_list, engine="netcdf4", chunks=self.block_chunks ) def time_open_dataset_netcdf4_with_block_chunks_multiprocessing(self): with dask.config.set(scheduler="multiprocessing"): xr.open_mfdataset( self.filenames_list, engine="netcdf4", chunks=self.block_chunks ) def time_open_dataset_netcdf4_with_time_chunks(self): xr.open_mfdataset( self.filenames_list, engine="netcdf4", chunks=self.time_chunks ) def time_open_dataset_netcdf4_with_time_chunks_multiprocessing(self): with dask.config.set(scheduler="multiprocessing"): xr.open_mfdataset( self.filenames_list, engine="netcdf4", chunks=self.time_chunks ) class IOReadMultipleNetCDF3Dask(IOReadMultipleNetCDF4Dask): def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() requires_dask() self.make_ds() self.format = "NETCDF3_64BIT" xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format) def time_load_dataset_scipy_with_block_chunks(self): with dask.config.set(scheduler="multiprocessing"): xr.open_mfdataset( self.filenames_list, engine="scipy", chunks=self.block_chunks ).load() def time_load_dataset_scipy_with_time_chunks(self): with dask.config.set(scheduler="multiprocessing"): xr.open_mfdataset( self.filenames_list, engine="scipy", chunks=self.time_chunks ).load() def time_open_dataset_scipy_with_block_chunks(self): with dask.config.set(scheduler="multiprocessing"): xr.open_mfdataset( self.filenames_list, engine="scipy", chunks=self.block_chunks ) def time_open_dataset_scipy_with_time_chunks(self): with dask.config.set(scheduler="multiprocessing"): xr.open_mfdataset( self.filenames_list, engine="scipy", chunks=self.time_chunks ) def create_delayed_write(): import dask.array as da vals = da.random.random(300, chunks=(1,)) ds = xr.Dataset({"vals": (["a"], vals)}) return ds.to_netcdf("file.nc", engine="netcdf4", compute=False) class IONestedDataTree: """ A few examples that benchmark reading/writing a heavily nested netCDF datatree with xarray """ timeout = 300.0 repeat = 1 number = 5 def make_datatree(self, nchildren=10): # multiple Dataset self.ds = xr.Dataset() self.nt = 1000 self.nx = 90 self.ny = 45 self.nchildren = nchildren self.block_chunks = { "time": self.nt / 4, "lon": self.nx / 3, "lat": self.ny / 3, } self.time_chunks = {"time": int(self.nt / 36)} times = pd.date_range("1970-01-01", periods=self.nt, freq="D") lons = xr.DataArray( np.linspace(0, 360, self.nx), dims=("lon",), attrs={"units": "degrees east", "long_name": "longitude"}, ) lats = xr.DataArray( np.linspace(-90, 90, self.ny), dims=("lat",), attrs={"units": "degrees north", "long_name": "latitude"}, ) self.ds["foo"] = xr.DataArray( randn((self.nt, self.nx, self.ny), frac_nan=0.2), coords={"lon": lons, "lat": lats, "time": times}, dims=("time", "lon", "lat"), name="foo", attrs={"units": "foo units", "description": "a description"}, ) self.ds["bar"] = xr.DataArray( randn((self.nt, self.nx, self.ny), frac_nan=0.2), coords={"lon": lons, "lat": lats, "time": times}, dims=("time", "lon", "lat"), name="bar", attrs={"units": "bar units", "description": "a description"}, ) self.ds["baz"] = xr.DataArray( randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32), coords={"lon": lons, "lat": lats}, dims=("lon", "lat"), name="baz", attrs={"units": "baz units", "description": "a description"}, ) self.ds.attrs = {"history": "created for xarray benchmarking"} self.oinds = { "time": randint(0, self.nt, 120), "lon": randint(0, self.nx, 20), "lat": randint(0, self.ny, 10), } self.vinds = { "time": xr.DataArray(randint(0, self.nt, 120), dims="x"), "lon": xr.DataArray(randint(0, self.nx, 120), dims="x"), "lat": slice(3, 20), } root = {f"group_{group}": self.ds for group in range(self.nchildren)} nested_tree1 = { f"group_{group}/subgroup_1": xr.Dataset() for group in range(self.nchildren) } nested_tree2 = { f"group_{group}/subgroup_2": xr.DataArray(np.arange(1, 10)).to_dataset( name="a" ) for group in range(self.nchildren) } nested_tree3 = { f"group_{group}/subgroup_2/sub-subgroup_1": self.ds for group in range(self.nchildren) } dtree = root | nested_tree1 | nested_tree2 | nested_tree3 self.dtree = xr.DataTree.from_dict(dtree) class IOReadDataTreeNetCDF4(IONestedDataTree): def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() requires_dask() self.make_datatree() self.format = "NETCDF4" self.filepath = "datatree.nc4.nc" dtree = self.dtree dtree.to_netcdf(filepath=self.filepath) def time_load_datatree_netcdf4(self): xr.open_datatree(self.filepath, engine="netcdf4").load() def time_open_datatree_netcdf4(self): xr.open_datatree(self.filepath, engine="netcdf4") class IOWriteNetCDFDask: timeout = 60 repeat = 1 number = 5 def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() requires_dask() self.write = create_delayed_write() def time_write(self): self.write.compute() class IOWriteNetCDFDaskDistributed: def setup(self): # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() requires_dask() try: import distributed except ImportError as err: raise NotImplementedError() from err self.client = distributed.Client() self.write = create_delayed_write() def cleanup(self): self.client.shutdown() def time_write(self): self.write.compute() class IOReadSingleFile(IOSingleNetCDF): def setup(self, *args, **kwargs): self.make_ds() self.filepaths = {} for engine in _ENGINES: self.filepaths[engine] = f"test_single_file_with_{engine}.nc" self.ds.to_netcdf(self.filepaths[engine], engine=engine) @parameterized(["engine", "chunks"], (_ENGINES, [None, {}])) def time_read_dataset(self, engine, chunks): xr.open_dataset(self.filepaths[engine], engine=engine, chunks=chunks) class IOReadCustomEngine: def setup(self, *args, **kwargs): """ The custom backend does the bare minimum to be considered a lazy backend. But the data in it is still in memory so slow file reading shouldn't affect the results. """ requires_dask() @dataclass class PerformanceBackendArray(xr.backends.BackendArray): filename_or_obj: str | os.PathLike | None shape: tuple[int, ...] dtype: np.dtype lock: xr.backends.locks.SerializableLock def __getitem__(self, key: tuple): return xr.core.indexing.explicit_indexing_adapter( key, self.shape, xr.core.indexing.IndexingSupport.BASIC, self._raw_indexing_method, ) def _raw_indexing_method(self, key: tuple): raise NotImplementedError @dataclass class PerformanceStore(xr.backends.common.AbstractWritableDataStore): manager: xr.backends.CachingFileManager mode: str | None = None lock: xr.backends.locks.SerializableLock | None = None autoclose: bool = False def __post_init__(self): self.filename = self.manager._args[0] @classmethod def open( cls, filename: str | os.PathLike | None, mode: str = "r", lock: xr.backends.locks.SerializableLock | None = None, autoclose: bool = False, ): locker = lock or xr.backends.locks.SerializableLock() manager = xr.backends.CachingFileManager( xr.backends.DummyFileManager, filename, mode=mode, ) return cls(manager, mode=mode, lock=locker, autoclose=autoclose) def load(self) -> tuple: """ Load a bunch of test data quickly. Normally this method would've opened a file and parsed it. """ n_variables = 2000 # Important to have a shape and dtype for lazy loading. shape = (1000,) dtype = np.dtype(int) variables = { f"long_variable_name_{v}": xr.Variable( data=PerformanceBackendArray( self.filename, shape, dtype, self.lock ), dims=("time",), fastpath=True, ) for v in range(n_variables) } attributes = {} return variables, attributes class PerformanceBackend(xr.backends.BackendEntrypoint): def open_dataset( self, filename_or_obj: str | os.PathLike | None, drop_variables: tuple[str, ...] | None = None, *, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, use_cftime=None, decode_timedelta=None, lock=None, **kwargs, ) -> xr.Dataset: filename_or_obj = xr.backends.common._normalize_path(filename_or_obj) store = PerformanceStore.open(filename_or_obj, lock=lock) store_entrypoint = xr.backends.store.StoreBackendEntrypoint() ds = store_entrypoint.open_dataset( store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) return ds self.engine = PerformanceBackend @parameterized(["chunks"], ([None, {}, {"time": 10}])) def time_open_dataset(self, chunks): """ Time how fast xr.open_dataset is without the slow data reading part. Test with and without dask. """ xr.open_dataset(None, engine=self.engine, chunks=chunks) python-xarray-2026.01.0/asv_bench/benchmarks/combine.py0000664000175000017500000000542115136607163023100 0ustar alastairalastairimport numpy as np import xarray as xr from . import requires_dask class Concat1d: """Benchmark concatenating large datasets""" def setup(self) -> None: self.data_arrays = [ xr.DataArray(data=np.zeros(4 * 1024 * 1024, dtype=np.int8), dims=["x"]) for _ in range(10) ] def time_concat(self) -> None: xr.concat(self.data_arrays, dim="x") def peakmem_concat(self) -> None: xr.concat(self.data_arrays, dim="x") class Combine1d: """Benchmark concatenating and merging large datasets""" def setup(self) -> None: """Create 2 datasets with two different variables""" t_size = 8000 t = np.arange(t_size) data = np.random.randn(t_size) self.dsA0 = xr.Dataset({"A": xr.DataArray(data, coords={"T": t}, dims=("T"))}) self.dsA1 = xr.Dataset( {"A": xr.DataArray(data, coords={"T": t + t_size}, dims=("T"))} ) def time_combine_by_coords(self) -> None: """Also has to load and arrange t coordinate""" datasets = [self.dsA0, self.dsA1] xr.combine_by_coords(datasets) class Combine1dDask(Combine1d): """Benchmark concatenating and merging large datasets""" def setup(self) -> None: """Create 2 datasets with two different variables""" requires_dask() t_size = 8000 t = np.arange(t_size) var = xr.Variable(dims=("T",), data=np.random.randn(t_size)).chunk() data_vars = {f"long_name_{v}": ("T", var) for v in range(500)} self.dsA0 = xr.Dataset(data_vars, coords={"T": t}) self.dsA1 = xr.Dataset(data_vars, coords={"T": t + t_size}) class Combine3d: """Benchmark concatenating and merging large datasets""" def setup(self): """Create 4 datasets with two different variables""" t_size, x_size, y_size = 50, 450, 400 t = np.arange(t_size) data = np.random.randn(t_size, x_size, y_size) self.dsA0 = xr.Dataset( {"A": xr.DataArray(data, coords={"T": t}, dims=("T", "X", "Y"))} ) self.dsA1 = xr.Dataset( {"A": xr.DataArray(data, coords={"T": t + t_size}, dims=("T", "X", "Y"))} ) self.dsB0 = xr.Dataset( {"B": xr.DataArray(data, coords={"T": t}, dims=("T", "X", "Y"))} ) self.dsB1 = xr.Dataset( {"B": xr.DataArray(data, coords={"T": t + t_size}, dims=("T", "X", "Y"))} ) def time_combine_nested(self): datasets = [[self.dsA0, self.dsA1], [self.dsB0, self.dsB1]] xr.combine_nested(datasets, concat_dim=[None, "T"]) def time_combine_by_coords(self): """Also has to load and arrange t coordinate""" datasets = [self.dsA0, self.dsA1, self.dsB0, self.dsB1] xr.combine_by_coords(datasets) python-xarray-2026.01.0/asv_bench/benchmarks/dataarray_missing.py0000664000175000017500000000352015136607163025163 0ustar alastairalastairimport pandas as pd import xarray as xr from . import parameterized, randn, requires_dask def make_bench_data(shape, frac_nan, chunks): vals = randn(shape, frac_nan) coords = {"time": pd.date_range("2000-01-01", freq="D", periods=shape[0])} da = xr.DataArray(vals, dims=("time", "x", "y"), coords=coords) if chunks is not None: da = da.chunk(chunks) return da class DataArrayMissingInterpolateNA: def setup(self, shape, chunks, limit): if chunks is not None: requires_dask() self.da = make_bench_data(shape, 0.1, chunks) @parameterized( ["shape", "chunks", "limit"], ( [(365, 75, 75)], [None, {"x": 25, "y": 25}], [None, 3], ), ) def time_interpolate_na(self, shape, chunks, limit): actual = self.da.interpolate_na(dim="time", method="linear", limit=limit) if chunks is not None: actual = actual.compute() class DataArrayMissingBottleneck: def setup(self, shape, chunks, limit): if chunks is not None: requires_dask() self.da = make_bench_data(shape, 0.1, chunks) @parameterized( ["shape", "chunks", "limit"], ( [(365, 75, 75)], [None, {"x": 25, "y": 25}], [None, 3], ), ) def time_ffill(self, shape, chunks, limit): actual = self.da.ffill(dim="time", limit=limit) if chunks is not None: actual = actual.compute() @parameterized( ["shape", "chunks", "limit"], ( [(365, 75, 75)], [None, {"x": 25, "y": 25}], [None, 3], ), ) def time_bfill(self, shape, chunks, limit): actual = self.da.bfill(dim="time", limit=limit) if chunks is not None: actual = actual.compute() python-xarray-2026.01.0/asv_bench/benchmarks/accessors.py0000664000175000017500000000117215136607163023450 0ustar alastairalastairimport numpy as np import xarray as xr from . import parameterized NTIME = 365 * 30 @parameterized(["calendar"], [("standard", "noleap")]) class DateTimeAccessor: def setup(self, calendar): np.random.randn(NTIME) time = xr.date_range("2000", periods=30 * 365, calendar=calendar) data = np.ones((NTIME,)) self.da = xr.DataArray(data, dims="time", coords={"time": time}) def time_dayofyear(self, calendar): _ = self.da.time.dt.dayofyear def time_year(self, calendar): _ = self.da.time.dt.year def time_floor(self, calendar): _ = self.da.time.dt.floor("D") python-xarray-2026.01.0/asv_bench/benchmarks/coding.py0000664000175000017500000000101015136607163022715 0ustar alastairalastairimport numpy as np import xarray as xr from . import parameterized @parameterized(["calendar"], [("standard", "noleap")]) class EncodeCFDatetime: def setup(self, calendar): self.units = "days since 2000-01-01" self.dtype = np.dtype("int64") self.times = xr.date_range( "2000", freq="D", periods=10000, calendar=calendar ).values def time_encode_cf_datetime(self, calendar): xr.coding.times.encode_cf_datetime(self.times, self.units, calendar, self.dtype) python-xarray-2026.01.0/asv_bench/benchmarks/alignment.py0000664000175000017500000000316015136607163023440 0ustar alastairalastairimport numpy as np import xarray as xr from . import parameterized, requires_dask ntime = 365 * 30 nx = 50 ny = 50 rng = np.random.default_rng(0) class Align: def setup(self, *args, **kwargs): data = rng.standard_normal((ntime, nx, ny)) self.ds = xr.Dataset( {"temperature": (("time", "x", "y"), data)}, coords={ "time": xr.date_range("2000", periods=ntime), "x": np.arange(nx), "y": np.arange(ny), }, ) self.year = self.ds.time.dt.year self.idx = np.unique(rng.integers(low=0, high=ntime, size=ntime // 2)) self.year_subset = self.year.isel(time=self.idx) @parameterized(["join"], [("outer", "inner", "left", "right", "exact", "override")]) def time_already_aligned(self, join): xr.align(self.ds, self.year, join=join) @parameterized(["join"], [("outer", "inner", "left", "right")]) def time_not_aligned(self, join): xr.align(self.ds, self.year[-100:], join=join) @parameterized(["join"], [("outer", "inner", "left", "right")]) def time_not_aligned_random_integers(self, join): xr.align(self.ds, self.year_subset, join=join) class AlignCFTime(Align): def setup(self, *args, **kwargs): super().setup() self.ds["time"] = xr.date_range("2000", periods=ntime, calendar="noleap") self.year = self.ds.time.dt.year self.year_subset = self.year.isel(time=self.idx) class AlignDask(Align): def setup(self, *args, **kwargs): requires_dask() super().setup() self.ds = self.ds.chunk({"time": 100}) python-xarray-2026.01.0/asv_bench/benchmarks/README_CI.md0000664000175000017500000001745615136607163022757 0ustar alastairalastair# Benchmark CI ## How it works The `asv` suite can be run for any PR on GitHub Actions (check workflow `.github/workflows/benchmarks.yml`) by adding a `run-benchmark` label to said PR. This will trigger a job that will run the benchmarking suite for the current PR head (merged commit) against the PR base (usually `main`). We use `asv continuous` to run the job, which runs a relative performance measurement. This means that there's no state to be saved and that regressions are only caught in terms of performance ratio (absolute numbers are available but they are not useful since we do not use stable hardware over time). `asv continuous` will: - Compile `scikit-image` for _both_ commits. We use `ccache` to speed up the process, and `mamba` is used to create the build environments. - Run the benchmark suite for both commits, _twice_ (since `processes=2` by default). - Generate a report table with performance ratios: - `ratio=1.0` -> performance didn't change. - `ratio<1.0` -> PR made it slower. - `ratio>1.0` -> PR made it faster. Due to the sensitivity of the test, we cannot guarantee that false positives are not produced. In practice, values between `(0.7, 1.5)` are to be considered part of the measurement noise. When in doubt, running the benchmark suite one more time will provide more information about the test being a false positive or not. ## Running the benchmarks on GitHub Actions 1. On a PR, add the label `run-benchmark`. 2. The CI job will be started. Checks will appear in the usual dashboard panel above the comment box. 3. If more commits are added, the label checks will be grouped with the last commit checks _before_ you added the label. 4. Alternatively, you can always go to the `Actions` tab in the repo and [filter for `workflow:Benchmark`](https://github.com/scikit-image/scikit-image/actions?query=workflow%3ABenchmark). Your username will be assigned to the `actor` field, so you can also filter the results with that if you need it. ## The artifacts The CI job will also generate an artifact. This is the `.asv/results` directory compressed in a zip file. Its contents include: - `fv-xxxxx-xx/`. A directory for the machine that ran the suite. It contains three files: - `.json`, `.json`: the benchmark results for each commit, with stats. - `machine.json`: details about the hardware. - `benchmarks.json`: metadata about the current benchmark suite. - `benchmarks.log`: the CI logs for this run. - This README. ## Re-running the analysis Although the CI logs should be enough to get an idea of what happened (check the table at the end), one can use `asv` to run the analysis routines again. 1. Uncompress the artifact contents in the repo, under `.asv/results`. This is, you should see `.asv/results/benchmarks.log`, not `.asv/results/something_else/benchmarks.log`. Write down the machine directory name for later. 2. Run `asv show` to see your available results. You will see something like this: ``` $> asv show Commits with results: Machine : Jaimes-MBP Environment: conda-py3.9-cython-numpy1.20-scipy 00875e67 Machine : fv-az95-499 Environment: conda-py3.7-cython-numpy1.17-pooch-scipy 8db28f02 3a305096 ``` 3. We are interested in the commits for `fv-az95-499` (the CI machine for this run). We can compare them with `asv compare` and some extra options. `--sort ratio` will show largest ratios first, instead of alphabetical order. `--split` will produce three tables: improved, worsened, no changes. `--factor 1.5` tells `asv` to only complain if deviations are above a 1.5 ratio. `-m` is used to indicate the machine ID (use the one you wrote down in step 1). Finally, specify your commit hashes: baseline first, then contender! ``` $> asv compare --sort ratio --split --factor 1.5 -m fv-az95-499 8db28f02 3a305096 Benchmarks that have stayed the same: before after ratio [8db28f02] [3a305096] n/a n/a n/a benchmark_restoration.RollingBall.time_rollingball_ndim 1.23±0.04ms 1.37±0.1ms 1.12 benchmark_transform_warp.WarpSuite.time_to_float64(, 128, 3) 5.07±0.1μs 5.59±0.4μs 1.10 benchmark_transform_warp.ResizeLocalMeanSuite.time_resize_local_mean(, (192, 192, 192), (192, 192, 192)) 1.23±0.02ms 1.33±0.1ms 1.08 benchmark_transform_warp.WarpSuite.time_same_type(, 128, 3) 9.45±0.2ms 10.1±0.5ms 1.07 benchmark_rank.Rank3DSuite.time_3d_filters('majority', (32, 32, 32)) 23.0±0.9ms 24.6±1ms 1.07 benchmark_interpolation.InterpolationResize.time_resize((80, 80, 80), 0, 'symmetric', , True) 38.7±1ms 41.1±1ms 1.06 benchmark_transform_warp.ResizeLocalMeanSuite.time_resize_local_mean(, (2048, 2048), (192, 192, 192)) 4.97±0.2μs 5.24±0.2μs 1.05 benchmark_transform_warp.ResizeLocalMeanSuite.time_resize_local_mean(, (2048, 2048), (2048, 2048)) 4.21±0.2ms 4.42±0.3ms 1.05 benchmark_rank.Rank3DSuite.time_3d_filters('gradient', (32, 32, 32)) ... ``` If you want more details on a specific test, you can use `asv show`. Use `-b pattern` to filter which tests to show, and then specify a commit hash to inspect: ``` $> asv show -b time_to_float64 8db28f02 Commit: 8db28f02 benchmark_transform_warp.WarpSuite.time_to_float64 [fv-az95-499/conda-py3.7-cython-numpy1.17-pooch-scipy] ok =============== ============= ========== ============= ========== ============ ========== ============ ========== ============ -- N / order --------------- -------------------------------------------------------------------------------------------------------------- dtype_in 128 / 0 128 / 1 128 / 3 1024 / 0 1024 / 1 1024 / 3 4096 / 0 4096 / 1 4096 / 3 =============== ============= ========== ============= ========== ============ ========== ============ ========== ============ numpy.uint8 2.56±0.09ms 523±30μs 1.28±0.05ms 130±3ms 28.7±2ms 81.9±3ms 2.42±0.01s 659±5ms 1.48±0.01s numpy.uint16 2.48±0.03ms 530±10μs 1.28±0.02ms 130±1ms 30.4±0.7ms 81.1±2ms 2.44±0s 653±3ms 1.47±0.02s numpy.float32 2.59±0.1ms 518±20μs 1.27±0.01ms 127±3ms 26.6±1ms 74.8±2ms 2.50±0.01s 546±10ms 1.33±0.02s numpy.float64 2.48±0.04ms 513±50μs 1.23±0.04ms 134±3ms 30.7±2ms 85.4±2ms 2.55±0.01s 632±4ms 1.45±0.01s =============== ============= ========== ============= ========== ============ ========== ============ ========== ============ started: 2021-07-06 06:14:36, duration: 1.99m ``` ## Other details ### Skipping slow or demanding tests To minimize the time required to run the full suite, we trimmed the parameter matrix in some cases and, in others, directly skipped tests that ran for too long or require too much memory. Unlike `pytest`, `asv` does not have a notion of marks. However, you can `raise NotImplementedError` in the setup step to skip a test. In that vein, a new private function is defined at `benchmarks.__init__`: `_skip_slow`. This will check if the `ASV_SKIP_SLOW` environment variable has been defined. If set to `1`, it will raise `NotImplementedError` and skip the test. To implement this behavior in other tests, you can add the following attribute: ```python from . import _skip_slow # this function is defined in benchmarks.__init__ def time_something_slow(): pass time_something.setup = _skip_slow ``` python-xarray-2026.01.0/asv_bench/benchmarks/reindexing.py0000664000175000017500000000254615136607163023625 0ustar alastairalastairimport numpy as np import xarray as xr from . import requires_dask ntime = 500 nx = 50 ny = 50 class Reindex: def setup(self): data = np.random.default_rng(0).random((ntime, nx, ny)) self.ds = xr.Dataset( {"temperature": (("time", "x", "y"), data)}, coords={"time": np.arange(ntime), "x": np.arange(nx), "y": np.arange(ny)}, ) def time_1d_coarse(self): self.ds.reindex(time=np.arange(0, ntime, 5)).load() def time_1d_fine_all_found(self): self.ds.reindex(time=np.arange(0, ntime, 0.5), method="nearest").load() def time_1d_fine_some_missing(self): self.ds.reindex( time=np.arange(0, ntime, 0.5), method="nearest", tolerance=0.1 ).load() def time_2d_coarse(self): self.ds.reindex(x=np.arange(0, nx, 2), y=np.arange(0, ny, 2)).load() def time_2d_fine_all_found(self): self.ds.reindex( x=np.arange(0, nx, 0.5), y=np.arange(0, ny, 0.5), method="nearest" ).load() def time_2d_fine_some_missing(self): self.ds.reindex( x=np.arange(0, nx, 0.5), y=np.arange(0, ny, 0.5), method="nearest", tolerance=0.1, ).load() class ReindexDask(Reindex): def setup(self): requires_dask() super().setup() self.ds = self.ds.chunk({"time": 100}) python-xarray-2026.01.0/asv_bench/benchmarks/repr.py0000664000175000017500000000452115136607163022434 0ustar alastairalastairimport numpy as np import pandas as pd import xarray as xr class Repr: def setup(self): a = np.arange(0, 100) data_vars = dict() for i in a: data_vars[f"long_variable_name_{i}"] = xr.DataArray( name=f"long_variable_name_{i}", data=np.arange(0, 20), dims=[f"long_coord_name_{i}_x"], coords={f"long_coord_name_{i}_x": np.arange(0, 20) * 2}, ) self.ds = xr.Dataset(data_vars) self.ds.attrs = {f"attr_{k}": 2 for k in a} def time_repr(self): repr(self.ds) def time_repr_html(self): self.ds._repr_html_() class ReprDataTree: def setup(self): # construct a datatree with 500 nodes number_of_files = 20 number_of_groups = 25 tree_dict = {} for f in range(number_of_files): for g in range(number_of_groups): tree_dict[f"file_{f}/group_{g}"] = xr.Dataset({"g": f * g}) self.dt = xr.DataTree.from_dict(tree_dict) def time_repr(self): repr(self.dt) def time_repr_html(self): self.dt._repr_html_() class ReprMultiIndex: def setup(self): index = pd.MultiIndex.from_product( [range(1000), range(1000)], names=("level_0", "level_1") ) series = pd.Series(range(1000 * 1000), index=index) self.da = xr.DataArray(series) def time_repr(self): repr(self.da) def time_repr_html(self): self.da._repr_html_() class ReprPandasRangeIndex: # display a memory-saving pandas.RangeIndex shouldn't trigger memory # expensive conversion into a numpy array def setup(self): index = xr.indexes.PandasIndex(pd.RangeIndex(1_000_000), "x") self.ds = xr.Dataset(coords=xr.Coordinates.from_xindex(index)) def time_repr(self): repr(self.ds.x) def time_repr_html(self): self.ds.x._repr_html_() class ReprXarrayRangeIndex: # display an Xarray RangeIndex shouldn't trigger memory expensive conversion # of its lazy coordinate into a numpy array def setup(self): index = xr.indexes.RangeIndex.arange(1_000_000, dim="x") self.ds = xr.Dataset(coords=xr.Coordinates.from_xindex(index)) def time_repr(self): repr(self.ds.x) def time_repr_html(self): self.ds.x._repr_html_() python-xarray-2026.01.0/asv_bench/benchmarks/datatree.py0000664000175000017500000000065515136607163023261 0ustar alastairalastairimport xarray as xr from xarray.core.datatree import DataTree class Datatree: def setup(self): run1 = DataTree.from_dict({"run1": xr.Dataset({"a": 1})}) self.d_few = {"run1": run1} self.d_many = {f"run{i}": xr.Dataset({"a": 1}) for i in range(100)} def time_from_dict_few(self): DataTree.from_dict(self.d_few) def time_from_dict_many(self): DataTree.from_dict(self.d_many) python-xarray-2026.01.0/asv_bench/benchmarks/pandas.py0000664000175000017500000000334315136607163022733 0ustar alastairalastairimport numpy as np import pandas as pd import xarray as xr from . import parameterized, requires_dask class MultiIndexSeries: def setup(self, dtype, subset): data = np.random.rand(100000).astype(dtype) index = pd.MultiIndex.from_product( [ list("abcdefhijk"), list("abcdefhijk"), pd.date_range(start="2000-01-01", periods=1000, freq="D"), ] ) series = pd.Series(data, index) if subset: series = series[::3] self.series = series @parameterized(["dtype", "subset"], ([int, float], [True, False])) def time_from_series(self, dtype, subset): xr.DataArray.from_series(self.series) class ToDataFrame: def setup(self, *args, **kwargs): xp = kwargs.get("xp", np) nvars = kwargs.get("nvars", 1) random_kws = kwargs.get("random_kws", {}) method = kwargs.get("method", "to_dataframe") dim1 = 10_000 dim2 = 10_000 var = xr.Variable( dims=("dim1", "dim2"), data=xp.random.random((dim1, dim2), **random_kws) ) data_vars = {f"long_name_{v}": (("dim1", "dim2"), var) for v in range(nvars)} ds = xr.Dataset( data_vars, coords={"dim1": np.arange(0, dim1), "dim2": np.arange(0, dim2)} ) self.to_frame = getattr(ds, method) def time_to_dataframe(self): self.to_frame() def peakmem_to_dataframe(self): self.to_frame() class ToDataFrameDask(ToDataFrame): def setup(self, *args, **kwargs): requires_dask() import dask.array as da super().setup( xp=da, random_kws=dict(chunks=5000), method="to_dask_dataframe", nvars=500 ) python-xarray-2026.01.0/asv_bench/benchmarks/indexing.py0000664000175000017500000001503615136607163023274 0ustar alastairalastairimport os import numpy as np import pandas as pd import xarray as xr from . import parameterized, randint, randn, requires_dask nx = 2000 ny = 1000 nt = 500 basic_indexes = { "1scalar": {"x": 0}, "1slice": {"x": slice(0, 3)}, "1slice-1scalar": {"x": 0, "y": slice(None, None, 3)}, "2slicess-1scalar": {"x": slice(3, -3, 3), "y": 1, "t": slice(None, -3, 3)}, } basic_assignment_values = { "1scalar": 0, "1slice": xr.DataArray(randn((3, ny), frac_nan=0.1), dims=["x", "y"]), "1slice-1scalar": xr.DataArray(randn(int(ny / 3) + 1, frac_nan=0.1), dims=["y"]), "2slicess-1scalar": xr.DataArray( randn(np.empty(nx)[slice(3, -3, 3)].size, frac_nan=0.1), dims=["x"] ), } outer_indexes = { "1d": {"x": randint(0, nx, 400)}, "2d": {"x": randint(0, nx, 500), "y": randint(0, ny, 400)}, "2d-1scalar": {"x": randint(0, nx, 100), "y": 1, "t": randint(0, nt, 400)}, } outer_assignment_values = { "1d": xr.DataArray(randn((400, ny), frac_nan=0.1), dims=["x", "y"]), "2d": xr.DataArray(randn((500, 400), frac_nan=0.1), dims=["x", "y"]), "2d-1scalar": xr.DataArray(randn(100, frac_nan=0.1), dims=["x"]), } def make_vectorized_indexes(n_index): return { "1-1d": {"x": xr.DataArray(randint(0, nx, n_index), dims="a")}, "2-1d": { "x": xr.DataArray(randint(0, nx, n_index), dims="a"), "y": xr.DataArray(randint(0, ny, n_index), dims="a"), }, "3-2d": { "x": xr.DataArray( randint(0, nx, n_index).reshape(n_index // 100, 100), dims=["a", "b"] ), "y": xr.DataArray( randint(0, ny, n_index).reshape(n_index // 100, 100), dims=["a", "b"] ), "t": xr.DataArray( randint(0, nt, n_index).reshape(n_index // 100, 100), dims=["a", "b"] ), }, } vectorized_indexes = make_vectorized_indexes(400) big_vectorized_indexes = make_vectorized_indexes(400_000) vectorized_assignment_values = { "1-1d": xr.DataArray(randn((400, ny)), dims=["a", "y"], coords={"a": randn(400)}), "2-1d": xr.DataArray(randn(400), dims=["a"], coords={"a": randn(400)}), "3-2d": xr.DataArray( randn((4, 100)), dims=["a", "b"], coords={"a": randn(4), "b": randn(100)} ), } class Base: def setup(self, key): self.ds = xr.Dataset( { "var1": (("x", "y"), randn((nx, ny), frac_nan=0.1)), "var2": (("x", "t"), randn((nx, nt))), "var3": (("t",), randn(nt)), }, coords={ "x": np.arange(nx), "y": np.linspace(0, 1, ny), "t": pd.date_range("1970-01-01", periods=nt, freq="D"), "x_coords": ("x", np.linspace(1.1, 2.1, nx)), }, ) # Benchmark how indexing is slowed down by adding many scalar variable # to the dataset # https://github.com/pydata/xarray/pull/9003 self.ds_large = self.ds.merge({f"extra_var{i}": i for i in range(400)}) class Indexing(Base): @parameterized(["key"], [list(basic_indexes.keys())]) def time_indexing_basic(self, key): self.ds.isel(**basic_indexes[key]).load() @parameterized(["key"], [list(outer_indexes.keys())]) def time_indexing_outer(self, key): self.ds.isel(**outer_indexes[key]).load() @parameterized(["key"], [list(vectorized_indexes.keys())]) def time_indexing_vectorized(self, key): self.ds.isel(**vectorized_indexes[key]).load() @parameterized(["key"], [list(basic_indexes.keys())]) def time_indexing_basic_ds_large(self, key): # https://github.com/pydata/xarray/pull/9003 self.ds_large.isel(**basic_indexes[key]).load() class IndexingOnly(Base): @parameterized(["key"], [list(basic_indexes.keys())]) def time_indexing_basic(self, key): self.ds.isel(**basic_indexes[key]) @parameterized(["key"], [list(outer_indexes.keys())]) def time_indexing_outer(self, key): self.ds.isel(**outer_indexes[key]) @parameterized(["key"], [list(big_vectorized_indexes.keys())]) def time_indexing_big_vectorized(self, key): self.ds.isel(**big_vectorized_indexes[key]) class Assignment(Base): @parameterized(["key"], [list(basic_indexes.keys())]) def time_assignment_basic(self, key): ind = basic_indexes[key] val = basic_assignment_values[key] self.ds["var1"][ind.get("x", slice(None)), ind.get("y", slice(None))] = val @parameterized(["key"], [list(outer_indexes.keys())]) def time_assignment_outer(self, key): ind = outer_indexes[key] val = outer_assignment_values[key] self.ds["var1"][ind.get("x", slice(None)), ind.get("y", slice(None))] = val @parameterized(["key"], [list(vectorized_indexes.keys())]) def time_assignment_vectorized(self, key): ind = vectorized_indexes[key] val = vectorized_assignment_values[key] self.ds["var1"][ind.get("x", slice(None)), ind.get("y", slice(None))] = val class IndexingDask(Indexing): def setup(self, key): requires_dask() super().setup(key) self.ds = self.ds.chunk({"x": 100, "y": 50, "t": 50}) class BooleanIndexing: # https://github.com/pydata/xarray/issues/2227 def setup(self): self.ds = xr.Dataset( {"a": ("time", np.arange(10_000_000))}, coords={"time": np.arange(10_000_000)}, ) self.time_filter = self.ds.time > 50_000 def time_indexing(self): self.ds.isel(time=self.time_filter) class HugeAxisSmallSliceIndexing: # https://github.com/pydata/xarray/pull/4560 def setup(self): self.filepath = "test_indexing_huge_axis_small_slice.nc" if not os.path.isfile(self.filepath): xr.Dataset( {"a": ("x", np.arange(10_000_000))}, coords={"x": np.arange(10_000_000)}, ).to_netcdf(self.filepath, format="NETCDF4") self.ds = xr.open_dataset(self.filepath) def time_indexing(self): self.ds.isel(x=slice(100)) def cleanup(self): self.ds.close() class AssignmentOptimized: # https://github.com/pydata/xarray/pull/7382 def setup(self): self.ds = xr.Dataset(coords={"x": np.arange(500_000)}) self.da = xr.DataArray(np.arange(500_000), dims="x") def time_assign_no_reindex(self): # assign with non-indexed DataArray of same dimension size self.ds.assign(foo=self.da) def time_assign_identical_indexes(self): # fastpath index comparison (same index object) self.ds.assign(foo=self.ds.x) python-xarray-2026.01.0/asv_bench/benchmarks/renaming.py0000664000175000017500000000142015136607163023257 0ustar alastairalastairimport numpy as np import xarray as xr class SwapDims: param_names = ["size"] params = [[int(1e3), int(1e5), int(1e7)]] def setup(self, size: int) -> None: self.ds = xr.Dataset( {"a": (("x", "t"), np.ones((size, 2)))}, coords={ "x": np.arange(size), "y": np.arange(size), "z": np.arange(size), "x2": ("x", np.arange(size)), "y2": ("y", np.arange(size)), "z2": ("z", np.arange(size)), }, ) def time_swap_dims(self, size: int) -> None: self.ds.swap_dims({"x": "xn", "y": "yn", "z": "zn"}) def time_swap_dims_newindex(self, size: int) -> None: self.ds.swap_dims({"x": "x2", "y": "y2", "z": "z2"}) python-xarray-2026.01.0/asv_bench/benchmarks/merge.py0000664000175000017500000000461315136607163022565 0ustar alastairalastairimport numpy as np import xarray as xr class DatasetAddVariable: param_names = ["existing_elements"] params = [[0, 10, 100, 1000]] def setup(self, existing_elements): self.datasets = {} # Dictionary insertion is fast(er) than xarray.Dataset insertion d = {} for i in range(existing_elements): d[f"var{i}"] = i self.dataset = xr.merge([d]) d = {f"set_2_{i}": i for i in range(existing_elements)} self.dataset2 = xr.merge([d]) def time_variable_insertion(self, existing_elements): dataset = self.dataset dataset["new_var"] = 0 def time_merge_two_datasets(self, existing_elements): xr.merge([self.dataset, self.dataset2]) class DatasetCreation: # The idea here is to time how long it takes to go from numpy # and python data types, to a full dataset # See discussion # https://github.com/pydata/xarray/issues/7224#issuecomment-1292216344 param_names = ["strategy", "count"] params = [ ["dict_of_DataArrays", "dict_of_Variables", "dict_of_Tuples"], [0, 1, 10, 100, 1000], ] def setup(self, strategy, count): data = np.array(["0", "b"], dtype=str) self.dataset_coords = dict(time=np.array([0, 1])) self.dataset_attrs = dict(description="Test data") attrs = dict(units="Celsius") if strategy == "dict_of_DataArrays": def create_data_vars(): return { f"long_variable_name_{i}": xr.DataArray( data=data, dims=("time"), attrs=attrs ) for i in range(count) } elif strategy == "dict_of_Variables": def create_data_vars(): return { f"long_variable_name_{i}": xr.Variable("time", data, attrs=attrs) for i in range(count) } elif strategy == "dict_of_Tuples": def create_data_vars(): return { f"long_variable_name_{i}": ("time", data, attrs) for i in range(count) } self.create_data_vars = create_data_vars def time_dataset_creation(self, strategy, count): data_vars = self.create_data_vars() xr.Dataset( data_vars=data_vars, coords=self.dataset_coords, attrs=self.dataset_attrs ) python-xarray-2026.01.0/asv_bench/benchmarks/__init__.py0000664000175000017500000000322715136607163023225 0ustar alastairalastairimport itertools import os import numpy as np _counter = itertools.count() def parameterized(names, params): def decorator(func): func.param_names = names func.params = params return func return decorator def requires_dask(): try: import dask # noqa: F401 except ImportError as err: raise NotImplementedError() from err def requires_sparse(): try: import sparse # noqa: F401 except ImportError as err: raise NotImplementedError() from err def randn(shape, frac_nan=None, chunks=None, seed=0): rng = np.random.default_rng(seed) if chunks is None: x = rng.standard_normal(shape) else: import dask.array as da rng = da.random.default_rng(seed) x = rng.standard_normal(shape, chunks=chunks) if frac_nan is not None: inds = rng.choice(range(x.size), int(x.size * frac_nan)) x.flat[inds] = np.nan return x def randint(low, high=None, size=None, frac_minus=None, seed=0): rng = np.random.default_rng(seed) x = rng.integers(low, high, size) if frac_minus is not None: inds = rng.choice(range(x.size), int(x.size * frac_minus)) x.flat[inds] = -1 return x def _skip_slow(): """ Use this function to skip slow or highly demanding tests. Use it as a `Class.setup` method or a `function.setup` attribute. Examples -------- >>> from . import _skip_slow >>> def time_something_slow(): ... pass ... >>> time_something.setup = _skip_slow """ if os.environ.get("ASV_SKIP_SLOW", "0") == "1": raise NotImplementedError("Skipping this test...") python-xarray-2026.01.0/asv_bench/asv.conf.json0000664000175000017500000001425215136607163021407 0ustar alastairalastair{ // The version of the config file format. Do not change, unless // you know what you are doing. "version": 1, // The name of the project being benchmarked "project": "xarray", // The project's homepage "project_url": "https://docs.xarray.dev/", // The URL or local path of the source code repository for the // project being benchmarked "repo": "..", // List of branches to benchmark. If not provided, defaults to "master" // (for git) or "default" (for mercurial). "branches": ["main"], // for git // "branches": ["default"], // for mercurial // The DVCS being used. If not set, it will be automatically // determined from "repo" by looking at the protocol in the URL // (if remote), or by looking for special directories, such as // ".git" (if local). "dvcs": "git", // The tool to use to create environments. May be "conda", // "virtualenv" or other value depending on the plugins in use. // If missing or the empty string, the tool will be automatically // determined by looking for tools on the PATH environment // variable. "environment_type": "rattler", "conda_channels": ["conda-forge"], // timeout in seconds for installing any dependencies in environment // defaults to 10 min "install_timeout": 600, // the base URL to show a commit for the project. "show_commit_url": "https://github.com/pydata/xarray/commit/", // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. "pythons": ["3.11"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty // list or empty string indicates to just test against the default // (latest) version. null indicates that the package is to not be // installed. If the package to be tested is only available from // PyPi, and the 'environment_type' is conda, then you can preface // the package name by 'pip+', and the package will be installed via // pip (with all the conda available packages installed first, // followed by the pip installed packages). // // "matrix": { // "numpy": ["1.6", "1.7"], // "six": ["", null], // test with and without six installed // "pip+emcee": [""], // emcee is only available for install with pip. // }, "matrix": { "setuptools_scm": [""], // GH6609 "numpy": ["2.2"], "pandas": [""], "netcdf4": [""], "scipy": [""], "bottleneck": [""], "dask": [""], "distributed": [""], "flox": [""], "numpy_groupies": [""], "sparse": [""], "cftime": [""] }, // fix for bad builds // https://github.com/airspeed-velocity/asv/issues/1389#issuecomment-2076131185 "build_command": [ "python -m build", "python -m pip wheel --no-deps --no-build-isolation --no-index -w {build_cache_dir} {build_dir}" ], // Combinations of libraries/python versions can be excluded/included // from the set to test. Each entry is a dictionary containing additional // key-value pairs to include/exclude. // // An exclude entry excludes entries where all values match. The // values are regexps that should match the whole string. // // An include entry adds an environment. Only the packages listed // are installed. The 'python' key is required. The exclude rules // do not apply to includes. // // In addition to package names, the following keys are available: // // - python // Python version, as in the *pythons* variable above. // - environment_type // Environment type, as above. // - sys_platform // Platform, as in sys.platform. Possible values for the common // cases: 'linux2', 'win32', 'cygwin', 'darwin'. // // "exclude": [ // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows // {"environment_type": "conda", "six": null}, // don't run without six on conda // ], // // "include": [ // // additional env for python2.7 // {"python": "2.7", "numpy": "1.8"}, // // additional env if run on windows+conda // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, // ], // The directory (relative to the current directory) that benchmarks are // stored in. If not provided, defaults to "benchmarks" "benchmark_dir": "benchmarks", // The directory (relative to the current directory) to cache the Python // environments in. If not provided, defaults to "env" "env_dir": ".asv/env", // The directory (relative to the current directory) that raw benchmark // results are stored in. If not provided, defaults to "results". "results_dir": ".asv/results", // The directory (relative to the current directory) that the html tree // should be written to. If not provided, defaults to "html". "html_dir": ".asv/html" // The number of characters to retain in the commit hashes. // "hash_length": 8, // `asv` will cache wheels of the recent builds in each // environment, making them faster to install next time. This is // number of builds to keep, per environment. // "wheel_cache_size": 0 // The commits after which the regression search in `asv publish` // should start looking for regressions. Dictionary whose keys are // regexps matching to benchmark names, and values corresponding to // the commit (exclusive) after which to start looking for // regressions. The default is to start from the first commit // with results. If the commit is `null`, regression detection is // skipped for the matching benchmark. // // "regressions_first_commits": { // "some_benchmark": "352cdf", // Consider regressions only after this commit // "another_benchmark": null, // Skip regression detection altogether // } // The thresholds for relative change in results, after which `asv // publish` starts reporting regressions. Dictionary of the same // form as in ``regressions_first_commits``, with values // indicating the thresholds. If multiple entries match, the // maximum is taken. If no entry matches, the default is 5%. // // "regressions_thresholds": { // "some_benchmark": 0.01, // Threshold of 1% // "another_benchmark": 0.5, // Threshold of 50% // } } python-xarray-2026.01.0/.binder/0000775000175000017500000000000015136607163016364 5ustar alastairalastairpython-xarray-2026.01.0/.binder/environment.yml0000664000175000017500000000077215136607163021461 0ustar alastairalastairname: xarray-examples channels: - conda-forge dependencies: - python=3.11 - boto3 - bottleneck - cartopy - cfgrib - cftime - coveralls - dask - distributed - dask_labextension - h5netcdf - h5py - hdf5 - iris - lxml # Optional dep of pydap - matplotlib - nc-time-axis - netcdf4 - numba - numpy - packaging - pandas - pint>=0.22 - pip - pooch - pydap - rasterio - scipy - seaborn - setuptools - sparse - toolz - xarray - zarr - numbagg python-xarray-2026.01.0/setup.py0000775000175000017500000000015015136607163016554 0ustar alastairalastair#!/usr/bin/env python from setuptools import setup setup(use_scm_version={"fallback_version": "9999"}) python-xarray-2026.01.0/pyproject.toml0000664000175000017500000003064015136607163017762 0ustar alastairalastair[project] authors = [{ name = "xarray Developers", email = "xarray@googlegroups.com" }] classifiers = [ "Development Status :: 5 - Production/Stable", "Operating System :: OS Independent", "Intended Audience :: Science/Research", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering", ] description = "N-D labeled arrays and datasets in Python" dynamic = ["version"] license = "Apache-2.0" name = "xarray" readme = "README.md" requires-python = ">=3.11" dependencies = ["numpy>=1.26", "packaging>=24.1", "pandas>=2.2"] # We don't encode minimum requirements here (though if we can write a script to # generate the text from `min_deps_check.py`, that's welcome...). We do add # `numba>=0.54` here because of https://github.com/astral-sh/uv/issues/7881; # note that it's not a direct dependency of xarray. [project.optional-dependencies] accel = [ "scipy>=1.13", "bottleneck", "numbagg>=0.8", "numba>=0.62", # numba 0.62 added support for numpy 2.3 "flox>=0.9", "opt_einsum", ] complete = ["xarray[accel,etc,io,parallel,viz]"] io = [ "netCDF4>=1.6.0", "h5netcdf>=1.4.0", "pydap", "scipy>=1.13", "zarr>=2.18", "fsspec", "cftime", "pooch", ] etc = ["sparse>=0.15"] parallel = ["dask[complete]"] viz = ["cartopy>=0.23", "matplotlib>=3.8", "nc-time-axis", "seaborn"] types = [ "pandas-stubs", "scipy-stubs", "types-PyYAML", "types-Pygments", "types-colorama", "types-decorator", "types-defusedxml", "types-docutils", "types-networkx", "types-pexpect", "types-psutil", "types-pycurl", "types-openpyxl", "types-python-dateutil", "types-pytz", "types-requests", "types-setuptools", ] [dependency-groups] dev = [ "hypothesis", "jinja2", "mypy==1.18.1", "pre-commit", "pytest", "pytest-cov", "pytest-env", "pytest-mypy-plugins", "pytest-timeout", "pytest-xdist", "pytest-asyncio", "ruff>=0.8.0", "sphinx", "sphinx_autosummary_accessors", "xarray[complete,types]", ] [project.urls] Documentation = "https://docs.xarray.dev" SciPy2015-talk = "https://www.youtube.com/watch?v=X0pAhJgySxk" homepage = "https://xarray.dev/" issue-tracker = "https://github.com/pydata/xarray/issues" source-code = "https://github.com/pydata/xarray" [project.entry-points."xarray.chunkmanagers"] dask = "xarray.namedarray.daskmanager:DaskManager" [build-system] build-backend = "setuptools.build_meta" requires = ["setuptools>=77.0.3", "setuptools-scm>=8"] [tool.setuptools.packages.find] include = ["xarray*"] [tool.setuptools_scm] fallback_version = "9999" [tool.coverage.run] omit = [ "*/xarray/tests/*", "*/xarray/compat/dask_array_compat.py", "*/xarray/compat/npcompat.py", "*/xarray/compat/pdcompat.py", "*/xarray/namedarray/pycompat.py", "*/xarray/core/types.py", ] source = ["xarray"] [tool.coverage.report] exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"] [tool.mypy] enable_error_code = ["ignore-without-code", "redundant-self", "redundant-expr"] exclude = ['build', 'xarray/util/generate_.*\.py'] files = "xarray" show_error_context = true warn_redundant_casts = true warn_unused_configs = true warn_unused_ignores = true # Much of the numerical computing stack doesn't have type annotations yet. [[tool.mypy.overrides]] ignore_missing_imports = true module = [ "affine.*", "bottleneck.*", "cartopy.*", "cf_units.*", "cfgrib.*", "cftime.*", "cloudpickle.*", "cubed.*", "cupy.*", "fsspec.*", "h5netcdf.*", "h5py.*", "iris.*", "mpl_toolkits.*", "nc_time_axis.*", "netCDF4.*", "netcdftime.*", "numcodecs.*", "opt_einsum.*", "pint.*", "pooch.*", "pyarrow.*", "pydap.*", "scipy.*", "seaborn.*", "setuptools", "sparse.*", "toolz.*", "zarr.*", "numpy.exceptions.*", # remove once support for `numpy<2.0` has been dropped "array_api_strict.*", ] # Gradually we want to add more modules to this list, ratcheting up our total # coverage. Once a module is here, functions are checked by mypy regardless of # whether they have type annotations. It would be especially useful to have test # files listed here, because without them being checked, we don't have a great # way of testing our annotations. [[tool.mypy.overrides]] check_untyped_defs = true module = [ "xarray.core.accessor_dt", "xarray.core.accessor_str", "xarray.structure.alignment", "xarray.computation.*", "xarray.indexes.*", "xarray.tests.*", ] # Use strict = true whenever namedarray has become standalone. In the meantime # don't forget to add all new files related to namedarray here: # ref: https://mypy.readthedocs.io/en/stable/existing_code.html#introduce-stricter-options [[tool.mypy.overrides]] # Start off with these warn_unused_ignores = true # Getting these passing should be easy strict_concatenate = true strict_equality = true # Strongly recommend enabling this one as soon as you can check_untyped_defs = true # These shouldn't be too much additional work, but may be tricky to # get passing if you use a lot of untyped libraries disallow_any_generics = true disallow_subclassing_any = true disallow_untyped_decorators = true # These next few are various gradations of forcing use of type annotations disallow_incomplete_defs = true disallow_untyped_calls = true disallow_untyped_defs = true # This one isn't too hard to get passing, but return on investment is lower no_implicit_reexport = true # This one can be tricky to get passing if you use a lot of untyped libraries warn_return_any = true module = ["xarray.namedarray.*", "xarray.tests.test_namedarray"] # We disable pyright here for now, since including it means that all errors show # up in devs' VS Code, which then makes it more difficult to work with actual # errors. It overrides local VS Code settings so isn't escapable. # [tool.pyright] # defineConstant = {DEBUG = true} # # Enabling this means that developers who have disabled the warning locally — # # because not all dependencies are installable — are overridden # # reportMissingImports = true # reportMissingTypeStubs = false [tool.ruff] extend-exclude = ["doc", "_typed_ops.pyi"] [tool.ruff.lint] extend-select = [ "YTT", # flake8-2020 "B", # flake8-bugbear "C4", # flake8-comprehensions "ISC", # flake8-implicit-str-concat "PIE", # flake8-pie "TID", # flake8-tidy-imports (absolute imports) "PYI", # flake8-pyi "SIM", # flake8-simplify "FLY", # flynt "I", # isort "PERF", # Perflint "W", # pycodestyle warnings "PGH", # pygrep-hooks "PLC", # Pylint Convention "PLE", # Pylint Errors "PLR", # Pylint Refactor "PLW", # Pylint Warnings "UP", # pyupgrade "FURB", # refurb "RUF", ] extend-safe-fixes = [ "TID252", # absolute imports ] ignore = [ "C40", # unnecessary generator, comprehension, or literal "PIE790", # unnecessary pass statement "PYI019", # use `Self` instead of custom TypeVar "PYI041", # use `float` instead of `int | float` "SIM102", # use a single `if` statement instead of nested `if` statements "SIM108", # use ternary operator instead of `if`-`else`-block "SIM117", # use a single `with` statement instead of nested `with` statements "SIM118", # use `key in dict` instead of `key in dict.keys()` "SIM300", # yoda condition detected "PERF203", # try-except within a loop incurs performance overhead "E402", # module level import not at top of file "E731", # do not assign a lambda expression, use a def "PLC0415", # `import` should be at the top-level of a file "PLC0206", # extracting value from dictionary without calling `.items()` "PLR091", # too many arguments / branches / statements "PLR2004", # magic value used in comparison "PLW0603", # using the global statement to update is discouraged "PLW0642", # reassigned `self` variable in instance method "PLW1641", # object does not implement `__hash__` method "PLW2901", # `for` loop variable overwritten by assignment target "UP007", # use X | Y for type annotations "FURB105", # unnecessary empty string passed to `print` "RUF001", # string contains ambiguous unicode character "RUF002", # docstring contains ambiguous acute accent unicode character "RUF003", # comment contains ambiguous no-break space unicode character "RUF005", # consider unpacking operator instead of concatenation "RUF012", # mutable class attributes ] [tool.ruff.lint.per-file-ignores] # don't enforce absolute imports "asv_bench/**" = ["TID252"] # comparison with itself in tests "xarray/tests/**" = ["PLR0124"] # looks like ruff bugs "xarray/core/_typed_ops.py" = ["PYI034"] "xarray/namedarray/_typing.py" = ["PYI018", "PYI046"] [tool.ruff.lint.isort] known-first-party = ["xarray"] [tool.ruff.lint.flake8-tidy-imports] # Disallow all relative imports. ban-relative-imports = "all" [tool.ruff.lint.flake8-tidy-imports.banned-api] "pandas.api.types.is_extension_array_dtype".msg = "Use xarray.core.utils.is_allowed_extension_array{_dtype} instead. Only use the banend API if the incoming data has already been sanitized by xarray" [tool.pytest.ini_options] asyncio_default_fixture_loop_scope = "function" addopts = [ "--strict-config", "--strict-markers", "--mypy-only-local-stub", "--mypy-pyproject-toml-file=pyproject.toml", ] # We want to forbid warnings from within xarray in our tests — instead we should # fix our own code, or mark the test itself as expecting a warning. So this: # - Converts any warning from xarray into an error # - Allows some warnings ("default") which the test suite currently raises, # since it wasn't practical to fix them all before merging this config. The # warnings are reported in CI (since it uses `default`, not `ignore`). # # Over time, we can remove these rules allowing warnings. A valued contribution # is removing a line, seeing what breaks, and then fixing the library code or # tests so that it doesn't raise warnings. # # There are some instance where we'll want to add to these rules: # - While we only raise errors on warnings from within xarray, a dependency can # raise a warning with a stacklevel such that it's interpreted to be raised # from xarray and this will mistakenly convert it to an error. If that # happens, please feel free to add a rule switching it to `default` here, and # disabling the error. # - If these settings get in the way of making progress, it's also acceptable to # temporarily add additional `default` rules. # - But we should only add `ignore` rules if we're confident that we'll never # need to address a warning. filterwarnings = [ "error:::xarray.*", # Zarr 2 V3 implementation "default:Zarr-Python is not in alignment with the final V3 specification", # TODO: this is raised for vlen-utf8, consolidated metadata, U1 dtype "default:is currently not part .* the Zarr version 3 specification.", # Zarr V3 data type specifications warnings - very repetitive "ignore:The data type .* does not have a Zarr V3 specification", "ignore:Consolidated metadata is currently not part", # TODO: remove once we know how to deal with a changed signature in protocols "default:::xarray.tests.test_strategies", ] log_cli_level = "INFO" markers = [ "flaky: flaky tests", "mypy: type annotation tests", "network: tests requiring a network connection", "slow: slow tests", "slow_hypothesis: slow hypothesis tests", ] minversion = "7" python_files = ["test_*.py"] testpaths = ["xarray/tests", "properties"] [tool.aliases] test = "pytest" [tool.repo-review] ignore = [ "PP308", # This option creates a large amount of log lines. ] [tool.typos] [tool.typos.default] extend-ignore-identifiers-re = [ # Variable names "nd_.*", ".*_nd", "ba_.*", ".*_ba", "ser_.*", ".*_ser", # Function/class names "NDArray.*", ".*NDArray.*", ] [tool.typos.default.extend-words] # NumPy function names arange = "arange" ond = "ond" aso = "aso" # Technical terms nd = "nd" nin = "nin" nclusive = "nclusive" # part of "inclusive" in error messages # Variable names ba = "ba" ser = "ser" fo = "fo" iy = "iy" vart = "vart" ede = "ede" # Organization/Institution names Stichting = "Stichting" Mathematisch = "Mathematisch" # People's names Soler = "Soler" Bruning = "Bruning" Tung = "Tung" Claus = "Claus" Celles = "Celles" slowy = "slowy" Commun = "Commun" # Tests Ome = "Ome" SUR = "SUR" Tio = "Tio" Ono = "Ono" abl = "abl" # Technical terms splitted = "splitted" childs = "childs" cutted = "cutted" LOCA = "LOCA" SLEP = "SLEP" [tool.typos.type.jupyter] extend-ignore-re = ["\"id\": \".*\""] python-xarray-2026.01.0/DATATREE_MIGRATION_GUIDE.md0000664000175000017500000001400115136607163021100 0ustar alastairalastair# Migration guide for users of `xarray-contrib/datatree` _15th October 2024_ This guide is for previous users of the prototype `datatree.DataTree` class in the `xarray-contrib/datatree repository`. That repository has now been archived, and will not be maintained. This guide is intended to help smooth your transition to using the new, updated `xarray.DataTree` class. > [!IMPORTANT] > There are breaking changes! You should not expect that code written with `xarray-contrib/datatree` will work without any modifications. At the absolute minimum you will need to change the top-level import statement, but there are other changes too. We have made various changes compared to the prototype version. These can be split into three categories: data model changes, which affect the hierarchal structure itself; integration with xarray's IO backends; and minor API changes, which mostly consist of renaming methods to be more self-consistent. ### Data model changes The most important changes made are to the data model of `DataTree`. Whilst previously data in different nodes was unrelated and therefore unconstrained, now trees have "internal alignment" - meaning that dimensions and indexes in child nodes must exactly align with those in their parents. These alignment checks happen at tree construction time, meaning there are some netCDF4 files and zarr stores that could previously be opened as `datatree.DataTree` objects using `datatree.open_datatree`, but now cannot be opened as `xr.DataTree` objects using `xr.open_datatree`. For these cases we added a new opener function `xr.open_groups`, which returns a `dict[str, Dataset]`. This is intended as a fallback for tricky cases, where the idea is that you can still open the entire contents of the file using `open_groups`, edit the `Dataset` objects, then construct a valid tree from the edited dictionary using `DataTree.from_dict`. The alignment checks allowed us to add "Coordinate Inheritance", a much-requested feature where indexed coordinate variables are now "inherited" down to child nodes. This allows you to define common coordinates in a parent group that are then automatically available on every child node. The distinction between a locally-defined coordinate variables and an inherited coordinate that was defined on a parent node is reflected in the `DataTree.__repr__`. Generally if you prefer not to have these variables be inherited you can get more similar behaviour to the old `datatree` package by removing indexes from coordinates, as this prevents inheritance. Tree structure checks between multiple trees (i.e., `DataTree.isomorophic`) and pairing of nodes in arithmetic has also changed. Nodes are now matched (with `xarray.group_subtrees`) based on their relative paths, without regard to the order in which child nodes are defined. For further documentation see the page in the user guide on Hierarchical Data. ### Integrated backends Previously `datatree.open_datatree` used a different codepath from `xarray.open_dataset`, and was hard-coded to only support opening netCDF files and Zarr stores. Now xarray's backend entrypoint system has been generalized to include `open_datatree` and the new `open_groups`. This means we can now extend other xarray backends to support `open_datatree`! If you are the maintainer of an xarray backend we encourage you to add support for `open_datatree` and `open_groups`! Additionally: - A `group` kwarg has been added to `open_datatree` for choosing which group in the file should become the root group of the created tree. - Various performance improvements have been made, which should help when opening netCDF files and Zarr stores with large numbers of groups. - We anticipate further performance improvements being possible for datatree IO. ### API changes A number of other API changes have been made, which should only require minor modifications to your code: - The top-level import has changed, from `from datatree import DataTree, open_datatree` to `from xarray import DataTree, open_datatree`. Alternatively you can now just use the `import xarray as xr` namespace convention for everything datatree-related. - The `DataTree.ds` property has been changed to `DataTree.dataset`, though `DataTree.ds` remains as an alias for `DataTree.dataset`. - Similarly the `ds` kwarg in the `DataTree.__init__` constructor has been replaced by `dataset`, i.e. use `DataTree(dataset=)` instead of `DataTree(ds=...)`. - The method `DataTree.to_dataset()` still exists but now has different options for controlling which variables are present on the resulting `Dataset`, e.g. `inherit=True/False`. - `DataTree.copy()` also has a new `inherit` keyword argument for controlling whether or not coordinates defined on parents are copied (only relevant when copying a non-root node). - The `DataTree.parent` property is now read-only. To assign an ancestral relationship directly you must instead use the `.children` property on the parent node, which remains settable. - Similarly the `parent` kwarg has been removed from the `DataTree.__init__` constructor. - DataTree objects passed to the `children` kwarg in `DataTree.__init__` are now shallow-copied. - `DataTree.map_over_subtree` has been renamed to `DataTree.map_over_datasets`, and changed to no longer work like a decorator. Instead you use it to apply the function and arguments directly, more like how `xarray.apply_ufunc` works. - `DataTree.as_array` has been replaced by `DataTree.to_dataarray`. - A number of methods which were not well tested have been (temporarily) disabled. In general we have tried to only keep things that are known to work, with the plan to increase API surface incrementally after release. ## Thank you! Thank you for trying out `xarray-contrib/datatree`! We welcome contributions of any kind, including good ideas that never quite made it into the original datatree repository. Please also let us know if we have forgotten to mention a change that should have been listed in this guide. Sincerely, the datatree team: Tom Nicholas, Owen Littlejohns, Matt Savoie, Eni Awowale, Alfonso Ladino, Justus Magin, Stephan Hoyer python-xarray-2026.01.0/.github/0000775000175000017500000000000015136607163016403 5ustar alastairalastairpython-xarray-2026.01.0/.github/dependabot.yml0000664000175000017500000000033115136607163021230 0ustar alastairalastairversion: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: # Check for updates once a week interval: "weekly" groups: actions: patterns: - "*" python-xarray-2026.01.0/.github/ISSUE_TEMPLATE/0000775000175000017500000000000015136607163020566 5ustar alastairalastairpython-xarray-2026.01.0/.github/ISSUE_TEMPLATE/config.yml0000664000175000017500000000127615136607163022564 0ustar alastairalastairblank_issues_enabled: false contact_links: - name: ❓ Usage question url: https://github.com/pydata/xarray/discussions about: | Ask questions and discuss with other community members here. If you have a question like "How do I concatenate a list of datasets?" then please include a self-contained reproducible example if possible. - name: 🗺️ Raster analysis usage question url: https://github.com/corteva/rioxarray/discussions about: | If you are using the rioxarray extension (engine='rasterio'), or have questions about raster analysis such as geospatial formats, coordinate reprojection, etc., please use the rioxarray discussion forum. python-xarray-2026.01.0/.github/ISSUE_TEMPLATE/newfeature.yml0000664000175000017500000000220615136607163023456 0ustar alastairalastairname: 💡 Feature Request description: Suggest an idea for xarray labels: [enhancement] body: - type: textarea id: description attributes: label: Is your feature request related to a problem? description: | Please do a quick search of existing issues to make sure that this has not been asked before. Please provide a clear and concise description of what the problem is. Ex. I'm always frustrated when [...] validations: required: true - type: textarea id: solution attributes: label: Describe the solution you'd like description: | A clear and concise description of what you want to happen. - type: textarea id: alternatives attributes: label: Describe alternatives you've considered description: | A clear and concise description of any alternative solutions or features you've considered. validations: required: false - type: textarea id: additional-context attributes: label: Additional context description: | Add any other context about the feature request here. validations: required: false python-xarray-2026.01.0/.github/ISSUE_TEMPLATE/bugreport.yml0000664000175000017500000000725315136607163023331 0ustar alastairalastairname: 🐛 Bug Report description: File a bug report to help us improve labels: [bug, "needs triage"] body: - type: textarea id: what-happened attributes: label: What happened? description: | Thanks for reporting a bug! Please describe what you were trying to get done. Tell us what happened, what went wrong. validations: required: true - type: textarea id: what-did-you-expect-to-happen attributes: label: What did you expect to happen? description: | Describe what you expected to happen. validations: required: false - type: textarea id: sample-code attributes: label: Minimal Complete Verifiable Example description: | Minimal, self-contained copy-pastable example that demonstrates the issue. Consider listing additional or specific dependencies in [inline script metadata](https://packaging.python.org/en/latest/specifications/inline-script-metadata/#example) so that calling `uv run issue.py` shows the issue when copied into `issue.py`. (not strictly required) This will be automatically formatted into code, so no need for markdown backticks. render: Python value: | # /// script # requires-python = ">=3.11" # dependencies = [ # "xarray[complete]@git+https://github.com/pydata/xarray.git@main", # ] # /// # # This script automatically imports the development branch of xarray to check for issues. # Please delete this header if you have _not_ tested this script with `uv run`! import xarray as xr xr.show_versions() # your reproducer code ... - type: textarea id: reproduce attributes: label: Steps to reproduce description: validations: required: false - type: checkboxes id: mvce-checkboxes attributes: label: MVCE confirmation description: | Please confirm that the bug report is in an excellent state, so we can understand & fix it quickly & efficiently. For more details, check out: - [Minimal Complete Verifiable Examples](https://stackoverflow.com/help/mcve) - [Craft Minimal Bug Reports](https://matthewrocklin.com/minimal-bug-reports) options: - label: Minimal example — the example is as focused as reasonably possible to demonstrate the underlying issue in xarray. - label: Complete example — the example is self-contained, including all data and the text of any traceback. - label: Verifiable example — the example copy & pastes into an IPython prompt or [Binder notebook](https://mybinder.org/v2/gh/pydata/xarray/main?urlpath=lab/tree/doc/examples/blank_template.ipynb), returning the result. - label: New issue — a search of GitHub Issues suggests this is not a duplicate. - label: Recent environment — the issue occurs with the latest version of xarray and its dependencies. - type: textarea id: log-output attributes: label: Relevant log output description: Please copy and paste any relevant output. This will be automatically formatted into code, so no need for markdown backticks. render: Python - type: textarea id: extra attributes: label: Anything else we need to know? description: | Please describe any other information you want to share. - type: textarea id: show-versions attributes: label: Environment description: | Paste the output of `xr.show_versions()` between the `
    ` tags, leaving an empty line following the opening tag. value: |
    validations: required: true python-xarray-2026.01.0/.github/ISSUE_TEMPLATE/misc.yml0000664000175000017500000000075215136607163022250 0ustar alastairalastairname: 📝 Issue description: General issue, that's not a bug report. labels: ["needs triage"] body: - type: markdown attributes: value: | Please describe your issue here. - type: textarea id: issue-description attributes: label: What is your issue? description: | Thank you for filing an issue! Please give us further information on how we can help you. placeholder: Please describe your issue. validations: required: true python-xarray-2026.01.0/.github/FUNDING.yml0000664000175000017500000000007715136607163020224 0ustar alastairalastairgithub: numfocus custom: https://numfocus.org/donate-to-xarray python-xarray-2026.01.0/.github/config.yml0000664000175000017500000000222015136607163020367 0ustar alastairalastair# Comment to be posted to on first time issues newIssueWelcomeComment: > Thanks for opening your first issue here at xarray! Be sure to follow the issue template! If you have an idea for a solution, we would really welcome a Pull Request with proposed changes. See the [Contributing Guide](https://docs.xarray.dev/en/latest/contributing.html) for more. It may take us a while to respond here, but we really value your contribution. Contributors like you help make xarray better. Thank you! # Comment to be posted to on PRs from first time contributors in your repository newPRWelcomeComment: > Thank you for opening this pull request! It may take us a few days to respond here, so thank you for being patient. If you have questions, some answers may be found in our [contributing guidelines](https://docs.xarray.dev/en/stable/contributing.html). # Comment to be posted to on pull requests merged by a first time user firstPRMergeComment: > Congratulations on completing your first pull request! Welcome to Xarray! We are proud of you, and hope to see you again! ![celebration gif](https://media.giphy.com/media/umYMU8G2ixG5mJBDo5/giphy.gif) python-xarray-2026.01.0/.github/release.yml0000664000175000017500000000012615136607163020545 0ustar alastairalastairchangelog: exclude: authors: - dependabot[bot] - pre-commit-ci[bot] python-xarray-2026.01.0/.github/stale.yml0000664000175000017500000000412615136607163020241 0ustar alastairalastair# Configuration for probot-stale - https://github.com/probot/stale # Number of days of inactivity before an Issue or Pull Request becomes stale daysUntilStale: 600 # start with a large number and reduce shortly # Number of days of inactivity before an Issue or Pull Request with the stale label is closed. # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. daysUntilClose: 30 # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable exemptLabels: - pinned - security - "[Status] Maybe Later" # Set to true to ignore issues in a project (defaults to false) exemptProjects: true # Set to true to ignore issues in a milestone (defaults to false) exemptMilestones: true # Set to true to ignore issues with an assignee (defaults to false) exemptAssignees: true # Label to use when marking as stale staleLabel: stale # Comment to post when marking as stale. Set to `false` to disable markComment: | In order to maintain a list of currently relevant issues, we mark issues as stale after a period of inactivity If this issue remains relevant, please comment here or remove the `stale` label; otherwise it will be marked as closed automatically closeComment: | The stalebot didn't hear anything for a while, so it closed this. Please reopen if this is still an issue. # Comment to post when removing the stale label. # unmarkComment: > # Your comment here. # Comment to post when closing a stale Issue or Pull Request. # closeComment: > # Your comment here. # Limit the number of actions per hour, from 1-30. Default is 30 limitPerRun: 2 # start with a small number # Limit to only `issues` or `pulls` # only: issues # Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': # pulls: # daysUntilStale: 30 # markComment: > # This pull request has been automatically marked as stale because it has not had # recent activity. It will be closed if no further activity occurs. Thank you # for your contributions. # issues: # exemptLabels: # - confirmed python-xarray-2026.01.0/.github/workflows/0000775000175000017500000000000015136607163020440 5ustar alastairalastairpython-xarray-2026.01.0/.github/workflows/pypi-release.yaml0000664000175000017500000000600515136607163023724 0ustar alastairalastairname: Build and Upload xarray to PyPI on: release: types: - published push: tags: - "v*" pull_request: types: [opened, reopened, synchronize, labeled] workflow_dispatch: jobs: build-artifacts: runs-on: ubuntu-latest if: ${{ github.repository == 'pydata/xarray' && ( (contains(github.event.pull_request.labels.*.name, 'Release') && github.event_name == 'pull_request') || github.event_name == 'release' || github.event_name == 'workflow_dispatch' || startsWith(github.ref, 'refs/tags/v') ) }} steps: - uses: actions/checkout@v6 with: fetch-depth: 0 - uses: actions/setup-python@v6 name: Install Python with: python-version: "3.12" - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install build twine - name: Build tarball and wheels run: | git clean -xdf git restore -SW . python -m build - name: Check built artifacts run: | python -m twine check --strict dist/* pwd if [ -f dist/xarray-0.0.0.tar.gz ]; then echo "❌ INVALID VERSION NUMBER" exit 1 else echo "✅ Looks good" fi - uses: actions/upload-artifact@v6 with: name: releases path: dist test-built-dist: needs: build-artifacts runs-on: ubuntu-latest steps: - uses: actions/setup-python@v6 name: Install Python with: python-version: "3.12" - uses: actions/download-artifact@v7 with: name: releases path: dist - name: List contents of built dist run: | ls -ltrh ls -ltrh dist - name: Verify the built dist/wheel is valid run: | python -m pip install --upgrade pip python -m pip install dist/xarray*.whl python -m xarray.util.print_versions upload-to-test-pypi: needs: test-built-dist if: github.event_name == 'push' runs-on: ubuntu-latest environment: name: pypi url: https://test.pypi.org/p/xarray permissions: id-token: write steps: - uses: actions/download-artifact@v7 with: name: releases path: dist - name: Publish package to TestPyPI if: github.event_name == 'push' uses: pypa/gh-action-pypi-publish@v1.13.0 with: repository_url: https://test.pypi.org/legacy/ verbose: true upload-to-pypi: needs: test-built-dist if: github.event_name == 'release' runs-on: ubuntu-latest environment: name: pypi url: https://pypi.org/p/xarray permissions: id-token: write steps: - uses: actions/download-artifact@v7 with: name: releases path: dist - name: Publish package to PyPI uses: pypa/gh-action-pypi-publish@v1.13.0 with: verbose: true python-xarray-2026.01.0/.github/workflows/ci.yaml0000664000175000017500000001361715136607163021727 0ustar alastairalastairname: CI on: push: branches: - "main" pull_request: branches: - "main" workflow_dispatch: # allows you to trigger manually concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true env: FORCE_COLOR: 3 PIXI_VERSION: "v0.63.2" jobs: detect-ci-trigger: name: detect ci trigger runs-on: ubuntu-latest if: | github.repository == 'pydata/xarray' && (github.event_name == 'push' || github.event_name == 'pull_request') && !contains(github.event.pull_request.labels.*.name, 'skip-ci') outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - uses: actions/checkout@v6 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1 id: detect-trigger with: keyword: "[skip-ci]" cache-pixi-lock: uses: ./.github/workflows/cache-pixi-lock.yml with: pixi-version: "v0.63.2" # keep in sync with env var above test: name: "${{ matrix.os }} | ${{ matrix.pixi-env }}${{ matrix.pytest-addopts && format(' ({0})', matrix.pytest-addopts) || '' }}" runs-on: ${{ matrix.os }} needs: [detect-ci-trigger, cache-pixi-lock] if: needs.detect-ci-trigger.outputs.triggered == 'false' defaults: run: shell: bash -l {0} strategy: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] # Bookend python versions pixi-env: ["test-py311", "test-py313"] pytest-addopts: [""] include: # Minimum python version: - pixi-env: "test-py311-bare-minimum" os: ubuntu-latest - pixi-env: "test-py311-bare-min-and-scipy" os: ubuntu-latest - pixi-env: "test-py311-min-versions" os: ubuntu-latest # Latest python version: - pixi-env: "test-py313-no-numba" os: ubuntu-latest - pixi-env: "test-py313-no-dask" os: ubuntu-latest - pixi-env: "test-py313" pytest-addopts: "flaky" os: ubuntu-latest # The mypy tests must be executed using only 1 process in order to guarantee # predictable mypy output messages for comparison to expectations. - pixi-env: "test-py311-with-typing" pytest-addopts: "mypy" numprocesses: 1 os: ubuntu-latest - pixi-env: "test-py313-with-typing" numprocesses: 1 os: ubuntu-latest steps: - uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Restore cached pixi lockfile uses: actions/cache/restore@v5 id: restore-pixi-lock with: enableCrossOsArchive: true path: | pixi.lock key: ${{ needs.cache-pixi-lock.outputs.cache-id }} - uses: prefix-dev/setup-pixi@v0.9.3 with: pixi-version: ${{ env.PIXI_VERSION }} cache: true environments: ${{ matrix.pixi-env }} cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} - name: Set environment variables run: | echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV echo "PYTHON_VERSION=$(pixi run -e ${{ matrix.pixi-env }} -- python --version | cut -d' ' -f2 | cut -d. -f1,2)" >> $GITHUB_ENV if [[ "${{ matrix.pytest-addopts }}" != "" ]] ; then if [[ "${{ matrix.pytest-addopts }}" == "flaky" ]] ; then echo "PYTEST_ADDOPTS=-m 'flaky or network' --run-flaky --run-network-tests -W default" >> $GITHUB_ENV elif [[ "${{ matrix.pytest-addopts }}" == "mypy" ]] ; then echo "PYTEST_ADDOPTS=-n 1 -m 'mypy' --run-mypy -W default" >> $GITHUB_ENV fi if [[ "${{ matrix.pixi-env }}" == "min-all-deps" ]] ; then # Don't raise on warnings echo "PYTEST_ADDOPTS=-W default" >> $GITHUB_ENV fi fi # We only want to install this on one run, because otherwise we'll have # duplicate annotations. - name: Install error reporter if: ${{ matrix.os }} == 'ubuntu-latest' and ${{ matrix.pixi-env}} == 'test' run: | pixi add --pypi pytest-github-actions-annotate-failures - name: Version info run: | pixi run -e ${{ matrix.pixi-env }} -- python xarray/util/print_versions.py - name: Import xarray run: | pixi run -e ${{ matrix.pixi-env }} -- python -c "import xarray" - name: Restore cached hypothesis directory uses: actions/cache@v5 with: path: .hypothesis/ key: cache-hypothesis enableCrossOsArchive: true save-always: true - name: Run tests run: | pixi run -e ${{ matrix.pixi-env }} -- python -m pytest -n ${{ matrix.numprocesses || 4 }} \ --timeout 180 \ --cov=xarray \ --cov-report=xml \ --junitxml=pytest.xml - name: Upload test results if: always() uses: actions/upload-artifact@v6 with: name: Test results for OS ${{ runner.os }} pixi-env ${{ matrix.pixi-env }} pytest-addopts ${{ matrix.pytest-addopts }} path: pytest.xml - name: Upload code coverage to Codecov uses: codecov/codecov-action@v5.5.2 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: file: ./coverage.xml flags: unittests env_vars: RUNNER_OS,PYTHON_VERSION name: codecov-umbrella fail_ci_if_error: false event_file: name: "Event File" runs-on: ubuntu-latest if: github.repository == 'pydata/xarray' steps: - name: Upload uses: actions/upload-artifact@v6 with: name: Event File path: ${{ github.event_path }} python-xarray-2026.01.0/.github/workflows/upstream-dev-ci.yaml0000664000175000017500000001163715136607163024341 0ustar alastairalastairname: CI Upstream on: push: branches: - main pull_request: branches: - main types: [opened, reopened, synchronize, labeled] schedule: - cron: "0 0 * * *" # Daily “At 00:00” UTC workflow_dispatch: # allows you to trigger the workflow run manually concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true env: FORCE_COLOR: 3 PIXI_VERSION: "v0.63.2" jobs: detect-ci-trigger: name: detect upstream-dev ci trigger runs-on: ubuntu-latest if: | github.repository == 'pydata/xarray' && (github.event_name == 'push' || github.event_name == 'pull_request') && !contains(github.event.pull_request.labels.*.name, 'skip-ci') outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - uses: actions/checkout@v6 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1 id: detect-trigger with: keyword: "[test-upstream]" cache-pixi-lock: uses: ./.github/workflows/cache-pixi-lock.yml with: pixi-version: "v0.63.2" # keep in sync with env var above upstream-dev: name: upstream-dev runs-on: ubuntu-latest needs: [detect-ci-trigger, cache-pixi-lock] if: | always() && ( (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') || needs.detect-ci-trigger.outputs.triggered == 'true' || contains( github.event.pull_request.labels.*.name, 'run-upstream') ) defaults: run: shell: bash -l {0} strategy: fail-fast: false matrix: pixi-env: ["test-nightly"] steps: - uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Restore cached pixi lockfile uses: actions/cache/restore@v5 id: restore-pixi-lock with: enableCrossOsArchive: true path: | pixi.lock key: ${{ needs.cache-pixi-lock.outputs.cache-id }} - uses: prefix-dev/setup-pixi@v0.9.3 with: pixi-version: ${{ env.PIXI_VERSION }} cache: true environments: ${{ matrix.pixi-env }} cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} - name: Version info run: | pixi run -e ${{matrix.pixi-env}} -- python xarray/util/print_versions.py - name: Import xarray run: | pixi run -e ${{matrix.pixi-env}} -- python -c 'import xarray' - name: Run Tests if: success() id: status run: | pixi run -e ${{matrix.pixi-env}} -- python -m pytest --timeout=60 -rf -nauto \ --report-log output-${{ matrix.pixi-env }}-log.jsonl - name: Generate and publish the report if: | failure() && steps.status.outcome == 'failure' && github.event_name == 'schedule' && github.repository_owner == 'pydata' uses: scientific-python/issue-from-pytest-log-action@v1 with: log-path: output-${{ matrix.pixi-env }}-log.jsonl mypy-upstream-dev: name: mypy-upstream-dev runs-on: ubuntu-latest needs: [detect-ci-trigger, cache-pixi-lock] if: | always() && ( contains( github.event.pull_request.labels.*.name, 'run-upstream') ) defaults: run: shell: bash -l {0} strategy: fail-fast: false matrix: pixi-env: ["test-nightly"] steps: - uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Restore cached pixi lockfile uses: actions/cache/restore@v5 id: restore-pixi-lock with: enableCrossOsArchive: true path: | pixi.lock key: ${{ needs.cache-pixi-lock.outputs.cache-id }} - uses: prefix-dev/setup-pixi@v0.9.3 with: pixi-version: ${{ env.PIXI_VERSION }} cache: true environments: ${{ matrix.pixi-env }} cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} - name: set environment variables run: | echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV echo "PYTHON_VERSION=$(pixi run -e ${{matrix.pixi-env}} -- python --version | cut -d' ' -f2 | cut -d. -f1,2)" >> $GITHUB_ENV - name: Version info run: | pixi run -e ${{matrix.pixi-env}} -- python xarray/util/print_versions.py - name: Run mypy run: | pixi run -e ${{matrix.pixi-env}} -- python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report - name: Upload mypy coverage to Codecov uses: codecov/codecov-action@v5.5.2 with: file: mypy_report/cobertura.xml flags: mypy env_vars: PYTHON_VERSION name: codecov-umbrella fail_ci_if_error: false python-xarray-2026.01.0/.github/workflows/hypothesis.yaml0000664000175000017500000001000715136607163023521 0ustar alastairalastairname: Slow Hypothesis CI on: push: branches: - "main" pull_request: branches: - "main" types: [opened, reopened, synchronize, labeled] schedule: - cron: "0 0 * * *" # Daily “At 00:00” UTC workflow_dispatch: # allows you to trigger manually env: FORCE_COLOR: 3 PIXI_VERSION: "v0.63.2" jobs: detect-ci-trigger: name: detect ci trigger runs-on: ubuntu-latest if: | github.repository == 'pydata/xarray' && (github.event_name == 'push' || github.event_name == 'pull_request' || github.event_name == 'schedule') && !contains(github.event.pull_request.labels.*.name, 'skip-ci') outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - uses: actions/checkout@v6 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1 id: detect-trigger with: keyword: "[skip-ci]" cache-pixi-lock: uses: ./.github/workflows/cache-pixi-lock.yml with: pixi-version: "v0.63.2" # keep in sync with env var above hypothesis: name: Slow Hypothesis Tests runs-on: "ubuntu-latest" needs: [detect-ci-trigger, cache-pixi-lock] if: | always() && ( needs.detect-ci-trigger.outputs.triggered == 'false' && ( (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'run-slow-hypothesis')) ) defaults: run: shell: bash -l {0} env: PIXI_ENV: "test-py313" steps: - uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Restore cached pixi lockfile uses: actions/cache/restore@v5 id: restore-pixi-lock with: enableCrossOsArchive: true path: | pixi.lock key: ${{ needs.cache-pixi-lock.outputs.cache-id }} - uses: prefix-dev/setup-pixi@v0.9.3 with: pixi-version: ${{ env.PIXI_VERSION }} cache: true environments: ${{ env.PIXI_ENV }} cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} - name: set environment variables run: | echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV echo "PYTHON_VERSION=$(pixi run -e ${{env.PIXI_ENV}} python --version | cut -d' ' -f2 | cut -d. -f1,2)" >> $GITHUB_ENV - name: Version info run: | pixi run -e ${{ env.PIXI_ENV }} python xarray/util/print_versions.py # https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache - name: Restore cached hypothesis directory id: restore-hypothesis-cache uses: actions/cache/restore@v5 with: path: .hypothesis/ key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }} restore-keys: | cache-hypothesis- - name: Run slow Hypothesis tests if: success() id: status run: | pixi run -e ${{ env.PIXI_ENV }} python -m pytest --hypothesis-show-statistics --run-slow-hypothesis properties/*.py \ --report-log output-${{ env.PIXI_ENV }}-log.jsonl # explicitly save the cache so it gets updated, also do this even if it fails. - name: Save cached hypothesis directory id: save-hypothesis-cache if: always() && steps.status.outcome != 'skipped' uses: actions/cache/save@v5 with: path: .hypothesis/ key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }} - name: Generate and publish the report if: | failure() && steps.status.outcome == 'failure' && github.event_name == 'schedule' && github.repository_owner == 'pydata' uses: scientific-python/issue-from-pytest-log-action@v1 with: log-path: output-${{ env.PIXI_ENV }}-log.jsonl issue-title: "Nightly Hypothesis tests failed" issue-label: "topic-hypothesis" python-xarray-2026.01.0/.github/workflows/ci-additional.yaml0000664000175000017500000002162515136607163024033 0ustar alastairalastairname: CI Additional on: push: branches: - "main" pull_request: branches: - "main" workflow_dispatch: # allows you to trigger manually concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true env: FORCE_COLOR: 3 PIXI_VERSION: "v0.63.2" jobs: detect-ci-trigger: name: detect ci trigger runs-on: ubuntu-latest if: | github.repository == 'pydata/xarray' && (github.event_name == 'push' || github.event_name == 'pull_request') && !contains(github.event.pull_request.labels.*.name, 'skip-ci') outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - uses: actions/checkout@v6 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1 id: detect-trigger with: keyword: "[skip-ci]" cache-pixi-lock: uses: ./.github/workflows/cache-pixi-lock.yml with: pixi-version: "v0.63.2" # keep in sync with env var above doctest: name: Doctests runs-on: "ubuntu-latest" needs: [detect-ci-trigger, cache-pixi-lock] if: needs.detect-ci-trigger.outputs.triggered == 'false' defaults: run: shell: bash -l {0} env: PIXI_ENV: "test-py313" steps: - uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: set environment variables run: | echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - name: Restore cached pixi lockfile uses: actions/cache/restore@v5 id: restore-pixi-lock with: enableCrossOsArchive: true path: | pixi.lock key: ${{ needs.cache-pixi-lock.outputs.cache-id }} - uses: prefix-dev/setup-pixi@v0.9.3 with: pixi-version: ${{ env.PIXI_VERSION }} cache: true environments: ${{ env.PIXI_ENV }} cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} - name: Version info run: | pixi run -e ${{env.PIXI_ENV}} -- python xarray/util/print_versions.py - name: Run doctests run: | # Raise an error if there are warnings in the doctests, with `-Werror`. # This is a trial; if it presents a problem, feel free to remove. # See https://github.com/pydata/xarray/issues/7164 for more info. # # If dependencies emit warnings we can't do anything about, add ignores to # `xarray/tests/__init__.py`. pixi run -e ${{env.PIXI_ENV}} -- python -m pytest --doctest-modules xarray --ignore xarray/tests -Werror mypy: name: Mypy runs-on: "ubuntu-latest" needs: [detect-ci-trigger, cache-pixi-lock] defaults: run: shell: bash -l {0} env: PIXI_ENV: test-py313-with-typing steps: - uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Restore cached pixi lockfile uses: actions/cache/restore@v5 id: restore-pixi-lock with: enableCrossOsArchive: true path: | pixi.lock key: ${{ needs.cache-pixi-lock.outputs.cache-id }} - uses: prefix-dev/setup-pixi@v0.9.3 with: pixi-version: ${{ env.PIXI_VERSION }} cache: true environments: ${{ env.PIXI_ENV }} cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} - name: set environment variables run: | echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV echo "PYTHON_VERSION=$(pixi run -e ${{env.PIXI_ENV}} -- python --version | cut -d' ' -f2 | cut -d. -f1,2)" >> $GITHUB_ENV - name: Version info run: | pixi run -e ${{env.PIXI_ENV}} -- python xarray/util/print_versions.py - name: Run mypy run: | pixi run -e ${{env.PIXI_ENV}} -- python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report - name: Upload mypy coverage to Codecov uses: codecov/codecov-action@v5.5.2 with: file: mypy_report/cobertura.xml flags: mypy env_vars: PYTHON_VERSION name: codecov-umbrella fail_ci_if_error: false mypy-min: name: Mypy 3.11 runs-on: "ubuntu-latest" needs: [detect-ci-trigger, cache-pixi-lock] defaults: run: shell: bash -l {0} env: PIXI_ENV: test-py311-with-typing steps: - uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Restore cached pixi lockfile uses: actions/cache/restore@v5 id: restore-pixi-lock with: enableCrossOsArchive: true path: | pixi.lock key: ${{ needs.cache-pixi-lock.outputs.cache-id }} - uses: prefix-dev/setup-pixi@v0.9.3 with: pixi-version: ${{ env.PIXI_VERSION }} cache: true environments: ${{ env.PIXI_ENV }} cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} - name: set environment variables run: | echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV echo "PYTHON_VERSION=$(pixi run -e ${{env.PIXI_ENV}} -- python --version | cut -d' ' -f2 | cut -d. -f1,2)" >> $GITHUB_ENV - name: Version info run: | pixi run -e ${{env.PIXI_ENV}} -- python xarray/util/print_versions.py - name: Run mypy run: | pixi run -e ${{env.PIXI_ENV}} -- python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report - name: Upload mypy coverage to Codecov uses: codecov/codecov-action@v5.5.2 with: file: mypy_report/cobertura.xml flags: mypy-min env_vars: PYTHON_VERSION name: codecov-umbrella fail_ci_if_error: false pyright: name: Pyright | ${{ matrix.pixi-env }}" runs-on: "ubuntu-latest" needs: [detect-ci-trigger, cache-pixi-lock] strategy: fail-fast: false matrix: pixi-env: ["test-py313-with-typing", "test-py311-with-typing"] if: | always() && ( contains( github.event.pull_request.labels.*.name, 'run-pyright') ) defaults: run: shell: bash -l {0} steps: - uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Restore cached pixi lockfile uses: actions/cache/restore@v5 id: restore-pixi-lock with: enableCrossOsArchive: true path: | pixi.lock key: ${{ needs.cache-pixi-lock.outputs.cache-id }} - uses: prefix-dev/setup-pixi@v0.9.3 with: pixi-version: ${{ env.PIXI_VERSION }} cache: true environments: ${{ matrix.pixi-env }} cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} - name: set environment variables run: | echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV echo "PYTHON_VERSION=$(pixi run -e ${{ matrix.pixi-env }} -- python --version | cut -d' ' -f2 | cut -d. -f1,2)" >> $GITHUB_ENV - name: Version info run: | pixi run -e ${{ matrix.pixi-env }} -- python xarray/util/print_versions.py - name: Run pyright run: | pixi run -e ${{ matrix.pixi-env }} -- python -m pyright xarray/ - name: Upload pyright coverage to Codecov uses: codecov/codecov-action@v5.5.2 with: file: pyright_report/cobertura.xml flags: pyright env_vars: PYTHON_VERSION name: codecov-umbrella fail_ci_if_error: false min-version-policy: name: Minimum Version Policy runs-on: "ubuntu-latest" needs: [detect-ci-trigger, cache-pixi-lock] if: needs.detect-ci-trigger.outputs.triggered == 'false' defaults: run: shell: bash -l {0} env: COLUMNS: 120 steps: - uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Restore cached pixi lockfile uses: actions/cache/restore@v5 id: restore-pixi-lock with: enableCrossOsArchive: true path: | pixi.lock key: ${{ needs.cache-pixi-lock.outputs.cache-id }} - uses: prefix-dev/setup-pixi@v0.9.3 with: pixi-version: ${{ env.PIXI_VERSION }} cache: true environments: "policy" cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} - name: Bare minimum versions policy run: | pixi run policy-bare-minimum - name: Bare minimum and scipy versions policy run: | pixi run policy-bare-min-and-scipy - name: All-deps minimum versions policy run: | pixi run policy-min-versions python-xarray-2026.01.0/.github/workflows/benchmarks-last-release.yml0000664000175000017500000000510715136607163025662 0ustar alastairalastairname: Benchmark compare last release on: push: branches: - main workflow_dispatch: jobs: benchmark: name: Linux runs-on: ubuntu-latest env: ASV_DIR: "./asv_bench" CONDA_ENV_FILE: ci/requirements/environment.yml steps: # We need the full repo to avoid this issue # https://github.com/actions/checkout/issues/23 - uses: actions/checkout@v6 with: fetch-depth: 0 - name: Set up conda environment uses: mamba-org/setup-micromamba@v2 with: micromamba-version: "1.5.10-0" environment-file: ${{env.CONDA_ENV_FILE}} environment-name: xarray-tests cache-environment: true cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}-benchmark" create-args: >- asv - name: "Get Previous tag" id: previoustag uses: "WyriHaximus/github-action-get-previous-tag@v1" # with: # fallback: 1.0.0 # Optional fallback tag to use when no tag can be found - name: Run benchmarks shell: bash -l {0} id: benchmark env: OPENBLAS_NUM_THREADS: 1 MKL_NUM_THREADS: 1 OMP_NUM_THREADS: 1 ASV_FACTOR: 1.5 ASV_SKIP_SLOW: 1 run: | set -x # ID this runner asv machine --yes echo "Baseline: ${{ steps.previoustag.outputs.tag }} " echo "Contender: ${{ github.sha }}" # Use mamba for env creation # export CONDA_EXE=$(which mamba) export CONDA_EXE=$(which conda) # Run benchmarks for current commit against base ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR" asv continuous $ASV_OPTIONS ${{ steps.previoustag.outputs.tag }} ${{ github.sha }} \ | sed "/Traceback \|failed$\|PERFORMANCE DECREASED/ s/^/::error::/" \ | tee benchmarks.log # Report and export results for subsequent steps if grep "Traceback \|failed\|PERFORMANCE DECREASED" benchmarks.log > /dev/null ; then exit 1 fi working-directory: ${{ env.ASV_DIR }} - name: Add instructions to artifact if: always() run: | cp benchmarks/README_CI.md benchmarks.log .asv/results/ working-directory: ${{ env.ASV_DIR }} - uses: actions/upload-artifact@v6 if: always() with: name: asv-benchmark-results-${{ runner.os }} path: ${{ env.ASV_DIR }}/.asv/results python-xarray-2026.01.0/.github/workflows/cache-pixi-lock.yml0000664000175000017500000000272515136607163024131 0ustar alastairalastairname: Generate and cache Pixi lockfile on: workflow_call: inputs: pixi-version: type: string outputs: cache-id: description: "The lock file contents" value: ${{ jobs.cache-pixi-lock.outputs.cache-id }} jobs: cache-pixi-lock: name: Pixi lock runs-on: ubuntu-latest outputs: cache-id: ${{ steps.restore.outputs.cache-primary-key }} steps: - uses: actions/checkout@v6 with: fetch-depth: 0 submodules: recursive - name: Get current date id: date run: echo "date=$(date +'%Y-%m-%d')" >> "$GITHUB_OUTPUT" - uses: actions/cache/restore@v5 id: restore with: path: | pixi.lock key: ${{ steps.date.outputs.date }}_${{ inputs.pixi-version }}_${{hashFiles('pixi.toml')}} - uses: prefix-dev/setup-pixi@v0.9.3 if: ${{ !steps.restore.outputs.cache-hit }} with: pixi-version: ${{ inputs.pixi-version }} run-install: false - name: Run pixi lock if: ${{ !steps.restore.outputs.cache-hit }} run: pixi lock - uses: actions/cache/save@v5 if: ${{ !steps.restore.outputs.cache-hit }} id: cache with: path: | pixi.lock key: ${{ steps.restore.outputs.cache-primary-key }} - name: Upload pixi.lock uses: actions/upload-artifact@v6 with: name: pixi-lock path: pixi.lock python-xarray-2026.01.0/.github/workflows/benchmarks.yml0000664000175000017500000000545615136607163023312 0ustar alastairalastairname: Benchmark on: pull_request: types: [opened, reopened, synchronize, labeled] workflow_dispatch: env: PR_HEAD_LABEL: ${{ github.event.pull_request.head.label }} jobs: benchmark: if: ${{ contains( github.event.pull_request.labels.*.name, 'run-benchmark') && github.event_name == 'pull_request' || contains( github.event.pull_request.labels.*.name, 'topic-performance') && github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' }} name: Linux runs-on: ubuntu-latest env: ASV_DIR: "./asv_bench" CONDA_ENV_FILE: ci/requirements/environment-benchmark.yml steps: # We need the full repo to avoid this issue # https://github.com/actions/checkout/issues/23 - uses: actions/checkout@v6 with: fetch-depth: 0 - name: Set up conda environment uses: mamba-org/setup-micromamba@v2 with: micromamba-version: "1.5.10-0" environment-file: ${{env.CONDA_ENV_FILE}} environment-name: xarray-benchmark cache-environment: true cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}-benchmark" # add "build" because of https://github.com/airspeed-velocity/asv/issues/1385 create-args: >- asv python-build mamba<=1.5.10 - name: Run benchmarks shell: bash -l {0} id: benchmark env: OPENBLAS_NUM_THREADS: 1 MKL_NUM_THREADS: 1 OMP_NUM_THREADS: 1 ASV_FACTOR: 1.5 ASV_SKIP_SLOW: 1 run: | set -x # ID this runner asv machine --yes echo "Baseline: ${{ github.event.pull_request.base.sha }} (${{ github.event.pull_request.base.label }})" echo "Contender: ${GITHUB_SHA} ($PR_HEAD_LABEL)" # Run benchmarks for current commit against base ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR" asv continuous $ASV_OPTIONS ${{ github.event.pull_request.base.sha }} ${GITHUB_SHA} \ | sed "/Traceback \|failed$\|PERFORMANCE DECREASED/ s/^/::error::/" \ | tee benchmarks.log # Report and export results for subsequent steps if grep "Traceback \|failed\|PERFORMANCE DECREASED" benchmarks.log > /dev/null ; then exit 1 fi working-directory: ${{ env.ASV_DIR }} - name: Add instructions to artifact if: always() run: | cp benchmarks/README_CI.md benchmarks.log .asv/results/ working-directory: ${{ env.ASV_DIR }} - uses: actions/upload-artifact@v6 if: always() with: name: asv-benchmark-results-${{ runner.os }} path: ${{ env.ASV_DIR }}/.asv/results python-xarray-2026.01.0/.github/workflows/nightly-wheels.yml0000664000175000017500000000221615136607163024127 0ustar alastairalastairname: Upload nightly wheels on: workflow_dispatch: schedule: - cron: "0 0 * * *" jobs: cron: runs-on: ubuntu-latest if: github.repository == 'pydata/xarray' steps: - uses: actions/checkout@v6 with: fetch-depth: 0 - uses: actions/setup-python@v6 with: python-version: "3.12" - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install build twine - name: Build tarball and wheels run: | git clean -xdf git restore -SW . python -m build - name: Check built artifacts run: | python -m twine check --strict dist/* pwd if [ -f dist/xarray-0.0.0.tar.gz ]; then echo "❌ INVALID VERSION NUMBER" exit 1 else echo "✅ Looks good" fi - name: Upload wheel uses: scientific-python/upload-nightly-action@5748273c71e2d8d3a61f3a11a16421c8954f9ecf # 0.6.3 with: anaconda_nightly_upload_token: ${{ secrets.ANACONDA_NIGHTLY }} artifacts_path: dist python-xarray-2026.01.0/.github/workflows/configure-testpypi-version.py0000664000175000017500000000221315136607163026333 0ustar alastairalastairimport argparse import copy import pathlib import tomli import tomli_w def split_path(path, sep="/"): if isinstance(path, str): return [part for part in path.split(sep) if part] else: return path def extract(mapping, path, sep="/"): parts = split_path(path, sep=sep) cur = mapping for part in parts: cur = cur[part] return cur def update(mapping, path, value, sep="/"): new = copy.deepcopy(mapping) parts = split_path(path, sep=sep) parent = extract(new, parts[:-1]) parent[parts[-1]] = value return new parser = argparse.ArgumentParser() parser.add_argument("path", type=pathlib.Path) args = parser.parse_args() content = args.path.read_text() decoded = tomli.loads(content) with_local_scheme = update( decoded, "tool.setuptools_scm.local_scheme", "no-local-version", sep="." ) # work around a bug in setuptools / setuptools-scm with_setuptools_pin = copy.deepcopy(with_local_scheme) requires = extract(with_setuptools_pin, "build-system.requires", sep=".") requires[0] = "setuptools>=42,<60" new_content = tomli_w.dumps(with_setuptools_pin) args.path.write_text(new_content) python-xarray-2026.01.0/.github/workflows/publish-test-results.yaml0000664000175000017500000000252015136607163025445 0ustar alastairalastair# Copied from https://github.com/EnricoMi/publish-unit-test-result-action/blob/v1.23/README.md#support-fork-repositories-and-dependabot-branches name: Publish test results on: workflow_run: workflows: ["CI"] types: - completed concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: publish-test-results: name: Publish test results runs-on: ubuntu-latest if: github.event.workflow_run.conclusion != 'skipped' steps: - name: Download and extract artifacts env: GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} run: | mkdir artifacts && cd artifacts artifacts_url=${{ github.event.workflow_run.artifacts_url }} gh api "$artifacts_url" -q '.artifacts[] | [.name, .archive_download_url] | @tsv' | while read artifact do IFS=$'\t' read name url <<< "$artifact" gh api $url > "$name.zip" unzip -d "$name" "$name.zip" done - name: Publish Unit Test Results uses: EnricoMi/publish-unit-test-result-action@v2 with: commit: ${{ github.event.workflow_run.head_sha }} event_file: artifacts/Event File/event.json event_name: ${{ github.event.workflow_run.event }} files: "artifacts/**/*.xml" comment_mode: off python-xarray-2026.01.0/.github/workflows/label-prs.yml0000664000175000017500000000034515136607163023046 0ustar alastairalastairname: "PR Labeler" on: - pull_request_target jobs: label: runs-on: ubuntu-latest steps: - uses: actions/labeler@v6 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" sync-labels: false python-xarray-2026.01.0/.github/PULL_REQUEST_TEMPLATE.md0000664000175000017500000000040215136607163022200 0ustar alastairalastair - [ ] Closes #xxxx - [ ] Tests added - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst` - [ ] New functions/methods are listed in `api.rst` python-xarray-2026.01.0/.github/labeler.yml0000664000175000017500000000454115136607163020540 0ustar alastairalastairAutomation: - changed-files: - any-glob-to-any-file: - .github/** CI: - changed-files: - any-glob-to-any-file: - ci/** dependencies: - changed-files: - any-glob-to-any-file: - ci/requirements/* topic-arrays: - changed-files: - any-glob-to-any-file: - xarray/core/duck_array_ops.py topic-backends: - changed-files: - any-glob-to-any-file: - xarray/backends/** topic-cftime: - changed-files: - any-glob-to-any-file: - xarray/coding/*time* topic-CF conventions: - changed-files: - any-glob-to-any-file: - xarray/conventions.py topic-dask: - changed-files: - any-glob-to-any-file: - xarray/compat/dask* - xarray/core/parallel.py topic-DataTree: - changed-files: - any-glob-to-any-file: - xarray/core/datatree* topic-documentation: - all: - changed-files: - any-glob-to-any-file: "doc/**/*" - all-globs-to-all-files: "!doc/whats-new.rst" topic-groupby: - changed-files: - any-glob-to-any-file: - xarray/core/groupby.py topic-html-repr: - changed-files: - any-glob-to-any-file: - xarray/core/formatting_html.py topic-hypothesis: - changed-files: - any-glob-to-any-file: - properties/** - xarray/testing/strategies.py topic-indexing: - changed-files: - any-glob-to-any-file: - xarray/core/indexes.py - xarray/core/indexing.py topic-NamedArray: - changed-files: - any-glob-to-any-file: - xarray/namedarray/* topic-performance: - changed-files: - any-glob-to-any-file: - asv_bench/benchmarks/** topic-plotting: - changed-files: - any-glob-to-any-file: - xarray/plot/* - xarray/plot/**/* topic-rolling: - changed-files: - any-glob-to-any-file: - xarray/computation/rolling.py - xarray/computation/rolling_exp.py topic-testing: - changed-files: - any-glob-to-any-file: - conftest.py - xarray/testing/* topic-typing: - changed-files: - any-glob-to-any-file: - xarray/core/types.py topic-zarr: - changed-files: - any-glob-to-any-file: - xarray/backends/zarr.py io: - changed-files: - any-glob-to-any-file: - xarray/backends/** python-xarray-2026.01.0/.devcontainer/0000775000175000017500000000000015136607163017602 5ustar alastairalastairpython-xarray-2026.01.0/.devcontainer/devcontainer.json0000664000175000017500000000111115136607163023150 0ustar alastairalastair{ "name": "my-workspace", "build": { "dockerfile": "Dockerfile", "context": ".." }, "hostRequirements": { "cpus": 4, "memory": "16gb" }, "customizations": { "vscode": { "settings": {}, "extensions": ["ms-python.python", "charliermarsh.ruff", "GitHub.copilot"] } }, "features": { "ghcr.io/devcontainers/features/docker-in-docker:2": {} }, "mounts": [ "source=${localWorkspaceFolderBasename}-pixi,target=${containerWorkspaceFolder}/.pixi,type=volume" ], "postCreateCommand": "sudo chown vscode .pixi && pixi install" } python-xarray-2026.01.0/.devcontainer/Dockerfile0000664000175000017500000000071515136607163021577 0ustar alastairalastairFROM mcr.microsoft.com/devcontainers/base:jammy ARG PIXI_VERSION=v0.59.0 RUN curl -L -o /usr/local/bin/pixi -fsSL --compressed "https://github.com/prefix-dev/pixi/releases/download/${PIXI_VERSION}/pixi-$(uname -m)-unknown-linux-musl" \ && chmod +x /usr/local/bin/pixi \ && pixi info # set some user and workdir settings to work nicely with vscode USER vscode WORKDIR /home/vscode RUN echo 'eval "$(pixi completion -s bash)"' >> /home/vscode/.bashrc python-xarray-2026.01.0/README.md0000664000175000017500000002243615136607163016331 0ustar alastairalastair# xarray: N-D labeled arrays and datasets [![Xarray](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydata/xarray/refs/heads/main/doc/badge.json)](https://xarray.dev) [![Powered by Pixi](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/prefix-dev/pixi/main/assets/badge/v0.json)](https://pixi.sh) [![CI](https://github.com/pydata/xarray/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/pydata/xarray/actions/workflows/ci.yaml?query=branch%3Amain) [![Code coverage](https://codecov.io/gh/pydata/xarray/branch/main/graph/badge.svg?flag=unittests)](https://codecov.io/gh/pydata/xarray) [![Docs](https://readthedocs.org/projects/xray/badge/?version=latest)](https://docs.xarray.dev/) [![Benchmarked with asv](https://img.shields.io/badge/benchmarked%20by-asv-green.svg?style=flat)](https://asv-runner.github.io/asv-collection/xarray/) [![Formatted with black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black) [![Checked with mypy](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/) [![Available on pypi](https://img.shields.io/pypi/v/xarray.svg)](https://pypi.python.org/pypi/xarray/) [![PyPI - Downloads](https://img.shields.io/pypi/dm/xarray)](https://pypistats.org/packages/xarray) [![Conda - Downloads](https://img.shields.io/conda/dn/anaconda/xarray?label=conda%7Cdownloads)](https://anaconda.org/anaconda/xarray) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.598201.svg)](https://doi.org/10.5281/zenodo.598201) [![Examples on binder](https://img.shields.io/badge/launch-binder-579ACA.svg?logo=)](https://mybinder.org/v2/gh/pydata/xarray/main?urlpath=lab/tree/doc/examples/weather-data.ipynb) [![Twitter](https://img.shields.io/twitter/follow/xarray_dev?style=social)](https://x.com/xarray_dev) **xarray** (pronounced "ex-array", formerly known as **xray**) is an open source project and Python package that makes working with labelled multi-dimensional arrays simple, efficient, and fun! Xarray introduces labels in the form of dimensions, coordinates and attributes on top of raw [NumPy](https://www.numpy.org)-like arrays, which allows for a more intuitive, more concise, and less error-prone developer experience. The package includes a large and growing library of domain-agnostic functions for advanced analytics and visualization with these data structures. Xarray was inspired by and borrows heavily from [pandas](https://pandas.pydata.org), the popular data analysis package focused on labelled tabular data. It is particularly tailored to working with [netCDF](https://www.unidata.ucar.edu/software/netcdf) files, which were the source of xarray\'s data model, and integrates tightly with [dask](https://dask.org) for parallel computing. ## Why xarray? Multi-dimensional (a.k.a. N-dimensional, ND) arrays (sometimes called "tensors") are an essential part of computational science. They are encountered in a wide range of fields, including physics, astronomy, geoscience, bioinformatics, engineering, finance, and deep learning. In Python, [NumPy](https://www.numpy.org) provides the fundamental data structure and API for working with raw ND arrays. However, real-world datasets are usually more than just raw numbers; they have labels which encode information about how the array values map to locations in space, time, etc. Xarray doesn\'t just keep track of labels on arrays \-- it uses them to provide a powerful and concise interface. For example: - Apply operations over dimensions by name: `x.sum('time')`. - Select values by label instead of integer location: `x.loc['2014-01-01']` or `x.sel(time='2014-01-01')`. - Mathematical operations (e.g., `x - y`) vectorize across multiple dimensions (array broadcasting) based on dimension names, not shape. - Flexible split-apply-combine operations with groupby: `x.groupby('time.dayofyear').mean()`. - Database like alignment based on coordinate labels that smoothly handles missing values: `x, y = xr.align(x, y, join='outer')`. - Keep track of arbitrary metadata in the form of a Python dictionary: `x.attrs`. ## Documentation Learn more about xarray in its official documentation at . Try out an [interactive Jupyter notebook](https://mybinder.org/v2/gh/pydata/xarray/main?urlpath=lab/tree/doc/examples/weather-data.ipynb). ## Contributing You can find information about contributing to xarray at our [Contributing page](https://docs.xarray.dev/en/stable/contributing.html). ## Get in touch - Ask usage questions ("How do I?") on [GitHub Discussions](https://github.com/pydata/xarray/discussions). - Report bugs, suggest features or view the source code [on GitHub](https://github.com/pydata/xarray). - For less well defined questions or ideas, or to announce other projects of interest to xarray users, use the [mailing list](https://groups.google.com/forum/#!forum/xarray). ## NumFOCUS Xarray is a fiscally sponsored project of [NumFOCUS](https://numfocus.org), a nonprofit dedicated to supporting the open source scientific computing community. If you like Xarray and want to support our mission, please consider making a [donation](https://numfocus.org/donate-to-xarray) to support our efforts. ## History Xarray is an evolution of an internal tool developed at [The Climate Corporation](https://climate.com/). It was originally written by Climate Corp researchers Stephan Hoyer, Alex Kleeman and Eugene Brevdo and was released as open source in May 2014. The project was renamed from "xray" in January 2016. Xarray became a fiscally sponsored project of [NumFOCUS](https://numfocus.org) in August 2018. ## Contributors Thanks to our many contributors! [![Contributors](https://contrib.rocks/image?repo=pydata/xarray)](https://github.com/pydata/xarray/graphs/contributors) ## License Copyright 2014-2024, xarray Developers Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Xarray bundles portions of pandas, NumPy and Seaborn, all of which are available under a "3-clause BSD" license: - pandas: `setup.py`, `xarray/util/print_versions.py` - NumPy: `xarray/compat/npcompat.py` - Seaborn: `_determine_cmap_params` in `xarray/plot/utils.py` Xarray also bundles portions of CPython, which is available under the "Python Software Foundation License" in `xarray/namedarray/pycompat.py`. Xarray uses icons from the icomoon package (free version), which is available under the "CC BY 4.0" license. The full text of these licenses are included in the licenses directory. python-xarray-2026.01.0/CONTRIBUTING.md0000664000175000017500000000021315136607163017270 0ustar alastairalastairXarray's contributor guidelines [can be found in our online documentation](https://docs.xarray.dev/en/stable/contribute/contributing.html) python-xarray-2026.01.0/.gitattributes0000664000175000017500000000040415136607163017734 0ustar alastairalastair# reduce the number of merge conflicts doc/whats-new.rst merge=union # allow installing from git archives .git_archival.txt export-subst # SCM syntax highlighting & preventing 3-way merges pixi.lock merge=binary linguist-language=YAML linguist-generated=true python-xarray-2026.01.0/properties/0000775000175000017500000000000015136607163017237 5ustar alastairalastairpython-xarray-2026.01.0/properties/test_indexing.py0000664000175000017500000000415415136607163022461 0ustar alastairalastairimport pytest pytest.importorskip("hypothesis") import hypothesis.strategies as st from hypothesis import given import xarray as xr import xarray.testing.strategies as xrst def _slice_size(s: slice, dim_size: int) -> int: """Compute the size of a slice applied to a dimension.""" return len(range(*s.indices(dim_size))) @given( st.data(), xrst.variables(dims=xrst.dimension_sizes(min_dims=1, max_dims=4, min_side=1)), ) def test_basic_indexing(data, var): """Test that basic indexers produce expected output shape.""" idxr = data.draw(xrst.basic_indexers(sizes=var.sizes)) result = var.isel(idxr) expected_shape = tuple( _slice_size(idxr[d], var.sizes[d]) if d in idxr else var.sizes[d] for d in result.dims ) assert result.shape == expected_shape @given( st.data(), xrst.variables(dims=xrst.dimension_sizes(min_dims=1, max_dims=4, min_side=1)), ) def test_outer_indexing(data, var): """Test that outer array indexers produce expected output shape.""" idxr = data.draw(xrst.outer_array_indexers(sizes=var.sizes, min_dims=1)) result = var.isel(idxr) expected_shape = tuple( len(idxr[d]) if d in idxr else var.sizes[d] for d in result.dims ) assert result.shape == expected_shape @given( st.data(), xrst.variables(dims=xrst.dimension_sizes(min_dims=2, max_dims=4, min_side=1)), ) def test_vectorized_indexing(data, var): """Test that vectorized indexers produce expected output shape.""" da = xr.DataArray(var) idxr = data.draw(xrst.vectorized_indexers(sizes=var.sizes)) result = da.isel(idxr) # TODO: this logic works because the dims in idxr don't overlap with da.dims # Compute expected shape from result dims # Non-indexed dims keep their original size, indexed dims get broadcast size broadcast_result = xr.broadcast(*idxr.values()) broadcast_sizes = dict( zip(broadcast_result[0].dims, broadcast_result[0].shape, strict=True) ) expected_shape = tuple( var.sizes[d] if d in var.sizes else broadcast_sizes[d] for d in result.dims ) assert result.shape == expected_shape python-xarray-2026.01.0/properties/conftest.py0000664000175000017500000000137515136607163021444 0ustar alastairalastairimport pytest def pytest_addoption(parser): parser.addoption( "--run-slow-hypothesis", action="store_true", default=False, help="run slow hypothesis tests", ) def pytest_collection_modifyitems(config, items): if config.getoption("--run-slow-hypothesis"): return skip_slow_hyp = pytest.mark.skip(reason="need --run-slow-hypothesis option to run") for item in items: if "slow_hypothesis" in item.keywords: item.add_marker(skip_slow_hyp) try: from hypothesis import settings except ImportError: pass else: # Run for a while - arrays are a bigger search space than usual settings.register_profile("ci", deadline=None, print_blob=True) settings.load_profile("ci") python-xarray-2026.01.0/properties/test_coordinate_transform.py0000664000175000017500000001140115136607163025067 0ustar alastairalastair"""Property tests comparing CoordinateTransformIndex to PandasIndex.""" import functools import operator from collections.abc import Hashable from typing import Any import numpy as np import pytest pytest.importorskip("hypothesis") import hypothesis.strategies as st from hypothesis import given import xarray as xr import xarray.testing.strategies as xrst from xarray.core.coordinate_transform import CoordinateTransform from xarray.core.indexes import CoordinateTransformIndex from xarray.testing import assert_equal DATA_VAR_NAME = "_test_data_" class IdentityTransform(CoordinateTransform): """Identity transform that returns dimension positions as coordinate labels.""" def forward(self, dim_positions: dict[str, Any]) -> dict[Hashable, Any]: return dim_positions def reverse(self, coord_labels: dict[Hashable, Any]) -> dict[str, Any]: return coord_labels def equals( self, other: CoordinateTransform, exclude: frozenset[Hashable] | None = None ) -> bool: if not isinstance(other, IdentityTransform): return False return self.dim_size == other.dim_size def create_transform_da(sizes: dict[str, int]) -> xr.DataArray: """Create a DataArray with IdentityTransform CoordinateTransformIndex.""" dims = list(sizes.keys()) shape = tuple(sizes.values()) data = np.arange(np.prod(shape)).reshape(shape) # Create dataset with transform index for each dimension ds = xr.Dataset({DATA_VAR_NAME: (dims, data)}) indexes = [ xr.Coordinates.from_xindex( CoordinateTransformIndex( IdentityTransform((dim,), {dim: size}, dtype=np.dtype(np.int64)) ) ) for dim, size in sizes.items() ] coords = functools.reduce(operator.or_, indexes) return ds.assign_coords(coords).get(DATA_VAR_NAME) def create_pandas_da(sizes: dict[str, int]) -> xr.DataArray: """Create a DataArray with standard PandasIndex (range index).""" shape = tuple(sizes.values()) data = np.arange(np.prod(shape)).reshape(shape) coords = {dim: np.arange(size) for dim, size in sizes.items()} return xr.DataArray( data, dims=list(sizes.keys()), coords=coords, name=DATA_VAR_NAME ) @given( st.data(), xrst.dimension_sizes(min_dims=1, max_dims=3, min_side=1, max_side=5), ) def test_basic_indexing(data, sizes): """Test basic indexing produces identical results for transform and pandas index.""" pandas_da = create_pandas_da(sizes) transform_da = create_transform_da(sizes) idxr = data.draw(xrst.basic_indexers(sizes=sizes)) pandas_result = pandas_da.isel(idxr) transform_result = transform_da.isel(idxr) # TODO: any indexed dim in pandas_result should be an indexed dim in transform_result # This requires us to return a new CoordinateTransformIndex from .isel. # for dim in pandas_result.xindexes: # assert isinstance(transform_result.xindexes[dim], CoordinateTransformIndex) assert_equal(pandas_result, transform_result) # not supported today # pandas_result = pandas_da.sel(idxr) # transform_result = transform_da.sel(idxr) # assert_identical(pandas_result, transform_result) @given( st.data(), xrst.dimension_sizes(min_dims=1, max_dims=3, min_side=1, max_side=5), ) def test_outer_indexing(data, sizes): """Test outer indexing produces identical results for transform and pandas index.""" pandas_da = create_pandas_da(sizes) transform_da = create_transform_da(sizes) idxr = data.draw(xrst.outer_array_indexers(sizes=sizes, min_dims=1)) pandas_result = pandas_da.isel(idxr) transform_result = transform_da.isel(idxr) assert_equal(pandas_result, transform_result) label_idxr = { dim: np.arange(pandas_da.sizes[dim])[ind.data] for dim, ind in idxr.items() } pandas_result = pandas_da.sel(label_idxr) transform_result = transform_da.sel(label_idxr, method="nearest") assert_equal(pandas_result, transform_result) @given( st.data(), xrst.dimension_sizes(min_dims=2, max_dims=3, min_side=1, max_side=5), ) def test_vectorized_indexing(data, sizes): """Test vectorized indexing produces identical results for transform and pandas index.""" pandas_da = create_pandas_da(sizes) transform_da = create_transform_da(sizes) idxr = data.draw(xrst.vectorized_indexers(sizes=sizes)) pandas_result = pandas_da.isel(idxr) transform_result = transform_da.isel(idxr) assert_equal(pandas_result, transform_result) label_idxr = { dim: ind.copy(data=np.arange(pandas_da.sizes[dim])[ind.data]) for dim, ind in idxr.items() } pandas_result = pandas_da.sel(label_idxr, method="nearest") transform_result = transform_da.sel(label_idxr, method="nearest") assert_equal(pandas_result, transform_result) python-xarray-2026.01.0/properties/test_encode_decode.py0000664000175000017500000000272615136607163023417 0ustar alastairalastair""" Property-based tests for encoding/decoding methods. These ones pass, just as you'd hope! """ import warnings import pytest pytest.importorskip("hypothesis") # isort: split import hypothesis.extra.numpy as npst import numpy as np from hypothesis import given import xarray as xr from xarray.coding.times import _parse_iso8601 from xarray.testing.strategies import datetimes, variables @pytest.mark.slow @given(original=variables()) def test_CFMask_coder_roundtrip(original) -> None: coder = xr.coding.variables.CFMaskCoder() roundtripped = coder.decode(coder.encode(original)) xr.testing.assert_identical(original, roundtripped) @pytest.mark.xfail @pytest.mark.slow @given(var=variables(dtype=npst.floating_dtypes())) def test_CFMask_coder_decode(var) -> None: var[0] = -99 var.attrs["_FillValue"] = -99 coder = xr.coding.variables.CFMaskCoder() decoded = coder.decode(var) assert np.isnan(decoded[0]) @pytest.mark.slow @given(original=variables()) def test_CFScaleOffset_coder_roundtrip(original) -> None: coder = xr.coding.variables.CFScaleOffsetCoder() roundtripped = coder.decode(coder.encode(original)) xr.testing.assert_identical(original, roundtripped) @given(dt=datetimes()) def test_iso8601_decode(dt): iso = dt.isoformat() with warnings.catch_warnings(): warnings.filterwarnings("ignore", message=".*date/calendar/year zero.*") parsed, _ = _parse_iso8601(type(dt), iso) assert dt == parsed python-xarray-2026.01.0/properties/README.md0000664000175000017500000000174115136607163020521 0ustar alastairalastair# Property-based tests using Hypothesis This directory contains property-based tests using a library called [Hypothesis](https://github.com/HypothesisWorks/hypothesis-python). The property tests for xarray are a work in progress - more are always welcome. They are stored in a separate directory because they tend to run more examples and thus take longer, and so that local development can run a test suite without needing to `pip install hypothesis`. ## Hang on, "property-based" tests? Instead of making assertions about operations on a particular piece of data, you use Hypothesis to describe a _kind_ of data, then make assertions that should hold for _any_ example of this kind. For example: "given a 2d ndarray of dtype uint8 `arr`, `xr.DataArray(arr).plot.imshow()` never raises an exception". Hypothesis will then try many random examples, and report a minimised failing input for each error it finds. [See the docs for more info.](https://hypothesis.readthedocs.io/en/master/) python-xarray-2026.01.0/properties/test_index_manipulation.py0000664000175000017500000002344115136607163024543 0ustar alastairalastairimport itertools import warnings import numpy as np import pytest import xarray as xr from xarray import Dataset from xarray.testing import _assert_internal_invariants pytest.importorskip("hypothesis") pytestmark = pytest.mark.slow_hypothesis import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import note, settings from hypothesis.stateful import ( RuleBasedStateMachine, initialize, invariant, precondition, rule, ) import xarray.testing.strategies as xrst # Strategy for generating names - uniqueness is enforced by the state machine NAME_STRATEGY = xrst.names() DIM_NAME = xrst.dimension_names(name_strategy=NAME_STRATEGY, min_dims=1, max_dims=1) index_variables = st.builds( xr.Variable, data=npst.arrays( dtype=xrst.pandas_index_dtypes(), shape=npst.array_shapes(min_dims=1, max_dims=1), elements=dict(allow_nan=False, allow_infinity=False, allow_subnormal=False), unique=True, ), dims=DIM_NAME, attrs=xrst.attrs(), ) def add_dim_coord_and_data_var(ds, var): (name,) = var.dims # dim coord ds[name] = var # non-dim coord of same size; this allows renaming ds[name + "_"] = var class DatasetStateMachine(RuleBasedStateMachine): # Can't use bundles because we'd need pre-conditions on consumes(bundle) # indexed_dims = Bundle("indexed_dims") # multi_indexed_dims = Bundle("multi_indexed_dims") def __init__(self): super().__init__() self.dataset = Dataset() self.check_default_indexes = True # We track these separately as lists so we can guarantee order of iteration over them. # Order of iteration over Dataset.dims is not guaranteed self.indexed_dims = [] self.multi_indexed_dims = [] # Track all used names to ensure uniqueness (avoids flaky Hypothesis tests) self.used_names: set[str] = set() def _draw_unique_name(self, data) -> str: """Draw a name that hasn't been used yet in this test case.""" name = data.draw(NAME_STRATEGY.filter(lambda x: x not in self.used_names)) self.used_names.add(name) return name def _draw_unique_var(self, data) -> xr.Variable: """Draw an index variable with a unique dimension name.""" var = data.draw(index_variables) # Replace with a guaranteed unique name new_name = self._draw_unique_name(data) return xr.Variable(dims=(new_name,), data=var.data, attrs=var.attrs) @initialize(data=st.data()) def init_ds(self, data): """Initialize the Dataset so that at least one rule will always fire.""" var = self._draw_unique_var(data) (name,) = var.dims note(f"initializing with dimension coordinate {name}") add_dim_coord_and_data_var(self.dataset, var) self.indexed_dims.append(name) # TODO: stacking with a timedelta64 index and unstacking converts it to object @rule(data=st.data()) def add_dim_coord(self, data): var = self._draw_unique_var(data) (name,) = var.dims note(f"adding dimension coordinate {name}") add_dim_coord_and_data_var(self.dataset, var) self.indexed_dims.append(name) @rule(data=st.data()) def assign_coords(self, data): var = self._draw_unique_var(data) (name,) = var.dims note(f"assign_coords: {name}") self.dataset = self.dataset.assign_coords({name: var}) self.indexed_dims.append(name) @property def has_indexed_dims(self) -> bool: return bool(self.indexed_dims + self.multi_indexed_dims) @rule(data=st.data()) @precondition(lambda self: self.has_indexed_dims) def reset_index(self, data): dim = data.draw(st.sampled_from(self.indexed_dims + self.multi_indexed_dims)) self.check_default_indexes = False note(f"> resetting {dim}") self.dataset = self.dataset.reset_index(dim) if dim in self.indexed_dims: del self.indexed_dims[self.indexed_dims.index(dim)] elif dim in self.multi_indexed_dims: del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] @rule(data=st.data(), create_index=st.booleans()) @precondition(lambda self: bool(self.indexed_dims)) def stack(self, data, create_index): newname = self._draw_unique_name(data) oldnames = data.draw( st.lists( st.sampled_from(self.indexed_dims), min_size=1, max_size=3 if create_index else None, unique=True, ) ) note(f"> stacking {oldnames} as {newname}") self.dataset = self.dataset.stack( {newname: oldnames}, create_index=create_index ) if create_index: self.multi_indexed_dims += [newname] # if create_index is False, then we just drop these for dim in oldnames: del self.indexed_dims[self.indexed_dims.index(dim)] @rule(data=st.data()) @precondition(lambda self: bool(self.multi_indexed_dims)) def unstack(self, data): # TODO: add None dim = data.draw(st.sampled_from(self.multi_indexed_dims)) note(f"> unstacking {dim}") if dim is not None: pd_index = self.dataset.xindexes[dim].index self.dataset = self.dataset.unstack(dim) del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] if dim is not None: self.indexed_dims.extend(pd_index.names) else: # TODO: fix this pass @rule(data=st.data()) @precondition(lambda self: bool(self.dataset.variables)) def rename_vars(self, data): newname = self._draw_unique_name(data) dim = data.draw(st.sampled_from(sorted(self.dataset.variables))) # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key # to .rename_vars()." self.check_default_indexes = False note(f"> renaming {dim} to {newname}") self.dataset = self.dataset.rename_vars({dim: newname}) if dim in self.indexed_dims: del self.indexed_dims[self.indexed_dims.index(dim)] elif dim in self.multi_indexed_dims: del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] @precondition(lambda self: bool(self.dataset.dims)) @rule(data=st.data()) def drop_dims(self, data): dims = data.draw( st.lists( st.sampled_from(sorted(self.dataset.dims)), min_size=1, unique=True, ) ) note(f"> drop_dims: {dims}") # TODO: dropping a multi-index dimension raises a DeprecationWarning with warnings.catch_warnings(): warnings.simplefilter("ignore", category=DeprecationWarning) self.dataset = self.dataset.drop_dims(dims) for dim in dims: if dim in self.indexed_dims: del self.indexed_dims[self.indexed_dims.index(dim)] elif dim in self.multi_indexed_dims: del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] @precondition(lambda self: bool(self.indexed_dims)) @rule(data=st.data()) def drop_indexes(self, data): self.check_default_indexes = False dims = data.draw( st.lists(st.sampled_from(self.indexed_dims), min_size=1, unique=True) ) note(f"> drop_indexes: {dims}") self.dataset = self.dataset.drop_indexes(dims) for dim in dims: if dim in self.indexed_dims: del self.indexed_dims[self.indexed_dims.index(dim)] elif dim in self.multi_indexed_dims: del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] @property def swappable_dims(self): ds = self.dataset options = [] for dim in self.indexed_dims: choices = [ name for name, var in ds._variables.items() if var.dims == (dim,) # TODO: Avoid swapping a dimension to itself and name != dim ] options.extend( (a, b) for a, b in itertools.zip_longest((dim,), choices, fillvalue=dim) ) return options @rule(data=st.data()) # TODO: swap_dims is basically all broken if a multiindex is present # TODO: Avoid swapping from Index to a MultiIndex level # TODO: Avoid swapping from MultiIndex to a level of the same MultiIndex # TODO: Avoid swapping when a MultiIndex is present @precondition(lambda self: not bool(self.multi_indexed_dims)) @precondition(lambda self: bool(self.swappable_dims)) def swap_dims(self, data): ds = self.dataset options = self.swappable_dims dim, to = data.draw(st.sampled_from(options)) note( f"> swapping {dim} to {to}, found swappable dims: {options}, all_dims: {tuple(self.dataset.dims)}" ) self.dataset = ds.swap_dims({dim: to}) del self.indexed_dims[self.indexed_dims.index(dim)] self.indexed_dims += [to] @invariant() def assert_invariants(self): # note(f"> ===\n\n {self.dataset!r} \n===\n\n") _assert_internal_invariants(self.dataset, self.check_default_indexes) DatasetStateMachine.TestCase.settings = settings(max_examples=300, deadline=None) DatasetTest = DatasetStateMachine.TestCase @pytest.mark.skip(reason="failure detected by hypothesis") def test_unstack_object(): ds = xr.Dataset() ds["0"] = np.array(["", "\x000"], dtype=object) ds.stack({"1": ["0"]}).unstack() @pytest.mark.skip(reason="failure detected by hypothesis") def test_unstack_timedelta_index(): ds = xr.Dataset() ds["0"] = np.array([0, 1, 2, 3], dtype="timedelta64[ns]") ds.stack({"1": ["0"]}).unstack() python-xarray-2026.01.0/properties/test_properties.py0000664000175000017500000000372115136607163023047 0ustar alastairalastairimport itertools import pytest pytest.importorskip("hypothesis") import hypothesis.strategies as st from hypothesis import given, note import xarray as xr import xarray.testing.strategies as xrst from xarray.groupers import find_independent_seasons, season_to_month_tuple @given(attrs=xrst.simple_attrs) def test_assert_identical(attrs): v = xr.Variable(dims=(), data=0, attrs=attrs) xr.testing.assert_identical(v, v.copy(deep=True)) ds = xr.Dataset(attrs=attrs) xr.testing.assert_identical(ds, ds.copy(deep=True)) @given( roll=st.integers(min_value=0, max_value=12), breaks=st.lists( st.integers(min_value=0, max_value=11), min_size=1, max_size=12, unique=True ), ) def test_property_season_month_tuple(roll, breaks): chars = list("JFMAMJJASOND") months = tuple(range(1, 13)) rolled_chars = chars[roll:] + chars[:roll] rolled_months = months[roll:] + months[:roll] breaks = sorted(breaks) if breaks[0] != 0: breaks = [0] + breaks if breaks[-1] != 12: breaks = breaks + [12] seasons = tuple( "".join(rolled_chars[start:stop]) for start, stop in itertools.pairwise(breaks) ) actual = season_to_month_tuple(seasons) expected = tuple( rolled_months[start:stop] for start, stop in itertools.pairwise(breaks) ) assert expected == actual @given(data=st.data(), nmonths=st.integers(min_value=1, max_value=11)) def test_property_find_independent_seasons(data, nmonths): chars = "JFMAMJJASOND" # if stride > nmonths, then we can't infer season order stride = data.draw(st.integers(min_value=1, max_value=nmonths)) chars = chars + chars[:nmonths] seasons = [list(chars[i : i + nmonths]) for i in range(0, 12, stride)] note(seasons) groups = find_independent_seasons(seasons) for group in groups: inds = tuple(itertools.chain(*group.inds)) assert len(inds) == len(set(inds)) assert len(group.codes) == len(set(group.codes)) python-xarray-2026.01.0/properties/test_pandas_roundtrip.py0000664000175000017500000001374215136607163024233 0ustar alastairalastair""" Property-based tests for roundtripping between xarray and pandas objects. """ from functools import partial from typing import cast import numpy as np import pandas as pd import pytest import xarray as xr from xarray.core.dataset import Dataset pytest.importorskip("hypothesis") import hypothesis.extra.numpy as npst # isort:skip import hypothesis.extra.pandas as pdst # isort:skip import hypothesis.strategies as st # isort:skip from hypothesis import given # isort:skip from xarray.tests import has_pyarrow numeric_dtypes = st.one_of( npst.unsigned_integer_dtypes(endianness="="), npst.integer_dtypes(endianness="="), npst.floating_dtypes(endianness="="), ) numeric_series = numeric_dtypes.flatmap(lambda dt: pdst.series(dtype=dt)) @st.composite def dataframe_strategy(draw): tz = draw(st.timezones()) dtype = pd.DatetimeTZDtype(unit="ns", tz=tz) datetimes = st.datetimes( min_value=pd.Timestamp("1677-09-21T00:12:43.145224193"), max_value=pd.Timestamp("2262-04-11T23:47:16.854775807"), timezones=st.just(tz), ) df = pdst.data_frames( [ pdst.column("datetime_col", elements=datetimes), pdst.column("other_col", elements=st.integers()), ], index=pdst.range_indexes(min_size=1, max_size=10), ) return draw(df).astype({"datetime_col": dtype}) an_array = npst.arrays( dtype=numeric_dtypes, shape=npst.array_shapes(max_dims=2), # can only convert 1D/2D to pandas ) @st.composite def datasets_1d_vars(draw) -> xr.Dataset: """Generate datasets with only 1D variables Suitable for converting to pandas dataframes. """ # Generate an index for the dataset idx = draw(pdst.indexes(dtype="u8", min_size=0, max_size=100)) # Generate 1-3 variables, 1D with the same length as the index vars_strategy = st.dictionaries( keys=st.text(), values=npst.arrays(dtype=numeric_dtypes, shape=len(idx)).map( partial(xr.Variable, ("rows",)) ), min_size=1, max_size=3, ) return xr.Dataset(draw(vars_strategy), coords={"rows": idx}) @given(st.data(), an_array) def test_roundtrip_dataarray(data, arr) -> None: names = data.draw( st.lists(st.text(), min_size=arr.ndim, max_size=arr.ndim, unique=True).map( tuple ) ) coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape, strict=True)} original = xr.DataArray(arr, dims=names, coords=coords) roundtripped = xr.DataArray(original.to_pandas()) xr.testing.assert_identical(original, roundtripped) @given(datasets_1d_vars()) def test_roundtrip_dataset(dataset: Dataset) -> None: df = dataset.to_dataframe() assert isinstance(df, pd.DataFrame) roundtripped = xr.Dataset.from_dataframe(df) xr.testing.assert_identical(dataset, roundtripped) @given(numeric_series, st.text()) def test_roundtrip_pandas_series(ser, ix_name) -> None: # Need to name the index, otherwise Xarray calls it 'dim_0'. ser.index.name = ix_name arr = xr.DataArray(ser) roundtripped = arr.to_pandas() pd.testing.assert_series_equal(ser, roundtripped) # type: ignore[arg-type] xr.testing.assert_identical(arr, roundtripped.to_xarray()) # Dataframes with columns of all the same dtype - for roundtrip to DataArray numeric_homogeneous_dataframe = numeric_dtypes.flatmap( lambda dt: pdst.data_frames(columns=pdst.columns(["a", "b", "c"], dtype=dt)) ) @pytest.mark.xfail @given(numeric_homogeneous_dataframe) def test_roundtrip_pandas_dataframe(df) -> None: # Need to name the indexes, otherwise Xarray names them 'dim_0', 'dim_1'. df.index.name = "rows" df.columns.name = "cols" arr = xr.DataArray(df) roundtripped = arr.to_pandas() pd.testing.assert_frame_equal(df, cast(pd.DataFrame, roundtripped)) xr.testing.assert_identical(arr, roundtripped.to_xarray()) @given(df=dataframe_strategy()) def test_roundtrip_pandas_dataframe_datetime(df) -> None: # Need to name the indexes, otherwise Xarray names them 'dim_0', 'dim_1'. df.index.name = "rows" df.columns.name = "cols" dataset = xr.Dataset.from_dataframe(df) roundtripped = dataset.to_dataframe() roundtripped.columns.name = "cols" # why? pd.testing.assert_frame_equal(df, roundtripped) xr.testing.assert_identical(dataset, roundtripped.to_xarray()) @pytest.mark.parametrize( "extension_array", [ pd.Categorical(["a", "b", "c"]), pd.array(["a", "b", "c"], dtype="string"), pd.arrays.IntervalArray( [pd.Interval(0, 1), pd.Interval(1, 5), pd.Interval(2, 6)] ), pd.arrays.TimedeltaArray._from_sequence(pd.TimedeltaIndex(["1h", "2h", "3h"])), # type: ignore[attr-defined] pd.arrays.DatetimeArray._from_sequence( # type: ignore[attr-defined] pd.DatetimeIndex(["2023-01-01", "2023-01-02", "2023-01-03"], freq="D") ), np.array([1, 2, 3], dtype="int64"), ] + ([pd.array([1, 2, 3], dtype="int64[pyarrow]")] if has_pyarrow else []), ids=["cat", "string", "interval", "timedelta", "datetime", "numpy"] + (["pyarrow"] if has_pyarrow else []), ) @pytest.mark.parametrize("is_index", [True, False]) def test_roundtrip_1d_pandas_extension_array(extension_array, is_index) -> None: df = pd.DataFrame({"arr": extension_array}) if is_index: df = df.set_index("arr") arr = xr.Dataset.from_dataframe(df)["arr"] roundtripped = arr.to_pandas() df_arr_to_test = df.index if is_index else df["arr"] assert (df_arr_to_test == roundtripped).all() # `NumpyExtensionArray` types are not roundtripped, including `StringArray` which subtypes. if isinstance( extension_array, pd.arrays.NumpyExtensionArray | pd.arrays.ArrowStringArray ): # type: ignore[attr-defined] assert isinstance(arr.data, np.ndarray) else: assert ( df_arr_to_test.dtype == (roundtripped.index if is_index else roundtripped).dtype ) xr.testing.assert_identical(arr, roundtripped.to_xarray()) python-xarray-2026.01.0/properties/__init__.py0000664000175000017500000000000015136607163021336 0ustar alastairalastairpython-xarray-2026.01.0/CODE_OF_CONDUCT.md0000664000175000017500000000273515136607163017651 0ustar alastairalastair# NUMFOCUS CODE OF CONDUCT You can find the full Code of Conduct on the NumFOCUS website: https://numfocus.org/code-of-conduct ## THE SHORT VERSION NumFOCUS is dedicated to providing a harassment-free community for everyone, regardless of gender, sexual orientation, gender identity and expression, disability, physical appearance, body size, race, or religion. We do not tolerate harassment of community members in any form. Be kind to others. Do not insult or put down others. Behave professionally. Remember that harassment and sexist, racist, or exclusionary jokes are not appropriate for NumFOCUS. All communication should be appropriate for a professional audience including people of many different backgrounds. Sexual language and imagery is not appropriate. Thank you for helping make this a welcoming, friendly community for all. ## HOW TO REPORT If you feel that the Code of Conduct has been violated, feel free to submit a report, by using the form: [NumFOCUS Code of Conduct Reporting Form](https://numfocus.typeform.com/to/ynjGdT?typeform-source=numfocus.org) ## WHO WILL RECEIVE YOUR REPORT Your report will be received and handled by NumFOCUS Code of Conduct Working Group; trained, and experienced contributors with diverse backgrounds. The group is making decisions independently from the project, PyData, NumFOCUS or any other organization. You can learn more about the current group members, as well as the reporting procedure here: https://numfocus.org/code-of-conduct python-xarray-2026.01.0/doc/0000775000175000017500000000000015136607163015610 5ustar alastairalastairpython-xarray-2026.01.0/doc/index.rst0000664000175000017500000000443515136607163017457 0ustar alastairalastair:html_theme.sidebar_secondary.remove: true .. module:: xarray Xarray documentation ==================== Xarray makes working with labelled multi-dimensional arrays in Python simple, efficient, and fun! **Version**: |version| - :ref:`whats-new` **Useful links**: `Home `__ | `Code Repository `__ | `Issues `__ | `Discussions `__ | `Releases `__ | `Tutorial `__ | `Stack Overflow `__ | `Blog `__ | .. grid:: 1 1 2 2 :gutter: 2 .. grid-item-card:: Get started! :img-top: _static/index_getting_started.svg :class-card: intro-card :link: getting-started-guide/index :link-type: doc *New to Xarray?* Start here with our installation instructions and a brief overview of Xarray. .. grid-item-card:: User guide :img-top: _static/index_user_guide.svg :class-card: intro-card :link: user-guide/index :link-type: doc *Ready to deepen your understanding of Xarray?* Visit the user guide for detailed explanations of the data model, common computational patterns, and more. .. grid-item-card:: API reference :img-top: _static/index_api.svg :class-card: intro-card :link: api :link-type: doc *Need to learn more about a specific Xarray function?* Go here to review the documentation of all public functions and classes in Xarray. .. grid-item-card:: Contribute :img-top: _static/index_contribute.svg :class-card: intro-card :link: contribute/contributing :link-type: doc *Saw a typo in the documentation? Want to improve existing functionalities?* Please review our guide on improving Xarray. .. toctree:: :maxdepth: 2 :hidden: :caption: For users Get Started User Guide Tutorial Gallery API Reference Get Help Development Release Notes python-xarray-2026.01.0/doc/gallery.rst0000664000175000017500000000135315136607163020003 0ustar alastairalastairGallery ======= Here's a list of examples on how to use xarray. We will be adding more examples soon. Contributions are highly welcomed and appreciated. So, if you are interested in contributing, please consult the :ref:`contributing` guide. Notebook Examples ----------------- .. include:: notebooks-examples-gallery.txt .. toctree:: :maxdepth: 1 :hidden: examples/weather-data examples/monthly-means examples/area_weighted_temperature examples/multidimensional-coords examples/visualization_gallery examples/ROMS_ocean_model examples/ERA5-GRIB-example examples/apply_ufunc_vectorize_1d examples/blank_template External Examples ----------------- .. include:: external-examples-gallery.txt python-xarray-2026.01.0/doc/_static/0000775000175000017500000000000015136607163017236 5ustar alastairalastairpython-xarray-2026.01.0/doc/_static/index_contribute.svg0000664000175000017500000000474015136607163023331 0ustar alastairalastair image/svg+xml python-xarray-2026.01.0/doc/_static/style.css0000664000175000017500000000327415136607163021116 0ustar alastairalastair/* Override some aspects of the pydata-sphinx-theme */ /* Xarray Branding Guide: Primary Color palette (Hex): #17afb4 #e28126 #59c7d6 #0e4666 #4a4a4a Secondary Color Palette (Hex): #f58154 #e7b72d #b3dfe5 #8e8d99 #767985 Primary Typeface: Acumin Variable Concept - Semicondensed Medium */ /* Increase Xarray logo size in upper left corner */ .navbar-brand img { height: 75px; } .navbar-brand { height: 75px; } /* Adjust index page overview cards, borrowed from Pandas & Numpy */ /* Override SVG icon color */ html[data-theme="dark"] .sd-card img[src*=".svg"] { filter: invert(0.82) brightness(0.8) contrast(1.2); } /* https://github.com/executablebooks/sphinx-design/blob/main/style/_cards.scss */ /* More space around image */ .intro-card { padding: 30px 1px 1px 1px; } /* More prominent card borders */ .intro-card .sd-card { border: 2px solid var(--pst-color-border); overflow: hidden; } /* Shrink SVG icons */ .intro-card .sd-card-img-top { margin: 1px; height: 100px; background-color: transparent !important; } /* Color titles like links */ .intro-card .sd-card-title { color: var(--pst-color-primary); font-size: var(--pst-font-size-h5); } /* Don't have 'raised' color background for card interiors in dark mode */ .bd-content .sd-card .sd-card-body { background-color: unset !important; } /* workaround Pydata Sphinx theme using light colors for widget cell outputs in dark-mode */ /* works for many widgets but not for Xarray html reprs */ /* https://github.com/pydata/pydata-sphinx-theme/issues/2189 */ html[data-theme="dark"] div.cell_output .text_html:has(div.xr-wrap) { background-color: var(--pst-color-on-background) !important; color: var(--pst-color-text-base) !important; } python-xarray-2026.01.0/doc/_static/dataset-diagram.png0000664000175000017500000011545215136607163023003 0ustar alastairalastairPNG  IHDRQDt pHYsodtEXtSoftwarewww.inkscape.org<IDATx|VFWL5{o B轇!@( {%.=w%KK}Z˲$?2|>{wgVjo jPH3zz44444[Zŗ1 eܐ}a,><7#Q|Kؚ5g|+tL꽨W3Ąem}}I r߱ qY] Qn: cAǞYlmϾy2xϰ}cSt@ypjp#?OWBP(αTvi[]ۨO}eP;I@F ꯲'ߩN};)I:` ,!,7'QAd3ԟb~9{?.W3dpz=c <ۖ_@Oa>9Y#P )v̼ḇv,d؅Co0SKMgi<{lYRsֽ/|neL=O)K/6cuC!/n,ױ=wpYpJAkͅǢmz9ﻇ% X;fAxBCt̸/;2<.tߦ.j.]ЂVzҝ4 m-]'J_&Wh:Ix\TOEnȴc[my;O-w2;~vOcYKe`ه|D]Z,LJ׍6@}rľW`Ӂz ubu 4퓓|`Oe ݐOeW<ֽOWv3Ywvނnpc~"bЎ|3;oѝ/cvv YюGz/8bDYkΎߕ;ml{~f7AlgM^o;\0 Ԙ 1B(;nx>dt'=EP-L5+m@w.kNτޫw.!Ϟ& ]f؎F`z/9OKP8W>jx<"cx\P(9I),;QQ˾=^e?'44J_.@1}0,P '.!J ~ @|vbwYNt;I6b'?- Ta '|=䢃P ]FJ_rbN 4c:x} HM  @Oe8Boҽ7Jme=1NY/(kB/vFn4΍`73a盵 ! Bo*/XP؆T L@l?¾oSoq좣u*v'APtv`'/م|R\2P),Wi5W f %$wװy]Ŵ]Ŷ% Jm_ ]|&;ΰ.Ƞ׳v!v,.`$CDDCGcUL tnaFh,y(ܰRupc0=__" 3(=/K:}p܄JIc^* m{c;Lnư~d:}ol|hߋ'M y$K[0xqnq`[8 oع9eĊ#&1w.&MuLFE7Ԉ_y @"dD 8IRb MH%ӻ Yk P','k]QWTsPX#k6!+%Ă݂'gYaFjcJ"`τem۱#Yds}mD .~X(#GH+ TN5p.2>Yyp۶իUrO4lK嗓a ZNӧQ YC)GB]ztpJ>`\+ɕ{,= xdw ±3-K>^sPx Ɠ;{#ڌK?{±omoBq^~7OR1?\ZUtQ@hBBB+|1l S'%x/hR2x몫cJ_P#д޽{7x ^P8O)A*갔%xPzO9*~$ .<h;V]X4zBhB5LB:vDEC ޥ Aj@eX!Q*@;]+ @3 ݷSާ@7&vc u!!Q +u4x0 !\ImOLT@+J! @_ y3+KPP44-ФΝhY5!̀,1j@e-ʀXg.Z$@&$Z 4`g@e(,"߄/kE oQ`PoJ 7X! [:̀=,z!a{ B^PY]^PYMH_7 p*䲔O&$YKz5^PAgl5I&a}5z  & ffƍ[e ȍ'T(АEdaL*ÿ"q$F0=V0@=sTn BYJKEEC|<%$S~*]d(D/Z : SڵI#-ChώY%MԀjKt@>9CJ ,&M^4!%:av-c:??@+j0ryFVqoB"ʪՄ车Հz @uC (Ј1cL7 }A&PXRjMHt@e(|.YMHaK2Lo֬C& $G{B4&*䪮Ba ~t@OI%5GEC 2L(Nll/sWedBA t@s aR&dPY:2eev6!2L-Kޙ62@@_`]B~_U^22=n(ԕBZ @etC&Nl%U-$@;T낯Z@+9 h)PP44rL!$s ^V6 Pd ^=5bʀe<Єh%P @"yǪ žt@ Il*+x.@eԀPπd*B}P_ _%,{ @+MTP/BɃLՀ i!2 @+)2 @_3t#y@߄$5^P5.xY P/h׀V 1%PX$$ @ If لYK(ˀBG!B@S}5# `"y@ hbq1Y!؄4;PK2t=j&$Y* j@Zt@+B@xI邇&$/f@A^FL}!eՀ65&$3 B)?UR&%x5Z;14gkO; Hz#GC@+ "ΊƊ2uz040AJ' > -JwLM- Bߌ\ծ*AX}q,u^$8YV7WZIV5,TJ$4qX`ӆf?@IdS±@YHcu䜍=nhBA,Z-;/+$ +GB!ՂB$[>* PYC2g Aed@AIf P-Ћ@=Y_{-E/$  bBƮzbrM)MH (S*7!EW5]ws'z T UaYwB6wVHhpB%UɣSl<OaӐ!I4ߩh5eL&PY:ʘq:0m-yf@huEW|G TFdf@T.cխzP}7!%J+|PZ2&!{@`Z*3Km u@AI (tu- DYmذ%jK!yHT6ʒa$iK2e ,kk֘zkH!Ԯ%ۊl9Iu ^Ћ]:ν* @ehBZP5t;Ђ},u@B-c: 3'u@o>MJ%xOꀮu  :N wV2LB2Lt;P@Y (xI W/e?ס"wke dz̷Xs~}ں3 Ab=/";_4{Ὤ>w 2|`~g/cAX]^PX&$UP i3gJ :! ((@T$$* !%&. @t@@#ɸ$KPITPOX7ˀ^@P|Bi /gQz:4K٠|d\@5 ސ_`t 2`sdg XfK)aaԯaTxGg(ϩ̞j6 a`Mv#bfwꍨocK`cƒ-_Pϰ` {|ߓl_3l#IlrKn-Ar=}0i2a{p,д]±@.\-3P.^sE w! BDhMHoBcu )kdңey}nuܭm܁6oPpX7/-랾sȵuc hX8-/ܣf@eĂ~z 8cKT (/ΨNĴ~iek@/0u@&!qChD2F=Tt ]IoSA,eSU۰bpu?G@6gg֤,YS9 Xa]sE۞,щܗ]x&J%{$ggf;2N`~`Sea ~h~~}bD%C)t6Aj*X@%t S '! D+~0h8u^h!CH)SDSsXjQ@:WXЬѧptz,EjU -B4"+@y AN@CB"ٕ9XOW5,T_,hŵ̬hؖˣT;m6ƂLܚIH2bvJ(g[J?ҲjtIRK(PhBXoխAT @$ ôB7;*;"(+S@(,OjHh5;9&ՂBh /2i%wuŲ5tIb̺l[z7L};C1_a+Ѻ燰&:K8ݿpˎ qܟ5d!gOg5Bۺ%eot߰Ւƺj ^P$*äuhBOaAV.xȀd%K֯'ݷlc=e4!>"D:UI\ f~>x+-Z^4&I:2e-ò}K&$XфB ^(eaK< MH]YVprO[(,B.xȂ̎cIzx+%ߪ*@1pt@uo YU`jAvf9 Ǣbu ~Ftx;̌kH‚@h(L2liBdhx>X?–!eBPͧ,n$2@}{,iBێůn]YY{?K :n 2d@[N*@ ָj*ST&L (& @An߾*P@{"dyf[A YYDPʒa/[^&!Attl;r:gkdxfA}@GǴ%A]]!)<7=8j:o r ӆfI ToQ]߄<6rD#1 tPE-q74*mƠ2& -~&Ϗ9l;xVzI6oYZD[(4!]tȟƯ*@e {MI!DeSehRBn AA mzꥻ@(ne@+X!/OugA XFcզ*'&(e'[- 7dG't-E749i}\h)1 fń&&`R(p'd{y{8inO#ٍ&bul@u݄πP:2e&$ @=%DMH֯_F) ;PIHؼPhΑY*sҭݦJPYMHIHR藗Ԁ'!PoB 9C,eeBo@n.B@W&P-:&BN!TZ09l@B.U08%,˞Q^P٣8t]Y IB 66L V 5[v0n / I^;BZYP: ї̂w ?b=4Hб3 2LϚ09P 5!̎ _Lj@P0).݄hx;&$}h*" ^b 2`VfLayaIPքd sca(lNBcb邇&$5^c]&K[ e͂(t?j  $uvq%iݕ;P^!|x;)әl :i U}@@(\6!eB RYu @6h@ -SrndTaH$xڵ$wo+M5UEc;oƂE lM4,7=Z88LJ;V,Lw/jK(mH25N!TABXr]huԤ[ Asj$K"qfsVx[5h,؞QcLu"h,4lP |CdMb)`4  &͒P^=1 ټT[=4Xe RLMHn!2328vρ.Iә, ЄB(vR@y!rIjB.+%41nf4qA.ę}*,B (ԨntB\XqpG-hiݦ,dSE̢ԼHWP.]H~vZ+sv;{TA[__6߄$'2ǩ8P=mMV{n!"J_,?5n!bgvt@a]hUӖ!*@euB^FRj2L2dd(̂8[0隐@hIH22ed[N(@:d@$:|K$$ BbJ0PȀ6kMZ f)0PcRy*Pw?P=ˀaBET )FqKL%$s5aʦp耺PIHN!TlUzP&P5dK5&!jP7!PXzoBOPc,qmHP%x=9PhBRe{PmY]?(d[7B@_14!9PM .x 5K,xhЄ${LՄE t5t;K&2LN 8&$/L!z5=AiR j~2 (R=hMzv-xIHZ<:z3T vR?)*49S!4S'rׄpzr3Hy&r+ǹH>tO[HYҟ™XΝ#Skdw 9$44T͂ځcyB#x;ٱԠ zq(9a)qoM>8pUcئ7E7WJ@VJsw'+3kKBR#Bay逎MoZ 8Vb1MiHB@r{7v̎7C!-,t 0j@Wi5;BdS x;*а0!-ؿ߯3;Bsw$1ݺKIYCǐoϽ.+%I볯+xd)[NG,ɳJ-mǤ ǘ((x;t@!$QdxfA^gv| TPv ̎qS)i"B+ԯ o^!!!T" @+DE$^TB:PUG ba(2Z~J:5&o3PG9y5?"!""@z&(?#"zB!9!*H3;Bd -l2 -„ tq|r.)kvj:uBdw BBBB TՍP@i]cB(B t@@Lu@eCzf,x;p4΂PХ iBa ~qB+(2ar_%x @BBBB D_t@7mo 3!!K K l:u5j:~(NB讞B 𹡣6)/(* N t=c [P1,3h 4 0gdf@BB''l'!B#Ȁzn!5 3;9?PȔy 2ڗP^ lBAGڶ du,! !!!TB|L׮j&glc0ֳNqBB+BA2 FqΎPX֢;P 2L_{預YY|c; [عDV:cfy u"r͎P肟P) NP4PKB55kg[Ah.X͎GE(}>@QB\ 4BtW_wvt@AwvjMHuMJo?:M[|m Taz=-+г& !!҅P(x;w7!! _t x;0̎c)HpS07;B:" az{s :"jK"P4PPPjP(B(B! =t@E Ԩ*ǒQ @ [5u 5ĤLBY[5"!""Bt@Bh(B(B BdBO,S| J jKzPMqC 2Ln Ԩ*B 4ЍfA_ D% (耞2d3@%ePccԀI!ONnZ(./ n {}D)0!!""JPX7P7j'D=2Lj:B+BoMN&%T?,͂wvBf T}B j@_w~l"Ä* Bծ.SM]t'B(B exZj%DB$ PzS2>6I}2M =/vo @@ek@y!4f&X=6E]ηRvIBh:=ox%aٴ\RbnJp?O2)ئO<X[OVA[FgϒGJȂsHHh( QV *,P:@  E3cq>:3smzɃ=fKڪQ+zfVܼRbhXتW IBgSp\%:irs 48H!_^X=/Ahr] t`xry.mHF֬g+D  Jj@hSSɠW>{Hxb2PB J&Ȃ6%#V;IeˤMYVJ,-[ ZLO p͎/ogxf*3;9)tA-2ȩAƽ?R;99 8'.Tcئ"7/%}ےKuWXrwx)nMBL+$({vsOTk.̎{,wRvJ! r@yvC ةDS%xM2x_@֨GOo3Hz @OQJ*@hx;t@o5(r<.Wx;}5!MoЄ5Ύ_$$3;BAgv| 7![on3vAV.UT x4PP@Aڄt@yfA(FwBRWPPB(d@@a gvEgA(̂_ѷ3;B]ZR3;>PaoRu@q͎)„W3& !!!4P IB@aF}ޮ]ؘYU4TB@5Pv ]t]"PP @_KoV*BVBBBf:n!TUt@t@BQ4Pj&Bt@B藋"~oP5ar ca\c@; !!!4@j%DB4K?""zBKt@p͎P3@@5f@E ߋ}3@ԨBt@@G4@(ф 5.@h&NH{N(BW!T3&NN!j33&2L2!gB5=?ov](dEBr6 B(ԀZ:ЏX+EECEE ˀ5;Bn%Q:={P!Գ` 錅KaY ô@B(t/˝ׄ.w\YK+!z'j\ ?!!""}YTdB#I$kʼnIQщD tz|PY 2LvB @@"߶rI(XOIH:1*ivx;4*Bw 32lB֯'4Y!!+z_X3-B /#ãZS5;-褴'!@(,C3f=% RuЄ4f}YN `gx[w2ɐP\GCEEѝ;3B fAӿ?ΎG,Vܳ .Ւloكkv[j@i߲e|c; TmB:hҺ!3BBgv N3(՚@Bwvj]qώGEրFUD!tR\2'}4x;͋nl Zg[Awv’'jZ&x+7!q͎PhBX̹ BBB/I7!]}5x;b̎GE} K4kvjT vzc!^h)n꬐sfA-@(N`Wd3;BMH<BBBB]@QTBU&<BB+Ba^IBoKN&ĵ뀊@((GvTB"j&Bt@5u?hi!z/ @rfAhhxP[kB=wz8u@u2Ln!4sVN!!k P[-5;BoJB/)S MH;mBrV:n c;Ai-darf2LBͺQD=PKB%x,Tu@ B^P+P7jBoN"PccB +SfAwˬu@BQ MBSe@]B/V?~Eԫ !"^oLL!!+ :*mQn t@B1 fʊj ]No ;PdS3oX09PC;P'jAc`PߋzBhX MZķomZ= OoZVk@t@@Ѥeצ͎| Q YY$@h=̉K{[/WB`9J z1, qM*,$Ξkڇ+sF8T׬Mk|dm$%֒{%EeZvoXnL&8W:EBBCIXd*H3;CjYw z!wf֤䭭7 yɭJ٦!O9#/ѫȹkz\ԔMRbUjZt3d05!@)j<y @̎'+dH[{c29l (/Fпs)\6ɪx|yB<zIuԕ̎$F*aT_LӉ?Ϥ~z(b!If |R2jbzp7L.Y"e%#CT8k$•X}!=w3I0Pv))j7y(kx@R ,z:o)dNBn7qaMc=>],'X[>96kv?0y)5kvsON%aAAB}pBh $xؘB3(LBoɫVZ99*r<.Wx;L&No7uOOoZ | T?ڟov| dƭfA'yj -fA(hUT1\}!!!u@yfA(h=׬PPB(tOm ̎P @AItvj8a gvBj&D3;>P%P3;BMH< ,}^I${[M,Dt ǘPtjT}[-Q IHn! @uMHn!<}uA!vP?SzM;PPPGP MYtF @&PBh$Y\ a2L!T7vPȀ./,+BAKtyB9ЯQ ~bBߤϟݐ|WPMt7ǎxPPP 5Qj"BUڵ""z Bo:>=yD)Z0ɄZ:PP3&h/xMIHN!Ը/q:3&|SP:hP sg Z(B(B TB (6) B͖BW5!̎wuZ )B=&94&dhQ !!!4@Z3?_aL^5PPP@Z-$@gp͎P[/? >P| | fylWK7&LF} [S_6B ڤ*B36o&32Lm|9AOVe5(/n7H ʀPȀM.,NIP^:RڢNG׬Pԧz$$^}!%Cdh #U-Pv ]𵊊Hpv&BkY\DduRyB<I2>%BKJpF*aT_P kDlGī:/(mW" GJ +_82 +WJ&uĚum$gf)q9kѣdCqw :amے۹f=D'9]ȫ)å?zye>8,G$eܕ3NJ,ϒEХRb<RJ,PvρNUG'r͎{,w2dv?+K!wn8,$&dv|y`zv\3=tZ5;srH@>!USBF+A)?7Tj r<.cckt@ 5;BHtQ _ TQ'kv.K(,U/B5PX>oCf B?ՀЬ-x;}n3; B?ڢ@gP;_(!\4PP]< N$k& E,ޚ2\p͎P}x;t@Agv~(j@yfA(k:< \\@/Z:2hB.xalLB,e,E TajX҄(b(NvovU,hbjB3;Ba ~LBb:n!c MU/P2h T̀TB@@xPB-IH"j@ALT߄BIHe@(hnC`j85!7:\ ԰?K5ʄt "B(B(BkMPX7tP#""zB:e@~ ~mBBKO;пPBЄw3%]/5ԀFCEEu jS5PPPA(zY2LN!2KLT6j l56BN b}ovÔ@4  :PcN tRL#!Qt2Bk;~;u5rs-!!+:fsu=/nvl tioy||E/"VK kj@.ojBרGf$t3H@hP(0!""PȀgf(/&-]JjY(B(B 2#X;=Yu2fǻ&!i]" MH D_$  ]\ K0 r&!@٤Wx53;J `UZP 2Lj#x;M^DwցxPB$v MHRȎ| n/s9)ov~ƺyFA(h5 Z :͸fA[@F7Tyf[Aǥu@g*DSu%d!h2 !!!5j:< TBB/4Ff˟& &|u'x+=:DTPM|h($x+5׀-k@yfAyM4kvt@g_ ÄMH!T3;B]<BB+BooBx+=z)u@E ԨBu'P)eD `t@PЋj9u@BBBAQTB5-b2L^PmV*zoa*~Cc#2LK0P3P7j6j f:n! .x7 ЋBQ!!!TB4[RMHn!j@a H5wP#PXhw!@?7;BaBZ\Jr -5;BA (B@۔0;J, ]b!!!TBU5ar*WF!!K,A Z ;@Be:ouj|#?RZoYLS & 2P@Ôi!{r͎P?0!"zBo5ɀPUt@Md@fhUow]T ߲PU VH6w_mфd ofdM)Z B$}tre< /@)(ÄBCk"1%xj!%mPi%DB7NBQ.xzz5`\oz`NB&Q4uHA @@&mȴh{!z״5 H42YRdw {+Ȍ[oUd]w Ǚ{ŲeRi-vWK5QRbM&ґȈuفd޽q~;  %UbbLg;FGϳ IPp0ށ<`n875GC8ĸE)tn:rGH)ΌXN0UJz#] % !ͪ BtryIH<z$m(w2<͎g')dBjBAWKMHnT( er͎۫Ӑ.G<D P BR>ةU P@WKBO {ڵdJaIDQ6zqX'[JH 2bM=vLd8yPo $];VC5 ۸䭭7 =87_OndlKvRp/%Q+ccWK'& wvsO:VOo8Ԁ%XblLr B!)7;nNt%x$ K0 \RxMHH%}@hu%dH;[P\x\p TO9RBW$a5kjY\ o櫲N PIԀvC6 Q| T 4&Y1 mf[AG@[\=OthOt""B(B(B' TP3; B@ԪЛ[PP/BDB6@y HIw +!I%2LȀrC($/mFq@n 2*5Bh %dhpxD.4PPO@(,R|ln\ TH&$PPOAumu3g[A&[j@gd͎ЯhXk ]sbUuq͎PO\zA!T_ZM  Uk@'{p͎PN3BE|2)S5B.ϯoe5j *6"_p͎Po-ϫD RʄՀ!"V<ꛐxf[A(0&!(B(Bh@6f[A(t2=%]L!t@?ovP1 ~dB U2 h& @' f!"zB]n!dt5\BB/(.Qr͎ӆZ>$,Bg (x+PaPJ 5 dB([(v!"V:|/(%i#ɇudl7&#++3×Kgfm\-%V(oQayUon*)$ @@H (1z*4) "(UW*G@z9u~>s˹w=s{e+exq=w|s^k*0/f֩8 !5dA6o{Q':1!Ju<;9`P㩎3ꄔL9L/Pߨ|5Ā~ZdI > -^8{% ńV h@* ]`@Whb@{V˰&t^Sh] 6PLh-qf~9L(&Lhb@߱Di&#=e.l05E!ÀBYl^FaB1]΄ʀzN0PLhՙĀ 8lB )UR! 46X2XV8Ph]~&ڥLڀvp zL(&K7F@\FL"k gdB tT:!5gB& G ^1UwPLhU05ݬMh2gUl@dBm: !ߦHh?ŀ&Z{&tq8섄 ńV ݿaA#aTP =| h*~eB|j9 SILwBz߫1 ń֖ ]c֨0Њsܶ(86DU ?3Z5i}2zh7zeBX L(&Lhnr5tDŽbB+fBgWfP 4y넴j^ ڰ&酇aNlZm*Zg=&&ZS&t8U{% ńVĄ^ +3hM"7гm@{auϣL0cf#> ӂq@X~F/xbBkɄ8 GebB13р.hU(hBs F@Wk^h,hVM 6рhB|6^L(&L:Mݣ9L(&&41t*|,dBߘ٤ Pi>#F?ǃf-^ ]qoucUЄeX0К3T2W(w z&*LhgBʳzLh2=Z*q;>nxl+MLh2}wԜ m&4i~d`B1cBNHyR ńńʀzm@/^-ЁAQVn}cн,P1ΤI kQG5K֤cm(3;ȒϮa+IZ?ʆ)'Tθ暆݂9.EZ\|qagw8 ogϞYdžj13;Ophג#lms߅Wf 7ߧ$ixY Ȓu'7SKov8#Xzytq7liiEwsgϢy?E6SVMLh0 4,o̖ڵFߑSmm#+UZHk [zi^Hk-~6x6к}vXuz5ӧ i,H礴4{$^!^H+΂ݻ^ LB}:=K΂0%}znӳWi ,^ έiy*LH<(^m]m3I hm鉣l'2unD*Ba-֦EAGUYbh)I!eˋXeCNutзKh@Ө[Ze+]m!s[oNj*A6h1~iIo(4YsʢU O[~ O)z*ɠW| |ؚF=sj=Qv֋V\ɢ]s<+ZBSy=2C*t:uF=@!#j+pIHw1KQC)ҸdA='Zc un44d˼ςh10%S݄~KQ1AYLۛ>eD=ԹKM=RΥ(;Y3Sr~bs,:^jK6pj\H8M( [ j~E=Q~(5qXUhqؗz.M;yg|78G=_(E)"8rz \Ne ڨ^rK z_ݏҳOEM+qӨ -jBmۇiz^\nUAwpJ(|1J~giԓ2T=(/Z4]e( H#Qhǽ0WG=dcη8peC.2t |gg' =?_ps\j~eh2vjc?QPhFX y.KGx.CPOgg̖ƚ4ٓ[ Mh]Uݪ3A\Q^(N:NԌJ/JU;iaSfrfӠ\ r/IEk]EϿ[ d俫ΊzjDI֚AG]t 2@g8_3JvYb=%;hfmr. 漠}r~K.  HF:=AnB7R,0c71T(7i/\hur˦zA&TЙ ;AEHUa> %jklb[="z>Ft2u*:^Pۗo%Nd?* ӃV/; ]K~]fk:n%QJE?jf!2A'(jxR@7^tѲTtr\',O5KTw ְOC)cL3^PӃbh.K9bt;2]A0ўgstu^o(?@GNN=g[ֱN=G#4ꙵ+HvIW~VM޹0zP{nVO0ƴ Ӄ~ʄNNBn ܤ`ϴ$ܛDUgCA˅UͫvmTnIӥ M1=hƦnV9UtvBN-m~jtdPO{5h 꼶Ӄ6Uxn%FkۜYA# (RM` ЖCl-,IqC]mzY$zVdkHՍ\ h#*Y& N)>sYͧ 9)`w[Өg5`}gJMwTO\sXSĩ"MḠ5ԦOp> ˫A54:&XsfOuF=`G<_"TGYIo7ڵLok5M_8;.}Ը2gA}Z<`j'r$#M{akK|LfIn19,HWT#]籤iԳY em䣞N㴎-,zCжmeUwUK:4Pl8G&qru;*J鿴8= ecA;bqru~*xUn'T }*t`vRupjn k1' ZDT/APyK#HhZts&'H_H&2M97Y?#j\Q;ė(Wpxs`?eF/ RltF9A{Xӱ7 Z+M~6ڋ+No&T?ͧH@!_ᄠWQn6J~_w 57 n*EZ=5+ۙA2W=,Q7F8y^ 珃&|u̪:7 i2V?1ګ|E?:O'K;X(fڸrgEY~VxLh?k~:Ds k (->z^`[\bTRϋtnaf#RAg@nP~" D8͍Z0G$K+ߨqn'&4SYP?R{} U{%zS,Lcn/!{@)YDroy)]"Mi߼~?tUb┣;}=2gBQ$bބwļ|v<OmK?<\lM&|RF#C}b'2 RbB7|Y.8~kfO!_Ĩ^} |koرinA7$7L=H3$*Lt57j?6Ia(oBW[> ̫͛-$=XUw{XB'Z0ӓ#_Kũ(WEGz lg\ȄauPќ`P{cN9+)$lbBw˥OY: D.=hr:)tN4 .vtVtGtҪBrSriMhip}bBO>?ٌ U[ٹm} w݃ݱ.,QW;re^ΙCث%ˆv-ks5m(#6b9:Џ9я0(j dMh?ܒ, ŸZ8bLhW.7x!dż0iLhڽ'@G x>{RU*j/=SVL7jq?&&\{iGFEB_O5V (V4u'he XL^ƿϭobB'obBM.O`/T=.U9-5WZؤzQ_s9[-P[MUYzTEIkMߥ? Hy*&qfRPU:* ޚ3S[c=4{F=;V2d$U>#<*Rdj45GvJ>e]쟳6vjפt RDO7k6j. [1͟f'ȥFX4.4.6b P:m#,ttdX5&-R\<Pz4qOoY4E_783kĄj҆-N! %D1L)YʛPVu*mZEwwc2⴪-W :,tcu;)]5kXڼ`_Mi{vr<2el Tt\n0H9(MU߇=~b"Uh A_:Ә'ekРW=tQr27_%你-rp#DAuVLh5JT؆td>ݟ3sY&z~Flˇ}`M <&h1?ֲ-\ ԋP ,SU{A|A[~3p&{> 2莠 w3Mb5ﴺp @i #b7]gE Z<UŚЏs|b{(c9}Л]3sVx^i,of\tjroE.m%-1eMMEj hFLYn0[b9ݍ:|%Ok"K}%hdNh sK9mEGj1By5>&T\twH=opбn,gYm/CIԠo]qwېNCkyrzs[j߄S`t7_NLA 9o̠A|Ǫz!hɭ~rA4q@>A'r[%IgiAZdʛPPЯŃtV)As|}_]7mj^\۠.zb5e)tmn~ǃ~tLЦ㢠s}[Eq;ަnD'Ace_uC U`Be{2z=SPu|]">1k}t΄~`1oF+Vg$5}T ugU+.VL@>2/&a0hRLQϣr&\20:7fin1j d1 o7+hkU2p?l{+⸙6o&he=aW+ rUۗxG澟T݂>FvY@uufݧ]桜 Xōdف9:7j7qsAMJ~Aɲ |?U?*cB 4b7,F'6Zn&?MLx&舠ł[5v d&t-7+p^fF:iJb0:}d6TU}Uִ#ҢAwY Tm^'%ۨng2R| :1h߶5e㠧 wXr<C/]mZՏ: j>|~L5^硶q;}Mr9i)$.@ePc?pP=_z@ Pg0 L(&0 &&}X{@U[m*6Pkm1 !BHEUEZLPurBeAgAhPj~ Z :wdn!@゠[-κm$h6e7Ơ]ӄ^Y`1A-8i"}=WqMNb8-|qOȭJ~]|Jп­&tQS៧]t끠ˋ07XVe}? -$pJ z )&FMx)f-ir˂Nof> Z=Y62㠥35hB_F-F# [}ۍNnńNAw%NU :,hdmޱ17KP4,Vծs6c-F%Uy &tosŎOGHQT,F>{zz3 hL(@m3 =ȲNvÙrhM96x ~]бBiBb^v MJ'&tY7;Zz ҍUGC-v^L褠-V8$ :9 X]/&0b[MIm=8kprmR,|l7{Z2=NSfTUoA]Ao霜78AOqY*JU8)ҼɀoIENDB`python-xarray-2026.01.0/doc/_static/opendap-prism-tmax.png0000664000175000017500000022310115136607163023470 0ustar alastairalastairPNG  IHDR &${syaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaaaaaaaXaaaaaaaaXaaaaaaaaXaaaG.y=~eDZϟw:Kr-#\ve`Զe˖lV?|gvն~B]3_;Sz_'xE=ww]rÇ~ƫ_%K%mo{[yVoqXXXVXXi_|yN;m/۷=\u]o כ{n|&ǰ O\|zO|gunO>.|{;{j޼yg}s=m {&7DJK󟟽ӟ__K.?SN9e<500OgַUǿ{?|]]]񶆅`v%;'pƒ>g_~[ߚ}rrK_?׿S{> oС?'x"ӰЛ}x>}?e˖ŧ!,,,,,,,EW_oͿm||{;oݰa5\#zguvE]~喇~XP555W]uU| =~ &>@`=s}k?O}ٿ{o~sϞ=_җ>OOs9'>aaaaaaaXdL>k.YMozӗ~_|g?@`{}ůyk/4z;z{{͛wUW9XW^y?8882p{qqqqQ#уN{N?tO/׽G9Sw/8kTNN㰟==.?>``BmbbMozӧ>7.]~Ϟs9'׭[w5bˏˏkA'XGSX)rg~;ac__'x_ Ƶǵ```f6=.?.?=?ǀ]m\{\~\~\{VVXVVqqqXG?spkˏˏkahnVVtqUƻl\~Vq/'`}Gs|m۶}?:묯|+wScccgy… c 6{g21ˈYɏO~VÞ{???~׾g?͛7[oQ;++ڸxڏ~|qjǎ<\hYg:ug```EWJ~\{Xccc˖-ÿ?Vvz]m\{Vq`]_9ֳ>{y֯ʋ.H+XX `۰|5{i=طmӑ&&;[vNCi<2>Z#6ܺS}iwΖVuV t&Qgmvݭγq:3#K- mǵ]c};[?􋧞YgX.Y7kwP<}<[ꡍrMZޯܻwh)C~Ų ]O?ckEEW>z˓޴7fdʗe@F\{ ?k}_}k_ۛ oxqU뮻~ytMl`,?e`훒&'p$ъ^K9P.\ T6jCBb)HK Ԙ hF'L5ZKž=t4ӕ^NEThAGF-m=kvיXF+QɻjwϺ[= YwK`ݷ~PWÓ76 DKf,-'K+.؂S v >U xv4Md6*[m(ob%+=?ʫ|K_]~Z7oN8ᄓO> /;+++++ #=P몫:餓/^}mo|~۷uJg{ٷXWݳm1Ї<`Pei%B9Ii09D15fP k:DO?O<+rhhhA``````dĵk~kfL;ڻjqV{7Gn^E{I\5>{ǀJ-ۄ8P # fZqG$N:՞"0d=/>z%qF#G:CߍY^i4vAVHfgݠ Cb,JXzz ̺o}X<) U,afri.~}񮧝~prFڻګE2``````dĵw1``c^oyjo_wWML&iA<Cihu9G{+StNth϶r.VSU!4kJ+o!dv:av+dĵ`````````7G7x=oC*e˳ ns+Iɝr B+Mv!.z JڙО`. j6Z @z5Tg˧: ;'Tz(V]U>k2cX8!<zg:E/%c=ֵ?jX5}a;$|ǩ`H;y.ԊW,@BV/"V=V/̂!,g`),z/DI:UTsˌX 5D^" &G!hlz9A`ul5Ӽ9z Xip;cX ˓oZ)b{W-N-^V*rpj|DB@\E8VsF v,rP%A.c(Dk#8KVwOnϺ[&"Ax:#.X?c :,6 IT1S۳mÞFwoߴw}W8> f,BNNUut,E X,hj',tpjrbjbL[hi(\ِw90dSA$T{Te SzZXVRR5:,@h=2Ɯˮȓ:ݤe"%c~xŲ p񥦃(b(1R$hsXEuZz/ma8ZR(%NooH0/'l\/`5 ^V`,9U뤵 z0_i9z XXXXXXXXXaXcXdHҺ\4v Xx/*V3=ZMT!ZWbSI3;'Q eRd3]Mvâ,jQ,u=+|A4@5sNr>_KBo8ⵤϦ$*" T59V)= ]ɭJ;P )b,0 " nD__KH % ʈX,XDK\Kv G۬8X-yo4̦F,UTITaFp2`Ynwɺz0_9z XXXXXXXXXaX HuV9ZLVO_Qٚ=a g,-E$B+H,!72>^J0;V>16^K3:4`]0:f$}ETpiJл<=^c`ʬ빁$W`V>2H+?fZPELbR62R@t}of 9|?fT-;JZӰc( -4// ,`d!X gű\he5r(bBii޲7`k6`AWȦ{_9-u'f; wiNkQ# fxLc,tst>\T* 8._W,`-?_5G7+++++++k4G7,jkOb4TkO7H z:LDS uOPre)8MW5`$&*9*Ɲz;V kANȅ;",ȅԳF;xҊXyԻi IߞO5JD԰ 9dC]AW%fs숡eJf3u -똳`ﮧrZ{"XEoҳqآ5u:1-}|7 n`f +$t !/uNAspw"ս^G= 2]%jȧ2ٴ1G цNM%JQ*DWmڒg),PuAJŹ"l+UVX! DEU{YN=^b/"z\xΤEBm!K2U#)Nr-5[EL^V*PV0 /,k1t}3A.%"H_CΪ!ekXER6}s @T,cI`ܿJזc ?'c u ,ӀqE^'9]h5`-N@v'J wWj•(p '\nbH1Օ aZU$BC?N=iW5+  (Y4g9lG=[/w&,\8; k@ci_lNڻ܁2tX ,@l)1 'Vop6o1hvoejCW&-]th>NA50~C|LO3LX f````````Mu?x=;jY(O6*ZV컇oD6R `vK]Gfuf/T UU;D%jBTO Ɂ\Zo }7L-C;]mXci%o4dLPRFD- UBrbS0V^ɐf}^q|rݦ9!$K-!"c!%*Gw-d %-ADO ! K9ppS5R_;#` X%ѕ A.=dK@Б2*xAW0)`}^tLZZuJEK0#:(Cv$ORXr7,Kш{zn8У7BRE٣[ze^ zCQ[L4B`7ѹ&@OE|A#:XNǀuK>ryW墫pCM8E]\H.M䔀I4fxEDih6̞ڈxr1^jDkzL]aD ³rje"G0+ChSA?-ݙo(ɒ-j@{ "V-tBBw'C;J9Pyj>X_s.dF!PU2P ġMO/߼/"k+ jtUWD+ JZ{z`%2DXj3N K3NyJ&A0,&*/Y14"~7UB6bLdl^|uD]w!?t-F:X?}nVVVVVVVXw>s{1.]x!;DW|U.JZHkC?۳1AJrTB6nHS|PPNTl mM1:=uT[T4tZD]ؔ^Ud0gJNw8h޺XP]zz:I#:jj1LOtA5&:X,!z*\Vr;F bve"8T9rsc%aAjg%w0rDTIۡ+Cq|X*Z5(ܲ }RӕoZ)*v 9 a!Fx=蚫w՟Xr6o޼˗a'p֭Y۶mQ;++++++Ol;w~wO_xqXG}pkU='ޜ;U˴LC'(&S"Nj,<ŀ]N~wF8\!9%Dp;B_ dykIOdt&J0V#g6"Jꁍd͗q2{LZG+eݝ; 2ug" FOlm3ʚ銥c2[:7 uia#zI/cK{8 |+_2`mܸկ~]wsiM6Ũu{_Ȁ%[O8WUڨb|5E9_)D> TF֨EDՓb]gdZh `Vy\+&k"0;/",8UN`k8s͈]J 0{15Qmz$wiX"]}0WZbY ќko,BE z`DMK\S!qp|cNxVp|l$|)FXWKM (b]* e2;h尝AWmL~$P:x:1Pjf]q-c|oI!Z=eGln2ٸ쑸Lc3pq` GZ>*ejw`y7飢_q/{KX===wf͚oۧz b :vkg_,`]z^x_p_b:kO'\&JDu{ߖ+YyG1\yާ1Ln^K$JP{YAL3$Xs~E/ѻ"!B2-JLv:+NTG}AN@1 XPX%l,xFG>'waGcI#^ D\WrLDny.,=*=4ܘ~+&WYcJ+֝#r CV%tX!^Irh0oEGs@d XF2X2Vat}D GDG Ʌ/X}Cַk_Z[aXXXXXXXG=`=Ƴ//}|#vQX7|QNp0Uz2qUݴ ~NNur4s񓣁KUm-ӀU]ɵםH[iN Y*SgviͫEyVN a> v!؝F`ή]D"$W&P$GzJ'j3kש2 k2JKt% *=CLU"F.SUbe 3:nsUzJ 7<sϟw+S# U)rmCJl_HK+ڎ賽qk d-lߴ_JGDU1V2ܩ,t1Pf!,TPjr90E<1%dEINVnݩ(/PZGWp^ttnځ(F&;['Z0Evan,c=# QBc^Vv\-"”>4fYehg;*ҥxX%`Ѹ &R [J21ZAKUK) .MYR\"@Z d }2%?Dh{`bs=cƀU2ܠk{( Gtx=#isp;﹞ib,&艮vߝRd"i){(3P+(QegeS}VQ$>Q]7 SœVZhQ:KI_RKz[F{|*c|7G7+++++++++ /K.a.W؍7_^S|poW-NHrO*Yw$*lWKRpe s}@r+> X6aEfctba^BءBK4&Z"#2TIKiq~:F'H_C7ޭZnw.h䘙cR?ֶYFFCsNrEWv?PW-޷є>2Ιh $=!QNfG1!CWY!-7?j&6ewB~+)F;!pQht2PC 9FTjDz{W!<]r1$7E; 5ujEKS_tjt$RdPE;- (\8Uߍ\Zӳd(tOw;pb]5]*d1rkq5X]22HX " 򵋣o 8ՠRd~ohtq@мUei{ 'z<ύpn;[Kيގߡ'r& wU7;c.CW|.SPK4}cL[LdwirKXO9zId/lz8>>>o޼hуN{>]v]|֪UN8K/o|z뭿y)x ^Qp }o_7 .OʟC[6|rr#M fJkt/ Ft,e秈EZ腦`;jӾTOd0#]=k{!"jFlJ33LUxirސ-j!(A/p]:Օ:tBOv7m/ÅZ2]YԹ8>H@,-Ժ6`E&󫧷o[ R;cXHCe h_uz^[S#W*B=Iy.&asHS~,i~ a;HcB:ˁvODP>i K`b ;v[,O r<+Ίn- :&]-[ R{nx_dɒ9Zmtut7O}S,nse}8qnG{utXڒٴo}{\zow6O=pKj3w=oV+~yU)x?iz>ӳ|3 9ܸoSZk+Kˑ!8o5~G';'^nt8i?۩6鏮dKÏ| G?G_?1Pӓf쫟wh])]Ԓʊ\\Bmc}f{?tZn'Z2u-tX` \\ 2?E7 ;,IUw?q`6P}E{5cSA-VƟUliw:Cy?Y'V\\p]??XMOo5濬6~sL;Gf-pcs U^l'\mc k%y;rbL+w~NQ}|/j{LK_RkV`n8_y=$3O:'|U~Ă?FgSݳbQJzf,2TzB)jSxJM 7޷;Vk SnIXY}c>s2SхgFa*q%\񬉅:C67<ɀX C4c_}V"BD9B1Ab ݈ޜdzfM2+p$+ΓVUN)hC.s4U%[N";*b=P 7AŜjb^[,B-$'^y^IRF@{97 ʹ.;قVt u\ce{1.( 2+mn E\Wۧ3.rpIHJeb|f58Ϭtޛ}O5R'ï̴է:"yŽcwtJWeE'_^bqŷ5U] ;I|sy %.Sw^ٳhJxh=%k>"HZ3Nڔ̞ 9#"*fJ׭ X$01UԨ-CyQDwhdFk͕u؋zt(5-W]ZVsp]'#)-qלjL]9uFXSnshq~#ɽ, KU]n7`/4V?i.Zμ#|)+&6J( `X η])}*01h+>^Ik uYc\~p~vdIk9zIeXXXXXXXaXXXXXXX/5`c J9X;>8S Y;kf!S$"ߓJAht{6v]ZtR/xGtX?ǀu kkr5`J::XV $N{gkx_JKCL9$Ǫj*ړh)א^ 3opɇ[3xwy\҄:XF;?J6Y *ӇV (jjӛ{C$W2+:TKv\u].|`4fkf tXX6Zְ+ udm;_xA|PDZGo`i<;J(1zWf,Xe֡ъ~8>_uA4=KcBf,kx10Ed2Rc&Ayv&@ Na0v{~4VY/xotU`E*ۡz {wW {ԛuӾ%z>ʙSc ՐzycznVVVVVVVVVX,`!BK[%LieY)DKX?q=/=G7+klǟcoMYt`,'Yڲn PF0Vf&zItR]SS%J(V|dP%U q s]B^x:/{hb,uvH)zcyj]#\PUY9eR~ D\ M^,5YQ`1g"˫>Y͒<ʞ-[ Vs,tO u,N\(GBܝ^%$)UnQG !?Ry$r}YmF|x1 ڔ"rԇg;]]AoMOBXm#)2![EϏ2ɽb{ieP%7s E:RzGO(><)J2;Z6^t=R@khH틯udEW2a9i)3= r6B+,I62`cm^-lt_!>j)C{qzi+|{jrPO>_{7#`M#8 :c a[1`%rJhZ&jjuZ*)=kuY(p.> %_">XI?Z9X,I*S )_4)G|B-˒Zb0oPO} W ^0XXԏ#wXaXXXXXXXG`m1`%('X}7Ly͎?#O<0"=5ɑTzT(g*\e,B-'jC#DL`_ڨ6Vxg"EwDZY];b YfbL)M!,^2Ѳ!CP-eΌ/ѩOw`l _MԾծe^XyGlC\ pYx`eq,O39 ϕ:W-̒W:oM, Jݰe{Z="* ݥIjL:huK%wTE>2U,UgիA,9D$LVp){C.a/kz i^R4 C_=- sNL_l>NnwAst"HaѬNgFLr\5c0? ]"[!QV-3I1j`m1r$2wjBu.lP!N?t: [T!Tu/ŴDxB:1+,=$9LZ@+xJ),Ws|tBg=^꟥#XBhr!}K$/ –󏓡:|/}FC?u#:X.=v}9Qy4(<ԅC40be te%z>te&;2K9.SU(>{YN=MS*#X60QFg#+\{*|-Sbb(!BXj|DH`_b׮]͛|<< vi_~y__׼fҥ1j````````*`a6ւ >.(FxYtϿ!O.\tqg_+ڻjM+8iN3-4)9i"8I{b=AƊՈ*57;KoJb,P9,:,z"ѕnX@Y\׎DdӴLZ qӒK`Z@;s~xSm{B 41q*"Kd )ȷX6b-97HEFp:q0N~7eN$ ɩpC(q^)EJg닂tzQh XEE Rc!iѐCk33 YdC3 #X#5< ۭq=WhtUzoCA'8hsHַ3,m_9/W255e*#HXXXXXXXX X6ViçzVj]ѡ+>%RV fJas {15&%B!Q9BWNMk֔8ScK*R<1ϊ1L$5>J6лwy,r }2׾Sw"܀liA@ЪUqJN(EG)iqh\Dhg pJNfwaNt95qWuZ̪1(DWT(sHKڒvU78ipIYH NC`ΩJs\%0YhU*JDSieb~րŵ#`CkGE{CPSD˽6l!hJrw:/3l9_7h>O p]}w(V"/JTpƧjb&]Հƀ555wvvVVVVVVֱX7gqF___ XG+`v^\pB)=Wtb .2 7wG۴*4C#RtRy=g,G9 JC}P1Ժ4PBGfTVOr!jssF5<q k7ɀ>TqԀ:w}=' D(2R: %fɘ UN:\;@2EvZ4{3CŮNiDՀTz(e܆ps+09WsM$CT,Y1cX0U h:%5"7⠿ ug'XƋ345\@o-GPat :)|5ʙZMOV@˩pƧ,>R%? bEvXs;Ot~5Ss X>3'WC0Y>)YiZ\5\,$-2?k'.g' K6 Xl6K'xqbSt N~2Qg_ Tb>kba;'CAڹ/X zOMZvӘYNC },k^&ilo@,Sh3г Id\XF-QhU:4]1El(Bmv'9,y*./^ĘJ&S{H(Ӎ~Jv}mgPPNsK:XV*XR9}spfkjnq X; _DJԝL-Nh_b+طa0E\:, beƢߩ3~>I!aFwj V"8hڻ#c!'o $i.cT*D weU-KLtd˻;eQw־5R`fuf>yNYoM:T0`ѿsyH}@-u&:5Sа~R7b$C]衷MCcZ%/UP"6i,hra ;a>2YYͻ߈ BZl8r74[}zߜ!@ǘ4t,V'fPx 6tl20GV*ZYE3HxqjfO~4*+,+++++++kF~1`%,pJ$+Z ~dާ%*$P] "4evɪ) sPWY`p }n^nZ %g(+0ʉs;E܌jp{vt{mT7D9(3C`(1֚m^e1tEeDTYn*7[d38J]55@&=nH繟U{䚮li4pRzlAՒ*3I!BT[3Z(jVf |'pj#zZ{㝩e!2إj9YDʼlK9-WjMKP:hXJ!E5*De>sf}C8rr*QS8&[8%SAVFϝI&@^xW'}0@-CBСrg%2\}v̈2;c-J-dUߓQCY\˕[E9J-`Nώco9z XXXXXXXXXaX-_Ek4>i4hkD6AcBAT9o ղU0'PW) qSt{Fuh@*6HQB!іwHE/ Tɦ]ZV2]M 8csӾec, R#!ӲU>)9a<螺4G?\)Y}t92¥UU뾦@!o(W-&bX)(2_FCE꽣E 3#)DeYlL<ܶ UHS߀.;o&"aPǡ2P (命e9š>tb 0|XSZem:̱0N35PTRʆ7>]_t~Μ6U.o|6ިgi*@PP-T5`UtUh&i}V*677+++++++++,EϿ2Y >xsUGYU_+FFW l[5f׹)-t|dh LMM:F ,sSMLHLW9ԕ(“Ma$=~t|(8w!B֒L^*g>۵#/9f([kiP@6.(np U҆tA\[Av~菇R ٖuv[3s ULgʻ[6^yTYBhe$W3Ij?[ ?0]UˈG,јE8눳7xH_.O_YhQN:lހ']gV!v!3HW">X7=u1`L,,/:~7ZL5hݚ!rqFbFkkEI鬡@$Sv (TB5](~LKz6; /"BvAQDLf,jgTp3]{+Z2ipH!dx诃#9$:l7X'ki9*vjX[]J`KG 96teҚ.RWfcU,8(A wȠɭ_eśK5"}eqC%5:,€:4~gEtސum@9rj?Qj4ד60."4pR+-oW[3C ,C,z^ N!tΡip7րrVXVVVVVVV 1``````````,q˓/&yMZ(qզIN;)Tm\QWҔV Q0/ MJm|]LU@pr^y| *ΤiIX.+}WFJ J$W'`UWid#$ 2u49PMל&v_Z92p$Y`?3 40R|40JR갓C ldHXU2`US@Mɀթ3;oV&xg.<K4[۩!qԐG"LzO/ooOȸ["k$AU$8JO UN3dOwhY4gh8LR 1y%w/X,a9܎kK_L>|U)KpIXAEOi˕|!u?rfwij=ҞG)`un|nVVVVVVVL9z XXc;,EaVgջCCC۰\s'C,,`&jV':X#Y=L;\ bq Pr eevuDt3+9QG8́*֙zGVg 1'Hid&":.{ite@ZeÓpޒ\1A1_(:Z f|BPe]~ޑ:D853_JCTft-5ca4İZ3\r(dnAKB1{ײQg)&9[oޑkxy(MuO3dxh5s)~i!UFfˉ~)/O~^)IPuCd38&"uRCwCT?fָπ" \EkPFipܥ*iHy3ݙ @+t&ϽwgadjH]OؘƧ8 ghRVۀ՚)pQҼAt=0ag&:S(;n8`nJZtw콋UՕW~UW]u0Ѩ1&&鴦;jIJr[[bٚz1ZmB!AmD/Q 8yq;w1ǚ{ƃX5Ϯ^{Z{1CcIe͇,b,ҡe)Z!qdp^mm7 {Wӓ?]$LJV\HHd5MD,^BďQk1=KF _"]hؗT,酺|rrrrrrrr ꧍yurh54{ @x iXN<Ҡk`KZ l5w(mz1#ULaM.VS:YTIjaכ>XTMq֟B+=Q*]0zF\R7v FvTjymPӍB]R,P:`,Z̲C41Po+*]|a` ZEI=/69*, 0OwZӲiDOɕ tKaֱ,iF=fjZvˡ+oYER? OVbؼNsy_T΁/J|!=j෯=M޾2ءBW+:P{RڱӘٱZevz:8>ݫCUkepP8Dg>PF,z֢=)m8jq2N>c׷ڵ̈gίAtT*m(`Q,EDVRbg,Zcơ^X!F;62N~ơi`MvֵŸB9tN^?6XXXXXXXXX>9`kgǠX q"km+? ,~^WB%-Xd0)J][XQ&PQghXnJKε#y8*@A+--<*ia]0eAY9Z6G-RPhॼאo'"aۓ DxBc Ubl`Q&PgͫwiXr1)\!dz8hg/)@9!l~j,olT%ҀLd{{Ku3UHr|,ciF\3hrU1sVvVBCoZWzhMb8 dś$UD1ǯ9֘o׌{j{B0tlM2I XaUOyurrrrrrrrV&34{ yOI~h Z6ӭdSʫ2&D8>԰8,FSJ`I%{iZ009 IQ& W#fܵ^B E~ &`1F\\MLlX Z0 LX!A9ۦ{=[BoWkOL@bcU7 %ۀn؅֭ӵ-=x==rL%mj|~`]3%Q@& d$ꓭ jږe^32/u, hPTǬ5iW*!Xh79ELhzsJp?:k #lp*G<:`U芹0pV|hBW}hXXP3LR}1:TRי 8im1^)+ !Cb>[jL%0J$ ``1-S$c :4Q~ӻMNWSlG>XP `%3ڢ7/֝Ԓn{J ~.JW:>C*5X}kk׮]d>iӦ98`W^5jW^ywo۶m={8`9`9`9`9`9`p:o3x뭷!G?qw%|["`UDn0a_XhhKNŮ3_6CVH;:"]CMwDٮdJK:0S?[xoNK!Itn*$n#]'0+XYC |>ZM)7CиBJed*bS(=* kTS1_pJpS hv*o\C5P'X%$*:>Cy Xf}ͻ `N \5nܸa^{ Npz'_׾կ~7͟g3-Y.ۿKOzw <_[핉hbCMYh%5aїGĿZ. پroSLBGDَfDwk62;cjT0 LAM= Bh,IME5تIΰ R%A9AN7Ȳ6lCҲv`& C5yUK6uQYnS7upz+3!T`+PE"$NUsY[`Ke!Bu>hueՂCB 6~u;jviڦxX!lYS7rp2rl  byxl%۟N2#Yb ޕ+;^ [D@_%CrhWW96+k6T6=~sC (̧/-HN/Q8y^հZx{>.Ny2B8輵v{Et2wm+n|-8JgxZoo}XΡw现}3f8묳 S>ooq3„I&_W7tѷ"O>Om6jԨ&>}ǿiw:SKKKsvy{0ts=?`E`/X `/X Xa]|{߻k6lo\tESNuq:ΓJ͛7O<z]p핉3pSup\aÔ?4fb L˜-]h2Aǔu;͐觼IL9v/LŔ,$oޒx`ȼ4hmg+xTLP0KMBHLH]4X4)kP((Gz8l+wW~-PsHM{{o`Vw=6Zx 3v{˱B?# >eXMt`a|iXA͋JXy;EFە$kauAوnAs@E[3Ofuf4*:W*,%f&(KӪ6&~ttn&0'Nql~a k`{@((FY'"KAۯ%rp tg}kW7-m;<9@{AWh@Vm5 w ^t|tV*I+fJ 6.)=,*H>PE^Eڳ焿D;P*fCj}|VvTv|JXZAalIBvxU^g頬O!4pB|JߟR֯4Si/$^֮G+dAONnں|V5XݓF=y?ZϟzJĩB dcճm\$z( &j ْPUvAkbf7dߌ|J q&ȇp(J&Kϻs-;b`k2ŞzX[cC>kr!|FrEJXF" Xk[zy]_Xv"hIi6аʘZXҒ䮲ex{_W~ZUu$$T`*^[R`: 1_QH].Y5q7IV(,!"(`閫wNf/o3E-29j%I[2]Y(:s_]X]-c5:7%yC)s!+lG‘+ɑtIҕOXXXXXXXXy:6XXX~4͚,>o>'F/oXU[$}CV9BXjP&F.x1)Wg Lfx#]xdYbmRu 7# ^;ElP@M\.*dȗʹtI,]D@/|aЪVͳDHCQ쨩S?5!%ַ̃-{{#ROP0h:;V`V bx$p)ڐ#%L fV %-:蠢9WUOXPxW2qЬlv|_}">ŀV I5[LTاv~߈#TfG=Y{@N0VҖP5i]Gi$Lc; (%$]9`e6XXu=¤H9qĬ04V ޵"Ҭ!:LwrOڽ\RBN ̙CSyq\w u` ~@AA (f&\4C}Pl-F.26`ɒbE5Tڳ9TAQhx AW -DTaHrk1?J4afA%j1Oe2z,)[sϜd-C-c%V(+ydQqTYa~z:Yل}P3+X΅tT%TPZuJ[Z{|1ס@fSYX@F hPI3YDVo~[ iE+dco$JzUd_f^{t@Fǀ^?p\B +cMB_ONnY XcoЕLZe"J h1RQ@UX+cԡP& 9-iBL1 %0DQ3˦C,&hGN !Hk;GU˜4fF-@B 5>>fLUUf7wDLX,hX+hl3=v2HJDЇ,RRj I"d%B+SLB<j T42duyl@A6mc5ES bɢ$w`ZULd/=hZR0)RBȋO aIvP]sշ&P #W dkNBJe⾖"i:oHE(KAW 6.bq0哧׎yurrrrrrrrZ7l{t+BU4,2VZ(c gOaiBa,@ BPh28;-yS}10At=ǻyz2YΌiȱKZm{ӮU`8JXu 0 8Ej}p$1u,|R|wc,yW,Aj VXxIomƱi0˪HMgK,XTvN_QػG9R&S:h! ]i.fRCb,8gf" 7J٠'\0khJC2G+I',U>h<ⲮG[hUmgrӅ}{gaʎq0OR[O Wwhƒv\VvauĢX`kLh 6\Κ ,;vأD;c*pI@Zu@v SzS[Y- *H9!0GT(J5~'eq+P٬- 4hcP&zCK4pxc| PiR-`omJ%(>=WD 7a*l+tf aZvZR=5ĪW:Z)ltFD?'™陊U <)0mS&oR]naw Jf, TY)clcUx̰W2ME41t@Op'YT{;8p?Bg.+: +SE8A-Wm\$PAW|VƣG#lp|rD k=+`{ㄟBE*0]VFNSBBHduBzcq5Xb P#`Dpur򜭑=ʓ/|,e*@9xc=*1S ׶ȁb}O# #DAĖc9VZiS$>'}p'Q郿]ՈrLTXBPT NU Uoa9&[!8 ZPiH">2Ziay߆+}Ԩb%s|,Ÿz}U4ʂ5u"~VKY9X㖍4l8.SFZW0]p\A6:$E~);޽(ud$AЎ/cĀg\,,,,,,,,,>)P'BQ[UvK ,.ϺF<4;J=ms\Vɏoa6Ǣ{ O!8Ș6[xk,R,Bzm+pvʃӰ4lq5Lh_|.~F',3.#d~X窩 8Cfh쎃aB q ϟC_Ǯo[,{6 MU";hX%0 naU#lp>=}He &ޓ@Ι@64{ AQ Nio &[ YWLDf2tR[Mգ!o1Q}K%$]cFja/tzʺ.J>8䩂cdN'0 `wVꢳV-ִJ5Z  $Z 38tqdT"́3WFa%2OZ%Q ]XDk^mCs*&S yE%҉ҌUjd(@*YVX);ӃMѦDb5Z9-V㫚Nіs"\tvV3pp|vaFo_&!XٓxV%3]{тa @F0K 7y X!G<:`9`}("]! -,XW&X2 ?TԨW MUcKStgz-5)ipnТ`hAJjL`%>8u9`9`ubnΑ5XXv?pnW>~_hr,)֧qeKbqSf,vS֣;x2Y2VG*ҁj3YR:vYfFda>X$p;fXVtǕc< p)FFk픜mѡ?@'`%mh7J}Kp$DU90j 4;*4,Ԇ7[))њ=*i&ݳ)֦,5Ī>%6)W]8+Ҁ`bNF"qV5`p`k՞lc2jlq(եb[Hg88r"-^Dt<#SFU M*y$ɣUGE?_c ֽB)iק78ҕOXXXXXXXXy*<>BU6XX130},k5JE_8CKm@}6w`bͮG 4-")Fá)dN4AQL*q4̫Vm)+#΀/!lfxB0ɿJwg])` ɦGGk0ƊgZH:# TGbH{cQm dq`5sa6Rc fKZ #B㢙xsڶǃ1XS5eӡb< b`N#FeaKteR!Sf* p6 L^Vfm "Qg{"n?șޤNU|&IO9!Ȼtbr~cl$e\dqEt0\UP Uu|',,,,,,,,Xݭ#l1ҭY~3< u"[X,xhF* XU%n^.#C/eo:d[ 07x#8# J u.($W 9HH(_4rDimܱe``c` #Z%_#-@Cns<kxXAjhF`űW.+ LBo6nphE s]Yc*b+aŐ(eb!Z*պ]ASeJWxPUA\ʠ ]HxcmJd+1t&B)g,,,,,, XH7mڴs9nZ͚5 . XXh? tb<[dcksX6S:Ǻ~쎁 ~[{bm-N(QxDCa,:`damqSho4;FXpXd.&MnnKw{:'ȑl25$ 'LkI;\VX5Gxڲrb" ;Bʂ F+|Ή^V#zU%f%腭gi3 +e(7EКcTdzU󢐧I+46*1+`Y25*mF9U}7WE"Uq$u I(hj{jh3ua ‹(ouLʠ|L{Rϲ ı`|dk{D ukUappjB|p<turVO vMMMW_}503կ~5<<5߾+.O?}}}}X'4`y֖10Mw_o}=(]7xBTp6B"{0#6 fd~TԬ<*2ј 2~;WR3'X_~ 49]y!RZꜯ+ق#))tvfUjP:,k&P6ʸ ҥU.k*+?:G5U;3{E <۱ r^L+d_;A=b.yH (RvUyfG%= .3|2b⟎S$Z~p+_@WQsu`Rx$f<,qᴿk8F1c|S׮wyK/0w\g,,,,,,,8}3yQF98`EQVoBQvw3/JMkD&%{+ zTJ0HO`nΚ\c$Fjҹ0䘳~9?.D8 񛮡3P$fi;YIq03&af8ST@~_WkM@z E 0aDdy3QMoz'Te%G[,.lXu+-kvW)~W[zf) ;d;[ӰbHy,3 cU94 &UP_VcmR[~}.gU GG(Ϙ֦WJkK 4򍎍)f( ^ X[٣hN\rZ PZQ9ERޚ? turV߾際Ο??g?{?<ݻwo߾}̙X'4`5}sUl*Xk=UhݓFa6kZ$q>cĬ 6m4mRB7QA]r)4 ; + {g&_O,5;C| Jhsqu\~W*dSSap&җ\Fc31~I.B^~orUQRFyMV\AT5thg\ӔY-JkS%X']9`ezO?e[< 6`%K0 /|w`:u鍬}/b}uQFuww?svލѣG?䓘ٵkyt V{{mvi3f'5`mozu+lmnԊ [,xƣC.~ށM{{яGoMEg*U;+oLeL q9VVpw3qPw핉藱<Kd)n1eʦCBC`⼬!Ya+l4S`ðMjVm h@}PELLF/lSdS+!qC [RNG}T&P;cI~2 X7[}SB9P٣- d@J*ƢNC:GhոkG J>{y~;@6]VW TO/G $Wr$Db s%俠))-~mO(r:_|ů|+vL[SOɓov̬\)E`X+d}Z/X̙O}ZZZ>O|_IZ>I~YgquVmhh`>|O.]zgw|}{>}Ne{ַ5Xl,t77|ڞnmwG#X=wKx5 = ܑ,=@-{{% uZ-~>Qk&I`z#uHyCY@-=K)Nz5L|t_O0CF?VI?ZP/8j٠``AO6Hͯ5\% ε@ahF;YۡVC2Y6tx"!UuX4e#Kd,C5 +ګG3GXa< 6URx< ѪH*Pѩ?g.~/@=x-(,,=^_yJ\Kj 87%E ك5.!5L,+X^a$ZvYg\O}O2ts=GGX+֮];gΜ;໺Ǝ{7ڮ gNAjjj5jw}4XX@w30 +0H 3VZo/q xS4cgD+>Xll[X$}=U/AS9NP j^>M0G:GlpH)1XTkCI" d)>ьVVJ@TMJrBX#(D5[8c$-;V>uA"ciˆ*`RR=,H:WJ^Ǹю gM-՗1,Ũ:Gbr} ,JH%*WYt+5@Hޤ13Pfx6h]Ē!V8o+[g ;Ukf8B >Mpj8^x3X'` {s%\œc5tM\pwq>lOn:묳/>8cx6v#mٲeGp;~7z5چ1㛶~Ъum-cn!Hu K:+hO2흏voZ5u-ٵG |a:#VsYDY{X5B|IjK(dF pJo4vꎮ?by(HKL \YI0&ΨI&B{A0J0NˌqҨ XG"Ė+Yme%z/Q"`.\_'U>>M\&gWY|1NWy  kY_ղ!AUƪOu/˞Z Q <8[Ea=d x3X3b".+\iZ=`R)=f/tCsr}]ǣ$w,,,,,k#l1͙3ꫯ^dɮ]Z uW^U6lD՞nd,Ic<>93`֦;u}S zglDP5K檋CGt}RoXp`6 $+f[D&- Ӣub)cf.Rz;+q1כL 9by$Ug7s1}'ŏ *ϑDΈ yRWMϞ| GM/EXnqJ@ 5#oFrϤv ;20V6G`N XW__G~`jV.Lqᨲ5ZiTms<%T [W襧0 l K vd 8[uls=wTu:Ӝ>|[o? u֖ۯaf_eÎ׃Mxr%SDBxd2b 3zq(]&Yt9rMfεT -"Oe䮖1Y9Z6$l*O$6PoV?Y/ZK[#``jyf xv;UЬY?_S^hU*Q:q(_r\E*F3?TW9\|}>!@Z1`w2qK)0n>5]A[XP_Je% fM{8ŦƤ+c5a;nnnu̦o^uֺ.G#f49'̷43}T,e,դ K۸^eN:Gb0aXg;},zvl&H4*ڱ~)`>c8OCQ $T '+!`G10XMzV,7_Ek@Ҧ[XÕ@1\Z<rf<'_,A@ !PB'r׉+E].ԽE3t蔱ǃ}a508YYCJ퍼b9^bsS6dܵ}9[%+YP6h=yxZb׃ C 480"-1XՙrKʔxRregWɀtC³fjp%ErbV/i0ɘ+_*o@,INw \L /? yE(Yd]ELx*BpsH*]y IҔsi4jsn'' ֕ra {a1OIu..kSgn /tprrrrrr:;8W_}utFuw6ӵ^uoXh,q`%@L3D?3VkDkFCMr~:6t4:( /G_uR}B15>VSDZa(oiedln5cXe\ նVu`l'4g~T(i" K*r#`ch/@$SQό,-h@JV L1̍0 +:j6lǁ&)>*PbQU h:F^,Klb[:Ih:9lwtF݂@$BaXI.$wcBXbiR)Ju* `( F`xWͶ>܀m=\9fоK˳inX}rmX\s͵f`g,,,,,, X{{F؎cQxwrrrrrrru [ܲ櫀V,PGp`?3þid"Ez1J!FY0ӳckԛɈ`iVaZ1331=5WvKH$bSQƝEbCjZ%C+ `h#ꂨIuB* +H]|+5۶"頍]s[SFvġbL$ LO'd\X\$!. X]U+n$ENJcVhr:?;1`%4 U'%_(aeŷȹP)+7ZnRp3c\MܶaW06Sӭ"`f4(UxX44>WmqЯW:rPݭW|YŒOXXXXXXX'`au,G` 6!0KS-H?2UboKOGO;.`Y&BHS\bNi#Dq6~!J jd\-cVOddԭREJnƵsO2kF[1s@#hFiK nCpT-ǸHKEX}LKf tڲZ/rX+i.d|t׊GAVK7i`^-kӒJ-Xo"o?]a΀z[/~Bj>N5r)hU@`s(IKF!37njLvn@TE+Mocb_21~f6̣)i9`9`u֮6XXǸbc{p {'C_؀^X`_@@m `X`k4jx{?U31,M`V)FltɃH~#I. j8P?jMn%eƋR'!t>AoC`T O3i0X/i&l0ͷ ,O.Y=-l26':56g7&3 y!RTRs5i=0J4A7C=4a>>+tGޒZHg|x(UU0{Jdxz9"[vdBefK ɟ=~ɫ311* r\<,Q5XX>9`9`9`9`9`9`9`8ֱo뮻ͷ^^7|',}pk=b Fhi@̖vꦬBFglB64i$3D™UO$,9jݶԩd|ZNҜ_<_DPG)4GU@I3OӼ =$8+Prr!CR%ps#S+RSf\nO#Fʬs&"WX|^hqpCB.ڠ {Ck݂JjƊާ ދMEV̌ 7.RO-F%BuM aN4Thp[ڊ[-g( ҅!BN nJThzѦbX_p{aZ{e,<:`d?˧VڱcǍ7x9|_~Ǹp̘1Ofm*}:At%m nQ @$DXH=C`6; Ck'UYWU$*bdUضbxևWeT+(ɣ D gлAPtfV+XP3 FJQ W6jן `i|!yZ)RN@MzGA žHffZ1tŠ^OT(Φ@Tr! ܚyr˄D[3ŇmRF7oޮ֬+sTJ(E Q&9U>2R2nx$gфPB-ޙhThJ;ٙ؎NcUdSL;F؜NN×]vw߽s wyd6|;4 9`9`9`9`9`9`9`{y睃|zm=K/^|Ilk`۶;~p1 RbMVioe?N.%%+"Da+Q DWܓdyfvY`GB.9 1S^\SP 1 PT?dx a"YFǤV4dX J }>Xte)ܵLrE&ĶJe.Y}70][Ej&ڿkLZ2ݢxQ0$QJf/R^`j NdY9? ^F>< fëB! CiDfwu"eG2zbahR|6aܣ*RKE\V;!\EA٦;+EҢxVGbB?6.T R'm}#lN'X'?yʕ^xYG{n,,,,,c8mias:q:ɦ/ˀ[nëW>K._|E,,,,,,,iݺuҗ4f駟Olڴiԩ̙so}m WڧmRSMec1/scnXG.yϠ^^<W]hg Z>Wutd@xd[6S>ο\ΏI4=ilXbO&Wƙ7rx9<Ϟgڶ2_-&Gһ5җp)W=+ Kp;sُύa ap-@ɩƋո_=.ں/&9T|FlDNT*ZX'6Op \-4V|6|m c1&~&$;z{kx861:;;S[l5jTww]駟?0|'>m;yN;O_|ŋ.h_s}~'/Xm`=k`4џ?ȏC`p3-7YS^mg7?;񧬞[3]}/dϯXoĚXƼw;Ϭ)X-6y-[]`B7,ٺR~XX~?/.5BY@~~ؼ ?GsB*vtoZ%?- _"?8OM"ܥ/KƙYSϐ1TT&!?KI(V?hlNOY = W%n*?z僩-=oj։`_?_uXGKUu xZ79O<$,/S3\,0ݐ⏗ް_a'+~8` V?bϸd˧z|ox[n]`_~ڵgq~;w>3gyիO>k5l7_kL%uˁM/(kּu?y3)ׇLSImV'-EFSf3mMEذ*BM%gjF45i7곕sCB&JkJV>pl]I`/͵!O `DL,Sc }2*i irf)וr+^\ES7ilVI:ˑuylr1\;' /6>H.ק22Ýwj~^9XzG؜NNi޽vy/|aҤI\8w+hu`ևkas:q뚎(Bkee\p'VܱZ</|G|ҡj`Yp :4[gEǍ`DHh !)C<xVX ahC7ãSorQgr\dEe32>U̇ w:K (ؾੲW 8C|KHd2jVƌ5agK %/ښnD&ov*c|C:{y졎 2yݙEn*0ZG3./:n4`P 2ҎIxL$ˣD_`ǢZ;* u<zYJ4@Wb>QbV)Js',,,,,,azJ4&laTQSRҁJAS'PqP&qE8޾֫Yq-DS`o}=B_M >xb3 CFFQX-"ZNc(rx:oB )XZb.Ƃ3ƉwQ)5b]X6)τGQiMơ-.o$f3Te +RK$0 r~i|,/WpbiRkPq=EfMnўAlG4'⦱TUC#7>SMO&պz7FGK ǬԚH4ڣޜJB*9TvJ &"hyVߌGyo,h@+S28ui힞6XXPș V)n 0k\JfEZ;IY놛<ЀwnCJ#x G·0 Wv‹cԱhl /eeS"tm.i\⃅rN@%F+>p ѴMT. X7Ez2bc>C)JK2iomKJGRLMM,uQV Rϭ%Z?o#Tgau >>wXU31_,sW)3-bf6^71v;Y,t[2JpԄ]EaTX kSRYP3-x NvZ3D__|}8pGk^l_# L2=U.cʶTIۤ%y]\ 11vS7z5CXn+}`n>^:0MDS7&jx5r+YPJjr(+XtteRkUOQANrgQS+½1xdJHpUwHiBW]AU Ҫ^G]:RB^=fEeLW%pvM/2j16k JwaìvÝT CQ%]@u2X?u{gJ8SPozP˒CON:F>~{}s˖-[fͥ^_GÇ_tE/"X`',#5@+6k WRlղe/^wwcƌuĦ4p 4B`&ca,d:SLl4 i&i޲35ׄQA{ 3X9[5p |Ktu/, :x"1-p$8Niҧ,*ЏdVvW%̣0S%}6abqTJ3zf&W{Ȳ9i76KSmef<#ΈbjLڟr4kJBslMCMU,Yhw秚'*9L"+\&[C*^ e ʌuli]I v`ÆK/]hxhWrXKwt}@QN{W1[oZgun,,,,,,,X ˗{njոV(zXtfY,H*/fVx]9Ahز W߮/<1Ð8@ԠFZ6ھC#Ex; &[XS+8ajzqNji,T!2Rc݌>dؤTب(Kh1R {UZQe*?/`Pkv!|a_4SN%zc[GC=tcƌSXXXXXXX`-1kXDt@+4|uw=f!$DExa"6IZf `Ղ/Q|,,!x|o ǡnC"T DDczGTi(v420[uϞS(PuHƵlSL~<yD0%ܑ]}$B XԦ *L=c XxVaiLj?-}vWS_੤[ ETw^}mRR֞_Oq_+^XXX>9`9`9`9`9`9`}iޖ6XX'z_h#qW3]-x"ăNb" _4ޜ1NӥX(w!h?Ucd AfЫ0D12Vc?{?d:sɃT#47]%19ݕDʠV 䩨wP$RQ\+K|uIERm7M<'Z-R=cyBl*MW7 ȋxgK_Y2*^g;I"<|=Gbx ӕOXXXXXXXXy} "wD T`lXBՌEl؈v x@o$wl\*!j> S3_?3XٖHu6Ђ >yO3.
ai:\uhۊkbqJl`  j|v#jZYR: XL,x j3Inh"HY 9IS6 F`:)^+ {[" TNgr=jZ0T0lD3E'`%M`SJj r+OܸwqkGa˵B3PCT7Q<Їx PE{6ir*Y|z>E aX,x-耊I/ 2GiX7)LGE4>5YE%kp@Lc@qw`MlExpV4XhP͡!]YP4 eۧL~Y&oBY Af>UnMX Y9N^J\ e`j4/ɱivP=n[ܣ*m'E!N|㉱&lD̓2 DUl,`5YnAځ7v9VC"{i4֫(\$"܅kJ52h+5%</=aE1)OW'5`a}\,,,,,,,,,>i{#lpNPe W*c"|TZpEI@,N~\=ߙ}SDcB ,?t4 R@B艅;z[: c +-`GU%Rr % ;m 8h(Xŧ$$urǣCheKEa5UEf7nW)v a}z+i1AyBg| q)W[k׃V';`eOX:`K HKA! ^`檳}HTEV; \ Gc޹WU_~:3vz{oﵭ&W@ x#O(D A( W(T cUEZ"GDUX[(ްHK!9X^ԬO1L?TOav匾*W7D o${/x. G[yRP"Xzyt/!kgagW!q DgdnG1UVlGnJ)&#~t=b g{}\S{b [ixs|^}A<7@u*Mcu߹Y# nJu!e%<<`X`XIXx?p,+Ek)?R&T#g*-s*mFQ"],YdRwڠeLhk̝0lnXa;NOXvrrFQari=r\!Hy.l9QEYs䗅{Ha+lR*R^:/VNӻ?L_e #G Ö ZMlƳB&,Ħ?EyOOޚve <_ >NW"]U3%B,`X`XV_U+4!5 fij,3us0IE&QЎW-Ei="4e e U cgع@E5Ez/+VHpFwWu$ۧs/iw +'oAF"8y9]Բ2| Tߛj]ǗC W#5 !x7>>?.W.?uaa ӸRFX+*DޖRnN`X`XVZ9p, , eb5`fiBb,0KVQ}CT% ޿k^MSQh _BP,T(^S P7VbʟgA]Xz04:9N7meާ۩(gPW7o@^Ja׫uN,B`a]0)+FMǩȔ~^y}Ji,`:Ii^`2†XJBUE*$lo*c ̼YxRIfdX, ,֤bc4Rk+&%c&u])Ϩ[j4 l;,}"*` #OhgfOsN`_l IS̛rp'S gK,ȭ Kos2SK I˻ja&ѯQ"Ǩ}@H9h[{y"].>5<j=w<`$! 'KXk~b+Pt?,`X`XV%%4.`XwRB0ۮ^efKjPʑ)Y(RV-&Y0TN؀BžzXsz O_x؞4UyG\F`qWx̃*e liKT"a&O&:jCUm+](q$<[FE pے DzXb<;t {YUyt?2uY Cp(EZ@Lhfo25~1 `X`X`! cU̾cr[qEwf7}yx2>E>ݢOCa2Naޛ{\ ?Mm)zt[ء|VDaHNᓆa1HsUu` =h"Aش"5'naS^%!$βoH|8)!=O +}xA@و5pgCuuIO}uqމi\ , , XVwjpz]Kي [kł4Uަ H>EJ;F $~,~W%j Ü6- 'X!c)( 5 S[ޟ!{xIwo軦~yѴ#ytURn|9%(U0'Q/FDڥǓ }[zTwx1 Ds 3zN"XzT"&5fᆘF, , :J`UF;`*զa0Ama*˪k(J^̮IS  "B fQ!78`ylVI+vXTJw% _SN^3;9?| DKgG7?p{xGBaH1'q7n=gCw_ޓqQJW\2`X`X`XibZg/))ѣ̙3}ƍmOzZl9i$;`X֢Rq)xyN r[XCtJd<7d+©|J`T]qakLm]DN姍 [DNzaprи‰iE¹mN3a`j$̑*,<x!rqkLj8&nv*XVVqq#f͚=74h{キpƍO6`X`XV?i'۶m;4h/CYXXx饗, , J2ܺvrϻ`ڵk3iҤ\:`U[?1M4¹2Daj巘%ÐTx1Fa³00ozc IKA l52rrafRS`}CZtiJOO?~9x㍙V"X_`XX*6ExС뮻.++kr ⫶}m{X>wyou# +ctus n.{RvbGs:'$5ͰS9_N? i潂M~!Ȋ^CW오%_b(W9Z9`9uiEǨӛJS`U2` 8^z+V X_`X X7~#U `-Z(--vI@,jw ozq7`CmQ;̃n*R@9o4MNxӵ0SN:#}< HR3T|YزDakH wogf9%NқW^EqÃC 3ay,L^mRM7lsYlf/SԚV5t`e˖t , B`qԎ0%͌=a#i(NigXٔ`(Oϥu3G^9I8hȰ^]%t: D#eC7miH|HEa3rt@u ZW@jqfKZ5Ѵu[qMf`X`X,gOTAP`PekE]x #PP\,z~!xce,ZͿ_cYh叆%N3z"?^y1)R}3$כ^D+miq8EhK&Rh(Zm5 a1LSz1A0 8?Pe%ҎSu݌F, , :JW?:p, F[(p}b}Ը֍"w+3$'lZsU~Q+U4m;ÓF$[FړzR,nUgH969"_s4f `I40gv*uY971$0wGFa ] w_j͡VA>`%۴is7o޼y…gJJJڷoo;yBì?, b+ct`%>}io߾Æ [xAՁ{Ǐ, , XL%%%9?iĉyyyUϞ=,f1X4 A|MVyeRtf*[R._ݫⱟY'aΧ KB^M^ct'g&DraSx[vD»ș#uiOXt2Օ @4 l8aYIPe%3rϧ>1 :L-ZHKK꫋GM>='', b*ct`%x㍿/͛7/(([ =[?Ԅ57Q!]a8r{g|[Vܵu% ][TGÙE}k2Xʠmin-=j۟IOgbz Ҟ_*$f[3[GbOj7m]/C%O\ړ'mpoK;Erڶ_Clä+`+~tp G .K-딷.2Ԋu;O}_Q沇_i ڱcǼy|sÆ iii&Lk^'(w B! XZjUzzm۴9k֬&M,Y^zή].B~w>G<`˝6ὂ-[/XF=du[ѵ.ۯ_<2/Iq%bOVE;~3EnR#O_q2]e'*5.߻w6m 3W?(V@F~r.]]*7CO}i겘Xɡ.gϞ7n\pavv#PfKZ5_ٱ{7_awGuC:K%;*z9;۫Uu/f} z{ٺ7sW*w̫X[Uᣉ}ЅHw9בMVT]z`gP- `/7-UWi-9 ŎwcP ]=Vֽ K{UO}`XS۷~M64ivn޼9//vڹ/>I |, :1 : |C=NXyMZRX~3˖S̄J!QiOO C}F1GZBx'1D=C+,߯8ᵍ X+#%ʳkh5:s~VSbu%1 `X`X`! {hvj})p\DB 5r+v`yW$msڊږz-E=NW.&-g>OJt[qW_)ôlOkںO}*VnF, , `=[qfΚ])D`TQD( KZ;wNmKXMG.;Zi3|4GFBAгٯoKObvXޮ=j7]Z=S?0Ҳ=|S M|%p, , R(p, , ,`X|VYLjr:{KDz蛞Ћx|JW/ܷWRY^hu;L%NjB1J*Xtu,H4rPKVXT%>ec , Bl _ɝS2ֲNyTI'n\.nHQY0Q(͉F<-=`+͠K*J lŠՐyJ%jyK1 `X@X`XL3豰*js10kE]}[zŴJS2SQ"aa0,FbT4F^43e+3)infe2% ԧ`ŘF, , R4.`^)jw,n[8ԒV+c[>N I.qc4ѕShz| , , fw%p, mYiJ}[|>;3cTN+|C]FO7J%4iݖ, 3`1U1 b3^eH$zN!`)] P+=RwEE=l<޸A܌,SWmH';`e߱ p,0`X,g^AmvvlfN Tˆpa/xWZ6\oyUjg "BFPmHy';`5BL#Xa,F,`X|&Fw3<$s1"|hs]unw*V*q|ڍ9*~睟hF, X>`FURRңG3gիWw)33m۶s!X`X|uNvO]\\.E9N2_* zC}!XA,++gcZΝ;>w7ouYh L/pJt,uxJ?ٺo08[ؠSf6X"*z1{ᑜ`aW_<㬡xX˴v.]dee |,I Xu͍i\ "s_fP>VX:FM:cOiD׳FïDWf*<\;BEwR07p,0}ͬbtfzVaq,X/gg3ҼrnFWWX*i`٦cx'/`պyNL#Xa,F,`X|&?`yIUfϧcOiڝ {4(Q=fjLL#Xa,F,`X|X>`U2g4.`fTUiIU>Ua6vFJLXS,-Twz,6RC,I Xi\ ";X,wGf\&Yѯ|L&2ҍP|lx] /&4l;?+톧b3`1`! {6h[QI#h͖llj3Ue3Q)?M/uҐٞ)MN<JJuY1 `X|} `=k: ÓTkO Zi=VaHKevjP{a~u̘F, X>`y2p,0U`C(,?9a;N?gQѯB3~` /nfFEW\/t`! ";X~gFL#Xa߫}5L,t\3QѯBmih+Q(23*r1`! ";X~F, WޠK#he;q`%|ϩ1#8K eD5[ejw*`fbj XgcL#Xa,F1=p,0U~J+FWe6gU_6;lEXn,Ȳw>,0`Uwi\ "i~ap HGT}u'] eXYj5, X>`}1 `X|?ZNAz_3*]hPehwyyqWc3`1`! {6NۻJݓqF”g(4̺δxk'4l;?^ZEw, X߳}S?`OSC1XjY}L31ʬQX?tL#Xa,F,`X|>666򌳮9 XaP5 ?.Ew} :J?q1 `X|}܏.T{b;C`! ";>`wlL#Xaqclist`fqcb3`XG_rG4.`fq?Nʖs뭷6iҤQFӣ;t0~<= նmۮ {GlgAAAZiӦXX} NyyX\r 7>ZXXh*R%%%:uꪫ6nܸ|6my睶{<#:pE}?l]}zk߾}EEE\jmӦM6m`X)*;wjsΜ9{4kW^!EH} i'|嗝V\ioڵ?`XO?tѢE3deeݻ|[nUXb,qeXqԿ /P3f̸l*Qŝ;wkW^>dȐf͚ua֬Y!qƴX)A?XSfǎ6lwQn]{͜9f͚SN}뭷XVyXXb,[]}r;CԀϭǴϿtҴe85|=:m4|G?vlYR ,3&##6ٳ?߱c}ԫ)ĖUqxN{[Niҁlу>h5ecu<5jԬY377*R#F0;w15) %%%cB7(Μ9p͛/䒑#G~ᇌUa?~a?{nݺ!Cz~7 *Fl܁O_vޝ9p۷rD1 ƍkԨqo_ݐ롇2>}zڵW^]yDw$g+-GqC5Xm~KL;'3gNba|`X(l۷oohs w q,ν)A'Vaqޯ?XJY =`Xaq,1 `X|}ZfDp, ";>`@g4>p,0️`! {qw^W->;u}vF,jq, X߳`Uޕ1 `X|;>X,gqL}+GL#X!BB!B!,B! !BX!BB!B!,B! !BBV%%%=z9sٸqW^˖-'Mdhoy׭[.z7R}ݻ7nj]훕eCO7jԨYf< /E*O>1>s=wرI/]4-A}Q5qXU\\EY:y8`U7oz=x;w˖-9s6mZΥH1eff?QZ=zE=3vXh̘1W]uUUM|衇:u-\03337޴ilk֬9ޥH1WFuϞ=y~ ~g 1doJPXFsLo~5nxǎ٭[ƍ޽{j ŋgddJ׮]^}uxqϞ=|۸\x"***7o^vvĉy_~]w>P%*~C7hZJ{,_CcƌS`U'O߽^zI׹7o maÆӧOo.E*}L4F۷o1g6Oh6~p깏,TơC˗n-,['V5BmC3fo~viouYݻwoHBsfgg =oס:7֭=}`SMMퟶ+VM6m^RRr̭d'j-[m۶isРAvgϞo6 ;vXΥH%FT|jk }`Se-Zd/='G`q#Gt 6mzjX^z]qׯԩc187]}W]uզM͛sϕs)RL^ziúl2ᄏ.eB4:4===Whٲ***SN֭KIFwisܹsS/%ӧOVVV֭g̘qKJ26lآE{/>Kڵ&/K5qX!BB!B!,B! !BX!BB!B!,B! !BBlْu-;׬YSVG}BBd4s̴g}V~iVϕA! !tׯ_Æ mf}mݺ޽{,!`!N^{i޼yϞ={챚5kYkBB(.]ZF Sr5BBU߬Y3cuq5BBU ݾ}rABBٳ^xuժUk\B'M6eee 6L'O6Zp!W!,>رcnnhOIII^^^ƍwA! !UPPPNׇ;nZ~޽{s}BB!B!,B!`!BX!BB!B!,B! !BB!BB!B!`!BoF9HIENDB`python-xarray-2026.01.0/doc/_static/dask-array.svg0000664000175000017500000003446515136607163022031 0ustar alastairalastair python-xarray-2026.01.0/doc/_static/index_api.svg0000664000175000017500000000667715136607163021737 0ustar alastairalastair image/svg+xml python-xarray-2026.01.0/doc/_static/.gitignore0000664000175000017500000000005615136607163021227 0ustar alastairalastairexamples*.png *.log *.pdf *.fbd_latexmk *.aux python-xarray-2026.01.0/doc/_static/index_getting_started.svg0000664000175000017500000000761115136607163024342 0ustar alastairalastair image/svg+xml python-xarray-2026.01.0/doc/_static/thumbnails/0000775000175000017500000000000015136607163021404 5ustar alastairalastairpython-xarray-2026.01.0/doc/_static/thumbnails/toy-weather-data.png0000664000175000017500000005271415136607163025302 0ustar alastairalastairPNG  IHDRee39tEXtSoftwareMatplotlib version3.3.3, https://matplotlib.org/ȗ pHYs  U9IDATxy\gz;8^D]e;6=xL`{n|{C2N/_ X pcxp\;*l< 1l)KsQsgsnv7d/܄QCq~-&{~N:_|jQ( E{ B!(+ BPV( AYP(BP BXP~衇jڴ-5mP?%~״ C&(kemC6AYP&(k[ֆgޯ|徠,(k-^akG[-̂6m2^8΀l} +_|ڏOeǵ ږ. WX8AY ohȖ.nnںmkX4SOe؛N]Ycv;Y<k[/  !B- 5AY-']Δ|zv?34L>xs졑9ޡ`Ea'CtK!,?}AY-ސ)%l됍s\!,ўPB,#{1wڼFP,:HS=W?*ͱO8͔c@sajN~ m)@!~U(lj5k#R62Kū f\U򁡙z|'_}? {/eeښmH| DYy~1{ȨP=?rm׻Ssɉx쿆5۴r2@/x4Z['( ʂ6/] Lܩd ˀ].$$3a5A!?KgwUT3!΅r!v%qܮeAye7^H#(M2"]@kY$Tpm";ẂknqkEyϮJSyDPE@N-z.07`_ Ɖ9ypJb;QI]jPɫu)j]Q}!( ʂԖ[H~n!CFn0H Yt}2kˊMω!?{p"}/r iߝJIFm̚:WiQτC&N6|L5҃YP;GGWLVvv{ ܛї}M2eU[cʔeAYP^Q:&Yqϐ}!6p;p~:)8w!( D5#V=)AY[r˕|ʭv7Xm2YuyTZT[l % F~1ҩY4ݙۗ ڔޝkd%dkn&fXC3F#@,oo(m dsLml=<;e|tJ&` IY]I2A~яkUtjK G*͉RmuJ [OAy9a e~HB`ɬɦVtr9Mt9\.L9L >(炟,( ʂr , %8qfgK&~ǷzTהcc"Bs4x=|ʂ,(w5}R4`qdjB^y;c;ҒR!I p;w>湘 ʂ,(w 2 =&5`n.#q3-z|zOw a`.!ӂ,( ʂr{nŋG_dl0gzqU)⭁Vms(vzq.> ]*3 - ʂ,(ƿ1 ^H51LۤldGE;Mfan W< a0eAYPn-s@foG|T;:@@Cd1 }oGU8%띏.Ugt􈡪0eAYPnbւsnKcs JVL֚[<;F@UIsދSd:!o8[S]- bҞmyNeAYP^\ENMe{M4\NnXX(LOn?(C q!S_bg>AYP;+S8}wU LydkgWHs-y%{ sH?cmA[nK(?sb0gC ܿ6eAYP MٲDe#vj7P6+U.Ng;2\0L#ӂ1~m勰%AYPdxW^+RU}Shͱ0u!{|S]~K]3 ^ Yw-[,?wd!C|_dHĚ5keAyѠNk!sb,:HmD } y[,]=u`A.xډE9 t56A,zdCLx hʹ>/t~W8W-A2UQFN ʔYKe=6v?7ݙttF]IW"cբ"^-:083k_JquCiῆ;E-7?b{}+Nhxdӂɒy8_P frkC/LǧZCvz.Z/o=}o2b/s^\ @}vN!lr<)! 'Gur{@^ )[9zlk1*{O_m8qNƧ\d*1'Vs{^ԑC@bCQ~{"äEφ3!NpC.K]-(} )[$9>2Թky&WpN/ ^''mȰ78"1gy>w!RW_Vp,(wʖk^ zU# NmұbRv(c;>^{oЏl=mr,<7:]pnZwdʂ,(wOlBws3Rel^V 36//~ikE6lȚ[pBSeA;4e Ig]hU>8>(L/WF(P>"BPUnn-eIkְŻ0@ǕdЩO 덅70)5i@ԁdd;}LyQGP~4Z['(rz) u=r eݲX-|~?(շa$b2F 둩szMj^[NTa~j7C_{ <CLv!~l]qZP5"_0b`$lA n?#rG0g E7o5˹~|";(5N͖K"tmܯSPn(FSg輻ޝ G mg17:9cZG6v|HC^;N-v|PƐk0嶂ra0Y4@uvuC3EEOYeAٛ''|t3=7=C|8d!rzєiӨ!+A؀C,` )[Iܷ.k6o%.s¯/2V$ b"g0m8v^F5ذUӡۻcޏJUܢCy_!/FI,( wQXQ1ՖF3\UKlдjS=?st-U]|Njꧣ%P>KB|y ʂ4FM)uQ.Ւ#7`~)E5sjӘsp^B0S!geeiּ Hq}ya>$^xqYԣ -Yr3tWjaZq>SPn;qbz-ʙ-R#R4|UP<Νײo+c>D秅 xASD)(E)h %'+٪Ju'CRU_b9$WсAm29*( ʺ=vXԳ؃/5hz3pdNqo-YFԁ'( ʺܺsV7Cf`>IHR976d9 :Axks5S"SآضCB3bXQl8s0XV[J` * tr8:踾9ywXƘχ,( YtdÂ>>z+; 83ǡ?\gc2}:,ώy*75>Qh^Q(l@ܡM ,(w2OJ5˸(r 6U<< idȹ7l49d[3ԽdR)@'t,( ʝ)_# 5ŀ> ֗`6DsMĜܱV[ krM&3fz_miR (eAy\W.WdY\l9ϡk2R fmF4r4V'(SeAyA܊AN?/4YK#oҨ?Ad dlbYxj-arH5VJ//d3)E,( V2˛yFbwSC}pH%28d椩#ȁ2~> nZZ={cTSCeAYPng(jpȘ- 5=&z~)$K8U;^ޚ5Z ~\P5: ʾ@o7X \+faڃ)<=΄}W2TݩUJе"#֞4/2%!da! woI!*WIiPNUb:R` x^_ȪA^@S׼,Wxy>!kb(lUs-Quɉk:^\.\]3|5N,(ws}qfkngEl8kmیmRd";5XIŏR,( meݩl&^%bE:tceh{2{J `g=3OPu2I)O#7 ݖBC>91ֱC:1ϱWGTjmgۼC)[e]#lE 8$`̿b9å1QdÕLS!I\#ŵ8L'yWhe]c3@lת'b;v<bpTYhÑ+,8tE|_,()2fU^>&#ww.Oן4k_bHt ȂktE234UPݐ'^Kt50{ chFgLWwpeD ykt[5P3ǔ(ׅ.~4 u&xJڱ+Lٲyn*0 ʂdzm &AqdfG)M#]c1I5Y~#`E>o <΂sx_U <>AYP5:"SNuۈ%oL&h.1"sNdkffViϖ)l;SrLBmknmdHM=DAQ5w) NDSmuz6AYP%ӬW `Hfk3lx,S.ƨ?'8!^//x- }{xn/Gi6AYP59Mʺڞ@<βS_.gǡ^@ חGf˶׻L2Vϰ9]E3X,(mL17д4l!<7e~淪tMN-Z|,( ʺF@Eo6E:]Xʔ[h(f B"AR|M(APΔ[WE.Nve5xCLAf͇5LܮlNUYhr(C?C豯Re]S511z3Y\LڰY}1\@oNUlg8D˰ѭњ'm+?φ7C|&p ʺF'T_4TedHVs .z#[DV l=3H92dI@,_uNSNm@̕"6$ΤMH=놵0'(OW!~MP5ʭ4=F99gsx'(]!%U@P qXfk,w-?K&@lUkcה ʒ/Le90{ ǠFCtr"YaxE{dm@ǣ-V% +);<$B~<&#v~YuAI?$Nb(Fb.pdTX FҾEG&"VŇOTe|shzχyc d*) dF2[4dc' ֞?%(<:: ́e7t''Bf=V zV mǘY'(wI&AY۽90YUM;YR+6?Ʊ<dg[ӟ#՜?H"( ʂܹrBDI)=VtaTƌaǤmqeʂ,(w\™vT&+d[>ڡp8cݨm={5 9,( ʂr)ֲ88J6X[xy#7Еɸ_:2Vptvǹ/8U{kZH,(wS^q&ۙ}\NTel<1wizNlǹɢbπ}4,@,( ʂreȹ,vυd;eyK>.M^;6P \H] +M CƠJDPeAc7kNLsF*qI!t>@E>iʂ,(w>d1@ѱ"68Lptܱ,,,k=nت7)B:a T\kfvZ1MOxتU_TL)eAYP^~-a86 ~1 6i $ʴʩz`dd{-;?˾”HPeA4R5}csɮiXy-(p9I=ei;OVy[ʂ,(fɩμܨ%Qhh}((Wn6TzpYd"&CvO,(O8b T_bg9͚vtfZ(|}5Yf>,(h(4Ī돇8gB ܿ6j 7T6~hqٚF`_fY3l72REj/H|uK&ddSE52V$jh^w3^7zj|Ug8AYPKB q.,vM?c}(}D䈜4x){q;uJ>Y2x4D,{"fq.jeAYEM#1[VkxqFb2&>cY·<ǣZu}m=4((F? ʂ62ꃭ-h6^*-=L2V*"$CH$#c3{A3iAYPL6Q$oǥTãs <1o811G6#atmׁ٫w53eAYP^E-(&!%j^ˍR_V59z׻EiZ4,qlf!/gNn>}yӡ }.p,( \߀s\l%ȥ)N%HMεc=gDQU4jAYP1^̀8zXibRʆsCbCޔNQ<'ʂ,(u|Ԏ*]*1E~"R59,;"M[l-iQ{fM-ʔeAYP^t=x>)@YVR7n3\## <-LM1Aӽ#Ҥ_>2g6!MYPEr>U౐!ZmR~֢u 6(ZgO&RM!U}"ĴcXLmd ,Yʂ,({lRGO΍jzp)M#^q΀N3olٙHȄS}fzZʂ,(l/ZF+*P9c*Dr|&;>4"eT[9q/Vb/v,([q5R rd 嶁p޸,.n[mp> it\vw hZ s:k/y>T嶓-r7zsHhL,ƍ wc74f4b #94cAYPkw݂-YiϦn_"Vgay1hܩЖ/L^=>Y@P^#ņr+sŗ/!b2JS^8M9n5E6$ʺa<'by3 J>5rUq+c&塚DM 2By!2iAށ8 !~̚< U[ㆵ9{ 6s~KAM)i$7gdY8pOJrfhLv$NAu7E[wbLSySL*sq>6 '@hWehH(+eAY%q [pʊQ@lh,&ߎm8Vxx,8̭*odEkҔeAyI3B[Ch7ąCN5YvnrjQvlASƖzRǮ̵&tnABEEY~z?sX; 1=w g#79[宁B|(u!i]U+GB'֬Yf|9^!6R𶕏9{WK'ϧ SK;, f?Z2ωKx~!?ԖK&( ^}Wܾ/; j;20 q[l,PmHOY0 qꋑڑMK^)g}ZlR—*˽61) ])&!6l9 \??-[?ʔ ,rdÀ az?W.jk`Asɸ͠>g0@)Aat`pSGeA<߅&|q&2qeI;YS8w`ntOFqj6XG}2fH5Ta&,Ebn.=Ni宂Z7: $tkI- d[Vi@ s 68YKl |m݂gw>|5qlfkʂrM7C_sg҇_졡g'8\ߝ' J[CaYmP+6_d\'dAJ ~ vJJmrY}YGJ-Jpd3)Yl I qt328N-Y&luΩMcɎűO ̳&/tc,( ;vYsRI\Cـ2.m/5H+Rqq6쯃& P;6z0";ZoES$ҋeAyEA/+f(0lEͧWBf"3 kfLaڶ9e5" euF}dmeAYP.|2L7B0AFkɊ+f8BF/ =sxӆlR7OB܏J62jdḞMnB2Sehj]V̈&?QC0٦M`ϱ+e~ ϕ(i9 954cER#GKaH7uo/#,V׺dCt [Ssghʸm86޲\a2=ܳ>֜X3~D(un,(2$d(S=F5co =7[wyx!`#L@v h\/6Ɲ1ѣTeAӡ$Rƀ4z}v DYlxr R tֳŻlk6+b*.Άe]CPLZEf(oizυ\? *[(~y]V4Ƽd9DPeA-lntP8֗CB̯xG<%I8;ױGpu|_eӇ=/CeAYP x! Mog<4<&rRɐ"{X y*\m@n4^K"+TOeA z5؝6f:^;>V/o OLM#O\d`x9$7/eT4p٥!,(w"Z”+RNjdOJ^*M3YLs"io΅ƻSM%Я|?:N[V,( ʂr[@邬2WlՄYfxkIT[1dAHv4eAYPVi*Y`.%E6fy!;b#$`z9^6=XaZ?Qx.#yl>L^/4i_!,(w&8"cg4Ba<ɾѣI~ڇevbgMeAYP)mй8"rqGƊ 0eϏizhnċhI{K= )eAYP3 1۷:r5-l>j zN]ͶWWem.X.^1?>z Lx%_d+g|xd nFeAYPsUu!֖_ )1UU]H:PrrU1̯?3O M}k% ʂ|`tK!,~KBF7 7.C2 eB:Kh@vũ$Lэxj+.x\GV/,( ,$Bd  RN1=u`/&F!SsqrNg =[-M97S{/&oFAYP a{$qb͚5 %{!#4TQ0i}ivsr'cd[+y3A`$@Oǘ0P)&`Rhe&CTowj AYPedHMv!~3!v%q[-u(Қ.M p܏eg0Țs20jNi^0}N`ʖeAYP^pk6#g'^ẙf5e퍂 31̌+ ʂPnus.S2de /,MtĬ[3:2sY+5dQKO"CZV,(7ƪ?YLrŇ[Cn&7o4Me+tQXKYxQ2*{ ~ۅ.i9} I,(d װQ5:k~mBHl("91 bE1)c׻Ss1|SٵcKSeA-2eW$:8105Jz~@=~:8v1 pI`٬+㜌pj%. ޜ?Yu ʂD0kfws ?}GFk5=CF;k2c*#eI;`{^⍋ ʺ,RAɱ[ elT2akCa*!r95!Sߚ.N=? >+3eA2e31+8$#7~n<۟bɤE=`"Ώ%&n5p>X;̳2@XPД\t+r聏1t-n)fmIsFXheql¤+( ʂrV_LB8rЈ72 EJx1Q6^KN,x63>YF}A;5AYP>S;M#3N>{xhi&MKopȦ{:!r4*1x61囬FAYPBS5&]:e~G~U\{ͱ9%r[Kc}u(SeA/,qCt]` DSyqŅUTg52HTh3$2> 4,VJ?eA+Y{HL)TyY\f:tNjd߰޲j(o&( ʂrW@o٤8}-]9<=;04]h:W|lVQ AYPںsi;|yЅVXG4' f 2"K>P,( ]lIv@]J 2`i%m7rM$V: _ kheAYPJcd6{hﻜlD)rSS&cǝ.'0Z,( ]<"_' ,8Heȣ=66Os*_ meLѫLW=)SPVE]*䌸Lβ׎f'PN1D3}4b&D'[J {|&m{̈́eAk5VFE66X[ GPaϱJ0R8AYPy$3{K_aesʪTHRD6gO^Mz;U!( ʂG~)ĥC]?T挶9r6Śη2S@Ї96/o2P{3|gZPB b g%1d xϰ6Uc-Ɠ=^eoUcdD$( ʂrB !_%%UǢ^,3 \Y9mļ1(iݶLYPxO,UK|krͷbCԥ()dmCQYPR soDb@,( ʂrCPzt#!k֬YpMyK9-$#6,JeĴqE3#S'Fo^mKe,^P^5AYP|10$a2lX8P+S0Nw %n}f2\!g NNݮVې&Rpy6AYP,0~ ~ޜ3?:rutTE,eAYPZ?՛S#a '浜kneQ,( -ͧde a.2O&( ʂ܆~wJ KBW.|=!~ tϾʋ˥ZDO}!ދ~^?hO\'0EoU!~eq~חB|ZPֿ>{C{s$_g?qT]!FB|-xx.ğDCg}W)/,pBJw{|q&2x $Cj(/( B# B BPV( P(BP([b!<?S,BP^0x B1AY0`X!^-s#gC91_r<*wC)wdBAC!B }bn>Z?*kM@1mKWGuvcSBw=f{,_)e;LUc/Ď2{s +(?c$zc(1 NX*w!q_WP?LQeE%/ aR_K=񏆹aEAYc)=PZ\+->w }eB B!(+ P( AYP(eBP B!(+ BPV(?͜*XakEIENDB`python-xarray-2026.01.0/doc/_static/thumbnails/monthly-means.png0000664000175000017500000124764715136607163024732 0ustar alastairalastairPNG  IHDRt YQ9tEXtSoftwareMatplotlib version3.3.3, https://matplotlib.org/ȗ pHYs  OIDATx\UCe@B !E'EEÆ """";tP:߳ɺ3N@k~O3]^e_ @ @ @ q@ @@ @ !@ @ q@ @ y @ !@ 0=w~JW %<>͛w &58m3Su֙u>cc{ k$  /u>0sMV:,!|gL8>,!@`6N86ymO0|j>ͦ{f5czy}艹;8-Χ@ <$Ͷ&Ox&ፄ>]Y.Ḅ,D~@n&$LvmVMخKXWh؄X$X[B6$d/gM֦p:f=~Z#6G&\`|_nogV_X?pig7lz`3:L {2cSqٹxm ;eo9 %ix 6~;&dˈۄ97K-λbevngsծ&ηP_\Q&Hcb3 ۺх8)ᎄ7u\a qxq>(-uN,<\t8ʶ)t{-:ᤄ7yv Xg ߵmǬcfz)O3a[81׾c--a'{N{ X@}kba\ϯ9}Farȶy0õy<3j;m6Y&6Wkqu߰WuJŹo?V@hW29ߵtssYw{0!m<:&.'ޗG6?7O\=,bv*g)qpxyֵeYL?;|X{=KҿY|ǫOq^vl|[U@@ X_¼o)4jYkw =,4{ nJ[1zeq6l'9myuJ$mnS;糴#-| E$X=zjdÓXC9q~Xw!η)LNvsFϻS!57ga+[h{!P_۽{8&2B@` t -?iVnٜX{_݊!lG:q-dzx2/.,ۗ@n6 v#gnSjR}ͱVmܾm}@"5ZK8s~sgB/WovڣX)Dt<^|ݎ-a@ qKX6|WObݱĹk3|ΖOy\y,o7$WYoĹUួ%/7 i7{BgwgV˟^`@K`~}%{9p=|Gֈo­;ݺqю8l52B@`sfΐQw sN 0v- [1~y-̪UBܗrbOuVz ]Wqӏ-W{?"qY=mZ3x:k|s];ގ ˏ~- FoiCjXqys®K-XpO5qnNΡYOWx}sUu{; g$]֙IFn632~9_` >x;f7wm;{  @ ;q 凬ܫWﶶKPK t^L$=oF*8gXm\M8[nymsކ8Ɯ~Qцz:-.0dGk<ӡ}[E\oכ6l18V{ދMygXʜ-g|{$g#,cq>ҊfVԌsu<`i2n69zO[ 5|wܶ}k,6&|z[x~wص,!@ ޏά(@ 8@ q@@ !@ q@ @ y @ !@ @ 8@ @ y @ @ @ 8@ @ y<@ @ B@ @ <@ @@ @ B@ @ q@ @t l]SZZ?}I`f4ʧG'綄l`zSp'g sO7Jx%a5O3 BHpsuX{!m?y4᭄ w'!aqfI6w%|]8MxO#W%-UѪfݝSYbkhڰKLَ>6z6av/]#,OpH|ކfn IبXASs} _Lڵħso!?pyqݐ^Fn~msz^ݹt'}.g"3oTǓpq†f<f W+|Ͷ]*qꢅy}kCnEoƭ{Hb:֪x7t)K9.܁@/ί1ۧmqަ]pa֭rm=m^wG .v<<yX'qRp[BP\`Cj턵oۦ8_%.d3>'X۶HٽRӎF[bsq^l qLd9ιql![Vk'5ZsQu<~q =goWzR[]6}s[>Ҟ Tko%λ=w qf^m9S/yvK]\d|S\slZjV9SBU2ф:5_X@Cs-;ۦFOs@ qA4T7< 򄯨9_*'@ a疿utos@ qa0Tj!/[hjucJ>DQ"n6L~@ !@ @ q@ @} -PJ+MU[zjAo*HqK5w-u^|Xrsq^x.dP\Ap@i >,P|(.woR|XraW>̅[iŅ9d.¡-pX\8佒*`c\>wq&Džö& :?Ɲs!y>,m0Y 0>,0ؕ Ud~ 03{p{hH êqFws[^!ëqiݸqWlceqC6ڲ~;ڲ}FMfGҎGF!+Nͱy۰A#FUX McFQFSֱc1\cFGuCұ4"wë9/WͺZrh5aGV -|kðA.tոsNg3zŪ1W?o5| [j젥1.KG/h5rs盧Z~yas] cjlsTC眫1?r g=OM%gZ|Y',3fn`L5w.ү1OsY~}uZb|\'`9Sz 쟟sHAK.j/eds47Z>;LJ#o=ggwhԘƻ)6oj<^p*yI獃?W/U\˿WMGݫISMqju5oݻЁդ&=&;mL>,IVkgV&SMzj3GTĿW&]RM|+o o^o0/< Koz3^| gaՓ||qul~Sj#T|[8lz꟏e]]hc2"MγZ.'=="~/_;qڸo;.wVzZ/g;zᄼ/g7tN]LzO}oe}x/emS}۱d_ja[Uo?z눭:=~;g|z笯i"1:RŃx;>|7[ToeA߹z|蹐yaɅCυƇ z.cćpUxyaɅCq!sxK|Xr!S8Bx>,B=>䚸q }\qKkxCϋO\}p]|h~<7{޹}?| =7fN3>,w;>…psxszLe>Lﭸ<gCDž6Ò ssa_Ba\-WuEWo?xc^~Uo=z{=WWo=rk5mo>Pn7ubEG6Mx^xzW&N||7:5g>>r7yzcc=OTSճ/Z=2_=[|c!W?P]r Ln{'TRs[}S/5/r|Ew?]xʍvu5GTzRo^#_Rm+ʉ&Z89ijPvޡ[=z9KSv3}zݪؠf?QЮU#Rt^]M<7Yv!+'S=wLuVW|?Q]GKV\:겕V.juޠ1չGUg_!O/|u\C=fT2˲nZpxuBëY:e! ka1 kV~~gZ&ߛW=ώճ 2pgP~; ^=wȏ2xf<χe52xVy>''<[}8ؠwwֿ;]_؝=;}5Ͽ߹'^x%G{OSWꟐ7Ct*&Փ_بzf (?}w)'{5˻kt-#̟:4@xxHK6DRM4u oHSN9 Q/1100z}aHad!>TaqzlCBЖ:5S}$1tzu|ן̇ p\G0…^ QƇ]8s!1Bχ ="0ŇNYP\幐v𡸐HH_r=Aυ\p =z.Աpmx/yRtVrz.Pl 70w\H,5_߼}2}ÅHW'ѥ3Qr  IB9,#6߾?՛O=E5Ψ޾PO+/eE3EkA4#tr bktDaqt>M97qOuصV'hw5j\T?m/"{F"؞up~MpJ"8 q!ǣC$y:Y=[w,D1B\:?W]# u;b>myg->2/#opCg\[@#9mz]U,6_< sOuNý/}Gdz~Yg0z!S:8q·qJK?Yzp#u=@7w{E͏Ͼ N~WUm8aգ`|/oFӇM/cDoFbȀ0cC{|10e  U8!qa?=trcxU* miqDZ-o Gy~dl$Sr\N8ćBE y>o\(+Pp빃)% s!qJ.T'dɅ~Ņ}K^r JBA+O3grdD|0s`y yxYDžY˅.TY(>xͿodu HFOeqhra6[7ۼΈmyZΞL;}'H#d^hx9]z8"G糸GzO꺇˂o:w}ޙ5'sOmLޟ?Tv~8 )u 7;j?^Q3>˪u< |tS?ׁG6xؙ?'FN$yx{Y%i}~џ~={oo}.O%X ǣaxg @Gxnվs,<WcO$ApՇ{CtNq{4H%d#^w'9Q"]495}?t.=}S Q;)5_Yoo<}  qx ɾz/kݮa' ryTXsB2"\BB=̂*/2oq=}aِxu'г 'upR6t)B%G ѓЫ~7O [ OD<fvJc- ߕz.ćZ'>\( Ńq!N"$?q!EχBχ %\(.Ѕ 幇=.N-.%CNP^s:)m$bqNX:؜͵pOYH \Caq* G=gC ՇeʠN';':m6\!=co49#a⟽y'30D2B\1:Dž.P|<[|&iυ^{.T'}TC\;ʨ!3_r:HK.Bχ+AqĹ9`?}Bֳ]?|XlRǫIBgqNgfXzO|BzC|(.TG{ʅ;\yچg(#ss9x s^yD*!lK`3cכs tƻi7gQhF ؆zˣ">sw6w>ZU#{f?yy=k9bei'mxup]\BkBTs-+Waܸ=I@mʕvY# Y_z>G#"^uBI!HS#F%xxO:Px.m1G#д!LNH<ބ#%Og+pvǾsLD?Sy5ﯡ;q·v>{Y΢PZ}Q6wMs5t9^s!q7̅\޹rW~Fgs+1:F)F'&SsCm!\(FϛK' a9L(/ FƝro1ƔW2SPO[ΧB潗Lƛ#ϷrebaG3e@>2"ᖐp Coo@\^&{C2J-2dEu5HWE`Kxkzu'Rn)GXRH q\/!E )ƧR5R9nJ-ao[`E tA WW҅sBMď6P]|(.|: *e| z"~WxUMB[2f#.TT~TvS{Թ鹐)=( xE! = u,x{=\(OBz>dЇBrC}Z/BWܭm.TUu -j&.s|rqa'|Xzͅv}R&;{w$89!D U8+<*-8p<xw=܆}rDJ{N#9=6>Vv8ZKV<Cυ ǰN\'qs%q}ԑc ŜsvJ.T|Ʌw>ϞŅ <S }ۮ:BŢ< 5|(.lSʅ*>{XDžLvBa_sB-rsa'|X5ypa"0EG*cf^-BD^B:A<r,b[ aG#t Cǫ FtO6Ѽ/.m#,Ǝh|-~ߒ{xVey~OPWr[Ni y:@Ӊ015@N:4(Yx,gM2і|N#ꦗִaud5cBF#b**Va92# 'lq|tǣsD7"p tkd=@*b77œP>K5Wnފi+כ~DX ~D[t`^zͧ{>3%ܗ0(aVQY4a{qݾNe)Bs1 Ҝ/Pa+|S2>}ȦrKwm%1N}so*\!K4{oQyT`KϖDay{**<(ɨ-:ޓ#Snmy8y(['eJ u!2J) @tƼ/1Jm3FV/xs&7a5\/ ͽ[ɇ%pŋBćM.TV\h\ssCυ7J8{Q.>B~j,+\(TPa%*BuTzO<Jh{^| q\XF ..T籊j UOyx^t!yx{+Ň y >S ^poo H^{\7\ptssk&A˞u"9 s '}Y3~`6S0vB³BU%/6m%ʁkH6KS9Ő0: Mkӌ.b݆i7*oϐ7{cT<;1ί\^_N؀ "R+K\#81aƐ͓ H٧,,ǵj*( L]s)G2Ve\tK+]h7bAg#c t? ^U߱C߮\ϰ_ %2W\\Ȓ ي =z/hU촨K3C?|SÇV$ɅS:%~|"pqrFZ"- lD};> .l$dxAq~Mq$]#+1B9x%f8#\ IGp xx 9G,#Y gu#cξD9 Bl+.a>s;Ǥ}0U>KJ6sM4ztBz#w}pV}~͡KcnnEE*4cp;>$u iGOjk5rN;Wغ )q)y@קbq=}asznNmN  ==Wh;œw h)W7?~6pm-t& ެ )$T-BX e^cJ+Ϫ MB814MV{uE0Xa*..qjvv2{Wn|3[Ք./Sv_N*-=C2.}S3(ewoI+] , "yoˍbJ#ΕbS{{|waPpbI9'2Rž%1De&t"wqTuLbW1Sp.Ki|XDž+P|Xp *<|P|(.T* ߧBχ*&]rjw\ 3 w1㫝B_@M\ȼBs_BuR\#Jm)%}x:+K>T/oqBu4Ç Dq,#:XaB N-.,y|狶BL}˅ygy8ϝ0 8GRKy/3ax*$.xi#58樦`F&L} Mh:\^w<䙳cUG/\SMtF xU<|q< U${"s= $ ^#iV3l3x'~:mnyZDZoD6a>眐uD8 9t18&s?xUWYuҁ ;m3^r%S)sMqr}) 5yL_ !y00ȓBa^>|SF Qr&]^UBF+TZՂ)°Sy q%42[\m44@HF+X}񂓫qB^f; Lyp0~f*WFdk_{SZkv~9LAW!dpNyVkcPQBINQP${|JߝeTubYW"qn PΞ\(Qr.7PB] 0w|,Ti]o $}8RZ4dP~h\_%=r\K }uQ)*Pb %uT4RyJ=Y8ttIQM.4ͺM>,̅tn‡S 5N3.ڒ 3/΃ xzDM iGtҮԪN7xF#t}X !UYuF!G(#g.FA7rřGVx[9`[+QN(;}aSÞ%~U#7ǜ\EV(z*#ԳXm*YS:-u!ZBNI_Ob]Æ!49V&W9™z/:Puvfc4<q[EޘJ#csG6;؟fuy(=ƋB=K4Ozn7zBǿGoϿ !5 Wo8yejhGGX@-s#Sŏ$́ ec@$'ػ2F:B59F P9*XTG{hYF;p|ԇW=xz(漿f8+&WȨ .)0,2>A1ej7Ng{UF2|Wpi.ԏ@R@wq xUJ%FֹwACmaRNmNf]<՜k(gD*0pyf *vi5:<\|Ź)݋rl4_·ÅJЌ5J>zőخkpEɅpjbx./ LK.ޣ( \h υhzN|蹰yL)oaST.l1Y a]*="ŇS C{ށ8_mE[r!4~sC{ap'?t<[QJqxPE|evrSU$˼myő^B؆ q!}ɅÇ#.Ts u:ϔraɇez]ʏF(=r,RB=?׆%-PU3z.4apjq9w\[>Dy+.fQ5& *Jը59f-z9S !#x#xi#6WZG9W;]XW'cqlx={O9qyDduё@'<\~YpZnx+sϡie7EdYdS/ m#x5Zn]cgڙPx9x*Ȧ!πYV~9^~ιrey5Kσ@{s!snԡP+\H4R .=,2H1,]5ٗ*: Z-)PWu1Jq^gLzQ!CӣlWƋrto*+{|X'ߣpQ\tD:9ٓ$>\ޯPP{õ4lw,1 3aRpNN_xZn58\ Uw\òކB\Nʒ }Ay.|7oU(Aos!EB֒ ߚqυpО W !/PJ.˅u,✭:,FNsCυMzɅ .z.$t }5>j\p!{ɇ5f]%3[ǶaQ#Z^dZDk!4lBx' \oʮ*턻#YG;m% CG^s"_a7 ٥y>xs듀f]/ 'q#~c9ǟrM:D-g|N+`:>о/W_,H'%E0}'|U{>ܞ8O"2%;#vKTs2$->gTEdSa䅤 ?qk}5` 0 3.2GǘɧWF6LKǨP%T%ڨΫqg2FAyJEzqe2HU4Ua$;B:aּAHթ'nxwxgtDŽfm|lɻf(UfU(E5,̙&qVg,a-~GBPz.QD\Ddo:)?υ*w C$ 5FxּSuSrxϞZ.Ò a\^:.|P|XzPWrĹ: ރsaY(wVz>υIo򡸐yÒ Ň%5@œ"̍ {ˇ̶XK.!W^9sA']"q4@Qq'tw Fh=+s΍svx:?<sGuJ.=uf9$rwx³WN4$YV,{EX"doW9{yXB.pugQNr&Hg0/O:"vDrҁ&\ڟy别uK\{r O6 4q a* 98F;R!> 3Wh\ܐK{ 硃)ys:w:?I|$~8F)CP,õ(SC 4{"aC,̣n~8RjZDcJbI Uax0d8iLlYely%sgzoseB_}r%# ]y:0F%%7!ꬼzFڄP⼌$ gcqD9 o's6\Ȳ:#.n| \(^rs!BaɅ.J{ϕ<ۗ\=uCUܽ8\+Ećeg..TTP\w <(.Mz>j\(yy?:b-,7K O Ub;x#"xD t҈b7x#)7:R :s3Y;݃Ur愲5GӉ@yxʳ&+B D #:zVp? I \dhB:D3j9Ƴ,C% G5yf6v'1dX_ӏ!ad|ku1 NqULe<<[rZ{)'O{gvKӞxYT$qu_Yxbއr"yQ'B}8u8ʷP1+~x(o{Dۃuz_idg.qnEk>R%h2z;{-<:Fs87` .laEd57$=zwPgυ-jdj%BwBB`7='!j:m/v*g : \ ''DovY1G[?צ Mnt"spu\Cj, 80syר@\q.OѤ9>c`#Fái(31)F Uu2jüEc( |bNKk 9 :r>\Ղ07NK/scaRy|w_+BF7L5욌RHե"ҶL;ֽpNNދ.q-[!JRć523ް,G(cUp+X\X tuXJץK.\(>\HB=χSRqaoqB04ƅ yIxQ\ABO)t=trsTa6]T;;B ܯ^YTX8[x˻)#To봞eD`a*d q7pc)\ō0*%XpվeuBBk2C1͇j Qã8-=Lȏ}a(ڪZGsY cts S_X2HpQ^&&F#c{.DucB:qaiJXB35ߖ`R1'ôq+U٤G Xi=Aֈs)]|蹐y!( ).T?WJ.%"CuB+>\(O4pBu|؊8S@iԅ[qOP;OÒ \L= sLLJ 3jDž=8K.Cg q>-pa1߄}F*m LRTp&s:0d)|{T^^ҸTw:P2}3ֽBH=y}A1KQ+ ,sҙiz=FP覆ly~ ޾GC u([zAq> a U B~\s^r!0/\1~ %V \P!|!~U\(^(P%ru\%P]Ʌ:.\ w]߉:N|+s˨ò|nz.i s aɅ8ϡ4.D=O\>_i6D3 Uq'AL7ʈf@!AOX;ؔH F ʇBʭ!,NN89yX$ތc{O;7vx^0qru{Qā<nĴ E8ω)G"֕qQ!xzt瘯ˢF4C_s:&.[iXR:Zl1"!? ! cN!W9200μw@ 1uJo Q<*&PTu?$Z!< q\%eq!<z.0WB vzP9>ȋ:.V˂ =u\M/sx. K>\(>\0 sq!d|fq.>4.-"יK.!ΧO.D#)FȺex;lC#َM9똯6 |pk* G; F﮸/w#NB DlE!Qz Ѯi  >4wy%Ptp,̓C)_=_y%Hz+Nn$sE;@ ohs.c9+ȳBpKtj,Ň;Q ~9q-ÇÅ Us-P%ʣ\r*×\(a܊[yՙЊ X/z,ʂ%,ò\s!Ò suu.\@}?~r4zO̿xK.q>=r!D9o%:4{K/8˄k(gE#({)\aJa[nCI׉sC&D؅s8qvEF#)NbpxM9v*,cTn&Ljc6 F8&7< WvDP'}e]`'5SP3V[zB~> ItYSqBB f4ZɅ*Wr:*K.[ %%Ń*UB`)K.˅úh"nsK>]|z.\Ʌop기Np.]C&qފ s8xu3YpSQAyЙgzPQ|!u`ֳ#"4] 7^'U]_Ͼ:]>j.t*Ώ^hX5~k8^o}}QRvU*ރ^.4L)]œ40/-}-~ sػx1q0u8C.SqۡT7Z:j}9\kBLJw .T: Cq7vK\aɅÒ 9xP\z 5<[Ʌ , `J.Sz>lQ$.,K.#Z Ňu\Pe{9O.]z.0z.\(>…[wqW읊\%s8 Q^~MTmgsńuK<>9n4"/;uy݁yDZs\?Ÿ|M8"x#1 tylUs Mr9rUTy3zVOCс {yk?9BÙ88S><@h٦%}{D\+@Cy 'ؐvȠ:5y:5>Uo sL ]9φJ\)N NYtD-+y)[9  R#BU1*Uc::Ʀ7H5n/*ۙWNIJ9z싘g~(!R9o{2|c_9^24ex*CZy0Oije*t|KS[y1Fl y'ć |4$..4>…Їg.X<).xpqaoqKB0bӣ8G W?xU]éFDc^Wݽ' iGsx XD4#Tk~8G">59"'N H#[z!8 \:יrsNysL4Z@fYfUBWǂg#Yp?sSҫ`ý!PyNyMA>8D(PBqrɅÒ Ňu\(>^+Ǿ쬬+;,=V|}Jzq^rCuTjX5P\H Cp!| 8/0WBNy+.+8qXq!тhP+o7Z֧cBpڿgi7zsr=IKkm G "KUoí!ٗ &. ۚ"U{G,+ͳcuôSGF#pՆvh*ʆ^5}Җ"ri o9 sIsQ<}  7b2Ftb#]x0B0X0d0x00-CMSDQydɻzVUeAʼB8}ȼe*hudDLʛ/!ôΣ^]$uy߸̹S\bzN{4H҉GHaLϪHqy-fGy(~dv?8QD@5a?ZsO>a}͇  ŇV]񻓷wRM<^s3\sZJX8BBB ޒ k^昗Utz.s%%'݋s_ x譆[z.%"y,!<6q>|=aɅC .xK.+}ͅJZHB0!hB5B5`aѦ$1qvi~Ag,uPD`"Uu\D)1+!.ϹĪ^\ȯ$~*G>b 1q1|2{r[ " 8wolz"]N^' -++\t^:C ĺ,=G*^:5֯C R/Kzq2D5/c{o Rń&&ݬLlK?:ȧy_Zz+>Oẇ3N̅Cq!Bet*\\H񡸐cw^EIj*.DԊ<=Jj?_5]|.XBE4Ň%P2V筼>{ke޹ }R&ڮsq!\q]-17~F5 9ޝvNy+.Vq\(qP; .LaBȃC !G%G#)֦ } Tsg_@w !xWq tr#[:N_V3 [BYV9fD8U9dNj̵bHE"bَgg> 9Wn*+oXʩWp-S"2"ej/񆱦KskHS 7<- g9+C4um2u6@7T8Pi2{Ky0k7L}j^rx5 %Մt+)pNAQQq8/UU/2 m[Dx(;ruF5c(C ^aɅ=’ŅF[ߤĹPU vP';.T>zYUbsrPB*SqJ.:ā~B_…t9>l[u\'s8h[r!9oB ވsrFcծGD Q Wsl#XFX#)":ooO5œ+g, 0 EenVYs /&c(ENf狆c59::T0/k"KS=ܹ}IMx@'c ,َ8lnyۄ?mN[$a͑ .϶BFNy., n!00ʕ!=L100%O^M%te)M=S Y["U9QF2@KMSv*x:}an^\ыqqpGɅ ,q/#V~,tI\zɅ@k85.DÅ^Y.\8&z.'>¼B x.-"7xK.!V,k0D"O:xt3"eBhq#񸳌Pa1sqn,}7|+P1.* c Q"b@]AQQT+bX1$HR`A ;窹Suk{_SNW; sx czncDgy= @MӀqShS2d!_?/Ǹ.@,vm7@j_5ml3@,瀿}-gQƀxp0*Ϲy )\Lx.p_7;"ssߋp d6ȹDͩ}׸c /)^`= ZBYֶyڳx-wll[!sNY6.mpU*m~w'3JdZy!-:\tBù9"S1;E#:d:1 :vSލlyF?q h7.9=>E7=wk=FS딎Ww-hQsP3\͐L-uHc2`%;AUAgnQ~#qdӬҚUܸ٦v[8yziD'ߊpK~F=Z(jeCI-Y=}Zh9¨biuM yuZh@fs@ϵ0:8EuZ7ˣQ F-zb#C"磴èCi4&8_]{?p>08/y H'ܤ5qi8'rN38 ynމs"H =s ~"3Bc9~ξx=y_>¸L l"I8Sq"9v뷁^M)1/ypoM9y1H \絜'k;։anـ .<_@ȽM7r=y>9] Be E ̝ O[ʫf/"݅ܦ4xUq+_Sk>CIk/wvyiߵOGF/i`n88#Q8 p,=::1*/nL%Fsg91"u@S{.۾.:=<бNiSZWǫZKGEguNiVC9vzk 9pAѥ/ 4wHq:S-%'"F2t?Л^;yMk7p>p8SGi᣿I#whVC\-75q1׼E#i1" QcQ -ɵ,N bu0F-sOGOQ--^be-VFQ MmB]hayN]0Z_=7yӻj!n|PZ/1$ozߎOXpRrkʭډ \4 qq@y/5.#Zψ6R~(7ޛq`*E@Z EX|xxRML.cBu85H>9|6cj㼆4wLMcbCG 3e0ܨ9g;N>8ͬo=8; Op^5?WйWpfs8_rEwK D荞[@ kf]kx檽ߔ6煆p/͞icJhv_yN 瓼8;esGJ+!qt+c3Gcz6n<:u>'q::K浛h/Qqn>zQM3c{橝6H^:Hcj{Lk$rCJi8FK릸'GT8yeѝHQrHq01>'eۅx3J.D?fDKއg0k9v+9sSۅ`MPf_4ߨ?FlszF7k{TmKG}z|n7/()s9e@~WLCSF,㐖ȩSte>uc<{p"p@ ř:1eдvXh8c$jWuyf97zݾO|ϘnZm<3{oU{Cjȑq<5+sFqjۭXgHb^2¹6YQ \y~`s܀8O= g?7݆kFDcs`} \<_#|D1>珿qe@!sy88.M7ZשD|^.|BB1ع{0(5 d}`CGʔ!p^mmLՉS{w9to箳nqzynOp>m0: i՜0':q>:2%v̪>Ψ)N٣CFdq!ƔnGGUYAı{>##7:t_}{$`NNi<'>fpN:ϏNuh=M{3S#GX^FHyLk 9<䞺/91?9 kǁsEh$( Eͣz;%m8*诵ǀl SPTzh {{c3)H<}y BHi7ly7oҀ/ DIܦ;?|~~;ƌ;%Y̝Cn'xϗG 8 ;sN97< |W1`8iG礯A/y x;s߰~q^_kzZ8qpN)lSjUjg%U%888: Ne/ gi¹ӹqzȎKP^:vʙ!5B4×~hxq2^[aX1QkR88zlketHSڭ9\]9"S usx]S',R\!Sly@:fs3":?Gj#8,Fvަ /#gMs[J{rid:ZYqVah!AG\]TQ =EbPi1-jaLi=8aFѣF=i9Z=h!nVHä!Zhzz|Օj!a}y5"J(IoLH!u$cT{8(瀢 (YDZ@X75AKD6y)~[f!> s{}nZ@x ֆkkIo`xNF#e3x>Q{њwsL~61ϳ>v9|N.xΟ|@3ǚ9 <%r=Ή`ߜ+꽉X}hwhι4_ZOlJ'zX&>><ǹr1^MiLq9_.R,X/-A=ԍZ4ץr9l˜:bMzj=ƨQ }~XFPøMKb&AºX9qEaBvpF-DݳYz:zizT m0haz9+wB[M3=7ވ*rInn-G٤ # r1h60UtW7.I6ٗ <_^ t=:?6wqqq?~yԮqqtzomG֔Fx2 ɀx@c#ϢY~><YsI,oWz<68 hj1uy3ٟ86YNo4Ǹ Qԁ N؟՚F7͵㋟s>}5fYA/S8@pFq4s#8&81!'F'qppN 98iF;5 fwU:ʹirYzk71N#lF;9*ڐ)G (c=F"cCin>"ь,M9D!1)DWjw225#1Tq\J ~BꡋSh!U~C$3j \ZkZb \G=̵6:5gD+(V簨ci!: ӿepQQ`cO/+Gp:B@G c-z0Fs-@|ƱQ #G-zaB&-$rX6Ik+wB8oF,c=T9Nd17 rLF؄Oǂ[ He@@̣@&Ёk<& ߟ~`>s8j 릶QdK('jn=zLossgx?`c  9s o}bc㜙N e܀v|~s{Mwyh<` 뎿#NUw> ~;c'/~[Z/l|yJg頢XX|HaRioezJQL4gG&c‘_98M)q΍$Gv87ϟ2}PzP_οLXƾёu#ff FzBz5,?5s}m~CHy:5FuNO8318Lsl1[#)g)*&p~[_Qo Xptii?(kt`zOR9ϵОh %9~{y_~ <ЅBu@0G=GTO.Xg¨ +-ci!svB-ގZù'/ja+J]=T .Q cj{nZ;z0)i7vVD\B܎Q tL9F-W?i]{STZ8Z({it14Q`FI&GxqIPxhdFq82 ||m.s:8Q~ݎ.g1JϾBv[ |s8~B9⼎rGDZ?Sy_h=>XqH8֝vdӸ@׽mǓMErEh?!6m~1}ޚjt\}߱uJmsjͩ|p#by3jWbQ͔KQ+M_7iّ7,F4'#wgrJ1nӉ8kԨf{56\xڅ[ _ZQ]3sA\:=a0~7cBߎ;bcF*&ܯF-LB :mtpjArh(K H 0m5.Dc5 dֈs7mgw  tX =a}fa6 9t u0jfa q#nF"qr,3q{|dGq?p @(< ;;Yĭg:sSiDs7 w ~˷s޷qp>֌[gJ{ JCb&Nr8KgW9F"4|78j_F`Oc'GƉ(ytu:y>=.\r6|^~KIcjscy;.G4z]Zgl;1k#E1G!"N5: ܦG15;gz;>:8iK!tlyVŒ3o#ZXjZf:j-\]h>G'MB3\z-0.RNy>KXT ؓja,Q ]K}èduqF-DF-#.fjpF-s-lC= -Wi][i-Sy.?-9D(,x@Q#;ץFr19gat:tm"稴&pٗȬ_r],pv8fSG\rP91;chJ{ .0[p 8~sJ /8oT<~׼ԀfjdnρiA:QrSםoل 97; /q )|'Mg,Pt#z7ڤGQk{տp>&HtJh_f{hnm88'82:;1B#0Ӱ0<8i P/u*.b&)y4kpFc "N#P#mG%@6~sQ/,:A1^SgqKvm=k3BŽ#j㒲<]DuS8/yt0N0F4*u/^Q6K>F·Q iYiDpoEJ,e,I75j!MmW 506TSQl֦Y Zr.n3+}~ۼLy#qzB0j!s0j!aQB<˜>Q F0¦hazO명X BƪQC"P, M-)ɺmDAI{ytܴv9c36M9@~FM3mx l&p~{9Ҍ;s>Lm,;R N=֝+s s9awo 8g%=@/QjҼYc[OKߧ;9npŃtqDρ}ݦ@7+?ꏽvy-d8O)Ng鄦t`}9+8%8/QQ;rSqx Ҵv#8kYx͈xN'}dw8N)Nkq2tИG:=vf9) L2FĈH54%R3j7#޵)@U'TNO:h7' D8%:#%J-vߎ3H~G 5c7_X kҏp}!ZE$;<#G¨jpcv\]˻;6Ms-4\=D\T haè GУ\ 8LclAZS,tF~-qzرnZ(|usM-DCpђ;ד-92t0:=gj!On|ZHz1.<> 9i4O& ؤ[5% O!ˀps@zw`t~>fc@&PIj6`ic3< =s*9Ov;,\9{qf>ΝFU8#X9_pA#'\qi(s;{E ΃ sM(:N-; t=vv̅sR C /6nZ/k|FGIg<[!^ioi=Nh"8&&p8>¹Qppxu:81;¹"yItM)::8[X\bNa13F5qt=v-98)A)Cʘ8"zG'MMM USF87JC !zw>ݹt tx4s3R7u2c|wݨ.Z8BҀ3=(,6Zh:{úK~bejagXƆnUI輻Fѵ\ վZ{wD-xszF-t Zg,3FX9k0¨Q  t*=Zx_*v[}jZ:kp>#TmXnZ'Ѣ ¹{Dp^phb!uyJqH:S]A:ja9+;'Λ.hwHPc8u 릋r:|VVMۏ#,bR;)ͻǑvFcHHuJMޒN"Fѱ4-STgS@z[g3:iy R|2wh%c\y|-M,7nk1IKh8o~얳Gܤ~Z8vڳ(|Q c=.ZYIEK%z3sK|s^BoQF=T E-TǴ؏נi9]ZGQ`Y_bp6{zW-\aj!1?葨M)9A8g#uHsʀD#6H;Fty KI;x8k{SXS;΢pmњm$'@ ;N|I;/c+=炅 豀"k}<'9`〮ٙMFFg8}%,N8O*#t9o?c8Vgƛn*<ukmu硓Ψ5xws^ÿ7U+[8pĜ/-856y^՜77FH0R8B qLqϻy8o6zGئt)+ZޏA2j#I񺩡F6[8;)!pc*#63: |i?FNv*vBYsMԙ:8iWbPʹMLi pJqPDŽV rt#D֫z epOyKNۮX4{K8ҦOa!1C©D&7@q.-ARo*c6Q&R ;fqFɵAiz(iaa*|y(-oi3>*i\,' ja>B-$NaBM6}3kG ʋj12j!J;U Hzd$=̴Kwà<7Z?p>-\x o/<'#Kkt)iwq6,#us 8S:C$;o,RZڡ]-J0jasjpN.:t_m1{siKDomEJsu8nu^|wDzZV6QUƁUF퐒DOTns"vcw<ﷴsQS vѨyL;0O&Ggα.. Pb;3p)@P1uc(sf:=k͹i팍"c$bDSJ㶦JDtvol;*D؈&lDŭg9 DnBy&Hޔv"EobK1qJ/9"1]1A Ο)tA5:k,)%*CÉcIʦS }$gGױ=ExSZ:Z| Z8F-B>h!Wf,Q 15ui63S/QG!Qih#\W3'{R-|ZfjqNs-dC썒OQ cC\ #GSB"jz0Î!'#j!C ιk Hy/Zد&83j!ֳ[8jC OǤ廻wcoItTv"׍\OF#6&pmc3HDES=PE-w.7dLX&L#f'sč~ۙ;f~bkp뼯9KǬ y{1`sk_CC9åX8yOv~o}qu'=={ޗKۥMk2'gtHԩUuOzNl96HgQ_tv0&HNN]Mid q$<4"Cv+B1gGO'p29撙)J8Mmw4ЈYt"BB֖k6R;QrS0ؑ84f \]1ѡ%Jd41C}QD] [8_B#QB(Ipގe/iNǬM0+W1k>g:qnz>Q-~>Bs(>f֐:efq_BR٣Z_֩ DЩ='jP06D0j!PZh\ "ʵШy~0ZꪅZp>L-|䤃Sc8RSonolp1rqԙs<, 2g#B/"źk@-prꯝ AW^|/)-*!7~80m;qPSn7%],]t9Qe@?6qM}^$,cY ;Gq<.>p|>7Sz0>/m}5e7|"f-19 +MwrSڼҞ½4{ΦYCKwiO /{u!܁-Y.ѲOc U`ޏCJG Ձ2z 1mfEq>3ͭ/pz4u}mnd^;?tۜyN,67,_1RDM ⠞zʜ_g3 ; (Mj7#7ر8&(FuHu"x xץVc,X彊<%G" Zp>d=ZBzovߕzƱhXL]p0jrSԹG'RGQ }z2>&S4±qjkaG-aX.VQ =kaèaqqʨcA èB-WzvW-zN GNFt9` ch"Di֜gD 2D9&|2sp:.a]c@}Ǎ4zHS漧]%dE?0 ɝ;gaaA1#ԛ~؟mjͱ.+xE@8ݱlHsmH[`*%{T9.~nMܱڗ6{]~mյ}b(T U0]kJ\[\ZrDg5O# I)0 HDp@uFuV!@lp-\ C{oz zL"gyҹ\ gO0_aFpUj!Z8s([;cԸsqN QGp.,ԝ)D87:w='LܹrҾStjwعtn?4ҵ*ݽnT}'9g# 4sF<0mgy"D9ǝ7n9pIs')i\㽀sY0omcbA[{a`c|Y8iw}^k{ogtZ;pNzj4Q~ᜍhjX[X΍(NΝh.v:{qJufy"}v n87)1j 0gFgQt1z8>ZK!o8L_$cFqFvHee9#?qɚ/vyckmAh|z0Wh!T0P8'k wΥ7\G3M _:0-즇ijhz "ϵP=p¤zF-C-vRbOZ(܇ZNz;-W9]ڿp>$-$JN8:}5)>q$J4f"@74&:Kvv#Ԓ@8֋ojN6 sw^ :9, &q\Dr ?̀q>Qj`X8SO*uk5FsiY8\'!QnY evksvvw~;)!ÿXk_--h8'_-<%Ӑ n8(4!zG97idP(6+rl^!HQ]氚u35i:VHZՉXƍG"A@SI1NitDIk07r3 `8DƚK%9*mbCH4]Omn8]R8G9'o~1CF茖X]5ϿX|܇km7k|zTcAA-cR*]m:n/rD!oihzP=d2jaè`Cyl޹P-tQ2.VƙaZX{-= Zد&8_9]k|xZH,@9cz? r4Js9́PFy X3mf:p rDb=ҤF6MR3\H $>FpK}]L|^bl"`9c?gi@8Qv@LjDi*ǘ:vDsFxDΉd`ـfSR`f*Y椬uu7T|C8w1iZQɗjݠo52.El6 gdž:Pnaʙ!㺀( wKu"bV~ՙ^q>r 5zskqJqHq<$}ӔN TQJlBu1^kq<3Jei1UTЪyno)v70h!ҋZ0j.Tƞ`ȉ>!z-}ja]O 9Z\Q ]T¨yQ C#h} _betQ z؋ʾoA:Zد&8_]· :|d i9QX?Q^hN]5DM ƩB`S@xbkmdTDD1"DWI&mKDmIWOpṈEH5Kn]2jSmQ^=s>+oWn@2 xe9}Dq w+@snwȺn'9 2QqDΉPsl\ډ}p,w?0C@E'Sܽ+!]}m [8Ԩ!]|fKĈhQZuL #[5R.׋jz;5:nܨ#S=NJ:9U Gv|Γ~7\dt֖s)@FԄti |W>g'T8I0i {5w&ا8 r86 0gVY.O&dYuOp~ǵ)rN3>:cm|nX?`Ns߇@;9ߐsYߋv8!O`:牤<(trDy ?6V x=3^ ?M/G|%>Q 3?xVY2Ҋ@:Ss4G"cΧh#L_GʔY:DR*؉GTıAqyf3\vQ5pq.MUqn%׉$:#J3:7 hk)W8r−Ǝ'(,v9&R>pn祻K~b9n{nX,9vw)ᾕJ;uSZtZwS=D:ZZh0Ba<-V3ޙlQ:+&)wNcr>'9e!:KZh9u!Z DC0-wFyZ);pP-DղRz´oD>jу [b^0>t=oH8ƧI:n2psاiX"z yꥶ%i'bD\=G4(Dہ{w:@"8tvRرhĀ; /~]~Z`(1 IsSyὁW5B@(7琅R9|.' d$p~.`Luu8g ¹F~אhLs+8w&:@x5k GiԘ>i +/*)v4Hڝ:zK"t5=TwU.Pnbc99JeC-j%࿒3c Tӊɬr=vzXt {:8%tbqH#Dc[YQEN(A}f46Lc{/pʜKۭ[ipLEt0jxpka'{aBW YJgyuz\YR,Q.Fr?=;4Ti!è,.A \0jd~0uBl 8zp><7@E۽br5 474n<yȵ#?&pDMqYc[_!S[G"%9Ocf p"} lu (C 9'(qY9\p# l4#lHNtL? ["G8Gzt/ʪ?!n#R@dNvf'e3FLtԐㆈ1;~q:Gƃsm"8VsoĽĀqgx.9{SqFzRU=pJKIDPȎGF:Wޛtzv)ܞ9*m*nVi!ީuZ>.\ CQr*Bct"B)&[ o5=ʣ=U ~pL"jQBtèa=bLé}j 6۾mtR5miJ 1eMwfb$<#ZL2 sH$:vOh.0Ij5#K.86B9 edlo2X`QhGxky`MtYΩ70ni2.x8EIНQЂyqC<I uG7o|'pnr AOp>h-+=MC{~0^U Y/zOvEésR݉}Ў)N<{ssJ& 4 `ҳI&I]'t?XҮvF 8BSJ=gA01ZpHt4FgJmx+̆Y9goN}d~֗CJ"Ucm|:rG7rm&CBR1 s^u/ZHE0jaluHz%#;Z^̇l[F-n/vd&=$e-l/쪅ج?y 瓰]nmUy[M:i ; 9u%LN2f@@.-=SLԖH9@N6H8su(r4R] M8gV$ -w5бac2; (}gyoc8Mox'odZBpy}96  Dz3Vmh0RXf֦qG$0Aic9ח=Ґ͎j+n& |ԌhW6}!XF-ֱ|#z"8Qz}U-z'u;tA#+Z=aèԇXM[48Ӌj!6+ mZw|oN(O[ɖNp 4r PM6M$I18si8:kO@Nܙ[JqD=MI^Fp~"#?/5@:B@aaB|,=Zhɏ.s[=T }P:4WT8OZPW; 7FCp|2p_U Y/gN8's]F8ϼR#HЉ4'H9DIYIy9yJ;YPidPgMWo>#6%#KH1+U8lcDsZA8>h8oxgXxޑ'y Rs,`}?+ğwfr Ž3JpH)PFqHΗ)HcpF5'JRSqH6p~AtNm[ѥYڢn+cKiT\BtFBa=!7s-TZ{ h!81*Vd-LC:ӠQZE_8ϛ_U Y/yz~fNIN*yCڞjܘMM/QRitƸ18u;QC9xy kxv#ZOD{4c =]1'〈y\HpND\pw8']3 馰|^J>l?FU.PfUiߪägv^iWvUi ri6MZ8B#Ո95G59u?ȌX].:Y$'Ӵu.qBs)>`>rwP[n5DRy6Cڰ3_m196P-dQZxY÷z}L] Zhv0j!M0.clJi!0^(-è5#%<%]oymD́O?Iv~(8?V5 [?wK0Ed:M΀o礇SM#M)tx')3ԳDĻ;3Ig7p:͛e>+pk:;܈cSahщ1{Φ{ԇƅrn@[KҮ,m9vZkvIGPRi wi#Sx#9'xDqM?|VaL^?*Eбv\sB~,n{ҹ_y8ˣ3nco3A4*p٪HQ298U$:"L콦>㬳9o49'H=R(g#N$]-S IsP@7(!5M-;&,24é磴djc#è>Foo]*-O-c,n i\h$(gR _^U^9lgdtokܒNЙ9Y\0fukI'Fsx]9F e8 ³g Ntb-sF5aw}' ߈/8e"ɤc3wn"D΁r#O6kͅs F@cT.8>9~p/pNigۻcs-m?GJ{ qDȪ ޗl!~<17f2F?9:9icD0Ϻљc8ADSgx H[*-O#O>[oY9tS~orns~~Ualw=!NqSѳ-TA|p"eS-U' h䧼a auZp>HX&{;kWCŝhNs󼎹 l2FqZYस33VQN4n7| Fgx'=h:qQ'yGM?j7g4]s:c6Jǒ~5"NuMۼ*ܞW}>ڭ;1>)<ḩ?| ŢߟRk} ?TڷR6e?*qKiOp~SxiOi#SrNjL¹#qB|/¹(Y`9hP?5u2-w>sp994 m/RM*zVo,`S!59cݹ"8wv/g6yC=&3 $kW-TkQs-즇=P['MaZ8VXzk!۟0J sp*kaja󷽴b^6r7bՋ_-cm@9#hxQ7l~$\%;8'd'"N9 w:ׁu@9'MƂA .ɞ?E 't1:&{9XD_gkzb` l*n֐kur?]ܹ'fQk^"[!sNY?vYi[VRWi-Ot*H$Z%#雟`$jb98R8U`_DLR3AD'Mbv=5)?2~yԘ_dzοα6痟n|!$mގi!0Fջ!B YDҨDѠ~`{uZ蹈ZȔphaz8YZFJN&8]k|h7bzNǃsjүԖiLԒ/IDAL 3Q1qef| e>9ڞCSWx: Zjщe' ǸNc:"0`^絋%׀6 ~N"̐Ƃsv7hG|2ΩۏpN|*y1jno0Gl8_pٯsϮ?ͤ>[8)菓C:8б|#О[frDqpLIuh;u8h:NvmD6n)ݶ^RuJ8ꜗu:=9r瓵 οM2Z8zHsi\@p,=Y`E9.Jñ{\?pgZ:wR0%p|wBl˞49F34,LGf"ĥu庨8Qx_]8i_Vi4|wjQΈ7 lVK۫ɢuXe3n-e 6ɉscD9VyYv_^ |2gv|8s6]Z,ҾYUK;) D#&R}=OKNH'z']ug7 XO oCϣϭ꒻mvpN=8vj'ퟌm*KtA{Ii{TmN\Iu/GL+#/ȵp>j-bDm}Ʊ((p>G4qB5"3Sd\vk5|ti朹K LImG^"蓥DCW0o9Dϣ盬U Y/V -Ox-{dtRI~ktiMk'9u6I pMdc 1 7> )D$us,4`| 7u|[+=lԞ Iㆈ HIi%bN\cs޵ihc%Zjp2p!ZhFM_U YkM-Ak!INz toi!ZD9?6k ]k| Ι{.KjcDzs`l2)ߤ{vUrliԆ\q,>9p^m95om[8Fz;vCë!1"fGF҉t5pbxsޞ-Oc=c%[^ZhrjS]A!L}dVU Yk4 礞()DI[foDхw"~ӵܐf=ݘ~]pn*Tި#ךl|Zί}_g[8o|b͐pF5!.h/p>&q1p>mpc&k8?92zΧF-$>mKGh1瓡+?.YcZzE 3· D5'M3"\ޤsӍ}-فIj̉HwDZrlTsxthGuơMq8qL󇯿<}l=>y pD k!ƒ.4QOl͝Ԋ}Q& CĽ?94CX ΉPI\=0oձ|&ia38U YxN -OhNܴtt+П'`g>9<`k9):~[3pΈ7G8_xcƛg7n‰9ᜅ K87Xt5oἅot+Aܰ6"9FJ{]TH1:4#"9n$F ?]<ڜ|i tlXZtk0^ŅI.z8ZHOgalk;yC+Qz8S]p>͵p؀NMIoB6ZAN:A9X$bN:؇3󳮽'uj/w{t狯:xCGOnhs?8d$0/|`Enxywkἅf)s's;xVkN<:&J N#^v>8bO&me+KG s =T _U g j>n: 'bN-9o>tPN~^j1Hʅzs;5Ԝ4jv\E79ţg։۲f`-p~5W -7pD,9Sc%qD{i7:978柦\߃pbxphv}cPg~iiVg 1kvb GR9;VJѼ qҽe{WL)XAq]?*'4u*] E6oLyAoQ ߰멣0oN0cU·zoьpE [g%0NzKK6|p`nw .W~j8ԈzҮig#_}~؅w"ԟcSuc<ג.6:/twcpyez>7~;R:{ΈyS-o?? )csvQilǞP39MӜpޞTf!sI("mpbϏHpΈ&f(cV霤="w4k^XkswT}Jvi+m-uHèoIñp:yB6èΛhsp&ia#8׺] /7=Zcw}Am4#:s u _)ŝ8שM/4x9M+o3#FMSj;u9n%e:c@y|G}4D627'c,U ;p~-c]kLFZ:ί/o-A9̗%RUs>Y8oNG6xzs1mvtsjZY\~ꂣRhn#͐\&{ϢXxJͽSϫs[8ZZ7zmdit䡇9ZQ =O YdR=IZηXbzT妇3Q w7US#Қn#;Qo@{Տ`N3XjgjOLN4v@@~}X->շpޑ) /xb[{8!_s ;+n?m|yC>p~՝p=w ʗg 7v~y oi1#v:|ōXm}ƱɩpHPDƃoIg_>Eǁ"AIXc0!}NtǓK:#9<u9ITqFR=?O6SFp/Ϭ\'z  tPHS H("D>j 4BPI B %4!;=}ߜw{gL93wܕ޵Z$ySInRѵB1!,`!Öh^R#BT]albg{,ܦ AU`!,!;x,a#6v.V[mt 6:p+#5zEydDYkȹ*U|C!_vC3rN:=5Δ^ʯ[Rk}A)gs?}Sɋ["9iO9!((ϵةs !Dsu=Kbnӓ͌s"9/ӵwIvi3"OY;.(C!2P Ͽ|gvpR!A!,,s9@egCG,lϚ=9IW4vrMc^{Jj4.4֌f,` Ѱ[r*D2Ws%SވÅ1a AzJy R.9G='oN5l3~L}OH>d])/ӍO 5oANUO^F٥̝M#9/8k{ϊsa [5Jr7ߵ-xYҥKҙ}c6j#J4I2!<9Y ) 8$pgIFQ}ؤguK\?{3;т%X)gew,Ypf|_dOvӸ;BtB C}03Fre)iOcgܱi/&n\$n=CtᡰP Y _^ 4 |a9Xz,DExX"9?hXHtkW k;+B!iL.T0{U=;-ba|Y/I`|KMWR~M3Ҙğt_rpēQ3*$HpHB9t\}ΥꉦȒ^o)%Y <(\e$՘/ek )D'~zx}JiLF_OYܑ {z,1B|Цz"QUMUM, .aiv,DJV,sŇtAD^yY<=3%Ŀެg߱X<9pr>)%e%ZJaiK?dl?.qB_Yp1V&ɩ/gG\ )G{1oV*:F=6̑sJ9F"|?Ԝ;j$߃'~6}M* w491|9팅c7XI6|#B0B  |jࡰwagrP w[yaHL w=8_m3qsox9N|ㇽ # v'0sDmGM(5\F F1õFdU#|J~L)((ywR;;yc?_"9/Nߛ:L랜WK[!\-R FNJPV bDBji2JD/L|ϡ; ErW]I)}8C(lYd'wd#.hr>Xde$[  !sTs,$Tn[ }YXXx&i!¶+ v,,o_   ]cG/:j: e@lw)1jɣ׳z'0l9. 0?mH7(Vڞ':#;Ĝ^4ۮ?S>oz_Oߛw^XMI9Y'Ғ>N/f? QJڕגrN)'f=\% #|jsEjѨ!Co=9DY{GGefqD(茠rGoSυ9),6!,7"ba}.\QK!桲!%s$ HΧ]xLC)ӷI&[Y #L69nB 5Wo9%yQUNI;{+|GrMNQhי}ꝜWK#w࢜mǷY{:aG=qCd17R)d ]JN5ZޒQ9ͫTS#٠3zH#!!cȹBĉ>ttqWk{=s!nO 5N@;7ܾCQx9P{=#"M^LQ?=OQ Qm.y7%篾7п_z=HkLBJe2B*$)!WH9'QSRʑ$#,8TZ+%*=%1D!Y;)HJni҈qe*#Hh\+KVi|VD^)?x-7JHc^||y;'_{$+BZM-`_~ҷhB,,E<,Cیc6=Coz,I{,a`a[t[+m^8CQJwɁ1Ծ'FA9׏=.{`Wkā{F 4nxJ5GS*9v zd˭F_E+s>RB0+zFry#,)f̢P 'IW1k?YDJBpRI&rΑJdI@Qn?Ȓ~WGxTslH"&}y%$`_s1WfưyÊ.&Xt^EkY9oX!ʤ-B0PxJ8XE49 Q!ln :B Q;#BnE]"9XXѵ-ZRͿ6'BOI|=smoQMFU.b )5rxZZW8] /Y_^/yCyO6qCb~&S!(޵rxS⽝ EΟ{dYqigEryc,$JJ7 S0N&i*rN>sˬH㐴rL3CصC"$ %5ON@QH^ ϪSU\S> ןAϥWH:9zwˌyê`!$㡰WsfeYO]x,)`"ĜF䝀 =ǒKReCb2O;A ?rVm,8IN)$9e,IU$Ce,rkE怟~'ٿS+o8PIJB_yI@Q҅B="-.ua*Ja!#63B eta<9ؿ[Q,$6rHcnXIbyH¿&9:1[-jqa*͸3 RljSCMIG)7œ'.&~{Gk7kzJM=WO6;;93̌zY.9@dcg4R5i|1|>OG'Q9 ik*GV"$Rq>!bhH5FN#wR2J&A"JsI*lq.HE,:9|ȱWeƽ#9xX,U VbJ99X(fR 32S(3>^Dy1$N&Lfr.I)܇:q)+|3|(g^kKf-Ӓ^"9xخX(9_9GNi;y%MF[Bb.=0ͿN }ĐS\#Xk1f~m|]=oxrQ9ǁݔS>%%,Z0Yᔜbrzm)Qf G8(踶yz9DM>W8F-ba':)3yzYQ"p' ZFrG?50 D$'`RdA1sQez2URIY+O9מ`#ήHY{F2=DQ$ EΉ !6v DCa!d,b!$]OBp1)9/D0OΉcnimtg=l|H:O)̨FB9/\H)l̙,ygd'־Nt91cF;<Yd84:@ 񓾸N$"_oY?GrH \EYHFsQUTR:8Q奜J$~/I:RIJ!עk } ܜk9c (eEι-2JN;bM|Cz˨ ν4ÍXzr~3㒁/?<MHVh<ŜrQK2@|%G+勒bɣ_T[sǸOE 6ɷ.ID\'"pW)݁qƲM,C>P9G`!&a%%BE960Cب{ #9o=9qmb!bY{ `E;,ZZnƫA!wKnGO=: qb9DKn5V93uNΌyfZN6\j9y e#VrR쯬o͜8^Grށkݹj:$Z*U$Hꑈ:$08 *uQZRRT"P9G%Ry*I7|g%>"NOO= ̌K940@ ꄠwTg.R>X,,9X$9FH9WnC҅Rߐ1!m9/Dya(祮lz=zH8*]x Z0bNLzپH-yz +u;+Wdk[P g99Wvktݍħ9}CJ;ky9i^Yq#9y5I[I祣sc#)D$avy$DItns>C$RcZw)r.ňDTʑ7LĝI2w$IWB*-S>~Pfsuk? i,Nc*{+1z2G< y;a9|H, EV[`M{c!%ڜTY;9اRwm\RDp3ca9|Bb)7F,lJFTsQN?987zW; 9_n';1QCćh}”#[Ӓg1'ޚVz[:DA>76gA3JS{0 RNy n9}(Ft-%$_H(F܇jޕB5Trr A$<2ג2wb$$~$]WLn}{pf\xyQ="<.}! R: s u^s{,;Έzt/ĺe W' s P)m6!rO2eff G))!;uR{!/7xg#̱=%*dqfdv:J:9#&+3H=m[a#ry$#>fGr^Q|ogyesG 4חKcH5#jSIh)-\[Sz+u_q$rb6 rw%"FOg2q}G^)_MgRN?4眑k2v\#Ff>8%Hk$mZ4,Z"%JFQ0A"(D%W"ɨJ;u[cH@IH1NJ{[Z$|>,5d<sX5?rsHF1yB2yrzZFV{O$ bృ =R.{F X( Džࡰ䈪,T,dU sFFdfmV]#4᱐gG<\ogXHi$ s9/0¢9r~ؠl4$v9B)iG!9L<9WyFw?VIsiC!JxKkEC)mW2 i(sG݈BM1B;9(Z>' ~oc?!o Aro%+z}r$瑜^ҹx}%szHFQ3rN@x]q$$깔jNW8_Q0&P] iG§q=IJ$OHJgrwT)K䜍 c9ㅞzE#Sr~ݘH#nTzr _߿6-u?Bn my9Ud{[:IU/w?6Ye <Ƹ5 VʣR&ZO9=G{ưA-L]ѫWZJƵ 2brSIEs~$z ^']_ +̙ dǷ& >âQky$,6L,Ee)%]wNhlLᄅr`&-X&nQOW΅|pyl䜥D].(G\7|~K]zт던B?ZrE-ߤ\;oXWdlFr xXBC )  !Mi`! a!Qxx,ޗBA/3ȊIW.n84q#jת,8̽'NUZWҤ$G}$T95{IDRD)!I\IHDgi)8˙X$rEJ(+9J)E:zrbE2RTg^}:Uک^|݆$PXAx'xpǸ6lr<Ŋkvᡰp+-퉅Rr^ 56}O,s^la; >1Fs8όw̘gDcs5rH-n-HO_n䂯n`*;}nbe\uMͦ}ocj~gVj?ukYXbQ,̑؛"9xX&Q` ]B…"`!Bk 凿}`|mBsH**?EJfsB&L1?oXkTx(usCkC2=ʤPɨٳ0>cnPnC! ՟C=B΅`!C;MRr^ "9X<%f]r(=w0, PQ|CQEl 9GQrs9 QWNy;dN:D;vJ9NC 6 ┷x>6 PԵw_P]sp\H.YG,b:%zG~r9Coz!Y$,#-OŵZQKDAViTV(넨qsP!P$HHIPIH<v%cJ9EV6\](Y 'r,7W(AII(THRU纫AWy;jSIW=T{[1I)Z.d\nf G/wJЈ287NI;BNxrNPN802Έ:Oxzrt5\wk;>!N1;cGp ?`{r: q[ri<q㫧q;o4]OsJcT?uG5}sGK.=+H#I[Q+h@¥qwDUe~V7jzWɑ$ċf~*IϑZ?w b0!W"e3@}>DRH* oRegs%8 y9 : F)zkOH@&4Zm'wZ"^ yyb!%߰ wsaiXqNBT_P8Ҿ!ya!xᲪPmAba! < (_A$r.tyvUN/: l8@)GņN9Dt%rμrC%uuKFbF"YRl{UDT!%r.p)$eyD>%T Cd Nq}yVt!ga_y,ta!B̳Pxb!s  $2 &h962wЫmBWo Z|.BӴ :y<BNB* !{Q,$UX^YS7bI7Dk+pJ.fORk)((]ho5]J/9 8D9=Ğ^G< :{C!|6%*}Gӧg@8%i ψpAqiP*zQ!VҞpnC߇2iq*9k(쾼p>?iUY"9qz!  'I GQ9z3!$2,łq6sʮQIFI"(ߔ J05ST!$))UٚPzKJf{vD"$j |BJN2KUZ (~ވz#=i3Y񋁑G<.B= 8mfa!!,DE ))bM EXc25Ttj rZ,&zC,x0kR= fa!KXVrBbՍ"9XXJ@d&NKSlFx#2u, .uz1c\ ԼD_7^ZN<䞠d>vJ晏yGGASF{CˣWCnfIA@n%ltoe4~l@Q9*pɅuu1q9ߍ_R!D=aqɊ86Hie(ēdϰPj,܌ILIF1!"9[Roj-ɩJ9Ǥ)!UYt%> m%*Ko-9'y аTZJ95rI$p}RPB^oJ^,0~6 YXPXH@C,d A۠tx+|O_YJ"k !ڽ(lyYH#8SsJ5+$~Qv \WQ!'=ᆘ*[ouptt~)YrN< #:QKT_09>rN?锰Cqѻ($J!*k3Z<$uYKDV"9s$2KbLb(F{b-AIPA4V󤞓qGKcza2*c攢bqeY$]U$JPEds-vÑУcN%hs1HtbT9'ٿ8G]l>3z[_1URd5~ߔU6RJ!Aι{A1>/dxȉfiey/M+#f?%l@̹^rj?{؛v.5(a6Q/+ꉜ7g{L$瑜l>z!FptpD'j$f(%9$$rRט!OгQ(6mYJ(}rJ92T9s q]E5'NpƟmC"+׷,re9&?4VpJc\~Ş1!XbC;,'w^XQ 'B3Dw&L=TгC`c*V_L9{< Z;o[ 7X,rx~ /pMTiOTksKDMi:祐}{yӋAGA,rȹwN>Ĝyp9^#/rxk32Ϊ<Nͯϊu~H#9ʼndQRtQ-E1ԡ(z'}U0$b.MF;=IPޝXżAݥSB$#RBK.Ue W9ߵ_ as?%{iriIU),< QυtsPDCa!xHFiR,<.ʹH*Ua^.^zHCoP=BB< A͠H̶.;mW 'u kQKLj z)Q)<܏W~<.9!eL+ U_%ODy}%=(ϒkjD()elըN=~Y%V=]1ĩDs}$qUjQ A$,d!X߽ʷˡDŽGGa6:BÁ{<|XqBt\7X |oP8,,?Bz5wخ(+_9o)Rs/$iW= 4N)Ff7r!$ĴN)TἿIKu;4rאU6InVW yVtbJ==s!Wsgy(C Tu:k:?eE=go}YeEry\.f8lrh:(IA'RJUpESIIY'e$$JL#&st KctF9-c2у"9~ tӸIQW;$]@j* ?ij!qC M!|XX諠PPp-62 r||/M !5۾(+^R44F,υK8e픪UFC|皧].AqM{VƪkQ̍qiCq' \ՎșAF鳆CLz^N`G9l0@'ꉜo{wk~T$瑜Ub9e#f&:JOH!!"~|\@C`5X}2r&Rt[$CEN\.I3hWd~kBI9R0xlΩ kLB[1$rv*oSTQߋ.kOis?H JPUP2~/R9ABJ+ , 9*c$RL\o|mbc_p{zo pqUcZ dB)Pb!J'h`~32TDnk,m$H4C=f=!׏x߻M=ϔ  !CQ,$Ny0ba.FA(WJc98C)o 9կrC)G 8*9q~6 16Aȳy9y OP{3(g:Qo|S+YnK=+Q㡚iBĜM nrDAj/7rO-ӹK ~#qeҴ w >`! Փ6rszx(_iÐ{ ֥C!ygymG}cC;JJ|N'Uc,rx$|3or8k( c#9c{[:)CH"r0'$ ň-RȑDtWک'J<}Us%$DK䜟gk<>}2Js" RKrZ]QΕgK1Qo|Ooc_Z1zB6,-Pb!'Gw P$]=:mV QUU$\ 7.E⥜+xȹBLjIz8Q{UO fڬ BTrޱ(__H#6Rz X=',&&j94^M=CRa1PIPnTz,Y#;'BPڤ ~CBRpC0,:URr^ u u(&(馼jKM5GCQklTq l6|Oϣ9^{ؘ`B*ys}K9x=>v8kYf"Y񭝏䤫:V2]298ጝ <9_ƈ4LcH㪝NSIJQHHP;d#w0OXHREUNb=e<$!%$tzMHx5D4 B{^%RT*RZoG.+ooWտy\҄tGM\tđ|ʕǤ+1UܒI-zƐ{&RP%H&5#kTT!P!s]˽k#e͓s܉WY)rol)G9Ռ: \S/kOӘw9Ou$qTBZ(pC Ηk̐R_m9kH0AYWْ*.e qYV_pЇP\X\#TbSx9_q]b!w7_vE%9D=Fϊ;\r70 /J 6M:?fUM>jg}%c\ZAB%\J:e$ԫE*ɟ#?LTOԕpz2.%2D-9͓sb2Nz2@ߛ*y.yÆBL U-/C?]xb*u#>aߔ 9 SR=!C!1=\< jxH"v)kDr1Ec}ٔSҮ߄xCQҽ:^iZlpAGF!>'l*J}U @|Xi۲i<tVO~wiͻiJ.lS 7ߏ$ē (Gߜ2N9$.}Ģn!̞qyB;xXMKa!`6BB߬ EEԅ!~cօG;{ ,FƤ7ƄBapQXh:9#>QG,l5v\##coH:s1;bK{)1ygwB(rF<\E@Շ6Sy ӸvJWV-H#9 rI)1$Yp͞}*D="91$LrD.rlRqtEJARx%~NW}R*R.7Uw@!BnLpT HHolr遧'r_fl"9xXXJ,Px(ᡰP ot !q㡈/qP$%,"WɵA)CPkt9ߛ q 띜юEw֏A/X+9ZEDV|{Gry\e'S0HcH蹔j.EK' DTGWR#%EWOL}/{W3|M%^W2JYoԓsO_}QJ ;׍dp7-]7(f{bb.}T bk S8s |ڢk k !`XH|[G,ls9s!AAC_m!MotV YGE[5伣fWʙw9=sz%{ dW7'HHB&Sb A $!%HBB M.JJ_jĐRߟ$U$eQh>D,܏R"*'sW|]] ;׊6cр/9޽xgfxőGs%2t"4Ü&HIFQf\'6HzMFEλ85q/΃cG;] :%WLڲ291f+AWx0ўGrWĜ˔`RrW||z8e(RH4']&q$DEQ%U SI/1 ّHfG'$J9{%62c׃yÆ !` C,B>'T.GwY tRX(b*}'B)lDyuj71=S*P!竭}Q,$J$ k͘ =y#'ZiEOm8$7)kyc"ỴC7?%7}#sZ9W6PErQ9Re.gW%ϮDo&3}|yGry\!SY9>AORT%$d$h$$p$n$hH^1" ))Q ,}R&JdE3qDJ|/3ܥpx 1K5 dUy߈ ʌ S$ ƨ1FM&&1fPcC&CLTԘDM\g됨QEih覙so^ykUw}MwzԩSNW]g(;Dwl?|9U_q -M.]"FO)P|Wҕ*~|&n81U tCi갬 J՘r SJ=)sVq~ \)e&qnpAw>=K~pJ HmSr#F.=w.:kңО$н|\(> (He ]LPSL]"sτG\H>3-n7s:Ͽ~@G#Е-ar9jt\vs8bs5j @GQǍt9f|Zw, \[8W=} 3t% {_µ ߛK[s@4;Fv]!KNvaIsgv!!=({q0TC.Q;CJה+Y%^5aE eh Oxt駼yVqV;a '/a% ۶6f0r!C2h !۝ \[ąpRIG+\: %=[yP\B΅r¤7|i]\CqCל־=28qy7gaw:" ,q:x_g:;3VDw\ 9B5u4}.(@>x-Ak<\ ~&*-{b±q^}FpAo5?{7 .;-l|ظp˽)nK7~rC4C#Խ.qκF=￑89:t::(}!S^'5#uqqǕN9sqaOks}԰m9^e}/ϱ~q^}FA?ik.(R"R;.6'SP*\ L]u^n\!AbMCQR@ T5| yjG8ES;h[8#8gCBv:^ ׄmB i6" 5V  ?r_s uPNv,7vFCJ5BS!>U/>p!|9Yo;gʅ9/v:_5a-4 ˮ[tD95_~S䤱.GQN <#>#ъE*QP8GjDzn|8gއ\t=N|^/̷q~4sf֗ puol.mn "86EPՠUP`vJkr#4]!ؽћ\(]Cm{ Ik; 칶R4s}0Z^Sj,}_(f3+N;*&mqaȅuP+Ņp>~Q\(=tq\(>cH)t.T? ]ԅI88f (rދr5 P ׌[v~䋪OtovSp͇ WuKQNC8799œw!s YSwNrjZgR5jͷK*}ܻk:8qVݸnՊ{:y=v8X*m_s1ͅQKijt$HxU3Qӣ~R8qrKzYWsq͏=n8vqaF&LB\bBS;|X KFDעOh/\z/Z^_C\ <&1..Fj"Obj/ Hk9#^[;6.ܲo8Ԟ!h ,sFIzR⯹ Jp~~* qDjU:"Wf`lfD9Յ 87syL\`8EI?qn]=/3_KkoncE4 Қm/0j #ZqPz'c:R; $ԕީztOFIJ)Q@.ȕA:+8 ;&Gr%y8eD9pr><;oܻ'}])}&)eд6)QњQZ߿.q.E.M~nr.t>GH\|\Ⱥ>ni=?drͷ>l\eK]$t!p^[8an[[o[ru1@Q%xqY"v4XGDT#!gVK].8B\〫y-9pn:՟5\t@sс lz^9=j6p=5o ޺ͥma ^7 pR0*g GY$RDuvyԘ!5uz`w!ΐ)'Hu䪅'8>ʱs&^] 0'Ĥp$Rۯ X/kn)iLDCהN\}=U7>J~΅p0r!|΅p1h)BCMw>t.cj&.b\\xaCX[..kbY˅y&HB[6.ܲoJMG/))u3zNM7ݶWfq~}j,hGyYГԷ9/q8KsDTnՅ㚫[Lmf|B屯t~}NJ>?,ȹ8Aj?E!9W\{0\\/k^nׄ/Qh,w)N w&\⹞p-l@7iONFsG,B/ X J%qX|˔ˮd-:rٕPo恦+$tu.꺬hjz ρFG|&}Gb껴3r)8w|\- |.)٫B~aB ^Bsgm:ȅߪG\|(.T|NÇ.\>t|\ƅ9s!W&`ԟ#IcGkoZ]uͽG|ƴ Nʒ\Ngw,BTyy@ꇿyi {4™%!ZG4s|I9zp8WΏEޛ~ir4F:oqN->͙ =_(|KB#v[؀ D朓'Agl+c#i;AQyxБxo,`5 TPpWR}c"?:o]W ƣ:J}OH@ Jm*4@,7m5oim\9 s_ $\)o#5.DF.B vC [VsaBs_t.TjC{.P@g5o\n4vo#Xqn^9oyt\9}E %Tp0+[ƽgg`7%<tWS;+UW*>?X}˭x`x/v\= : `sr͛8on`[* r_AꝣB9\6u4W*R yO$G*JJ5tL9APN D u}fX[?3"Seo"ĹF\Ƈ[^.Ӷ 7pJBȅ,;j:.`z6p.Fn΅·΅·΅p^χƅz,s9= qݚk޸pGH#YGh/+QH> P\soiEc?S1qwنE2;ppzyj痖 yq>:}ŕ7\ t?UЗP..pQ9žܹ|}oa~窄 {2.ou+>Fc|x[-smٜu >Ok޸k;. p;?olε}ڹsm6Y;v\8o_smv\۹6.l\εk;ƇM/u;v\k;v wεk;׆-MdsjεkC~sm?lhv\۹6lq⼡󆆆&8ohhhhhhhhhhhhhh⼡󆆆&8oBCCCCCCCCCCCCCC M7444444444444444qyCCCCCCCCCCCCCCC [qѢ'rfe/O. l߯lvؾ[ ϻqa†ƅ 8ohL?Jmw• VHV$| s#lom6.lhhh\ظI镅VxZ~W%,)$/ 5ꄛp=vZE_-dӄ'mϯ%\pC;$w.HtW=| 6m[ i$иqa†ƅ 64q0-!LDXLxhޕA{$l_#'-OMygAM)aNppg~c»W0ϯ{6 U!g>(Hqaƅ 6.lhaZ>(B @WK$էr !X$ ;ޛV]9~F'6RHs $]]=.cp|No'FE$\Iz9= 潄T/r8ai¾6.l\иqa†&֗v(DrG"ʾ/*WPǮO3 *WH"t{As`)H kT!?,sshT3RypCCƅ 6.l\y:=7}Ty wNxfYuj qC@#-y2XD:TI:!P[1Ri 3nOI(BHqaƅ 6.lhajB:2%-秥җrtUIUz}l_Zw?SRjP/kXzn*n1m|Iط[|CF?dk[x]F뾍sIۏ}Nyo蘇=zsnsp,؜t^G?>s1~΁ɏ1z]CUo%}>{`w,ϟ j߻gqHy-_x/w|8vw>e'4.o 0>|1n ~BqS1.k3]}|x<֞&ਣT7i{<9ܷ Evذq&|I#3 wf^o˟jwKOn~yo>mlOmo7ӻ[=Vמc//uwg=x?_/fw?(?>wtknЭۯ\Ϻ5W[7YݚkyݟO/x >ݟ ڟ=Vկ8)/}o;>խݟYw?˒fu^7\|^|s~_]G#8=}|kmþ?m599:9HᚋuIߨtk.{KwJnwйc:>>;AzO9wN[xf[[Pߜ_9慧/85߿OlyOxӻ8=cϜ-{iOuY󿓂=k S`ڈwaй𛋟QC=!v4\ osa.>4.̿soйaχ o+r!o›>0r 2*r!9D.{׉ {΅ȃ5.8V8qy {>\8Џk\ ٷa Ňp!a ȅn".?tv=|ю]s o]}ܒmՍ#ܼ[oX}ڦcVC l[pͣyuLG>nXz+Vθ>ڛFƛ3xρe+Vu_캕 v 7g,M.IxM\qCoʏ_^}k_|m>d\%c~=x-asy]|k>֯߅SFnnlgU}wh7&p_|7 hEu{-{x-\ /9) tCv۟>k$ӲF%(}%G5"5QX%v)&)XIn^}/K9Ksp$0Dd0BC:T) [s{ ZQ? u*P\GY\* 9`O{A(|YV3!6X+KcO_]Lf!3 rNgQ/@0)}w,zbG`'C̃CBqaB8N|8Bx  %@B}B%Ή\y'>Ÿh>0.zBa +c ];j{bP\X:C|pp/Vyy?Bq^>t.="qal?(.Eq!|gk\ t.C…C\g/:{6OhoDq.$q {AdpJ+'((Wq-p/DQ#u17!$v8+o̐fv}g59 :\B3?fLD7.9?9@ߒp1/N<%ݚt1]zCׂo \ĩjsxmq.RH/@lݷ{ɢ}Y1gt?|ڙ9\rH<~'Y Tbw[>H=M]'``"ϕ)%N D@}v] ձܩ&!}iXG%&("U`H`η~P@2|9*sM @A*_܏+WIY!ϟ[R}Uyt '_{SP[>h>wsN_.VI޲<&$- h _AMpqhĹ\sQC~3|(>t.칱|G>p!\X\dpƇ.t>t.8X&$37K|3ƅ&D.,b0rE.̢4BC!B5.d9B_|\xm>.C}nƇ΅d4ˇ΅oq lgB]q!ȅrѝ gC\ dc{|LtH:AW] kwwWVj>I\.7<:+L`.%.+WYβ %EK#r t<8g)܅:=": t炋t.N>^ޤ7\ݶw/*q`p9Ǎ1X\tƆr_h,Л{ y?$g}Yoq]1'$ y<?Agy~XZ*Er`psצv&XNsW#9k@I"9>x@Dq•Hv("EtQ"ۃ^4xrHz\(>paq‡c\(>t.t>t.t>\XpυWcą+eć 9O\BqxϜ\(Ѕ<.\JGy/=Q.yÇ.Esиpprr!Y]sakw:6t E9 (LMhv辟ĶD򒆮mxy_sZ75E 1.R]Kz: s1"YtvFt#>߽"/=].]QPw.gtuw >@Íuݴb텣}> s9s9{{Ͳy%qp9 !N@=sgpIY5E΅|g]; 3>D΅>{/H:PbV%q!q> s!5Yp QpW> 4? J=*<׎s%>*`#7I-;g*8%E \6S9&I%8 "z{l ۏypnylLҼwn}Rvt}TNr:V.7~' HqH1|]s]YO%<6"T oٚRй: $:‡.t'lyΖQ3UMn!o>m4m:)qҝ7 $Ku4Y:i=[*uqnpN@;}؀4B!Bi^{)!H Z98歀$HN(A(@PgKV@1vgY='lzPŸΚ8ׂRA)ͦRP`ȝ%WKgRs=O: NxL⹗[s@ʍȅ#|h\(ݸPB[;|h\‡.t>t.@paχ  w\LP1"߼{(ćBqG]|X(eG(Xj~υ ·˅Ƈ΅x4$7is!!kyt. fTLssw'swܛiRW5q˘~r78.ȵOM/G#U-']唫ɛĸKs3[< s:9$У0t^_]5ݷe\xgW5r sI_}ŕ֣`7?QG=wYEt=O>k_ & '>ħ8?N3?Q]Z57ʍJsKT4N>,1Tyrrr"N:JL-$56HP0ʺ$Z|/IޒQAG8dmJTڢ\(5tuw r<0 TG)xVNJ.;@<rϼ...(*(-%)xAzK0;WӸuMSFHj4-zDq/z4;}M/p 7*fakH#\)|P|8nc&#:z 0~ .'Qf/>\2 N˅~9Vj\I+{H)#.x'k\8{HezvEzB[Mɇ߹0wȞ ‡ 9/,^\]}5>l\s8=N7>_iǮ[.7r>\VTĹח+UݝZ.XS.$-G#`qa.A .q.H.x$|.zt&κjӽiG.DE >߱wn(mJ;$ЁNļs5r~\x̢psc_SϽ'fM0βwLJIhﬗj HlMC(TF̈́$=CZ(e@js`3S]?~'w[wny=ϭMAj$)$$Upw0zj{ rw w\+Uڨ tw>A|m,7ǓFVSxJ JH. РT"}dͶ\/c䝊,udmVvu8N _J8sa s‚a΅eٗVzLqa9:*t::`BE.웲E.$=> \1Bks+r tq=$#gEaCC4 |7u·`E|!8/"h\Kqr@Y}={ 7~ytcU!MŹ;5qN`= 8<65W4Ĺ\f=\ܝn.礸K 2^XwqkE08].\nzHBzZ~5? .ߵKb~Źg )&q[#q}֔|s8_) ޲sNRЋ Uiq`L7+ιMJ'a5!gz"*U6&wDC}S5vߍP:i)Sw&k|1Wrv|n}unwךJ&%nĻyML)Rp*Idt|ܚm_A\(%}^s5ݏ r#~Pt֌(]~-g|47URd">Gvx>vy9 ٖ9?n Ň΅5.w΅;F>TNE{B`a_pg9.Tl"B\|8vȅ`m ]׶{kwѫ YGKH8\ލ6ZExtգ@>Z Q*)枎z$}t 1Źķdsxs_m.s> >+>?e=jw \|7m|k@a bFq{۽|8a-}s I7ĹF| D>hV$( LH5r~lΠAeI)8J& s@E঎%0ͩ)$8C9Tv9(ͩ)$TPPu[u9-w)sHPIσ(j4Qu1ETu"w/M>g]iVq!6HR:4i-:PjsffzV9덀!\A,wwdX[08O$߄3vߜ % svaȇP-+( }=zwBՑ\T1.ȅ$>q!@ECFo eʅ@u$Cw.|\(>;8w.Ŗ)ũ[;׸Ԝ |ҹp}qmwȅ`{m5.x\8hyHi!%&OO%cݹ;\sDsUǮ!];b=0͝v=Pgv5NCB{uOSt(.snKנv\sGEsĹ)[b\)W_;.Ӥϯ8 `48$W{M%pY)ޓt&2&-yI) >C7b[sгSD`4vnWJ`钞S4% ɽ}(2R6K:nNѝIQFA(iK zI-Zq _]ݒF Wr|gاY"w#YY<_w\X2z"_2q኿ ȅ}2ć΅~yŻDG1]~]0.q.Tz´\(>4.tŹpCr.V ח珽qB ѽ"s\9HZIεO_xr\Mbv+Q.^||"9";-qi.+7=|=m&]kZ|Fp.Ω;ɖNKgTH8(DPN}E}^}xʶ+qNKt^MkpEJ{O-_6䪳,AiUzt而[D '8G9!t<$ "R&i]5K(I*Kc_gN&KJ}ٓ9VN!j0qAb=FVLc㵐(xVs&G 8~$5r͖|js (X>ͻKpWޭ$跩4ǑD&b)*AҬUdQBWnwt]N/v8~ڷDl o%>4.4}\h|8/\h .TjtBPǔ9"ćB΁?"5JQraBBs80r!J#z BaBΡƇܟąBuB~!!ra8ԩ0@׸й>t.M n0.|s!1 5.:$q3ӭ |KEʽ Ұͻ J[_f])݅=ވXjr5&MŹ4s5qniķkD{7mSk95|+6=ιgq-q}&A^ "y{OYPӦ[:xV Uƹq\Z;#W@I <ۏ!z`s!-a 339j޸s'ep!#~\HJ;|8/z !Sy ׸ yd8 wqRbVݹP"Dƅ>R\X:g (߇ /7*Nw[8֏*& 'u|5?tVs5,;01oru4'\](g P :rR2 ,hY9:G 'Uߙg5y^ .ւ(TMG=(0 F DּFȑAN5.(6H%:4bHir|Ͷϯ^GiuT7yI}C8D,qJvcs:ޭMŇҋc NŅAGχ΅d..TjZq7t.~ {#riD.ô &F.fok Pbܹ\>P*H i]`i򵋕q>|%Ň }Pk P#0~ Ň %=&r!8>MoRqV_a>6s9B:{l 汁eDr9r%ج&gXaD{s6ǹ 4uXw[ζ4Cs]PcQ)VS{n9V)[j,T.!xBߨTNųvL}N{>M%uq|9PBCKy냍 .>t.RB~CBm Ł U;..2r> \Ef;(zZBprB. '5\(a΅A] EU.t>t.eT݀ 52Y c"A"3kĘmn (WV;\T_R㽼"&0+,\<&cx>NfO!WZ6Koy}8g8.{6Cm1 w xD)ЯzFߟާ@ZDTx[_G:?.,M<{$-΀X݌~@SYR;UR7P 9AlW]^RB5R`3F\R瀀%iNzsЩc 5O*'mH.z^&= Mؽm!饬怍z]~B-(38bHB]< h2FMAi.5Ի]EiQSIGb8G֜~c*޵ MoB>̂z=.TBo r uAL*|neYA Brs!"~\9s!\~с:B(ȅ\)  c:|E >Bs7pQu., qC\\8KD.Mo:.8Ow-AEQB=s9_Y8ݪ8.%}Z O9.pMUu5TsDu+5Sk b}K+]s]bt"矧Ce:ytZ|=Nx8>ha{wz'/ù@EAvUq&η:q.sUtc,5o`lqbf8GqQݤj'hM FF4ƻFA$88?5?639%K8FR"L 6k 9S0C @:{PZ0n.y@ZKLOsP/49ϩ{rxNґ\}cCgچHN@:k?kqwU}=Ǹ/w.d޴ййPi= F.TPC(2rڧL-r!q7\Wy0nx΅ {Śȅ.kA!Wq$gqb9WMyχƅP)><.qr!p.[s"߯{+| -bUPq nJgiٞIt*u[^[j5:Mۛ.kpA$ wQ<6&N͉ rc'e暻0WJ03wγ8G|q~.78/]޽I\=tu/C.~fAY;tݾc\(>t.TŅȅEq‡΅` sǸPǸfx Z AB"Ywst. 'e&¾i |\>iW)CZ|htj gOJ|C8GņYD4(-u&\jwqv sDǗnިSz8V!m VR]{x{j;뱆E roGx-o %_Eh:[ sk _憁+^8{-HB8jM.Bs}P}*u7qst_i T}(ۭ̂䮲NLQ55r#$NAA A z*I4>%(i R#!P\>Փmsq7gVrnW`Ui X|0z$):y*ws/řTܕNzg7*Kw./UZY}ӫAOlć `\q \( 9ͳ0 %DKi[BtDEB|p-p]p~'\\XxN7>ƅr·΅;zVls!ȅC]ĦW'~CBE;"ΟÞr8ņJsw0n]] [H蜻P@>LұY5<,z;#u +H/:ւT]\q u8貘r_{~1Bԯ/2tW6êpGƩ: :Ws s?ǎ$u95{7/i'oŹ՚s{֍Lpe 2k) @vN1@n\#<0N[ybwA,ǑߘIOہF !f,`hjK@`wԧO2Ft΅oQ׏<|x+:bЛa:-YJ}q_BDskBBρ sf;Ƌ;_ o;wghz\|X۞u4wsxȇ ŇB+\ć Ň΅· PNÅsйp&q^&7G.DsBp?Iǹ>-o W UlIh揳.7xuoG˹F { k5j$8 ߷۠{ {fsf2zP sE }s3aOj|3Qǡ:PA # s!ȇ΅5IirBe 9faZ:K]sB¬\"*! cIP%Rȅ]{ Uo\Ⱥ0pйsIP|\88N{MB6qYss'ٽ sA\)ժz{Dw+캕c͔.kݙEZ=]%\v >Gy7.a`Ve68$cڻROts7s՗/^='!~/ww]==VsЧ+DX\yfv{DMo|ٳOh:ɦU"]^k$TGJ$z> 4CDwߡWA cMGw>*s!‡=ӏC|P|QpR"+Op!]ȅ5q!Bhc~>zhb/-&Z.HmP˖>x\M-\b[<.ۛ\jyѫٛއ,¼elܝ!WbVzޏP+uϸ^Kݝl^uwuw~8.wo9Nbk+{qn=9fC*^&Λ86q/ 1XA*9+b䒫rmcNZ)$웶p!~ gw]#1_sްr(%TicH:^9>z^d#tZ`4{ tZS$KxHZU^0'Y|pի= .玾or*,)/e-;}m jSW>3z\wά#G5qي·΅=:y = N@Tc=T'=F.d|XAP>b #'cq$>ą k +Ɓ΅*)90r0p.J90ra(; W p}q~Ү{MBf*-Wo#.5ŻdKG+yZ"Z!z 88q>s08v*~q&7[q~s%ըfMq;/L썏\{GS;8D ŽG蔀TΎQR8 F/+}fQs4Iy{`Wyk= `1gM8v--/nݹ^K5Aʇィ#K7J"fШtb;i%4?q77x)%=;,A HI!olq~.z3أw ćd]FIls:}VOBi#"ȅPc։ |8q=[ȇINÅ:I9<ݳKBϺĥ33FqQ|ƅusa>-y‡΅ˇw{"÷k|9"k:ŁDX9>5rA䱹>v fYw暫/+u[Dvmۤ]~~߻gT^_.Am<5Ou/c y1]reA_`^fuEKT'u}~= yX◷ߣ%%!> +<%q.aY'`eXRsOTWv9L#$͌rM58'%ppr_ۖ~w^qx.Α&@ry B\|=:Ai-X颓R;k7sk_hTlc:VR4KL&H@/bEqU;7JK N@:K0*qG)gV^|pcq}[χ΅ k8· ȅ\"x2R\\Xh|.\QBO\8gǤv'[qaERŇBy(йX 9·=uC\+Öйpq~ʃȅNq\(ф95EyM+u]\/)\5r@MK.suUjr5\B\p|9qDuL _|oYӾQm׆8潸NuLS%{ac<=֬\ڽ \i]${qd5|:Zj)\>@ܦY=܇>_qϩyK^Uy|*_pl~Hw +^^{}•i9_Ǜj$L^5*c^>_yS78t/NmSEH *qާ ̝$@JpIc)ԽXi,ܣ9R-;AFTݽ_t&FHNQ{: 8ZHE,K2fc I鵢8'TjЧiQdTNK"qqݏN|FxlkP|(.aBp|qW!Y|d\am5,.7ȅOBsx0rȅjpZ> $.skBV>A\|'f9:=/zB ss!u ^9\H#u<…{:sa5 * ]Lmh?p+.Ĺr!k!n _|[²gGq>vOw+s5k+<י- s Au 8/Kx7]P Y?GW]W۶ Kք&pX約O؄p5q{-}(T78w<%i)w]؝pԨt)@%1R`5#@%`%U04@g~>XǨ9ߵSÏW>TMLAti:kE/:..9GRJ)uAwjO iR3 J龝r@'ϚMu*=Cuƅy[CnhlssTwjeZ/{q>.^aj8_]4R%(W0Wz^FI#@H:)R5њyZ7(NgGF䙽T52rW糿 H6鮑 p&kSPWO P#؇`x~>8 I&qO)"iԚČ wCTn4<6ήEQI*'nя<[3$߹QݒΨSOhsZʇ.Cȇ=j9|\YB~cCJJŅ}‡c\X"r,9΅w.kNBc=r` 'Cto?Z%>\;/"Ο^4qp\k[ki۴5p <:Km|&17"c_}#RܛI#Qr>p^vYqJ7lFxq~xW \ZAv.k7a|q~vh/Hwy_h,MgW>"2n8v?QUewܥ%IxQ_}M81Qa%,KN( t|'4&J5BjxWQ_X&Ji߹F4R[ZsJ[/;c6qG-ǨOl|!Oa"̧þa̅e9CqJzA }|J{̀ t./>q ̹B%|\9L##.>\8iPi ͘&?pn;LJqqqZ<_,wuB!Nwυ!\<= n8pZ.:Rs~{NBȝ8_h|}o++9Q0Jh.%Z ~Ѯn-p\o﮻PW|{8wzsWXuz[FMzq-\nθQЙ=7|u烆pE@\s]t Ju0]y:AcI\$q tu, <lF'W|(8R>1.Ź5?#HΉr,聤xz Z,8*@ =_8gwhOL0Vim]QywC):QJ3O=IDfq}RSZK})g=wm*gz TF"ߓĹj5#>'4qpj. % 8s;As;Ú8/]=Fc ,c΅=·B0q Ʌzȅ~j\8̠^m)>;?*R&a\5RyO˅87.\_>8ą;&B.]{1|^;s,qc^4.G|Sc[XwqqQ\{#9Źן{RuDnI{ţrk&]Ǯ~nEd8< (29iR⢵y9G!ʞ#4c6:.xy-e},Nࢂ7c{/?1<*~aS{'\5;=#\? Hȟ-swJ ιt8W@}-09& uZW*1߃EmTkEq_uT N FP'UoQMϙ0ɛ#wc7s\ SU_r%T 1G~iIQ$Mq RR:qoկ<5f}ܱ1D/|¼<$|?]>pIY\|\qS.ƅ8\|Xcιssι00\HsRBw #j'7#砏lz9P7QAйP<\p>sB3· ᵞ KP33oiD2Y6r^so''\kl nw.hGa^(VNH]tO>p£ 7Qt>=G?^8 Ts7y9/ fg!^ Gw͓Ϣ\s!2< t~5U4xݼc79?u_y UM;Jn)~ tkG<~כV$/5 8_/?HjbaE%RmUX5JAJd\ z͟X}TaD i*'}xzp@չ+Pc@TzO(uԃ(stƚKNwҵ8'eE:f.59Mtk2q@SI f;~wOJe)/D|X[5גe[Z@ZBxй*ZָP|\%|\|(av~gVW>hg$Ņ*s!|~ _.{Jmj8Џ\o.|=r+ra̠G.(+ 'X..TL?ymE"Ň Ň=M|8rQsZ.̢?s$.Sƅ3AUApG.]k jE5׼r8Gs=䞳?O])=6}@v`iP&oZ 75 ous{wZ:Hixg=yUqQjjvrYqyQ^y9Dij>8 'T?8ұEտJ()j#9Q\Q;@Z0(U/ R*9su#.x?FSRR5V Y+n~g~%zƸTs5SWrv<=s:x,y@5kc#kΕ`|Po`4Z[dAhߗABG1O)}7hv~ţG8\<!G;8ݎk_tXI^;T΅`B 8NqCqdB!B i_q!SY|\5.PݹPnj\(> '\\;=G.t>b_}b6Q.T MWO Ňw.,ctQT\9Wƛ՛g.d\X‡OK3Mq!}ȅ]-q<6o(} \,xݚhE5Wv,rsR>$Y(( Q6=vgwq$tac4PQG]>ڶIXjE ĹĸBߩ==wpl%s8ŸWZ;< hrᖷ0y O}b'>t@[ZZW"5r<k\n@   Jq3~Sy9(tB-K Gw%㒛݋m)x$s vg?y-5,]~`Qd?eĴX^E:FZsj GHN|/+]n:! . @GHT؋ƙr%A]ȚEoG3N_k\F9OvH14.컪{ei_BCBj'.蝆 yL|D.| `]d=raLϟ/:WkJØMFI&|\ιs!)41 .>4.D{,\(ܹp}q}ȅ]w`ǗLÇ ׃'qOT[U!6q6kH-az0˔8Q՚+*F D鴎׼F)JHt"+P+. $0Y!4z^ (ٟ^|s@ʒWHW=׷q(b]&R|c 7MHޝ^U>Z쳐.}ίu)ZK^_|JvSRL.-} L׻Q͐YG"vmNrS ‡.Dl\CCqjCEJukׅ*a~q.PaF.oȅȅȅ.̝ ""ȅ.U]\MBcwC}C±;:ծqX@)uƇ_й0_(|P|8+΅#'q!@7.ܰ\XTqvu #\ enK;VԠ ]r%]{v to,]K؎ǧ j 8\K[RsỨ_Zi(k| kE|k.W.s5[5)"](QCHs9&{hZg95'yHY^Ѕ՝"οD8zAMoj̪sͽ3 =tw?+>4.ć )(\8hK:/"\mqQ Rg~9>|YV8u@HFL#;#@Wc8 xKjRBp.Εe߮Z9 eq]XRѶ4Ԯkyck];$΋`N]\}|1Ή(}_o%_c^35]Լ;z|or[Sye08g}gOG]KLIO`\=k%I]#X)Pi).Hi%s=J5( $bw>/:TWs4  -4Azk HŇ.,<7+\oCޛVg?餯)11jiU8j&1γ&Vh4N1qQAѴ8#DA'Ԩ! s*"9#Կu{jڻVժUz}7zk, s߷BxXc!XPe$Iu5٢=0A0 G:Z Ԝ"C1,{!}O|m^֖z[[\QiYš \f|}]_~r?m,{Jk.)]ȽX3UKq¼ leD>̅F,%j8p2&^BH1@3!ܻ~Qo|On;8c2K%q&N)^g aba;6oxXC|0M,4k ͚ X#wBOAX5x\;'`ᤨP,eD9$fUQab9_HIYs>$1xI4 Rx(&(#f4o^o"Ң|4K1Չ&-UUp̮7^ tKunw޹ܒvG)s)3j'|<)}|݆Fv<޹MWw%Eyg?z 4!0Kf6R]y薱uWgjs"1ShCFy,ooCN IRc{dT\=]S')]qkV,t=f\ $=Y. 3ˁC|x )nIV^ch倨61rL7{{KvkߌxfYB~#w0 B+z>w m#@Xj,[̎ >,ģ&xlk,&aa{BBKk,̓zG, k<̓P<(ù`ʊ1anq^ sef<]<&yxFeZ\ۓDye;Nq'& ?y36cfU&=Ksc.{z nZԌyw6R#8N#Aoyn/ }?Gܻ7v |sKݺ7g7t^)g1[r֞X3 o v +׍lYt6:N2 qy<'L0qRKPOHZb=3KB3{1)9Ht݋="AyN^lHbQ~_Mɨ'} c0!~Ǻѹw BJ{;~YsvW_|ja\<B甃)  xhKa O<2X&M5'f ,:cX(%}X(,|MvyW'NabS, Þ <,jBOaba97<^b! 7V&b!qqκ t3Ȟautmzffri$ZMM }:p3ߙz_vȓf޼Fw=aޓ ~vǩ@{.=}6/1wYecsɚjY>fW[^y'Л1oy1|yY諏_o<7ftkt4AL%9+/QXg{5O7_CjX0[ |,TdBRA  e1]kf,42CBb J@%қ%2C"Y$u6y}fIaR̞N8?g W 3* kK>-q߲']dI6%l3{?`L5y(,L2#c78?hGzi Wǰ1i® a>QYP#LXXXh{O# _BB.`Xo艁 q/qBbZ,$0zb% BpcX`皿ՙZsL VfxLH.%Ԛ$ٓg(6HiI\=}c ^ e.@Hu)3Efu%=Fqcs^ܸ;ïJw*x{Ga竌cXXF-ǰ߅S B]B~~ ͔0 ɉY'w`atL,̓'ڏ}X0+\6s4e's ΢ɶB CW9I &f<0q/0E{baba`Lƿ< ?|pRvulpn47knzzvFoQN!K>ss==_k_x]^g}➀ȓ |~"uD訟sol|^VM\e#92o:N' n瞛A/nkPŽO݌9Nr˛-Yn\R_9WKq}g85sǮ,6;QZRJ"m6vd2gC)5eg|]PHdRێ?Y#Y dZzGԓ 'nT{9GwُY+%9s,ܠtIRyd54훔w*;㫘-HwKf\m 8LNed3'v/&M\{18!cſ W +q> > &a!0k,L?jy]^cx8 hH,D܇h nS knMbFpYu"ByٿX(^X@z&e3e5j8g/|Dba_By%&1XxXlk,,',{yXXsp&q~_O8_V29q[qڗ5G Ms4I^rBj76Kξz{fwUtK=BtlѝopWb(V,+ƈsDwg {ryrfqj>ǻy|y]@/]>qmFxJ7M|s'9?~v\}e:M|L`8_a󣮜jR[)ukb՚hh.m&nT3cSBN\Y(Ahzd&ǥC{ffPrC>!؟!"ͣ22DZɩ'MK>'!B`lYgsf'!M-TijBSZiȀF7 ٤_ܓp,R#^6+uOB e?ĔAYqUX}qqxXYo"IJuy2-%r# ALu'2 jb,[q%8fιcJVFiC^ <ĕrxAfGtAEO˖k+[G9Q?4vM|4JTǼu_D19_vg QνN[˵ً=H<IudRNȚe9gB n%>:K~C%?ѫv=ge+y2ZKP@垖&!@7cj)!ilQViԖݎyλd溶Tl(Bd$!ff dkv?yW6}^KR!}ڣm8;f<8g<jab!d%y)]B)ίeߞ  '*^¬I, eVy|`aad54 -syc竎~GfC!}#ю ,60P,DeZ8ůMBbk8J<ܬqҮ0MQ-go3sC@Ox=nzg{f,o\qn%A9M8a¹ap֙sMꌑJzd4 jdQgZsIsݍOn~?3~/gOy|y`a+pfCBH̦_7=b;k,k,k,|BΜkWc!o X3aާ  \XX YJ0POp0`abD:4zalt10d[ca9? 䁅V'b!qo W س#۬rfkq~ Jy"+T|hg[nx.,aW.ɷ D=s.Exg(}oxC)=ъsL4o uKqn>ktK1=}^&=z6'Σl=O&dx/3Ot')f>񣋎zRo<?eq/hCYjs݈B )f-gtAigrZ$P2Z2Ff,̬¼6>\elupwvtO$K }əӏ=_=cZLa.%/4H=ʕJiT"ÖR{XB3OdlkXjdpn*c圥߲- Nn?e_>/s.%c%E` cY"r;>,#JOqs㊪8wZTVd. -[ [.VX8fKxXQxXhּ `&#ya!O, m D@X *s҇Fqn۵Xiv!\XI[vx8){X0t+D [<=/px؊}"4B]&ҽyXĨPY®}f3lk}_N}BDZ/<~.s{Ɏf<C91zsXxg$\'e}LRnZ8sraVq>aOui5!ͪcUVM|*+*C@s,s+ek]Ir9Ri9́YU51$8:ΆnQ| TJ3IF9w{e~$t3GAj"*d?$B = nj/njunRj@X-)[i߲~iŹ1kR;Nȓ"DkU ee'5 0{;#jq^bo7ǫUd%,eM$fBlJF˨-' 98QEu|yb!BI< 4Y`a+ vY0 B0Ű0G%50D3k,|Жi0zZ*٢BO0PO4iixXXŰ:Y9aba,TlJu/[oXHrA&"DƳaDqn9{a)=Fk8˸熘.ʽ츰z[^̜[Lҁ>MyRAQlyOL+yN s缿tOs9Od gls0zջy1疷3^mu&GfгG^sGoW_O]{iG~X8b47J;7'ouq‹ݭ8Ǖ:fwd?mYJGEs)3fR:)r6H#ĐK2ݺK,aY"G 38%ӱ,>fйudztPm]ι3Lbɨy IH}ef+ҋ!Rіl8/kyXGTAF {+K؉*y2U`2NWfNf MA\bab!8&*By9Z-0{ 5Qc_|ݽXh_z}8b}Z ,L< 9"XfI02ǰ0X(`&^ 1,D$pq> CnpA 9G ڇV+lkVZnIY@Kz{b|)γ'O]s_'f/ s3yBb!anܬO? l ow, 4l !ݳ|^ƼΜG<L{|-н\"/xߍy*qcϿWmUy+r єUؕRV$) 3E1BAlǒvdb@pՅ0l^k8%)ι?Taqg&;&$[!BBl HP-[<ir-ro=<%i4FR!ͳ b9)XunZwGH>בmxi'SdFg&qwߍ #=cX,X0Г,Xȱ 抇`!X'%&8O,k,ϼBKsoj,ŧB: ,XXL,Lq^d{{4 ,h"h7{W)Nx5_JY) g Qgfua'i[^lyBr_%9>Ź%VesҼN:^{:g+{ sӑ}RL̑ ,DIDF%Zx|_ɩ~-w2eu"KY(,F}{G8shYaBÙBF,2z\_׋&9庣 z28 MXQN@,9ZܐZ֮8P|d, ԠܓwsXhn J,giGXe{-[($ȵ\y_b'\ii᎔N9%-qt:8rH8 }A6&*xaa][8f,4y <5…y"y X0 0p1,gB?[MBXe00P,T{PqއXé~sNPXeyY tLBbo |y43ü4N3k,m\ZjF93]@GbfgsGFp)-s9r[rس* B:M#}̔_ls6|[M:K묳b܊mstYC^;/4cc,ŷRY>oƋzAovvھ3JR=[/sOHQitrg&H#ȧc,-M-䑞p dKqA9hLy^ϲxh(GAF!H_$ s ز7|) !5Rg6Od#$R r s (nbId43^NۍY+ғ\uϑlmm%}V` yUlqn퉅|C1Z "BO,dXxXc!  b åbbx(t.:j,l OmX8-v%zyH< y|~5{bLpD,$R#];Yv!6\jo5^r23ay:|pƑ~8}y_&K0>)+^y-%O$Ώ9btqGK~Aoq>BxJ{)f0o֔~sd@!]vMN8B,͌ADAJ"!HsS2jodLQ3!ui:ղNo|PGM"}ǂenYL$\BDw1RQ"[gbhK~* zoi3!dMyl2Mjv3~i8"/o8jȢe x(^ rrxXxX扇5&XX s5Z^c' -gO,$IXXUDmYo#B~1,dLxX s*f<;x8  g*koNBbo4|SZaᾪ\sKpMўBkzqv:˷3cl~-;!h:s2~{979%r9a99>{ |"Վe_ȻY&'2c~k`b^2m9b{?|ǞRV+#Jϳ'<ǞMq"Z]K{_G<ރ8j 3X!ӊm%}H%dR eA3!ft!%3XYsNi3|LCF<_Fo[  ZfFvI'ģNy>O1%$}ǐr2LJ,\59op" *F|{ܩˬ%(r9ǝ;[ʎĹHSdr[G>ahErj{aƇMUެVdM\\>q>#&{ab<\\UgPs7*ab9\,^7j,k,td!YB⚼mb}X؇bj4&&&vcX8`!"]vp,&{amdT뜯}a<_^Z,z-{{SsiF<wcGԮݮļ;P#[,`(Xk{;Zq3Q>~)zD~ɒg5F {t_Gqy߳t`nBrR?,3e͏4@"CmزBH&DB a$1WY 7{TXJLLQ:-Oq8 9>IL#?zlQD:vE$qnv s$ &vH(1wəl1÷}bJneHn3eeM5>ʌy)gg(:8T?S˪/ X{P<$BEc;P,K,B3V$%*u<k ( Պp/>NX؇>jc!b\f+X Ű1iX abYmP,$x؇RIOq>IlL|WdwUڻ/}XOn{:-Ӷؓf  ts\K4c96mL6l{c%3mg6M?,a7|Cq> %SD?brTmf yt2W#iECFA4!843?M &Q*d| K$%[Du9!)s^=d $8~LwUypLfǒ iLtY:篨 ^>UB  BBCK,9 ;Bp*̳Y VcXH$"B'@HOFdu--uV<{{Dx}78w~wƑy^*euiL(O5&no>/h&T➃8_OD2vE q8C%:&;UBJ4is_)oΝ2c$1BjA 5Jwa(AV=3F +-)84Sp-Z%Y_DT7ȨJq?K$1IC3v1Se7{w<찐,yyXX!чab!<01N,Ds{gY{fk,L< &&Xx!d"~gqq>`گ==sfsn[BUfӰ.u{m1m[^z}Oϓg#ʝ[{}Ɋ4[Y>D_@3rj !~q8o6q.7VܳHm_͵(CE?}ȜoQ7JY'1+ Ź䓀^l9 %;D"3Q$c}f$i$QB0͚k0DdxǪA`HsM AD}J8%}YYSȨ$Ք}9WBwڡ_!c=v5ǰpaN?HqNb: @!]W_D>w>'TT^s8[uyW@bdgF?9oT[fͳ+獁[M>8F?'=rq~&>%cF=qo⮃8($;deEF  tSw✥}wI\TERi{0gSBLSaM'C~nZio\,8*-ᄐK/s4 !5õV7*~:32% U:gnWŎ>~dx8\,4S^c!ݟX8-55}}X =̳G-T kl$ JR.TmGR{CR2f mK< #$sEafN;N|–+dBp`7C|F<\U,

    dRk 2!Y[Vb-&(¼"?6f+|7z֓=#GO2FK%fA0K;dZg*ιMBjh,KN!Q> Y:5o~Qq=õBqcB{k,Dsln4v 7 &o= Ao|,T_J疠F3}W(OpMy9BDAjǑA/1~qs7vݲX8vuy2%>ɭ.|͇8Uoi S9e?C>!efY{5sn(9!%sNp}X"H]㶴81O.yQ8pI+0+j,tEs0Pa? d'ce?~W gD,$F8X0'A\Wr8o5fqI]Rb7&F8v o޿S68ow6"QK ΧOisM|G4޿ Ɓ琜ƌ f~Ȧ q<i }|,岤!%;~BJS"M@P[2J@8F7v+\7ghq.&X[sq0 ! g7 m5k wqΈ./ mɚ=ˌxuW{y,J)Wk5%Jv_>uѷk[Z*^z^ND7j9M|㚸V`@~l$S"$םyooCdfs,d4{,}4=2M&Hİ&/%?z+8'?\fdw=s4w|o+EYZa5ya|[&aaBO_uI[N Կm ̷8]#_M9KD7kM|?\1oh+!J])$3!drҒQ2EWf'4#SBP޻'.fYdg AmmUq~졣ѕ덝_|zG4&4V  X`O38 B.w< L:ϊ`=!=-%ުĶk5̈́dLc-ϊœ=i(]{UF)u:✌91eZRI7[k%s$f!ίcsk;9U荝fO'G8_,\I,k,tz_9{nDq.Z/&8 7,'`!^f+9v+}\3{91qnn+)c/[3:ay_(3(MqޞfO(g'cc;N6܋󽻿Jđ/zf3a4p_jM3qdD߬_n:s87A|fq~FUM%Λu&9M<{Vbx؇BM q䉅3Io,B.sCx2cN 6$xljXHL#,xr܊+Ʊɖ=°[ވk-k'.)sشN΁u;qN?6s2Ĕ̑|n|r)DςۜL@HBF)3m0|NRHo<TRMlo+tVeԾabH4UyYFe7ѧ[qNd~'۾hE;/_,zY) kn@7lsS8/ZTOͺg'~[*o^XgQk^!Źq3F-Eg?)KmvqN@H (ef9RDbH+.͔ɦ3. vE5lO< /ܟ9뉝48]+DFl{O>|F 5ǜ oqއDbj,>ab!=a}xɖ1<y#< mhpV±u%sA(,m3wfqn2oލ#?ܱN[q^2(GMWY,zNw tKϾ6n3^KTuL#fu2_S.s K7mE%ęN8yGny&+ԒOng[A8եхbe}>Ԇ"N6 Sz}=۹aBH r9[43-w[e$O|~;w_:Ǐxlx\93񵣡sCcxXxXcbxXc.b󍌅b!b]ae!Xx V qO?g{'̋ ۇ^z(2΅/;N!/twPGAo6&OiV ݷYUqh=w'F{/2DRB` PKO~7ar Ќzq^F[IdHB ©і "zmw}Imʏ[}!VJ:,($\Y38'-m;蠃F[iQ➙q2=RfrYB 9e_f$.=wM$'J !!)9Fb %IF!m"ef_h+!-!=be*ǠB&I6^^\p G?㽱}״==eD$=Hǚx=#E,G\ȘMs2申khxwar_+oHBsp\1#M'fݫ2;m6! KyģykNjKP~KӬ~Gp;R44E"!PZIf i'[C@~gFKHʖ ЇI "JI%7';R8s7VYg&ˬ' ׷ `VbxXcxX b!"] [ 'uu?z)Wo=&53Cg|]Ϙww}g s!mqgǼ^/p:;fv X8h}dQjY#Fsܾ~ŰY-,.I'r^ڲN/!J(I %unca?@<-YwTb3$1,Lē,"^ɶS:|!b~"I+!m{S0|uO5x(wb! 7 &rY<@C/ Bǫ%nf,E?A:kϻyY#BwފmDwc68@p!dE糷v>5?k-/5NEy n%&XxXk,B78]& )ɐ7"vz;[ޮbun$.Ez|6mU V=a 'odte?m|q"Xؓ\Lgg r3.)Y$77'Ȭ[ !5cDp=@J!f3zl1;"m^H*!CH-QBJٺq,hxX8lyX6ltb!-EVYxۉXH|o8?su*K|LF?zg /ex+yW宄q9<9۞fۊVgd?:cԢshGXU+hW퉔M8?{{F_7{78_,]BQ3JO6[d,Խ8CBĐR's|!(F#iBFdu aX:ª8|![fߥ溻S"m3v18?=OJ8̗8hº=є5Sc'/7 &IdB:x@WwFoefo}Xb<]s2`+`,~XH8΀gܢ4k^s8aQy'F`+ɜBl͞`>YR>S1;󞊇_`[{ơ|EtZQ2PM<_xu9 B"J:BF1D^.S a !3K݉$9I@F!R#.v2<JKn3!gK^צTLi,R|\.?ƿyԚ.ɞ5 X 7 X5bab'!ቅ!XȶWíLBbXЮ@D@1gg;)m/fhMGH#W2xǭQnݽ5_9Yٝg_ţ]gopqb"r3J9܌s9Dp; *!2~ Sfy^rg3zӒR8x# Xu4Tړfg|`=c|T#uu(k|ߵ"/>wƑ8pEBZ| =pTK,lǞ 5BB']bE'Yc!}xXHb| 4F"#3B p V7¼#o oU}\_zq!Y_ɵin\q~UW XX/<|ӈkWboD{}׵S.KPz&!l)BL H#rDl : \wޖGB36M!$൓XD8-l!f۹Mb MҪWrߘxXVX  #*BgvÉk,7k,ACsL ?sz&!<`23kDv3z,׻88n5"R n9ǐ!#DN՟r1ߟYrIr$,<#LqVo?oJu/3spZ,:B9XX<\ ۲Ck, L - /IfY}pXH |e,mwdZgTfi{]^D\*Kz y1o:⼔/dӛm'̍rY]N6麺+K;zKq~ŕWmY,ToG_x^4AoEHWS/zkGDl{8?a;;s8"]"b?Du=QŶ5!E3營~L[& +Ic(3f`ɷυyx~ +~0@0cr8۱O\_8WdX(Xun߹xXcz XFx(3_,L'%R(-ddBM$sĖ rTG >O+jf%b+f$}38Oh?i ~k,d X8(x("-]BpBd5x(y 9qAuda7M1`Wx/o&VJDpo[n[Q~ю]Oy q )#D\BJ"S1$O1!fdC6;Y ,},H(2KoA4A% |Vw>/g(r, ],:RS2I$Cf эJHgo9qaW_| |JB; o< B?p!,@xXI] ɮ5ފx8>hrW+'A;]F;ˊ&4ko+J<8-{/VS@ױd zЙ{~qq+ *οkh qi!V-bk`YHX Ƀa@qdJM($$?< -H<d-B 6ϑǶ =،19"Ƿ'9c_8|, 51xh_xc04XxX~NJP^,r!p+{XHAXLy=Ĺ=133ĹS[v8o+扅VubdN9z|@?}ވ~xB߼htw.'?3s7&Y8ḻ7&iٱM|M.oUM|?|X.B'݉!T AT!Zd$j죤T C 1"c$3g!upöX1qΘ53 N^}s9pz1gFE/cи}%Vד^B8Kqo:[Pk7h.ӋIV9oOo<ϟU ~|}s&>%z{@ | ^o '$tõ\mR9jc8:R <ᱽH9BJ1LMFF{|V^ӏ~tI_68O>: |dvz;5R.&*-y?X' >_p/P,jz[F߼>z3{9}+ѧϹ7VǏx=<7G4ڞ>C_YMswC'^(J9!d0nP8Mj s9HǝY+ r uFЇ1&ǥk?W]>7RpuP< &!&s•B9XHUxXcVC]q> b7\yqt޹E3Y잸ߔ!=51SB-5q~qZXV,\/x(}Xh$"C`!@P,d=p`,q> -|qyY'qNƿ+/H/sٝs^g0skK+,\ :{iǪ5Pq}k{8|ڛMk&n9% 'tA*"kA eزExɸbQK4 Yy׷ԑT(!2HS03!}$65<{3-d~CP39g|H(W'r]62Y*?M8a.?`9rQ.]x8i8Mҥ=)Ja%K %1ُkb ~TNN/O|❟zwq]Z8PΊ΁nm,dxXcaX qUOȭxX`X8,p6q~{j"7 gnz=;w3ϋs0ƅu3-?@F7v݌܎.cI28e.zoqξ\+,\8?F'^o $ey!2싀Ȣ e_".1hsIDL~+Ƹ|Q$D $n DASלzwtSߙ{9uqX|ZsA;CV3_0Ӆ8KJT$uQh 6Ga^KzrU*|_}i"(XkܐwkV-s`t|q>>}k%ڡT_<q.D.ԺȅG9iBȭ).zsoYKs{;|k΅MqDs .L>O?磸PkLq> |:ZHiħ#y72Lvg^s(8u.Eyߋ ?E/𺥉\!:`vz/Ğ|lq?1R8_WOT&YXqN8٫n@Ή$qN .>.HwI&'Pp)Tɸ5 )SueYzTAC\Nmue9ڏG:9|6qqI~V=)Χ' {TTi8DzNT9> qmʽ[ ,4YrOPOe= p/WFs'*)7q`ܾOsDF#p{gfHˤNz59H ]^ H,7J)"]l⢓)(U$arTQ/_يsox\ .%C(.dz pR=Q ŏ8_8߉Ʌ|yi8_l|8G viqS6.ԇTYn8ǍA\JwyݛÑz'm5= 3]߹H:izj8_QoO|R8_˵ L[.$}Sn>v9 T)JyT H 8*UH]$)F8mنZtz?]HުԭN9_4ΏK$3-nM}CK!܄p:CqO?F.}"Nwȇsq!b>t.PC\|Ƈ_ъQ\(ar]$8{q^Nu翷[GWt龖}5]{7\7xݵ\.͝];iGvuPs>^xRqک[tw.eO.\8Wм_Rũ:#y$ ]A銛?xgP)[mr$Ʃ;&k Fw8;`Q;FQ"K>pIW&j-[Q yPsN8;ϐ_|ݸg-~ 9Bܙ+F>pa 0F.wVŇ $N|X:…aBG.t>t.::: 營p\(v]ɅCq6DzWGTwAǞsr]wY^UΠ,@L:twKމGK䷷mq13y\r*<:_B0&o98 J!)q/ Fz9o0CrnN}JҁK$>AR8^r85Rr5?B(]UI/~DznT o-s>H>\{|(΃j|(.B2$Lu .#"#::{gѡ]\\p.t>t.u!s8'2 ]O'Nײ95qaF|SkSM|Hyf9r9:㢫]-s/MD#ꌂCwC1Gs" /՜K8/Mq|=F\Ā@Q[>7t<HzT䘓֨Lf#F!Ԅ{J;"[CW}nq/}#\9p p!tD:ERMθ;䂻8~{'Rן\X篿{_㏫8_<:^Ή0_8" U "RqUTbrB=QT+Eo>nR,rtHvQa P-S9!sr1nU2! xnqm˺Ϯ?l9p-rpB\(\|\(. D.D{‡΅·5.0 Zb:|0p~VB!yr့J:7XFsf{džg=).]{q^n2;\vJ|fK#g ̠}ziAGqҭ[^DveL\:}6wWRx G}Y*N|S8_\8g@Vpym_[N#\$wޛł!JS?7U|fj)ÕQ PގCXǞ:QA]}-RZ{\B=uc&pR0T@*w,guY?C>OMq|\ qθss}Exйĉ%Ņ=DBȅZ"RSBйЅzB0r0r!|\O>8qBa}S'֗NUqNwVXGǨq;EkU4u]tW 8|;!v5&rѯb:ӍsU{sֹ]»v!}s9sۧ88ӼTqS_<saAa~_lnsUU=i?Rur4RH:IT0vqX`TPFٹĹלt"ym9隞T(n^9SQ/EX`[NMjWwsFq̷,W&y"yr!|WB!r 5v >қ \t..>\H(.s!|\5>1fȇpp\.Fq~p+GqSE¨s|%ˌ>n0n.¼8WZПwcs{qNrws%pyiFyn15.,&q>_~<%ymQZ'XEm} L>bE'@% j =,` Li$'IT(#0ő)=:\"JAʑb\PMәX)Hn^sR}WΈ:!. 3t󧟽l&xS'].Ņk\s!(#^\8!r:sѝ =kǹP\. CPsB‡p?˅·+#:\(,Mq\* E2* | s\u/exWTNwij&-]M}*{%} &߯/M(wEԻ8ٺQO CM35g~?xГ^s&0UJ BIWZG!*8U)4Q'`1 tt8qN&w$}$"j+Ypt7.:: /纠F.#.q~`+Gq>)Γ .T[%fk78EdD o\q\r:SN4D5h#ͼk֊NTB]Nwf|q=)گ2J8I(KkNuQ "c*1:.q'i<8޳ t2rKCI/4/xU[n9bxxUD{^w\SGN`D@䮑~+yrb,(RE3\!Sf+SPc Lڨ[#4-rP+R@ JJ#7 FYLR[G)_[뙁95zL +%gR:/8_K"}q7/@G)Γ׎1ԸС*AȅG.U\HӸȅz,>\:…~qӹP!CB΅t:8\:Jq/X81B)Γ #ҋ8Vgfy>-w<6wspjtARM4Dq'PnwscR/))EfD7.zq.&kŹՙR٭c{tϹ@0xq8Vߩ?\q~b[-^_f[ܾ-1qk)ݡ.8OhGĵ">r()%m S@+4G]0gu4 TG]#䤼3=t(UMt@q(c bjQZ :8WM0xW .'M>37;{\r-'iY]F)ΓKPZg ;7\Q!ȅF.DG..ƅBBV  $s^|#Ԝš8ױ…d ? ߳fsBõȅ[q> Ʌq sҴm|[cAz3@#q/:of{=.y܅)";/5saz#lM\8OĹwcg99&pk8%Ĺ<| Hnm q3>9*xܲqŹ\a- 6yO JsRsrOF܈RAHD  >(j.mSAL`U#FQwI-clG-GbHYx* %Sz̸ :!"νH8p LisҹisPA){7D;}Rn]s&@eyp|y ȅ82B͊"j;\s ȅ+C·΅s!ۋ(qчBa{8O.\؂8G[t肻;ݨ1f^ׄ<ØnX ud\Qvp7sjy{Τ- >]Ms~w8y8|S7*{ѻ 5ՕmNm{o1qn^|5h?)7TA R5T*IR-*HҭIWRk[*\1W\%EwwW@J|`=֭٘ 1AZ d=pxoKEåTM#1±Vi_sK"18Kq|8&.owc Bq>qȅ{ Xm#"Н #>|Tw0r!|\=B uB@)OB.t>t.~B;\p=| ۧ8O.S/DwbT lf QMv{a/AncsR94+}0i Sv:'C7v-.rwУ8Zsq.<,Y⼅΋u:9}qkԏG捀s)"bԮ`TAsnϤºk$Tߗ[Exf]GŲxGR'1y\O|PPB\J;ƅ\\HYM"w…~aB-5. uaB;לÇ 껏Zq=:d$ ۥ8O.\^.8(}0 QKm0tx9Zg]2NogvV5]كyba8Y8=G}Nuf{#Z&sn5nwG p\x󟄚V?4=lDir!0xxrK+ҥ[smIbԭS7⎐Ή Ƹ!g4c F"q_+84L"5LR2#efVNq|H|s!|Pȅ䂤s!) iȅ>~͹PEs!|1=raBĹswmw.t>ܣ8w.|Z \(|:d$ k w"ZBϾNk4>p5E'R%D˘3o7;@wQ>HigswϹaϫּDZi 1s:G7\x81)nNUq>g\q64v!ܾznYpfC\f/rs8'4aN*W5 Cѯ4D *4)& c1kl5Fw{DbBF)d]L!c]@ʦ\Fp ult'ΒTN΅M%Bי |}*;||Jݜ0qг I;ɇk 8i8΅L\(W wȅ4|\^.;rBo\ȅȅԃB_\ psaBCyVs,.=v>qW+GqpɅ%ҋ@ӱ8r}5Y5}~_^w-}:OMw+>d97b.z+]jث9aX4[@m1K>qArIWGV#58ߺŗ(5nUo݉-~Xi?; ,˝?aMd/]4sR1%;ܚ:5?XVJ˅m&Sېjeԣ+v |TO1KoJ{굤x* pI n=GS#'\q" w]#).G9uߛ@C"|^[kFUc?5̅z.tq>7 =]ڸP5ƦaBzoD.:Rn\H]xBF>\ 9?kȇ4DŽ ]W 5N8ぇBa T'f/q]4M@zvNl{|q 9/oV]'[p)GpnxLu#88$. z܅9w\y|73q_(q=wqZk7}9_bS98n̫Sz;A'^zF۷cFݻB]PZA3y i [Ou#1<1<9D@厺' kƅ.šQص9T2}ȊA28?˚CUɇP90rxE|Po΅ȅBBzBC0f8JTÇp!/.\Hc=>3j>\pqK+GqpɅki`wbwRg }kz85gwҽEJO7QuUw]cxD%+sk78_toos][֦8uW?Nq|.?3uTKs s 3.GIqd0G7݃STwsH8Jo9+ I-!A]5o$Ώ4Ox{Jq|9.\oaB_ 4ȅȅaBjArsw*|W8ֲąJƅ|nP|: Lq\v,^֭҉'8׸4f) `Խ.8.΃ iufp~s`$i*(5y^3kəw~9&I 7W_b?xz̹(Gto}\zcQESB8FڨTkIy.fVg@ԝ!Dj3\;`t^ SɇɅg!>\kޝ ԏ;JӷùP`EBE.Npa]p?M.tלt]:;|X);yH.cɅHwxIk"{lwU/@ #F9y>=FWKt9qNg(gWsrBDL]q~Y_jbדS8emWsTq8 6tNf ̰e7AH_"Ёz{va1Q4%bdP6y ]Voye2S-h2 HsYL.ߵx-r!\ r7s.$ƅI"pa}9ȇ>"ra> %udP|; Ʌ&Э1\'cw&]%RKu1kZMHWw΋|ڎ8a>:]ǮkY)ЅI燿κO<=y\^7] Eڶj{HJ!NWj1\`.Y⵴ON Fib#ΙT} o9o>2/qGx'/ǶؤPwhKIq,TjP|$.\(\|(.# p!/#زXr|8x>L.0qJ:u7M9Ŭ><6Dys?֝nkjsf3*s ưUƬqa>]Is{#*v:!y\^ =(UH+xcn{-+HBzZJ8wf'}W:frKySF* @]׭ q~/iOVqܛj->d.y-KqˢQDaˇ v.dp!|XBtBG.t>%7ŅaBοƅr՞.#N JFrn5)ʇɅ(mA4ּw[t j s[k>kQ/8_pA>#||'\rW\s5Dq~ȫ?T8Oq] L#.1"E)U#뾠؋͕<>qCgv6}vϤms:/g7?Ys.RC9Bn/}Is~c_ {]p<>ũ)sYl>\ם ‡΅aL{ /YF΅]\_h98Н|‡ƅq^I;vH.$2&N(Z8Fm{]urRK*tӥ\烚^u΃8_(Y^w^Isvs/"T~谼s痷E_5iyC*v<.y\pnTѹXAA.JyU:3ݲ@o:Gz=%u\.>9eSN5; DuJq/|i~Mt߭RcQyJ0,6ָsB#9#r!zB\ȅ5>j.N:\*Gs!y qk"+\8I|(qBa]WO;&N8o*$N. S^wj'K8rUfi6O&lcלG2}ͽZ>xt>g@8}? \]sWWݏyJ,h)nqK@J# D)STj_9Hq<0} Tzm%)c pP;u.{Ȼ#IlRw/=it֛NkoǷz;6G֞˚_"@BD(65 ߑ #F^BxѲƅ!\ܹt|xƇ.T й]ćzH.6eIjWSKv#L;QyS\cbfDyA8k3f9S}|Gq.q'Us_*v8)S粐+A+Q@dj Lcr ;]r VS.֢LǠԅz#wj)y0Z;RGɹs__t"\s'0UWv곟4q~s_֜s!-6aҥ.5‡ ȅ1.Z:"#yCsa =w^PܧJmO93\(.tqpPn9|$ X|0prtKwR Kr)"UkV:}ݹSGr]u N[㻾FʅAzOawPtvTk?vicZ q.;+g^*ГU[n9gLxh ~۲~7sHqz#λ:@GWu_ÍkCaR8; |w&E7= uj Y*q:*X1h Ey9NΧsHk@zĤoVa~-se5,=a}:|8ιs/"չP \E.D׸P\H|B讯JG.t>d1eݹ2׀ :"Ň#η#Ptars::B8ȥsEzgu&*QtnwϫF6#\A!{8GX1h} (ƽ)λa+Y1N͹0I\8n窸-Bla '>Rs)ΕnX\ Z pEhzH!AA[琨 zHNw[.ۮ@c=f V i^=j9t9* oϋOkʾ!wC_E>H>ŸPb uƅ\\D.#9ps_M;ƒ/>\dCzCBop1 qyra.r!@l5ZsKS8m8s_ql.w]>}n⼯=WZ=<65s+lg w\Kgzy=טSg1i|ej{g88Ea*iFu {8erܢ6(UpŸ m\_ rԡwX?iW`k=0)QV4$Xu!A)cI5ݹ0Rgゼ`Pq_<_/{éBq0r,. |(pq!|0fziP#^{.Lqa [ T N8AɅέb#08&<ͩ-ca9q~}hG8eq Ԑs1>syUcQk`[L$SMwgK<b:׹jzoԍZ~7Ii. @+o &9͑obɇ߭K;:F.|X:y I&l…",p!Q %CBƇ8?~#PHq\88*"AWRo Nr(q>k0Ԛ5ixF jKz~k_0Ӽo~ L\s;5`D>S6{6z\c%\1Oq8?;l/۱upq7Kq+?7BrGZD Hwxw^ܤf&{ Z5Ot(vK]򠵆-vz7N *)ȴ[fwJT@*ȁsaXA_<'z*w8O>.Dew-~\ȅ* 0\Y<\5>\S?sD9|\|pP|{9 ;J8׬tJݷTvΓugNZc͞Tw9q<]6iqE>)sYR qNEwmIã.H# 5}m:S#gYBF}뽆FFQ3JnRSY:cp)Tb}Y{<͡o<{])Γ L3B]&p!|XBs8zwBCڅָƅt[ yQ\8'v3 N:"Gqp8O.\t5/ntJ[ƩQkO=Ԝ:a_f{w1ZlCvufjnMcS#Oqo}z[yRSY:<9P4'r,{Oé[΅4MŅgqaŠHw>\8 pr?Gw| #:::VpPH.Þ)Γ BtnUyx0ά8V:: cF5X/Mj9|0-1Fo.-KC_iv~4]fק}}|_.Ծn[|޶;K3>a[|oM֜?˕AS{_A f曻뢠 <4N:KH5)1Xuw)FSw6ǫF=uiJS^3]g;n Nq|8U\ع-F..ts.d} ҝ =ma~sÕ礰qH//)챮rO\AԏW!%ΧI,< U$ϻtmr:~FH>JL7&I8GAu/.ic .2FwO?m.jsЭ):K+Q9H HLףeG9OT~̚.3Mz i\|8.΅18r#Grp3=p 9BFyQʨ3,9؝,-.ҋPKCYzpY#8' tda+9s.H) ,}⇪ޏNqĹ:uoҥbҐI-_S9=aRV@ջ{'讇ql@.՜]֣o*i~|_|x3,]ݥSkFl7\q|8\5#}^ޏ+. ImZ-h.. N{Uqa5]}Tں_tBۨraiî|#Grp6Lq\8ET2| Kô8a6+܅eӽt\Iz8{:{c;\Ei3c|<8G_fB_];uWJqy. HtX.s{ץodSػt*wLj{:{FwfTj|y+{84G~N]}{P"݅g :5'=kQKxs?IR>8oiwVٽ*.MŅA{{ҹaNq!|\|jo̓B.Rj_^yH.6k V>%\ɖK"WL DgtisutԠ;e`ۼ2DZh6^ppW&AolvUlzSR8eUzFj} -3δU 휣.]`T]t^4cszmtGjĻ(MP!ݒSn&6Z _;W u98/k^vn})Γ% m.:qp"ʅsÇ΅|0g%[2A"Ωw. ·-&Uo#PtɅS&%ĩ3/|ɼADz^w5GzKMv\i\f#}w{߭˼* ?5a<֠{mUiߓS8eQR5k19FJT`S0)Hs~5W3N%F=y6JT`(t.D>\w\(ln)Γ ns{N}zᦙ+5ؗ<@g,wug|u@.e>yP1΅y(q9Q|cïm}뚻<*ω)S( JT9H},U{ \~R&Ȟ% HUش)i1(Gmt\kR7)h6l)cu LuF+: T{ԛR'NKp!uU.Z@0;20ra.>B΅\[ù>t.(>8nBamR'N8W7OsY^ ]3@\ ..ﱡJxOa<4,G΅ 屯j:5U^ǥ8Oqœ ԧi⪔FHB k~xͻ_xw9KzP .ҋԧSzJe0պ$SBN:6@D8J>MS4H!_8tq~q/mNzVqGU5BFB&Z̗ !otם/ZƋs u.S.ƒ>t.-'.;qa#P(8c&F7~'1ytFG|Ƙ .r=p ȅ{.vDq!;!   =ȹ+˦%η#P.8O.:q oV<1ϵW+nKLg0樻Nc9w{zrzbbZ;8痗&]o4ufR8eQQ\2V):祆QJTzR9XqΌ[i.=R5t}d踻XW0K8n^˺(A( DE]tv]geG5yǪxC"yqN|8 KxBsй0=hbŲ!xѹOù@|^BиֿT8Ņ[8O.2qu#K@KF/p.auݞ:bmSt׺r4Ԉ 'M쓱ph> qxQϨ=0y\FL)=QWKӼ).哚_rpz(Sc={;B  '@-9ޒ9Em0z뗟axasi☓ߐQX5ޱ%]_#YŹ7(qnxns#_Pw,J@JA Tu•#S:utuǛNLpv>LێHZaHz&pMI0ڋy}cOt.΅p FAY\@wܸpwL.rf p%=_v)NeW ;Fs~mx>[B?5_qF:NzuصL{iG_5%iwӚ=UlaZi׏-6&xA7Θ 篺` 4FDHJW%] =k.pSlV݃_ 4 osi¦KT *|ͭO=iyċy?XIG٢ogs8,y. ָw.gqa;a\|ȅ\1 #H4zsCW8CiC;2 OO&N@G+pxvyU&ziֵqQx5@<&s +60vĹ C8jίp?)z qŹ\xmw6&5 Tsy3x{ۻ>kHgYqguL 'pRlG$u@n@t>Rk 7p@=~g{?jwx˴ڿk[\&rmMY685y. ȅޜҹPUSܣvۛEt8yxBs!7|\(t Je oMwr*>4[l6<}C)eu[w{.Q ].]{zET#ZvcW]tأ-Qœm:9<6sa>swxbzomv&kc|RgnFݹuqή@D|q-zL@ Nmَmyά`TA[v]>.(f)[+繬3._߹P)JGY\(.>\E.|3G:8{iztuC.P‡ C|pZpl>EUq~=xG4ԩ @[b[EVR{a^x,"9֞tr]<8wpq//rtGG^WX7R;z-w M}TEnup ʥI,n@`K6_}&T=9#9d F:K4ƹiw\(oD.΅rŇ WBCߖr.g}%w>$ݛ].qa>4.u|]vvfru}~  7 r%qԋcm'W8r]4xDzA^uJ:Nw˻v>.=?j.sF&{?lk~}wfL˴$\`4Bp\7MafcF)0U@zۅ3GGaxwE8:6Ot3c0*'ER̵KR7 7.k|]y{( M;.͆pS.ԸFq ˈY\ո>zFs pa‡ {> $*|8 k|]  {GlKԞ6sJ3wt$~yW=  sOmwǼ(N}& 7&uo Z8~msip. Q * 8;wHVAus.q}!RMNF\&k=n @lz rk)0Wtk)C=+O47|\vdIck=wf餩+'\(fCaBp·.=_ɅK #Ω˭yG#b4m4ԜGg_[]~yNk 4Hm'Uʯ8l΅{Xy$\j?w5xrh;TPͤqRI%)UYu4?:~ twv;`Fa] Ɂl*oy0&M-9QWa96[푮yDž_x vNÇ΅ԝG.r΅pn #Zz2r!}6|ddC9op /t͓ 7tnB]uDe_1tN5MؼyMF9] _a]X[úGaM)u;>f ΅{隯+q;.]L5i :\co?6G!~ù3A\%G(AlwGq@- T]{Oen+^yRI&.޻hDžxE.,,.ܹfqĸ_ GXAayC:· PYB^>paPp/V^o\\sWƤ(wAg:\Fq^oNWv9<\@qjOnf;6 ׵s.j0`~}O8X7cUǚ:1Ǚ|\yyɅ% sfgkk"_yya"<<L>L$ɅɅZ&v٨%-jDaa"H.L.L8O BzIX-*oFFk-~7-ž܉-S~=?nޞ?OoWEk}ND"000<&wAswoqla{ZlZ{S[YorA-Eށ?bK[#Z\3&SBJ"|||H$&&&R'$" NxRZoTE!iDng;}l|?{^Dҵuu])gH$&&&ɅɅɅ5MºO%U#vtE!#xcZlKZ4ve>"ϯ8BgWFskH))U4ZlD"000H.L.L.L8O "LI->Pblrt$cHe;,SzԈw\n,] R!_\Q9I$DĚ+zP}s"hSyMSgHmgc,"rR+t@y7l+Z'WGU[[n]V^D"000<x(3]n/WI4Y=gx\ /%5[zzwu@|T>ۗ+o&H>L>L>L$ )!7=gH$Drara"y"vWOӜ2N$D"\\HqHy=̦|oIRn|Daa"H.L.L8O$D"H$D"ówݴ9.3o6olxt:wvݦNKfzA;s>;̜5V^]6:6fl}jlc]qB;Vޓgq-ٮ=v/jy>?m#{دQ~{$s;Y~nifm.mAѨ?|ݚ=lmĻP!<n\F.d?΅p|]o:C{l.%½V #‡ 5.k \F. >\wCBNyƅ39=::}ÝŅ > Õ\[{B::|h uF.#ra],9ٸ8G:prAt}[Jܣ9x?ws{UFh_wPV^WsЁ o|k|?1vx*0wc1?sLρU}׽>׿v_;G>|'GY=Ӝ3^CQwo^ߛg9۞߀߁v8fsZw`ý3X .;/ٸϒ-u,#Y|vp|w3w=C\|?h{|qOnulMi>uc|x/|B!'5|R_|pg8#Nh~c+N;'7=G=k㣛wtG5<'CO잻4׿n='57ē{~''w>'vjН{7zzէ|Σ[>ƿxh\YkuwtNi~7? g}V%.sqh:~:}vz>KZ{cW:O;NǢQߋ>[=Y^-Z}.[oBgt_>O~c|x=V-Z|{oYw=|w{6Z"{{X+_5RM] >t.UC=Ç΅:BCsNCB= >t.FχƅLйP|\q6>ʅ=F.Թ곈\c߬P),B}&:ȅ,t.\j|0᪸P\\w :ָp.>t.Bw k\e\%r!G.->t.\ >IqiKlh09pqmn\~J\{u_n-斟\4sKKp 3hwݶz͗}MWws/W+sk~5s\ްbEwuyngV8xch;uX;Gm[N8_O~s-sޜίLkO={]wmv^]Mw|Mߪ>s3AS{}s׭~u#N[wk+k[\^kgp55oVAK6oZ;fɃS. . `t Rϙ F `H<-)yC b$ZX='XT@P cNڇ"CA ڷAQMR}V:'_t\z^+BTz Q& \+p9;r>_ǯc糺ini-z}g{QZT^`qGcyL`T3@]q > Ë(%m{ĹÇ΅B}aBxPo޹PgйP͹P XqWB[qPt B]F."j;OB={F.{8\ ΅:ȅ/8}qs |\(*>t.ps!q" >9}>5.PS\b1P<(Q~Β\ykQw )b/ sD@pчhW[* @ &B.o׹8A}Xs;eܽG/B{.E.JD+yEW6D@HF-:'^s-^ﲬny~~A"εn~҉3!Q.tM n$80_ q.\Kvl$=`q Ky*xP@A D?`m!gؗCDZT@y4 4L rD" OA^ ıWP[z^:>@uk?:V8si?8"sOбz@:`ԃI\)d_q"qxu\W]caԢ\) (Z^{b )W. 6 FGZ8?OOM3YE 5W0*t " …sqsD!\ ኃ:Jt΅ ]:o>\莹s![ P!|\8\a'r!sv.tq!^sEJsg- C.zCBe ox̓{>t.-ߙnȇp!XB- ޝ C\s]>ɽ=_,q^D@y/wcM VD_t %wMFE1BEr[Tlo99׹yO{|WsxSy!0@".rq [u}#uS\FʅI'ς̉ywܷy~}B{t.$Bv.t\H dps Å· ȅȅr;> \4ȅQzPk\YD΅\Ĩqs{jKzFq!BtP媸P·΅s![]S\8\(8ˇpas Lg9Oow!DntJ$2 I~ sމUc-`+xH)=mML" ~ŏEKeǕ >,)0+_W_n¿?KKwwcE]_BNr(i[JL]Er\pqʕ~"sw%΅t'9gm8\yH("(;Mĕr19uh p+H`AnS^O*)Nr z^})RPϊJoqxj> (@Xp(ΊSGsg@tFEk`SS vWeUD}\ A,5 :}}>R'kY[4߀F=om^W[8ŏ[\SCBй֝ 8k:[BCBbuȅ{ <@s!i %Ň ȅ8ϑ us;M\~BƅsםZsȅ2Z6\tʯFȅs: k ܜ 蚃Ʌ昻AǕ5=ݜRBa J*y"b:W7ݖ*b. ķnz"}zOǏbX11k{;.}'Qm5"^@+r=aՋs{2oZf gn6)})Cs_{g9z|Ĺ`y-~⥕nmv}Ӊ$` DtE^D8ݤFtPƉ[3kN=םkZuڿz i2Cz#@M AS@%^OS&oE s=xH!T0PA M]ú6\Qι QCm% Ql Q,ʅ;G #pԂ{Y9rqO' kPs;-}[7Ԁ.^>dupFCBwϝ JkĹ~7΅:йȅ 3>\F. )-\hq iȅ·p! ٜ s.( p΅EEq|*.ԹQg7s.Hgms\8F|yr<Źe{ v/q.Z3.RKpbx5[+\JkP;ʖ<0oQskœcbc@Fq>҂Yߋ{J?.1QB*w}TĹs r;礻~vqQc꾭``4hHVA_ =WCyqDq` \qKZ>am[ܧk]O'wV4Ci&͒`x'gHi}&!I'zV#㌓)آA+xtJ5slAxj"po޻km~ƇuZvA[u8ItݕX8IɃQǯ՜{VץNYᵗE Hk 5;N( hݳU JnXr=o9zcoqh]Yo38zc8йZis\H{B݇ g CB\mϩw>\F.ԭG.!gBpBR#B%95.tO:jv.t>t.|#\puC0fV .Bs(|yr9"%6hvɽXNzD7+ͷfu&p ZD.ιĹnG:VM8c糺G\@Ao{mx]/+ujC9REAc8꧹h^k&7YYZ7Ŷw/N8עJK%^K(s\)4C6R=IU颍[6FhqސT7cWMZ%..i =Hg^m5 `T.a1Źq)o56r>رǀ/0Pc;_N ;;b,5q>,Y3~r 3O_O5\r6g Nqq] \!Ç^E<~ CB!5(q΅=z;aB0ryt.Տ\ոP\#zIBHιkci\;;‡sg8:ָp>|H| ">.4&ҹݹpu]v璭Gr.`N_0p5`yl=1Yg26w-ymiG _k"!ߧ6n#9:^\ppD}.@@[;}{&<@]j]" MIvs-Yﵔ:t~Ut_{NC so78ܽ*΅g2޹ .<{c[e|k'>hq>1UAc!4Q`D-%W~坔Ot܆i<-IJ(L}SsaQ-'M]rG9ݼڿnw_zU)w",|HQoרs%yJq!gGqiUKL"DvW\8]Ho-_ߏxFm[|XBjB@nqzyBй1΅n.*BR :F.v=rozw;a i o#" kݹƅ~} _A\p> ]JQ|/TG>]u>sovMq!785JmGrMX\km8EL!6qaD{L3Kzm?#wWg e/tt^skB2ޟ:ǚnAUpqËD7Eu0p1!tfs|..  L]Mdp)~cĆ_{ڒ36کfJpxDE`=Oq>)y5`hzE3< z$n83wuKgzH8IJ(c|p:IE*[>n/ln3zR7Vl%{#:QǤpRcNw&=@:CoEWzC% nRDc{qT7?6Rn|9N,z+x#j=G?wm0q^JlrkKBйPg 0!\{8>)…'sBm:z&C8"#JgqZɇƅaBC5.΅ԟG.q&#cB.xD.t>\W5 u<·Y8wG.t>\\߁^X]q~Ʌ_'.@A;.ZJZjך8NŹ rOw7WlǤ4k <֢wovNyhnӻ(韜rxA_n^6>N=6z1 vzQhqq$.{T-urSzx%9N7U(._[r!duBɱбPqsбRoӱ߯c!n/T9U]XP'Xvswa ڣ^^[G"!?WlTզt(]}ĩ< AW!ϸƭ?ucq7/Eί9? qeIr>ثLgb̄R-.㝒Rݏk;J,|y'm5)h)3fIp]*IT%G3gU)բ!,oI4.% W~๥^s6n{3KRվCk'?O݉ד9ό4W/ο^=td|y/I'Y=qS+OHnmE]GzQ_zn%z^v"!9gn\ro[3뜴 9?n!>}7w ѱzxU),`!^ ~W%!IJn_9''5#慜=gC CΟu[/-tKC|*ώ۞GC\=._W7<=pLOJ Xvy5 IX 8 MhDP%y:2J,}:gorIuBYmlʐz/čnʑIW铊T ^pzy<}T k'Ef7w'HBIf1=91W5,կlv7ʉ+ߋbԕ$znDj#fR$;q qSՔ~޽$럴mYíIc!ttc!nc!l ÌgB :^E.G]b䣙rbeB6^zȹWzI&q\џ ˓z1Fz.+~swU2^Znx8 CBvCB&S59ڟcaB0caz '=,,xP}?6p-cgBg,#_fXxo˭[Yyo2r'>۝c//,d˱p9X5|96"DdFpT; cB}UI1T rƯ"AŇ>g9yIxNr5}!E YXH?zB0c_CB1n s|ӫ҃`aOXX Hs,cwo;b 59*r:ОGfe_$=38sfC9DGq//FkeɺLk{ǃc׮׭'т_%Nَm{Lsܷq|6kz{]!~pזs_ Y^~[Dky x U-Nѓ_9 }9@_rukxMr+urNp.&!N>dx#q'p/]I%^KTD"Fs#%QE=J|xR R*/0SBDU_Q&7#n*}c\˽N`~k%>IWr򺼪I|*.$^SH^O qGKq'RLJLwa9^u}*CSx:ڧ!Xd7X}ՎbL$XHOP.o\xP8(<aad,ĵ,mO0<׋dE< 2Xv'nP-Vf< ݼͱI}BGбP<_X"'| kr~`aufUت2Dޚ#B "Y>V!EqgʕM޲J<}>r/9ov_fˍgc3Fq Ɗޢm]SHsoQ,PE=^S5 rUvy6@ٻ)ޣmlNz <;Q9oܮ59?9/9rNEv뜤5_2WURpH6N *H>drN'g!U) -'bvI*=#HEFiq(sKHR )aI&#xU* gZפDd-'n >KsﭷYBu$>z-" z85?~lj +J~BgqN^،N{󛅇p, g+a~#,T9A3 3ʻz`a!,0d,jc㡿`!.0cao }Vx=,CU1'xXf,t<_  ΑX¯?zQ2E9flF|^5ªϗوBl$Z&еB_jynxdəA<ݰ.y.K;wŁz?ZUk?>RB:==ȋ3Ȏ:'|>ei|O{\.YkNj|4#-2'Nu>'Tkr~uCB;:a u3#, IzBm>'UsBzf鎇a/f;P$*˯[uj~kQφqn\;19y3$nHD:DTz1s*ܒeiWiːr4f?؆J'T}+Ay.,qw +kw&}\\˘dy;[Tsu,k9٭Q^kr~cHlƯ9t`! |UQ 3rlAˍ]x 0,9=ݎRBה'ca/: 3fLZNRJ,[$퍣 RB$LrI"_*,\%{FHRXE2N<$ !}T0rtJC*A$*de p;#'lnEeT?i?8H00rfJ צcυF1 )w32p9޳.x_½ܛN/:xXVXHK#9xc!xX(\qcb%HV{wFK?ߧޛ.v=@߫ QN|rR̶k.;::vn-hf,xsvϣz#'Pc!v=&c!xXȂ2xbBcr5c}Mw"77>BXخT,s%ܫݸ̗v39ӅtbGv#Mv"caH:4˼qj%S>,E܃S|V{UkeVm;fpneO;qa"Q\#|7~ql7p59?`ɹ Ɖ~5ffWG%I7D#Ru9+^1XIITn$^J!GD:ZY9{HJu_Du+2w !%%U'CJ''$zM*ǽn)R^{H>kę9S +Qj}VT"^O}m"!eFI$ԱqetٺW.pf'ȹWߒ.a{{C'|ך6r >ʱGwXi±б|BÌaCB;X} >𦇅qn ,t2^ʌ ! }2߳g2r>B$:  uxx/xzG@q*F$>lNJ($J*]|\Ra !u&Q?.To]*a)"%2l$xn+)-w]I*NWc(>qn'pVboQIu6^ JUCZFC2SOHG̾t4WtݓQwű˝"I,Tz&9i,ӗ}\#1*f$Z 2xC@TX.#^#r{8ULu?sL\Ξ{{&pϝ7睑dPIqG'ֳ[V$M<;bgr^~- 9ؐc A;E|,.wtGnpR|8=tCaھdJt(sT剤V|Xb"f֊V{o)IWu!i&ei{kBybj3zl>W-sILu1̆o,i?yV'%" "mo>q9kYNR*!_ڌ <  !}8::f8ÌFx0BFB̄uy 3{XUep Na!蕠[B',a%:eб0c!xXp< !C-0X w~$*UrXXݩr1|U%6#_Eu~B5#]Q9s}5o*U*-f`F*g<!ju#Wݫn֛ygs?<ȹ5;wJ'U+/x;7ᄧ@` Vosb-:d:Gpi_S1' weF,q!}şn<[t!Q:ĵdc6qQ ?Cײ'%6ל29}tQa6.(c9}Ocv/9=VkGǐ=E%%D$wѫ1ef͒М`քѪ%tyfYC/gb a{~#.~$Taq$>3QRRQUu IX{L4ϺgDR idFK^9`Gz~79"UeRUT)*79C:5g]9ir>o бfFzoϨgRNϹYφtHU?">bmrC}H6"DLr~]N\\Ɠ2uom!'u/툐/A_]&YVeLU܏XE弌FE+7/-8+#մ 0+A VPH>WrxGν*&+d 7g'EE~<\-e$Z4 2> 9WR$I4j~H45b7&D'&dVELR[IrJ̚`z'q'}5)1o\5s:}'%V~h [&9 Td@" OF1w21ǧo\a%uڎWx⡰pU X2X6&b=,e~W3ir_9',,OXf '0,a!xo):X:rٱP2}pY, M8,* ov$*_z_TbIϧ/o8DqTνYIyC{p^<^<]^ɹIsyZQVǫ1lqdFν 9drz˙N@=|q<z}׾1\ T=g ̵ɹ?@rUpGMy2ᆘ{dro{!ǽIx-^_s3K7 #6'6kr'zt*ͫ鞐fI'R3 HN"WujzgIUR.7+I̳6(-3 FrXB%Ռ^L{iNP:;ͨr6աDYfkʰTA(ɨ[ReM/D35fw5>n9Ļ!>13Æx7:Ͱбr,1 QB? uxBHca1 F+=D1ppbRq*/GNο|y%ьE3.vԝm*9bdE]\q;GjQjrcT=E7J7~t!.i۪Oܚpi9J9RLP_Y [eQ6zrZ|>f<1gN9^$Е>̖fXxAn  q_O [/  3`w0ZB' f,taaCH-V֞>OBo%p,xXh펅峌 >,ԏ/,{p,\ 9őX8F[5*sBR1lY=r1::+z]!_ȅȌgbӓC\A^I<aHf#jw3RͫS'n7m=]!{<]ޛ9>u_7\稷#?.և>BC{TbOq3}azA3qThT~7^9]'wS׍sR`B̽R%ne%Q]k+_%Y"#Y8{.QNHUEA9JrLiSƀ.I>'̉z:& O2O7Hr֨!%~yHK=(6fg^{]SDr<$\Y:vtrp\k[.xs:fǻ~mڇC<{"iuBB+ !ݘm\ ynбP;r<0Vͧ0X_BQ;;ƃ F @žjǴ CBa/xX:BEnHp,\E8I,Twdk,Oy&R]":r,1/D^D*څpdouP+ثLuy9Ow".{rm{|ຜ[17 +8&NP%JH WGSSTrnFҾ֏RA7bO!y)>xn&W$H9tzB=T}[Rtɹ^G>]^NbJ%Ygsc$d#bĔ 5)m=IH2ZT mW^3CYzM{RZS=.ZR7LӍq)m%>SGtTƬ-gh/+JT!L*{"ձT1Lx>;71!?}_'},SX aзEN F ullT}3[9.Grm{0|[r />twuiw횜l+3ʹ\®?uCF}gvyB,g}x;ސn+tURN ք40NNbzBUr#5ՠl,Dҫ5ʷxI̊;3& i srꯧ'j_Li2EU=QmZ2ϩVRqIW51rFlZ{@|se#:Tޗx*Z2v`p"ZCЇ9{#g<?yۭJΕ"?[?DDQͳ]~ ArSɂVCW"SiNzB~|EI#9DM/ R9rcJB,]KR{2Z@(; Vʷ=$d*Ag$%f9^齏ZoȌ!rn~zYq,tiʢ,*9JOrVR]G"WzvN|Qjە=$%$p9^%}wi;2W[QK>%~=_=\VKqCV<)C!w5+ؓ1ks x0ONe, !TBU{\cCP 0caQXz۰QeBaOo2caۨ򭪹0cxXI*5)%AqcXHaaǾ,s+:9X:"we3^c83+~C{Cqqw\󃕜O77A'̽u돟/HjJ?\=W2ErPl>2fֱ=$5J2f$L}JJBLI ;9HĶsQ&8sC\t~:!;u,\`TB@CBLбP;xX DVCE)Ä ve,tica&#{ 7ؗC3f< 97,<9 _DŽڨA?s:BIX8 xXH똎<`er~'P9_cNW=m}֩z]vG>ٳCMWA  ::b9Eο綇Ob^o]"5R:mKT,xy=nGgWMy#_ڹ/50mJZQnAPr;QUo'i~{Rx8'Izv\oHs'R*>i;i+,vT/SGU rn9aO&39M̬zG2Mg_ T#o}/+z]%y7xSg&Iv:z=*G>Ȧ;; ͆1ǚF?gMr8d +ɤI&uY=JuBɶXUmy5O2g~l9a.RGTbR$%'wHzMHqS6+/~6!YaCq>鸝CFNć~`Mw 1\, 0ac066-O$@1\sϜs:}{AMu.W!e; UuG]۽j3u? 3wi Z届)b'dԑgqC$sP$5 dMnt*$$TRglHٕhEb]^drjygesC1t{S=OŘFLNڋ)ޛ}NzzR$^\s9#6'&绌Ob &X(,xn s[dXp`ՄT 2o%ɶcc!.YIw?aBp"珺X859M,Wy8JGu7YQIvS:M἗]>GdS2wyNS빯gr^MJKs?j-h"w^τ>J\ϳy^ cok! hL|90T^z,Hk{h7_G,Isu59?ɹ612=S2v%i%"s :Ra%J@a7e:̫ZTnVu#d 4NrZ9ΤM2J艤^CJDe9g!ˊJz` 1>!}ɧ&)$+FD1Yo.- 5po9:Q(:"s|;Z;|бOUбqWnTjxmP=ca!p^X(Bv,r :=v;b2]U=`$Cfz!'*`j,4<G*H g,`,r~#&Pq_ݪX(wĬ҇0R4tuG{﹙{99# AlGy'yUZz9m$GHnm}4ͥfj'Jpzc&RM^T tGW˿7c}cQ{ExϢHcolXۜs癜",>t뚜\ $=rrIZJInSR灜Tj=Zj)4Ղp<^.#%yTϑYVw{-)͉HF%mz)͘I@!J>Dt/ܘeZU ݮ\G:#uwgϘD>ڂEXfU53%R=>Z i p5#/bAԧR\'f"c'.ca&`BxX(rpSx2Rr,Ā,c!\C5e,Kbf*K\n2ϻ K!~O4įAy/CM~є:њd*=fS )J 2XoV=UR"{+բTTS0%X$^H8!ƺ>$Zaͦ %.ǤJ]D{0J-$Dؐn%n S@KDZ"*Ke;D<^ -(c֛Ku>΂6퐐 )(hFE)GQIuNDH)Yq /ݫYO~ך_`!msg"2Xm`a #,ªpamd,`g,]Xj &,GXژP T,4<.:.C`!\,,܁ iX19EΛ UBHU )>'a⯼5s]E UU{WnUϱ|&SX(1m89wI^+Ĝ}kҦ+9wW;cm`:Ő_wBC^OO?[t!Q:ĵdc6qQ Do!:e59NO8w\uI*fp,ڣRχ|N"޺q25sc< WW *l2=$ٙVLؘAЩ?7rYOFe3)%MI6!T~)Je$Xs7nc\D=>ƽxIR}PTe~oOpzTGHKrӏ*L]u,NQJR;W7v"Ow;r⯞C6B}.ii5XcqO8uB*ca>+P2wB0h 6wVP¨f\s,~ 3CB]*/c㡏L-7c(z<#s#xX{cѱ5,=聅X߃0c!c WBοfX87\]!,BУs, ;;nȹU+!MF#ܖfoF{홌 |ڰfL&i!go{oyU.s_zrWm.S^m *37{\!d>#8o6J-tyYHоǮ NpmQSc-4ٽr>1:رLp>S)4r)3G?֍teICͮ?_9ouUُڂ6GrN.% 9YT V?rO_ygv/"z AΩQM*sfj$r{m4IFH2#a%zTbqx ICJU!Թ%zC޴{,_51^WbP}빔RMsc,uDUǽe%9Z1K=Itx`Sʹ/tLqפ@ʂL+bD%g'ާksN]Wpt.W yOmF"e?s1x3*2pc:tX*r~Ob[nbHT!RH{RUy1 )v%LC]>mJjzGs]ޛn#M{ rs>8fgR9;9/}TvUUę՝ќyYߥȵHvwZe(*awfNwOѴWܙa-Ԗ{g_sڍxgMH-):}#!5 i~󾣇?M}}]c8?u{ i4rɨS)i?zӧISǦqg,wׇ"}㪆 N[]CBcѱ8Boc!: 9-x9V,BYn;XSI,Ҝ.37 B22 >?6<4,,~N7UZ2\Lн<ȹt,bp~g=,`xXB GhMwG9j>!ois.H'i=-4nBʻnFGg){Lʧf8kL]/]$WɹWK<ȹ* m[̇I$d|KՄ_2v7XEqӘG8qwXFv.>wOvg~.kO۽hge$OXnضjB$>6RHᑾ{EMY$D1jQ7U2=MЧ5UC^ːfꏒK*=:g39;<y飔<%So*RRZx7c٨pQ*Zֹn7^"MYg7݊IF} Hl1V=E#(Md;>?fnGfMw qPdбq-X(  kOHB$n`X|6\Z9XG.㍲ DZPS9m V23`1,D93eS7‘qX+莇 OƲm($ 8pI,T.sqgՊ 11hy앻nʸuoӫ;IOr=;*;pi{BZb.楿<ȹHy%wN?zӵEsB&oV9'b^f=< "Bu_srތ~ۊoߟn1gyG$w<W^]=9^ʙ3N^_s3Kqq=~9s!|?U ꏛ1T՟cSTP ^$] 2qT:!QR+Hm a iӖFM9H7́$JȔWRɥH:Wbot%$ +9 {^G"џgdC)g>깼 z (I%ϽG0F.ɦN;.L5)w1CIiM]5&KT1AUqj:~^>~-{ؚ2:cge,Tx癃T1qs'QgXu)V{ G==,D@s, ӄsk2c!d`a# ]=f<'ŀϰGR.a }Yn*=,  %cN$*NakrKXX*9NᣰjF_K!A}svv79uC7UWw2c0J;۔g){vrQji839I;W D bs#r,,}9R!.N΍hv#d>z+HᆂE 7.m^g䚓ڇ>3 7ݍ>eGɍS~vb(ߌod^ 8燭=a.gWBXdl1UPY%!-բgmH0?B9'&HcPacϮD!o!$mSRf\1W$w11wSR*x$zM_A©*I$j~&_PX|~_TB `=?KR3UY$@=6+yAx&%vR5"Amm?]LBʩcp&9gQjg%GK&   9vfaas0cK C2.ֆTfbI36<7$3"w,JB𰃅!XrX$ I@Cu;9x< ,s3,,=醅T :n WBq$*N r9/Tͱ][Ŷd|FoY-8R=N{\+艜 {xKfCv $]Su1+[CyeOnZ!Fn!vERH4>{iowdЍ Zh %qϾG=`)r~AM'd?hҪؖ$w.:jndn"cWҐ q5.gg`&hʦZn}>f\V6$rGYΑg.sŒb!\xXx]שKe\ FU ;2X[l$=,Սݰr',} [x 7X இpP'rPqpErjWw5EYpePإ®t9pѓX8595rFpu6<ӌ^:t#7XLI7|4nW5Ϥbgn >^p u  g{j 'krH5%T1ʃuM49ݛdF$Gj-zs\] pissH\CBk%1*T#3{T6#N{q[.HdwԻmϭ!&悏)z5i7%~T+ISa9?{^3ǝ&39ZIT=R?a8sR{}T=Tr{AZZ R^A$1&Fs6SŠV+%ENHR!jPŕQRDrŋo}xvJ^MHfT{?}/ q]7ʧ>zY0bhiUY'IWɄtHVIdu␎W%H8Ez>ylK~krxXH bc*cǣaױX^caX12xqԹ8 dg ;J.LcZx,ga!}.s 3rT$_ݰP˶0 ]Mqbe:u"kxXIBʼn]]#Q=nGS]Ij,g򨠗j.$ݪ XYŵ!hzP,W=ɜGc9>[[9Yn 繷󀜗ys-wQ(NVyc"`T;S/w&n襏8+{0Ȑ6%̎g|)gdr~L3B59?کb3%UQ7@Vb{K@sx3IJIDk%5"'Es1ㅼJU܊<4kUvr%TE|޹Wi]B^JUWr$2ѯ]y%-1õX{h{K'QzN%E;Ķȹ/"!nz%d}K"5kz=qC1 Nݧ(Hqɯ=jMί`!=$,b$XXUcұCB.g,)2f,g,tRXe,}d{rp < ,A   maBICBl"QѺp=p,da&c!xX*n[ObbMw߭!^}e#6LFvݡ;OFoFY>~m *A}uX,^e u!TEe'Rm-`vkĪ0'ȿEfMԅg!ĉf&p=b^ɹW归]gDz욌;w9/.9^`':)!,'&J 9;}y"ZIpPrQÄ7DEaIF!cNSe,Z:9wɦU1c\Ho"N$N\IfGJHm*U2oIoJQ sm$z2s~"HfPJ/%oUp_6-G[i}VH=`|[t,8~X'n-.yc|Y4]X(ܣr 9%SzN0caC״ޔBUn1yʞޫ69($d4<9?KKkr~(s?|ꏗTѹuJr#p A"9E+A>[ԓQe>:ȓQDJp8TW=UR/yn}4P}{)7mlcMΕ@ ׹Z:W"($%H\QA dthCݕ9AdHBD=,2Zf t#Soļ$&p;@ΛwH!W|V;LJ-H 0 3g ){%Sf|6{9>5 I9ozy׭_'&9׉?]*>˒s>\qv'dH7@cH T=cuDDV:V+Zq۫7r?s"9&,ɖ'TNDշdDOF&K%+vdt)'2MNŸJ6yN)s~v3V2Zrą ߈w?級"oC,r‡#.9! qBJF_>xٚÌc2ܱZcc!fzcasźp8gތt,k97,,,xJ`*Ё -pббqk +:җ'ɪXh玅ٝo1 1'dcxXn;%AW5BEDUR9/9 qrp*+QHǮlf6Ivȹߗp, 9rN;} _ϵṼW nVɹŝ8|Eȣޘ^_LLK'ڛsW,_9p!mqjyD,r~/W/GHu K%AyHzt8 4IPy:*Fy"H550jj77m6IWqjYJ}o -Lj_D!%1g44MΕ*!U|ʷh2$H71E{ lRs1r!-r7|j] w~2UN<)Q"PAu97 %d)!G;5XW<>|), EΙ3XX҇p," vkzX|S=u ]XYB2psr߱P]A߹!-o>bE7Xo3>b)< kCοǯj/}P%眔$29 #ybI'HFuƎɛb|v2g܋9V31=a"֐sw'R*"aVRU|\12< *y|rJs%őxN IIɩbdt 8̷i:9߉TDo`3Yg,^K^ڿœHFXaeŔ rXd=c9t cL ɋiUN\|TyzGXh=ի[黏j>AUQlIwT1y3smN;}~3E/xe3F?59?ɹNHzjD5'͒* 1=ڗR:ΓN1A:`U""7Rӗǰ`a<RvBg,EJb~a3郩`a&灅jKx "uI,T kY. HEEK^}kR-F.oCcʚjwgG$5g*F}ht#.2u$W2֫!M{t7 r>29;,iYWkuHqΓoM|F8.3w#1$bO$JJH#;cJQ%2T>qɥB،N%G7JBD[YѯĬC)9IUf.{r?*9?7 |M?[MLT!L H* [8}Պ9XdoSF#,daհPa*aao2ۻX>󂅪2bP1xbE;Nb;*9?h@ !nW..b(6$=SEƌ Nﳶ)nuZw. n6<\ku>^9AQcH].{(Cu1<;ׇ!~tqqakr XXÈʡO@!~=T.[!vO {̋8}Mίf'%jR1_wQ"瞜U[eT qp3,zL>*Yj))0,RSOqRIHmtX)P>!,FG_ʐPI7u]X.2 iBuY\-֍ꈧp.ړ_y>27=y, )Uo؜ "]MaB@ =cl4X( 1' x)NFrcTuswy[2p!d}Bם[ŷm^v%Mc!`t{yGr~HfS{]ɏqwB2 5JJ"aq _cTZKoeTT`JңT\d~ŵ^BGz1+U"Eʮ Q$"J@39l4#4RM=ռ-WBc")ߢNBJĝQ$W{r8~:q雟zȒBؑ7DzaS CBʼnly!;9W[8}gw'T!Ž\jƭ= \eOqfu\@7@: #("(",ba_,F}' ;$,B !r>{VySۭ=?KI0fKҭJp]F@}5; ۹Nנ@ٗWSUjn{ {E.x8OV8V?8Jcq;hYhdw8\O w##p7er* #1I w"L]l|&\Jbɤ.D!o(CRNCHTݶg\".Î{I,qI;Es`N8.MzmBL_J7߀: A|Mv1eM=I-d[ [sZ=MzJ AM[vj!![j!ϛPۤ8z0 -2øN )-]sVSn{J }V0uA{|J-D[yly'sÑsn8$^AWN0޻tc:כaPtjHu}|$.zoc ,S68G78$(DOE-ΑD2!]E&)8Gɰy+pGw:dÖwR߶ th!׮ASZhBKZh/T:U-pnsC^jhy;j+pWBĸUޗ&Zuim@:v#;P)}`8>ޏ ]"&oGo6LHH,Jr`2-~uut·Hi'Sἦz\0y]؂^7g[-ЮlkY ^ Ô.nZ=Zp|OTj!"yݸIO/n+ik8W@o"fĵWt-jtG0<ŭțT9{ w\;i8c$|ѴdH`*}Vm!fX [ٝ%7oY!~e!pލ(K.7Ut89}HE & OL!=}$ϛwv: S?z,}0$_ nJ@_zJ.cG[p~6 1S5y]B@4nB|F=Li!ZH=Z0盬Zqxi!\m*_"nע&<=Dj%LlW8 An(eT|(wd=AKHnx{P|ǎRD'cjoRVl=] ἛR-jF #H}HYfiQ6BIrʾM| NB/kO$9)Np$YlἮzhc^ k!\p^ĴZHhbu{f8+jC5i!ix-qBB!apgg=8Z247DyC:Lwu-g%4-gsq--XXs3R:¥L\1p >>k ·VL 0GBwhS>(u爳y@X1sqׯn5lG7So7y/V-嵐N <*i!K{L QKkTj!bܪf8qn8pukvߜet㪴?wb֋/s-@%8㠮-Y!p=yCo<~w2'329AoU5!%s0lr@ݙtأIG NLp0*!ž^".zgz] o_,ɘ:e uB^ qr-@&-C] Ž kVj!"y|֌9`?3es sZ;uCJ;vE\6Kl rH&\φE($#EBRNje1g:qe]'&,;W4G༳_fl%ϝ<(y@K@gz/ d6i!n^ țNyZAp)-Kz88z+*1n287J=tw+L{9 @W8N9 !{9euY=:8:ANKFtp5c/!]8.QS1 ~.HBV a颇^ i'¬kas -Dd8bj;|D|$q9,¹sue*]t_` pξs É7pngV̵]bkzg .o · "d4&HHg#{/T18$H2C21tL_8|^/^:.>(5ѧ'6Pܢ6yJԛp^w8'1- z-\Y-krz+ZYa_]R V`K/1bxd)7G.pI\yᖳssqw{\k2>~31ID3Ab'IǜxauեgR{0# "|Y:FtHb/"$HJs2B:F09Ši8?'Œyg'cpz(ZcbXjjB9wZ =ZN p0__ 1aha[pڕZk}W.?^\=(\Kn9Ku99(g9s(WE07<#i8gy8g )+I  hU8ovߔi9j<o 1>'asstfI uN =\ɖ3Z'+1n:|8pZĵ0 KpSN8c )c|>"y-^<4ُĮ|ÍD쀽k,CMD0BR7,\r0z&1HKy&)|+1纏X$q%sïkXzY#QCpAP$ecCTLp aB$CP^ Q=Z-wH=!-l οn"ƭ1p>z+ZH1\+^Fε\֧ͽr 7۔ϙW>ǬTcy u5>Kg=t7zaV?|3] _O8jivR-&:rݏ%#єT$!}-$G,eBʉHn>z$PMNE %KtAo.V#jzKZv 8J y77;/PpN.]f{05pnu)F?ssYCb g=޷a+pޖ:1X2ʒt奓w)ioy N$f 'NL$F(ew45v.wp#(9!!NQ +v1SqCr.TᜥV :H-'B j!=8_R \[|`Xts9`.s@9=ȹq#B\Kc 3`:8%׃V'4~4a?=W--Z)OC<2(c;\=̝nQLF>w˭>NR'V`. Ȅ8OmJp$LVZ?pp21 ݯŴ;m x4^utNZ=nrSE/R6i!k!{{L zX碅F=Zq+Z0oPV/Na O;n|K=7㰟קZ׺X*ĵʹ}rCsB|0 pEB$. DB"I%ڕfDxMRuJEkľ˸H{/c's]PHh1DT͐xeŢ/IiWI%&0DHFt_2s<>ڇknsiowLL\jLGroUVUͭvT䄴Zh0͢Vg+i!wCϜKKY0k8}: tY7[+ $ ?\SprߘqG>8_/ވ%?SszXBhp8Z(e^ ⛴zfU|x-QKZȦ-|yB_m=FZ7n@86-=u-|3W>Eʕ'6<<냁s }6Pas8GҲObg|7^(1Ǵt@8bSy8y.0|NE;ǟȹa rp+p>4/{yǹ.6BBjz8$,d_8MK &)eoe,c纠#8(98sI#R7/*/@cH"«_;~vx8&$ e:U;əI)νwB >_tpKĴ[8<6ȥ:JZhhCyl0V-jnUV SBV g/:5B>&-Las ηT"ƍHP <:y*^\)av<_Gom#h_aK-[0w9Ҏʲw<<!۸漰{.y<)S^貃}Y>=V|@= e!64V Nj3]ZO!X' pN#BAD2 pϤ Z5/D7cKH8k%K"($tΑ>sV<'Ri-O_:$IJ2+nh~;[>o^G[pR￉iQsglщEgv;8>iBko0nR ْ3RC C 1QzX AS zhC;9aI īZ-=ap^9JGB G[Rnpw܆98WzCXho8]}P708wx@~\#e8 +luzLG<:7V|ދ}}(nnp>vC At]vb,H½cqISOu7ugs\}$HY[|=_+?*=(K8YɄrk''J E$9Ӌ;?kΩ:Vgr (:>'4&Q< _xф=gIZn>!mB\a":FP4 MqO9h=Ŗ|Bs^ kpB\Z-|z-Pj![lQ"F}pj!W|/"_{Zu?p܊Adpj;gĔvԱ:~B{yqVלdxgo>W!}q,,Z9~/|o#s^Krh;C嫣LL L}{7]if<AHXɡFxWa_;W&Nʵk~H2~%7343%;0~ {B D"zSgHi钜"S~"EBVYx8?({jK=oC+M;Mo۩z*CV m5BBܲdk!aǴ0B6j!CNyO}쨇V q?ap^|ش[pjD @Xv)iWsC7-K (FٵNt=9.: (i4vrt,,؋st=ȪE}@CRpTa86{qwhii?`fb_Cb,KHe|u8?L(z35St7{3Y.瘠6<8g<9>IRut^11K?Kq.Y$}J?:dt@8|ޥ{Uj^}ܷzK1*!݉Y>JڠW%-7t1 k![RZkZhZZ !^3OIZ-~-[lX5n1]q-}מ(on 1k*l4{ι"m֣s<|O5_`ߍ'Epܞ+.8v b *&= oq:u4Gvҁ֨-[AU^ܢ(GRspL9D ĝ%eBFDS=J%ޘLd$\!8Atx(Rы;<}5ڒObJ2:,ہ^B*rJ 2ħjsCjhmI o%-4zؤ\F-fkEiaCpV z-Z-nPUf=l7BĚsz89@N9K5GMv`1W]:8-@眠>s@9u*sd:~O59 YI'EgY;oΟySqPpޭ)-,1"qK1b$wRSg]piWdT k8s͒ās+ ) g]\G癠"E%R~v T~ц;(w{GZ Z-D{C^2\{֭*PeVCCxےbҸÜ|-֯BDa¢H0Ю=qq͵^s\s?1wA1t^s¬^r7;x2v [gev˺4u "uSt9G_{~scyG_Y;qz;K/9N`$z9'#$LF{Ѻ9X]/Kn~'>K;l(n_d9O$<&HF:#$=yG4 h&p.KwIUL!yb]J7:Fn%HRdu%EI;½!H13bb!!H6^L?FI\L*1Y添yhREZuSZ(nZ!Cj!שk^ wQڹQrbi]t”쩇cA ہM7d"V}8_*ka2!1_ʳO(-xc;l֪d9r]WK8FNYr/R$H Qމr/^pr9%&tP(%sNp^vqR;U8z8ċpĩ^ ,{S Cpk^ KBwnz8 дMݳzX.[,ƀTj!b5/f-  fY;sa1` Z;87=R^s^Zp l1'r_vu8?;#kaxyc*:p1<ɨ&-9sUKc$drUqcJ$u @[pp (AQ]җΓ[Z";糼dYù 4~0kO7l_$H(;D &7n#L#d>I)I)Z|$IHDoBrIo|Q$t5K<$O,ÄHV>?+ɾv$ ~* γvF%Oi!tzN=ZإzhSCyBh@=4ZZ-zBBYfG=8WX.y¡|@N#s'ׁ5U,s=@ !gn:Iv;Uws`.5kzKA0 ? 7f!;%{]?W7d uuF QN"aG!dYyX#D%#89H.epȐ0>wr#9Kn$qxnS"{DG.+|LAγvN1ia"Z5jC]^ 1maI 7SZJ7ZϣZ-ZȪ"vΫj8>b/͞k`&{r\sns@9p6؎ELRspL;W;8SQ@Hk6*"(g9c|g^J.e8pQ)0$;8Es"CRǨ&{}YWrILHpJ)%JO<$4 ɦ4 ]y.$H&9t%@bL'4d )~Ym2~d=7ܶã<@:^nCh!Y-sÒqM&-K²j!nҙZ-}C;-Kz؀5+f8Zؑ,8׹c x)088@qΞ|v9]m<@np;\@:@{,s[u➧_L {&9;ncGH8z+}֭G%H<18ɠ!{0Ik+eqs|G,s _lS"{O$v2[]sTHcNnO\-'廂M?!. su[iB]g?|$`c{;p~g;-8Ǟ׆xDoYqC<{ `!. Cn2c 'P8GJ$Xp=N\"gh, I *;#1EbȄTvC${ԪEt{${D'I)9 I鬂Xa/U!pvQNejFyCL@+AC̆pP)HF[!8?$AOe7%5+dv^ZRvaBB\-O=mBNW=KpM׬B*/{/ zHu@?]9W8 #ƭ|nm,.؜dׁ9e2x8gW{#m KH5J#/GReM@¡2%I(B5! E&$dN}H&+{e 14]$1 !P)=G1ǝcӮd%W:1%tha7Ba"piaIS>{CqaI X/l:R +g8Ϲ0w^ـrL QwR@]ZnW|)d't( ŝ鑿wk.'i's0^Ic9I8? obA|9[8.)Rz8Z0ݨ^ ^ gpe=$#zp,!s_WVps!FJ]-֍`εot:f9cwCf!Apqz].ó3!hnx,dl:.v"6mηJDwξs7BK 13;2RBa׼%8g%)׭pn!G7DPǭ["QEj.5ҡSÊ3^}&OyTLp<;uZMzH(Y-=V eEk0iV _լ#^ =g2R pp/>~w&8'O t8ws]ˊx] Yx$8[bYZH8'+x6p~wd✷by3no! xk `.Uq4Q;XK;K.FǝZ)(cDt0 ktb9E%c%]#(s7,wDΏ:mϝsӶ\˒ _ԫ/t[p0VECMkw&-Z-tz!q8W֪BڄaчbY@^nϾTL}Eq1g^;um)N@εfdkrTH8Ju0^p/M6+Jzt8 s&!֩Ւʴpl 3Wsc "J5F$5z|wbɶasI.g2?L6t=Vc }}8.Q8$9e@piy9Ym ?Ϟ.q< |uq!ߺ_{` cpĀw<&Iwvٯ-8<ܕmFw'!JeHnC$av/qM1)qVOg"`"/L9߂@dCr26v?:=g EEOwje8Z*aZZUaﵐSLJK c^ -zh;qB<-ݳzH-p6{*-D|M8=̹(9Bz'@* p/8<۟8Ιq S~.>`@g9\q\u 0tJ3)A 4Th9'#Ep8?T|{| c!!ER$trw-^C'n9y&2yLJby OXj8+'zPߌN=Gs$e>k32g-m ;tz׭޲gcpiC|B{.BmjBEV qܖxE=̹aQr^'1CvcMgI;^#dB;?/;=gDWP.q.EYM/νdOpwҪ;q V؄?}cKDI*߶wwk($q&H1=L?}  ftظQVN8BBD4$p޼Gśo0Q&HFCc[{؇<}}>!e?$/kdS{K%H(3cRj1K=%wswA@uk,{\{osYx,-~Y!7?"||}}2g8}8OI$HFp^};4\UIHLHH`< &/ԎrwbR<wIn;y'Jq6\p4#pޯBpPsB;EѬSnj!_Z Z-Dy?j!~BzEERAΫym0K>lN?'gX-s C);N*3'ph*]yNMܓ>w~K)<]v8Xyv\3x#@9pp~Ÿ :j3g8tT~6PBLHh"~t $6)EBqCIF tH̓Sq0qf$\=#S9:؊Y}NIqcg'WB<թXq7 pÑ;Oj!ԽRُN-np(zIR y V Vj!]pK}=O jNX{/}@4{K:a]Q^ ) /n:t9w[9G(iWZ8H8xC < p/.xtR2~ʒ^Fx-Fj!nsZ(BBV &0!>ZH=4ZءB^TR 8BγZV(JV ON3)ز{N6=^x99{wt cO7@yιM 9^9[1Mޞ~0 x>-|{3c [.j98?{#H26~ 뙘J8PB qL:HP3m#xM#= 2I8{Yj`٤b>!Eqw/BץqP'1IdbV'pq㋳.p3޲oBBB{-Zfx-chHB {nZh8^ q>Y-HzH-z2u*3g-[,oss;pΒvo% sڬNhB~t@Ya8q |,<Dzv.0Hk8o.rNX{#?~ww%q}SoxS08p~U?dl] u~z%DbeW0!er&N:C:K짤[w䴏]T&rZ2*e-yS)foeݱRQsNAFb֭Gܶp1s?w̭wuzwE[,|T袣wNH9=!zlj1`>s{_.Z$\x:t 2 n$ Rp[w'sPw^K׵\rOqU$?g `BIuLJTo.8śn#I;^$_N;K%NM'̓I')t@2ID׹:HHv~|TzOry|7(upws1W|_/wR2(yQB:^ ^ L;mE^ ^ Cx.lgZ CM{ixB5~$<g^CΫ2g-)UgMk=Tӝu趼Rw ܇@'Mg?9s8Ў!η)7p6|.{Hן:qwqQgKXk-"]5j7OHtSpN'Ks .u v]^ݗOmy8 qmG}QK>b/A!q&浽| sR:bϹ-c_[B<LT޸`IH%)ktp=c)1C<#$jL@YV9="!&|v'/]lWcCѓi# Mgs $i|mVO7p˄4|_jj.S~u38fڤdya󬇭kaz(Z ^ 㵐yBBVY?z赐BBN]g%B!t;u5aV{AZ-tp:h{ S%= W:Zp8\{FuR{t3 :s$cwtq X{9mn-]n|>חuiSwq=gμ<\ P8=@1N{اsK9\rЀkexov'\#Cź6~t:l|Յn񿻳{oݥ]8?G$yk+8- sR:`8AN YĒ#$!e|nx>&,9Pڛd3 w]thR%t~t=Z[Qv u%t{Wo[2}Im pv3W㋟3)Nppd.X5j*I m{BMZhZ}JZ|uB&-x¹C4zI os!3>O.i0jmQZ fF|8_5y"ޟRpNNM.^F/ljs qs9j|N1A99,.s&%X"ADlj UL9HF~wHt`)VLLXnן9$p.vE'sRp؃ ]tm+[Ex^-Cb;ı15i,!69|,',y;2)Yr*--ǔq]F2Om ^ I#p7]* +&$N,gIu3 *}ĒAbYb'{1n" V*RLjLDqE n;`=ㄕ\"*v|cDN7q)I(O& w!w_pxi 0uZu߸x1iaI-T~R-hBn ,}&z-:-zhP NSU@VV.XT=Z'}+8B{?K6m;Ir.b"L/]%IN o1akvl=, KN"2g-]שRtIuc69%*`7Au93<Z>]uJCb'j){,3c(^;zb_sO0{u,'4]k bʒTP5i,e'sK`5-L)9 _[,{7e݂9oubIvf8zA=nzB KzhYmPtF ީCV UB렇^ MZcTZ-tz娇N Kpn{ݭSnpjBG/SZJ"FG*3Tj!be8Z'ER|yN7QdNWJcuquS6!eO8H6>W8>'yaB%4-:^ CЋBDRZI 9kNH mw5j7l*ȮZ=L뫈ε@ hL:GWi!b Y ;kG@ۺ`4sx\ѶN'@mrw+YsIg{s:jV64sly : t霳]! y3:Mk?_?d qRR{C\a$:u}_!i%o^a&87k";.NG{ܩ~iMO[Vyhc:#ɩuk04퍌. npi2%I-΁[(CA]iIiH^U5BbʄTǵ8,=\һ2UawkP:c~`] lfϜ%Zi"~758㋝|N2uvvγ#۩ګBhׂ-miےrMZzؤv/jS"Zg/N VGm){(N 뤇Wi!_>{Rld2_mzzí8՘}ZKE98; sMIw#=țw`aj{Hp-A9~ ^(pERp=o(7ycP|ěwpἇR;&t$t2|HR>LlGm }HI,D⋲N OSz.CT@n2G[%D3kD=TY08N8Pv(Wpf'|㟍/vd| YYrBZ:$Z]S-,^j,aj"v%[-RZrKCB^X@`a SgBhka;p¿Sd8Z8LGc^,סft֭N( AG7BANܖsCu_ ot߯ϵp.}t7 ήWc:K[=஋Kpm'm9{pr{KnW.xnc.@E:=nC =&\/&$J+vj8J4X")E)8-^˸6ȯ9ĵd<NqB%Vi"WV2n"yM8[<5;DNNMt,zu/ự;?bIƖgd8z8rz5B  C6i!w;-=lBϴܽz-y8r m*mϰZhfUpC~7ia;p|*-D;ys!ssw!W:Xr׫5uьے*8@yt:vzs_l6q\!p~sao@g~G__|v⟒?pT|Sa}ɖ$!) '7ۉAGLN9E2RIL/$[dz8 [hl9${- ApLFS/A H%hv=NpCv)$cӎppĵPqyC^ yU-ԾjBmh zu{&0 Ry`poTj!ppa=NV(l`ߴϺ_,}8瘾u aapZ;]wue .rءpb@\i ܎sik)NEHo9d RH6_|APkz8w=9E ׃><|vz=/)D;YYJFu\.8D9~T $?3aמMO:*yh!]pWz-r}c Z u B KBqsG;mJ-Dp6Gf/-oOysRz0>u:%8snK-y@+W(1c$75j7E?eqz7:ߎC2>3g8G+Rpc'(.<p(aH6svHe,moWɬ&HFwlTMwʽ^8w+sʉ|P]\k{xT]|!'`MZH=Zz贐^ NtI=Y=4s4]& {JS-xi’՟o:y"څ^ìtYSѥmpn @-uºos,cj;[8xہs~i}bz/s/9s9i4WRy;pWd,d8pR8/puK+po%8I ᜉ( QפܝÒ$3<&sxM,^Px9M5dù&LrQۣ~MEpoG J! w!םo i>:2$kkBjB^^+ۂs;ZhjCVOi!ZXG8Vj!bۆa9gnj H5Nu3BϏϟaܬT Xt1/Fy{8M+,fϹs:{ͥ\]K;hME~)7gLN'ݱplm^:zQ@z-j"j!csB^KiB[kU 6zXB78/Fj`v$;w*a_R \seuì;55.:!N:a/@#󱽏maBKPn; X?0'#d7g9`圚/xw#|eϮOF LB:e義Řގ@ɾsB4{%UkH$5ɧ?lӾ[Bz ]#I͚$,Lvz<#]ƠTFuƐdJp{~Qp^{[^d|I7Ml7ğuiO e|t\SZZG=ZN KeKZXPXNo-F- XD߷zZx +Ʋf-rv˭\9 ά\>3Ox:N…;=u~$PzSp)K)1g:]~:$}'63g8GI_M##PWg99u20㾬QcwH<_9䫒1bRˁ\tȶ"/80( M7C% F/>%!^c|WuD8_w{a2>_蜇&bSsξc7D2E|tZ8 k碅V}BmjB]kC;UʅJCn6}W E ziN-f#0EGsK.J`}tt*5y^rrm;GI;nKpN4\n-O## kY΄k}/y| 1 &leHNP.@bjoݡTK%%Y4SWPS ?.6oDdSd1c⚇I$I9'6ڻ]|'ߏW.v* |>S͚7c79y,wXox??sR',L@6ߜpn|Y,Q78_u"̶3cPɨa *'c \HuܲDSR;xH&y|#werjq7ۉ\;u"%0MqLBJXӾθ4rlp@3CܭqBQ.휮6fzXo=$t{ͭeN ^ 9.ktΩF WjjlWB稇ꤳ͇N:1ZP|6BDz<1Yj4vSONglݗ(os8tt1H @P&D8ystp8Bv5/^8hn^{ :i!Xm˓̧ۃp,;j^>[}!^Iil|n2$NK%cO6#$pЇ !FK(NUGHvjYcttP]&|¹dەgK8#6+u+'5&t8N i8G_ٓ/O':cYkB\Ρ^ 8aR jWJ iaK՜^CuZhw:^,UZ(ι蜻2 sVA"q9NJ[.Xv^*YwrTv? ;tGY\zj(Ku/F?9֦)VuR;A.ky;K}6݂y ε,9 WQ%cvH6nVҝV`E+%rYf<ݝA7!)E%wbV障G߇.p,޹5=m~zUCw |Es%RQIH딌WŧO"kp|󬇽AhmQ MZ[FRژ*f.'ЇBO=o0y ZX78ϚVj!pGƜ*=@s,qNJ3q#u3DB{ Z knǐ5t~㹍jןuWPHP#:.J 0^r8眗?ʻ^?6 _F/ljsb6>7Z '6":&: c' $VZ!d#Y0/9,鬺KϙI j K"4 v)! *kB9MO$LuA>=u;踫'd8z[ZvBVZh*JZo؍UZh#QÁt-p =,i[R֭klXXγ9-1*N5v|H0eɒwg^t_n'ЫK5p{tqy-.ޏ].Uvxr%|Nw&/@~}5hr\l N:ik%{')AHJqÐpϱWY6 @*DN4eӁ/S~}R]膛4YLMBʕA*pm'3ZȾr9HyOUj!pއ6s 8GY@orf\ -y εW;[q NV|,3)[߇rD@ {Η7dsۅw^W]O{C\a$:}_g=E "磫R+PʉDd(\$05D m'J&cJ}—ٲO<r=&&!&1빲<$/ 1$<覣-[6+ڳ [=ZN 9-Nm Z-npz4qޝJo^CqW٨R oo.kZX8ţKpϵ'M{TX@.q:6|-zT-~OnJ{$7y],?%V[p+12?-oَS]v٬buNHuNRu:Svn# |s!e2hs]IǕÊR% 7&Mwq=Pp"DB*nk|N^礜cy`W%c*=:OI A'katԽۤj9M-50SZ`R ia/LkZp7_9m%@wy=Yn|(1c:s¹k%滈ߍx=I>=3g8ǠRu Sbm{%X5O0&41Y,uΌM MDƍ2 غF &v[fDT?ɩ7do//ᴥ\ C"+F{_5w|=ж&-K=tZ(Crz{>oZc/F>ҩW.ZwB{X-}B986B?γ(8]sB=%R^G(7;s<۞q{0CR@p<Վ5q|O]|-6p<CIHVsAHH!!`$$Hܹ( [3] N$u/ 9gT;єknvƟ^r]l S!= p+>eXf8z;ZZԠI-ĭB 9N v4}CjB*y/V:-W\jxinT *Kg8ZXs8agq\0|:sPnA; 2xJ-ook)s?wiow-ov\hpfGd'6p<Ca< ( \d*ٓ@$wBL:.&vDu;H<ᴨ"3! :]t:&W&T).,D1G2ʕA|A+b}zi2&yB IHpƍC=ZϽ¹Ne&^9]umMZpzH0A\P/&%nnvJ γ*F8'[nkNui_|/qts[ҮpN['c qum){is?oz ĵϋ7U2af8pvJƾs;I(\!8˽H\r9͵/u>GwɜuY%n\\rdrg=,WwJdR ci'"J_ڧN'co:yÞ&-C9ZwBlBSޤCsV ԃnۉ haR '[-Z-9CuW\R 2g---Z v@ 6B \U-A0$a3h@d!ph'ݎmC7T}>vkv-"8sjF $20ڵ׾|guoU{RөS*]kɘ?"hliymaz66ve85ݘE ƪ9wU;~aZ͹_Bˆ}hP# !113QHd(E)r))1sF.-vMJc$;ܦ:kRJ2"@biF3$Tm61B9 ϯ_E9w>ȅ0 #ρ a<`0f$>\v ƙ\8n[>u+Y3.q^ilF3b+<\2y7˨>hB`NnΝ ǩA9e \UyEj{ɠkļdSmvvCLs.r0sSYumGm+mCh ~[[~ư6v)vX"})nݜ ; zIÀ, p9Bދ5K*hQ^EdJ&k)4YHIzEBV-iH44}m҈q̻^tJu NQ (|<!m*Fig_ݜ;K>\ʅ0iĚBt#,JZrai!wȇ k>|y`!|hB<rr!|\9|Gr!sƅ|FCngs9M(uc9<pM\:p9׀v=f]`hԜk:{8b7w5:NSؙ}9bkk'wՔQ6͹_b8VDtDReAfuY̺l Q=5x\AQv6I`R\RjFrdJ_"CL1JbM%_&% 1gb48x'W/qݜ;O>\€N 9eF8Ќk6LtJiI;EųNf>^*huy稹ќ\jsGz5jw5r>1'm5Ŀ/b9wsQF0 t]d9qhQfo{OMDRjDncRB"m>ψRQLnsME(1k$y\|&c=9Erǜ7Wsˇʅ aZ.>\h aɅa e9l= st\#œ'.I* \< r!0w;|X[?ZƝ,Ʌ K\8|=hB͹sx4Ӝ蹤7-,Ǐ&iy;v`gd;yF y[z}#=lx92M{9ԟؙ=/<|cϭ&"87n2(t]&;9)JH<8VDc}# 9d;j 'i5^1]l;Bů. (>51 z˂eΟ]}^p\^\QC˅H}g}p!<\|\HNҺrvCrr+*)/iK|Hc<3MʅʇʅawV.L͹sBm1y8M65 ]Rk#͘.N9es7Ts"ٜ1;ad]M8!hȻt+S|Ϊ="7n2ЈFFQŒ͒p;W|Fj:Aji!I1ujk))EQ"M{rl%coB`ʔxCyahՠ¤3LJ=qsf{v1l gM4Nm{øHZC8,&ŀ`η> 6G]3˜Ԁ~lwi7uɥN~Q.~qF3#<FFpٜzZ(ҏ_kSB6  >M/#L5j*Ex aJ~#<|QD(DNj9Qo^Oqwk'p=+ X R7.L34Z.?>\#BgBMc'1p˵Gܵz 8F[ԥ.vAMsWW["6~oMpAv [0ׯk.uD^h$ iN tNtl_zp Nk􈢐fQD\H>T.~ȇʅ7;>0\eV.|s>Ym?qjP2VйpGab#O~:8=c~v8z韨^~'{^_?r )=3eK͹_rb2BztB(9!+&\\hkĕ mBB)ʅl>T.xp\SGgXgڍ y)2 .olk,c7a;Q<k{2NZ-F*."16thMfxif<0Ƶ6>?o`/&l=J?pYn?0Bs^|R'` 4sNXhdH;n0.8>\ÝPc):pegf.%BjmJAP\%.$jMyɅr!K,,+Ҡ> ..ܷ9йpG9\#4~/DЀh-6qֆ1羸͜X6;vq[wކ)z^Kg朏qw 7[o~K|ݎ`µh):eDz 2@}c 1؆ѣhQiZ;C@j[E"()i E} 3*5|O5*~)Ji!HaUr\vHdQ#`9M{54=H u cCdM|؍ B%Ж(* \[KBB.VX,>l3j>PQ7FйpsD縲h!!kFkim> 8cLؗ1LwoLrmf:s1m:=|֨5ŜK~kC>ڝjNbc{E%>esu9kq"LETZc@ m5ܖ]٘"5*@8p͏(UBd΢ ACyQ^>; iǭ/՜.Z.E˅bYNnrrC^+WY.T>=6(i*ʅȇʅǺ D7>t.U&:li܇;Ei#h|#W]:>ɤTu]kŰ"0)nk;k}uqBKdC 4:dWW4PD/\GQzP Vlc6`5˚NhtQyݯ WQ} GbT,R)#Ejc՜ŔƄA_ U{[꤉S6Tc|paCpaO.\Dt=saxmO-)W>\P.d^\TaAZ2j>йpsԭٜ3=lVSz21ƎE|룝1.޲YֆoN4k3Myj_3w\=䖡{:vN].}&`Iw5:eѢ`1B%%R6ua/}ɱb4`KѦX sxEvKbT)LUDۺu NBS k;9gE5usAE?bTp! k c9Åy\Cp!BM)/-*v3m\}2JQqB]T ɇʅ0A5cY2z>Ȩs_J<ׄs L y`'Иg-@ zMwgSii5BmW60&=7S^kgG1l'ޯ1ǝrsJ/c4Z=V-AG},saC5ws…"D|~ S S8`ah!J/;3bsqyCL,u)ufsmvs+65"&DՐk;4^˒9AfeR|{pBD1"pBp raB"@+&X(*Dž6U] y/|XB$ ՘o\8(>&lQsǘ#қ-ќ'#N6 ;nt<U MS V}Aku@͹mFEϥa=O# LF6Wc~uDZX=)5__\pa!6?N?֍XLIoՏu}o ܜ XX7bm{6·9r3~6x̏ӏՏ=t͹: cccu.t.cccu>ts'/?V?V?NXXX͜v,Dq:cc?cccu;sp8p8͹p8p8np8p9w8p8͹p8p87p8p9w8p8͹ p8p9w8p8͹p8p87p8p9w86r„p x`?I+ 8P[OMkn S"]9Dx|PW%y 8gz'~{|:;···sss͹2: `O}NSy. /&x<$t{Jt)ehnzAކ09`f} Z,-v^l  nnx)'`p.t.t.t9wCܛc oRx p>t>t>t8 nVR>++VGY"ŲM%+O<GǿWta-~{ j8;···sss͹o2ԤHm_VOJ5ْgn7"=eu)ў8\^j0}PzX_'CCCùйй1ںg-.ޘfti:goH)?~RIcz)5@С=߇YSWVWG915<Ey`ZfG6(N\\\ps1Nd2}MRБܴJ2)K.O#冀ҖSS+/n똀ߟ4y~DwYΤv!R}|||p8:::ܜ;p8psp8p8sp8p8np8psp8p8c=vء7oZA<:dIա;N41^>`mvn[Թj~;U߹ܙS:}J5o:?mz5o֌j;TڵN\27gj} aywٻwn;DZH 9ic{MLjcqg>Zcxۡ{}sz86}ىY 'k25q@nC?}{`)Cg=s;5mw<ƹnQ:p}rx~q=;ǶWzo\ $_@cp̞A8~MgW=NBWA_8aIw]!q.v .yṖʅ@>.r!~C˅\˅x7B'PPD|hG> k\Xn\(|hCr!{*c3r }#BlG.6ʅCtuSpcs¹sMS=!1m?ή2sxkt?5'cܹSg3i~C:~ o7Y3}qNcw2 &:p!M̖!spW!-3gn>q~`ACcx~uP{.m>!$8xV5e_3 .dm'p}s$kr˂E_X-寛_՟^}bu+O=1psW[P:s;2V G.xNP='>0z_UkᄆzUkTk~՚U'cK/UU|zoVՓ߮'jl ՚S埪5ם?嫯¾:,#՚?kk?>kSU?寪}q;;Clju 3X}j[=yt?|ʈ'>񏿴zW}E;~T_vf܎z+Ug1>3"J{J}|}<;\yX=p{'Ww}?~j9uѩpIU;߹cxc7?-žylT.,7\|(\^ **ָ0&3.̜k7i}[.$.o9Bl5˅M>T.0/S.7 k|(\$paO>4\vCB|GʃB7Bp; yiB\HT.=az57G*t\W\Ȋ:xhEȃ˪GXZ=c>Xryۥm=iI費:| 5>ޏq>ի?opx^9'n>^>W|1ty}ȱχפϛCspNVZ"`CV_UՒUw?PuDzܿOEa;fY}>pmU:.;c^=cZ=cx~W=~CvxMw.nnzM7^F Os&lY;3=`a9"pGE⅋cLT!Pi!F7DE4~4t<(,TR8I p?5]AlByj]K-D$t~C&m!r}51gxh:f[?DSMQPa΂ǔL6/XȂa[\6^c zx^@Gs\CEYT.' a3*wN>\.j\( ?ئąB`Gʅ8B,2xAa %\Hތ|hp>T.ACra^4\ϧć%.ĥW.ĹM 6Pp|.p\K kT04bFF6/dTM̈́@aL?htaI9V2ia9ұfC-8/>?\L[Sd>PsN3M# ߑLb E S4dncAEq 0ňFPy$`w 7G ނ1DB/ =<〨1} x>E i\{PAKъ}N0(P5>oj OQυWs$xoǂ}rNQ-D DqBBt m%A M4Є㷂U"Zp_4Y%ąa<ȇ5.<ɇ8-Ҕ+|%ָć5.cC˅T.T\q)2 r!r!ąC|h\|(\3Ėʅʇ…BjɅ\' |ˇcM]t=93&fЖ̹Xn@x24Ljj&JM9!WMVmcj̭.^~odsė `ցY EK v74t<3bv/v/IƜQvg4f[F4l|k-InsK̀v6ɼ9u\~Rnՙv0ZG3g;= tZqe'F~ &qyrBfsiAB>!lkD $HB&F!Ryc)Q@1xcL C/;|>NzAu*xLj!8/XT. G>T.D>qa287\ƅPʅ̴\`Ƈʅ˅,\Hn\B.LZ.ľȇʅ6.,-ji?A>IʃfQؖ eЍk\}ָ|(\ɅX.$Ҝ5"%У׆0J̭^Q3Uci!V$SS%8,Fp](Fma~MiiF6kVn#63P2\0Af$,k:iooE+~_/y=6^}.Ixw=|oF86s z!>K\8\'99qPs 45s507LzJӌ<\9.pep!Rsi"QF!~xKQAQj!DFA QFAƺuc6 0UPR>5q)Gt5*H xcD\_ON1bS(uS䵆]XYkpѢH\ZNo"HJ7O1Rh7AJcVJIoV(F{@DH4#wǧ\t=ec$|T>ɇʅ42B(q!k\(|XBMV.sʇB\H>\H>\:uBCBBT.LƅCB3>saZΜ-. CFϝ {>h5&%CnӸmp!-- 9/EhF ƼVlN[tw 9Ƣֶk$ݚs޷= gXks5sxww/oky6SN]K xt[{5Sּ]?x<̹F͉FeAMS~hzyħcQg?hGaڑ#D)5c5”vQxRtv|ױ"4E& xNyTDH D!&Xk464 ܢ ad #H B7'<#M+>j?!2YB mF RFŵF(>PL eHkWP K#i |Q^DUpDR4#N%Z\I p2h5v0ZnS[ci#cu5`t{p! l\͌ czC afCÅCB.@ET.(-Q^r!~C˅J\H>4\zB44 G…ȅe'* ۸p lɇ B!PPBra?|hDs9j9o_yyVLs.a5iS5=֪: n)-_4ZzKVw`kߵf-pPJ]0dy\ɜ)}DoO00400儚s/wcm59~i:D!kfo< ?2=;oۀ 8"xHY V>$**Ҝ۔ q|h0rr!*F>T.d |\HnP.$*2m0.-mp!Y.LFȅ0r!,-*eڌ|\4tB6K|8Z.9P CrᠹS2hЕ R|sռ+kԸfJI͘sMk׌1g<ֲ+ ֛hFͭQTniDٵs, 9~]5o&e|ҮK&ݚu/N) ::y`[Ӝ1^Gsc΋  }=\SІm|p9*[r|ekyk7$>2 GbT6A}61ʩ)S7Q͏FĺJFAu4׬}vP#Ķ , R6{(k5 . EF9a)q ȔMMsI#1q;M|RAn1SՈaͺF%QQ}4M8X󑗒9g~)FiĈ服 ^L\\fr!оr!X.ThN˅4+g iÅ,reќvC1mG#! @YRlVƲ6Db: ;&b[&'K#_b&gɲMj~MĈj:"E*d$ bGV=檡yw )6IB &1qt#Sok$ǫA2Zh6=hI<)@>F4n;$ۺzk&_d^{-.#rz{Δ̞R1k*k6#scp>$WNDϭ93rgO8՜FײPkor5 · ɇ5.L'rBr!L8CBFV0ulfhurv5W.c%B5ʅk{B6_\-qa/\Fl+***ڌRqeAʅ4HsE222saZcÅl|.dpmq5DΑ!t؄ZcJuC}dVxMS\̹i ר V#Ԥq;c]iTnk4rb1+ܯ5^߈'s(6+qe+=WהF%byi@Kg#/5\a8dhd<ðzF>cq N zWOjΏ0} =׺9"ԋ)"%. }gfGKyC&jtꮍFe`v ը0) Sf`nbB'R=~zOWg՗T?'hx"I{Qd&3մgi#=)oAQ\b5]4 \{b[.JPۈFԠsx#R527׌E2B)ǀ^b *b6nm"֜CrP?o6]}rxG`Opoz*gw>,q!. #dG|XB֜ yg+JœޮaCBn(u*\HSn'|hU|B5ʅ$g(/rkBC5%.5Y. .b]%ڲԡƅ#4ʇ>2œA&p|sMvnB`M&V΅B5g"Rk8Wf1ckz5hw!\M5jf2ڵ1f>V Y34gslھrj@g35t.B NHu=tiot7} bε{17lGu{q&Sы=}0.!/#0IDtb`ͯ?uVRB#0iOq&ZjjFz!lr nE!D\EC(4!}>?dGWXqQ+~ fyk՚5\otquòY#꤁Het-SHq5FB$ `T5v;кѶ4Rd[&B=މR0#)J8\8.1|i[G@jz')"xH~/:B@?M[R}nEgۣz1~ˀI.p=ȸD1X0sq)v…5>\H>T.Q`ʅoń[.O-hK˅7 ɇ .  \Gn+q\ K|…4 mY ʅAW.dSas㷴c%՜ qޒ˜ Gˇ0Gls+{o:9w.`)µFpmQRzfFI u@o1\m\8>Tm0E\H3\ĕC|hć֤ K|h'b(jRBw%> ć5.DWć5.ľ*H{õŅh6*cΏrZ+l>6[m:K im 2450>-Gϑo̟Fll l5Yji|F5zI.&qzt[KoܭAw{'?.tc_Ro5e=nru扇۹5bR1Hs )nmw)bn7 s b(O stUPs55cɥqCQy9oyOS0qgܔCsh)#htIDz *a2MB>(6 'n5TcϚ9䨴߈촁ҊKqM שQK6]UڈO2b Qs^$άUO"4NsXA1܏Stxlu-6O~YQ?oS]>"}g9_|H.+b%Pʅz\ɇʅݑ ur!hfg  \H>\Y.\-MąEG6.\|h-F64hK 5ɇy!P<0ԙ Scj$CO4rF:/寊b#Ɍdm Nu'NSq,Pydk-!(e_swST_uAh7k%>lpaCB-qayK>T.o\ulf r!x|hC˅ȇ ?ii0F?rau{Y…1@\|\h0s.dnv5vR0qa-1˜}ƅBtpra?r!nRQbnMhxmgއch դͼopm~fM6i9fLǑv[ 7 {.Ƽԍרi+ -Ƽn z[ }k΅E2yTst_0:~/y30s.s4wnOm--vps1sF^-Ej:΅%k`5 VZ> ܆(J)y4*k+z@T{iLDz&G~3h<&j#GvQzѼ su&8˙ KD'𘤾S2 87hb#@r/D ߾3ng&ɇ5.d]l \CDG56, qm…)W.mF.Q|hc2ąʁqhpa<svpe#: Q&b;y\J\H>T. ȇʅ9BtSDZ1LkB`-ݜWs.<BøV^JuoV)ּN6[ɶchq4{I_* h9 re3Ϩ;1jMK Fmo֐[(Dmy^BFm& ܴ:;QsI'oc0&bXwNyޏ9׶T>e"nɜ3bsۑQ"Q˱g\,Ζ؄pbw<[FaD )D#GA2R!lt1;q-'A:kFoxMh%EB_`hLRQj̕:p]Tws5hGH0*LYOu٠D1 L)G̓ nk&i0wL[}{SxcܜG>ddy1jnr!PBfȇʅ:}d$0r!Ʌ4˜~1\r^n H>ra[R;>:8n |M qlXL|X4|6.ArhOʅn֌m e1/6FJǨnvNFi|+0kZ}FugӠϙkĜߥdm五pAo7J^\үaK2_FY]x~=9K"}]unݾտMܿӷrsљs^`1R#\7j isgYBT#H焈a5mJl)IYsu%gﲑj,!>!&Y7NNÈa)"E<+c$I FTW=k[Z6ﵨ9v<ȑj]6L(35RA*L!JR61Ty8'aXzHcfGoj1~GsƼqcݜo |\ zAsBvV.ʅ o/E XC.d߉"U|h\y/Bx֔+׭iBnc…‡ʅqqAQtw֯C|Xj 3u:oCćʅќ'>T.ʇʅ8K\:^ʅR{v[+7fץN/RR-{#54P5\뿗,h9g$Ɯ|Y3Qt5ywaslom{efe[:{)%ݘuT9eh7?eǟ1zM7 En_fv_б\{.1oIqל?.9znQh")*S6a 4@QѴCKA$)CdCB\RPrR< D L A=%^HRBi~k:M^sxvm*tۖ\͖"J٘ujBF=Rf|$Թ87c F9"fLBsmFG{PR̢sq?]ϫ?">3ݜo@|rajn+De>T.Dïć5.^ 3*Fqr!#4!EBD;\EJܷ\H> vvD\k |YJm\hܳąv:PPǬY.$ g>\H>.De\$Z.dvH\x Gˇ0Ϛ8  ij蹭'VcdUφuڅLogz[5mZΨ߫9gP߯hEmt[ XLsH'lֶgm$5ڵMܹxHAbj'S!R!DEm'G TbsSmPav/f#s~U?O|ra k\FD6kGQcCr ąa+b\H>*q P B.4 }/|ֈ9'jo5CBsjtr!B6~#.$**Z.pa/\Hs\8Z>9?~\BъdlzI{A&E'7) :ɜEE|n*6>ʅXhJ|XBDZɇήr!~Ʌ\\KȅCB\#1R\SsBFK|hyą%-tvFhGsCBmg34ʅ*J|XB\'>\H>T.D$4$Q |+"Qsra?ٓgr!| R}y)bϵ՟Fq@Y9 }q'1ALW_|kZ<܎&[*ҘR㷚ɗ{=}AoWƺ,{Mgרyjag3f󱘺\ȹ{ sGek:E‚ߏ9d'wxj/͹^y%Q?kqQDA DH ;[1J(Q5), 6#>)G x#cD9hQ=)H!ZA9W37U mf-ՓQ%6?mWnٽ1ґBVlgҤc (ܦ@QB2bQ(:9GP! E8/q(yҌ7 E Rl@ǜ~O~~iE|ܜUs.\;'>lpaCZ\fGBHy|h\d|\Nj\f[.$7Y.T>T.d$W.,fX.ltۃ. ɇʅvNY. 'p!m Z.T>$>dą%skrh)3ZxڶnǬ9/8kMV^Y5G:mkia=rlT. Bpr! a%.dnȅŘ, sÅC˅a[LB 1hB`SܜB€7sn3ҪN#|L͡l3tL.k՘3B:򥒚Ns~4ȹ5}cx7N^54amt`[mŒiV[`f cԍg3njsJ\BԼaлsҐs非Zγ8͹sUb֯bQ XaW\;& QR<LDŽHc$혢 !:s~(N%b%;CQ2-!X5"qH*;)aHm#2ĈFhMstEױk6朑s{#S^ӜCTⵈ&A"mӌbùGj`9bSS993|U5uQg_{1lΕ 3Ҙ- [1.ʅ8r!'*r!MZ.bBrBlZ\hk5z^Zzr$f5re>ʅʉ AD>T.dUd_.$*r_7>\H\H>T.-œ.\41h(f- ]5E:ڬM\ǣфÔkT\ݬ;16b1kZ/e jлuSFlȶi8W4CM5yIFF10:r6^UW/Vp4Ùsyuuv}cο0i,97ǝD)Vc(՘#BTkH S!jtA&5imLᄀD*& :)D D((GЇ_IK>#Fq_*$y I qL~7qIcmF(- RpvlSD)FuAOcZpx.bB Q4e0Cq@@/s`E׈ Q)R?||5˜-Z>Աd\PiȅB[.-K\HmڸPW.lkr!\ؚQd# :vĚV6N.5!CP}1žW'S][aծqn:1_>ꌃy,)$ ? zss7.yn6ZG3g)2b9V27! XD!@$ p؈hbf"B"u{Fql9)(D)Fq3zȑ MMTJh=ы0i#H`T,YRZg[Ĩ-F(9CtBc`^؝H4 RsGT)Elea??7^/h&J7FgA"R!z˂g? #6޴A! p8>\|H.3q*HBX.1WS NB$r!r!|hEʅ03rBm\,G…nRZ=raiRpȹm ҂%\htp!S>$ù8>s˅Cragr!pčӜ 5rwP`Ɯkj<mŹשcwmwcߘΦo40lۼψ9Loy!pMWႀFiՌxAX6^kV2Қ.kr6i]f8h4=[gjR)Z:;͹B"׌xNݦZ?yK|ϟMm4s~̃1<}Xs.,lpC,€o$~dOZsOb>/vֺ9G=%Vk.U"[!DpQ xr䢪f#7mk MRShF{8 ]ۙ~Xi7"!q {)S擵 :0m"*գW]9Eޮ5"Fq'JRjғ(׸>Y ' $1^.u>Ada07tZz7$Rˇs\0s!ć u^8ʅB\ƅZ\H3mP;+W^ʇj}X.Ԇsʅu/e`YB@vlZ>,4Ɍ|\- cvCr!T 2 k!y9_\8(>Rs^Azs*ӴKK9s|nj-`a֗3+nw/Qo3 Pxu֟ۺs6u͔/uw o ԀQqW0\ ?\ӡ֜k*F  ٜӻI4Mݗ9kv-USwŜ->`(%rQf~(ݜ?l#̯PT.ǐ-K\h9WMB|˅9O9q$\H>\\j$ƅ֜+5ld  Yw>r!P.DJ;Pqa z^/s>T.-œ/6 &99к}~2rʶ4(/;0)gtqF`qrqsާA;&]GٱmuniWlf$>*_ 8lsiYƜߖ7磸p5s#Yi9K"P2FAʜiNH1N^œE; 6 YWj4 ;MHg/t8_HΪŠ۴Β9g7Fs9݉@tF `α`be@HA2=Q4i~VK U|X76CB~0s!;!4Bɇʅʅ7Z.TQ.f.JP$ -MgM1[.l:R>#+`fε(ed |XB jɅLJm c\C!)gr!|4bpz~}ishjZ5f00jT9 fv[>|._l9jЙcNXcׅMW豩gs9d:#nGis8ٶ1Ng4L]gͿFonԔo\4LGȶܤ׎4xߴ6seŘ3忑ޞ fhS~_5"^nD_P07\Q0>&MK# uh!$)&~#…'a*mB,gN5M֐3u]yP9wrNX[\mѲ-rvo[,\%sC.yi$PF=̅j  p|+|Mp|\Asb\MךZ5b"kR\g1ԕΚ9ר9#|v>Ԩ/[Ig]íy!cʹ1\vEc^B"伝F5R --cn#| TT_; X_cV.o~7gV]}EvIZ{pe3F7磌q# !Wl0Z6l΃q!i Ch@hW ZD S;˻d! D]XEk]>nA5*>u_*>XcRͤĩ3l{9Le-u15֜5=s(m1t6PB$穉VnD\ fE)_G%1#Em+H P: _|ļ8#+|mԜ˅a͜'>q!span W]]VB5Bp r!Gyjt%<6nyPp8.u.96>,X^,Qsn r!9 z)|H.ɇʅA!05ʅ((!+ \W.'rp\D1DIjg0tT}lunM4bH"5Vj%(L x'x"݋it3pr4Qɝ@9toa5઀ߧ) Y>)s‡ n >Ĺ qn c&F/'+sNyntOSee+[S ێ뚶lv֝q5vk42 QFv;MjclaO֙Àv%t\G0֐19yL1t%c^2 cιm-޼?ۇY}g3uߜ-=z<\ Xei[Ԝ6ss> *&H9~&G99GA:t`ʞAhiKs #ҜZۙMٶ"BuV H{(:aotq m>~\zLo/zZQVޚޮ J35ZDs^2%As9AǙʉ P닅<7F3!:q/2 NmDpC"]$FլAFsD),R?~̍s12rb"Yr%.pA΅ʇʅq_B/Uiɇ 9|H.O>.dZ;~{ʅʇʅ\\H+q!8z}]\H+qn.EK jK\҂e[ܖK}BÒ9Wn#ra^w "JN>T.sCB.4ʅqʅhwʅ)Ús~Lkhjܘk}UpJS/=[͹xߘs6c|1ƮMl[#%#Mn :|uKЅ[+w|ql]SsFٹjm7ޭ\YGٴkˇܢe ɇ%.ȸf?saiƜ7+ ՜FLqrƅ4xBr!ȇʅ, I\8Z>9_V.fOT9[k\Sk\ЈQWMfJ bnj.ד0SYg\q^Fmu54mkujؙ⮩4l&55$N;Ӡ3 _26\\%.d?.fY66}&tۼoyAOj\q4M]Fٲ]ةh‚T)5HL{m!9}1յ?3|OХgk˰?CQAX)t:S4s n H݄`Mpp}00xIR,QV}⾸O 6zԦٷS+J6cGcfkEi[tqJF59\)ZD!zK܆Atb"C~bw F٨( dCyx@)s` n2 90)`ye.9'*k\draJ_|\Tv$pan~sʅ76LP}9B\EK[.$Z.?…7.l[f5^tƅ43]{;SCb\Pq֟) q* QnA>\8R> !i3r!pH0^йp\_kn]ɠk0ks/]=OsSFDՠۨuF qmFF6}+kg| w֡i48.g]KMz  zg7J /jmW^05Ӎ~/"9kNSnK!L}e20k:{@#AO=hy|x\/qs>^kαMqhXehΔ!Ga6J^,HBB"F!8 ,)qAu> Ao(i!&oqyC!Ci 9wxu]6)؆b€ZƥI ͒ںwl "F *F Rj;jS8hNƼ \>&Q!jcS.*= 1}H?ϚS3#] cN>T.DDP0s!GDҙ Ѹ|\9 Ƀ:~ȇʅm\HClr!W; Y4B9P0 i-v*e)ڴRB6+q6/J\:;b, ʇʅC;bu%.h5˅ƅxć ɇr! 3~ʅhʅ!;9w.쳾\GosگF΍+ l I*֛3rΆp1jlw\^ӜTtk FK400{:?mYvw1緛wwH9#]Ǭ{ZZFmf}E b {0OA9 sdMFբl (ey5=;\)|f[1ŭF>#?AʎƱ{0rN84O[Gde 8*T-* c2a yU.RB6\yCB\hC n\*qL_G*e %.$ i-niݸ { Ҙ5˅ A9׾Ʌ;s ȅ&*5CBgC˅C!{NkBs\8 snץ:6ZM95zhl29"hbYsS&tD5W9iamz[S;:-9!'jfFa{)s~WcQtӔh:ͩ,z4RݵIRF38So0ȹ6k3皾rfϳT#>>7t}4>{ jYɨc8ju b0xt<#EW hȕRQCBEуIsD0q݂m#!8DXB^q'p0@m_jdⓏEt>fF5Rai2&ݦӵ;;-imdgkĨ9\{c Fud*;i5BĚ[Nymad)5Bٳ``bST E16߷ Sٽsɩ?xQ4COy= ҅.H>M03|\HUB́z[L|\HjImƈ]$-2r!zkCBˇjK\h#˶NʅX6.ԅ^kαBzidҼstڎB1'2\6ӈ CC8^a?rh0vnB'V΅k K\l4ZX0j[͹FFSp6Ơ9gĘs֟Ӝk6G1ҍZ#Иjnkx400یÔܚtnn4j̙tvA3qkFqLn1&qyZ'\5Y綶D`S7*^C2UFʙƎ:E08 %T i;ۚ{k(Hu SmTJo݊Y{&p7&}y~`28%*)15jcp gDQfdA544tP-wO=ԺSunխU{Uk\U飃ĹgG^L9BFGu/Jur}%LEEXث\N5ǣR菌fGug_^s`|ë8'bNIбP}vaL8,tUV@_U(q'8IǭEq #ֺz){ncӼ\zCΘֆ*&a]B*5j}w.[ɟkG_weS׊,uFZ#PdSSYIcR!YY</,!D%,z)!b!QU[!߾q)qŠ# |L9Nj.++BK=zy"{3rB*I{|¾8㷩vȧfB@cQ ) q\K\d5;!=qDO>89s!dZՋ܏8?ON%եk9eC;.:kSBD^CBeυ^.Q?zhʈ*ˏX(^B&*UE,WN66:ʵ_\X',#XбPD:6ca:cXx8XOv8oBb?NlS~,/5-}I%6LJ} ӄg}_sSg#%]K{t~t׳*I*c\"KЈ}oȭ#}6K[EB?׾)ƇV/tqN3ө4-g#S""`Yf5"0lrl8 K%N<#7kGKD:u֕%̐HQVp<"݉iSȷ*48[5]wq^5FREy99!eDHJDJen$BK9}2ؓW5Cvd<}6u?Vc!.g|xXHqx~B(xX߮)W8JcX9M('Bm4%,UED3b0b!"O*|2,бOR:"΅a   5N2/c!'($XHlVT+j}se`-b)s5*X8`JBǫvq^.\9-Or2b\@N HMܹ=7:Gӿtq޹x>\UtoF;WdU5˪Ԉm-V SaճϬߥ޴Mʷ~Zq9hr\e>FIU4RrTٟJ5#%ip BH!F,R]eͣgф$RV#XTS$SDVTHsgEl R!.Uqܚit*!ո:]C[g$ι3 GEq.Bmc)97qkUHUB~3ѫܗ8œI<.xc94:,ɰPؖ~FU 0 qбpy0ps,2+ag'B@murW,eXy" eVaR.< Xx8 e|FCBe&Baa:0ba7>U,.&q׏hBs(x(s =9sHxYϔY_U^9qsѽ'q.&N\~ketN.Quxees72vB"Gk{Dϑwcˆރx%Γ2u'uɗV'g{&UC'pgOfx~oyy窷<8_"2DQ':Uq~%:teDDU6 <G'"x*˄A fL8K NNDEbKB<\<&ygŦT|M\ i̜_pRĴk/-e7.g~Ke]x{xtkWH)$Kc:&p Őwy*]%Dtl#Ώnt:ƅoyV+ ' yKXHx(,oVxXOڹ0w\q,UP= )c0tue{B ~0a4.΅.,k"rPXʞ^-PXsᡰ0;'<4,K+<,aa>px8o |P\%QMW39gk!9sH1lko=z[2}Yэ]rG"8se^~K)s?2)g^rkgPFD .OR<Xpr[ǎ+Wӻ8ʖ78.k,p7@Wc5;X>o[qy26A҇&&R:ay;꣔qƜiLB!d"D| E"I'Ȕƅ-/dOȈHD"JD U齓ʴxr?W~=2q~JFaR$qQ٘ n\>JHYsA-_FH|1"jtL̍,ˣꭌfH|V)CJ63 2 s7:wlify[Gd&BߋJ7E-wj1}v+ilZia ͆ Gc};\W_c!6KdPX uԽ;&¦wG,'G8:;/aбЋc&C€5q"(SXhxX„F,}/aa4Eo>}I#[i+.γxfDʼJKxu>^Og>Jg{ɺtozq*Gdk9[fpI D<[k $Σ@WAxnJ֕MLt]is5Z]tiz>S ^#I[]}@ݏo-n3./qkb|t'|!sVg|v[2G&NG{e:[!Y=Rfq%rɅ* qDiy_."9'y>H!YHD!_b4Z>vY\cU rw7Lray1AN* >*H'M NM*KM~8ODTYpLCN!4 z `>6ec8>;w nLJ9}W/%Ņ;ۊc!\x <4,Lo ::F,9.uR' UX(Xʣ'B+a㡰P"IH=%,@XYBpPxPxXÈ].Cw\s,t޷B]٦,J%ɂ{h%s~w7gKo>$Vcw&pѮb(.-{ݒ@.+c YZ!5\ߦ!UՉw{nW}eHFe=\cD1R_z$!ܖ /ThSz2KN*}WvG\AJ+ZkV#܋+"Z`T ːD^ ~qZqѻ׊cJ~:c!o!8߭  K-0 aBaB:Q1qQxyU]ce NJۚybeݱݳ1[" ` qKe//q.>Wx״!Eޗvh;*Fq{>c*&<{Uq谮c%V\pqqq;c8w9ۯ&zm@N}[G_7tn'q^V*/Any$EaU(dV|3tU?I{&@PTž3ӃxOD(!M+qNvzHdTT.=6* )SG>[+~VRaQb9EHUY E_3^QC>W>)u9;וXѯ&.13 *Ad dQM٤J~^;Ǹ}؊c!|&0(r,ԉJc0bg B \X鄡cDqBeK'( #6*ܟ Uv_Bɰ0b0b7BwsWCBo8 <(}E BzÈizPxcͱ/q-h`ŹJ]<-έ</m}#xgNЫ' ,5RR%W$DO"J*!AU)Agt1vzQ/%L%MdD/d+3OdS.((U)*rzX9!m"~8R.ؠSdQ?Nq$.`α,jz-/rz%{˔qVT%*LNY竈1%eK)Tof2̹dV͐G/Ϩ`?OV:zI{_XqsψntMNKk|XcTF%l%9F' b]|M.eˣ5aI 3rJ |<-nRsBG!C*Wx-}[~_%mv{= yr~C/Pl#랽S_oߋ8ߺ3D8*^oɲo18cz9E)Cפri̹ юJ!C' ) Yd@4Ǘr)fp6Nj8)2(3"BQ(PϔB4!p˺/,DTY)*̙w,cϋ>JyLd-2>Lws9݈J5.b"^_&c!U_9Ie-cxF~ ф*SqF3F"ԓ>1uA;&"J'ZT}( );M_-Ϩ]c\c!Bc&0&$e̅% dhJ(s,n/z}C'r39˂\ʞ{{is7秲q^|{#9{v>s]#s生ߛ܏8SֹuwcnKqp |iȼ}D.^+@<%)_\Qzˈz| (=>W%rN"0%8e2$G`JWE\MG)Zʎ+>xhv~T=?dW}z!R^+!c$ާ 8W\ȥQw(w[L'Bd׺GHS)*kw h}{]!%3EhS 9[27)ӱy<"[D(Qf(KYc}!.ŭ80:;x8-qI*Caa6ǃgr;7]X1xPxP]Ȯ,7KIyY [\3Ջ^^vl%;?3Nz̜:g뤝eǹP)7g/W`U"3.foѝO2.M..X]~uIxqrwun;6ϚpܞUҖf#L9^ea>-BZ DVnYBF2dF\sȏo{PDTY"q󌴋]CH+CӉQnW66pJKy]"fqCr'0B"J Jb'F)F)'},&Dj٠zv3CʮcB"2yWt!Nh|)?h]9dI5""}0'!ݻq~%Ņ/i <f7ʥX(N xw>c07O掅z:jyX^rE,'+%KX8,,TϹpx?oBbɖ8x7F[SH|pů_ Hec` Wg: .ϤU&{>\ɼtݳ2.tSyeBs u߯xT>x8Wa sse0Ϣ{O*bun^#K+wX0[dB$uǫ)!~ϼs(/N/!U\_cIm!܅yTv|!gZq> gUFO:Vet٢% ٘gf>>PHYϖ#e}oz#M09˽32vtGu_dT) _*3%R*e^H6nZyS*qoY BGGhReR (eu{loۊ&{SV$< {I#ybf#΅ #  nc!8'\YsJsC7 w-!mr1k^ҮuYu/w#8&Ź* pizgyq-{fYŭg'佬 T_,|j3/߁~>ܹ+ƞcO|ǂwEfwWiܦ>)HS;u6a(5c!re |S&@j8T}*2N!D4GaK5.b&p2?U9H/ϥ1C?,2MRDD{!D=s.hm ]qd>MqD={e s4!L1wʞ̜Yd4 xY0ڻYK{y+(.,lpXʛ3:VBބsc!((q^]S,kqy\Wu\^D~]thk/Ehs%gneGߕ1ӰuY5wIl{Ȩ{н^n& .jSY !b> s}q$ݺsw'J. ;@WBtFq~ۗ?ҹ]oB.kA_}x lTNWĜfl+.ֳE]!S)ce+S7HdNYpD*CjD;qDQ2DTy&ev<ʸs,S1mRGL׶N G f;)hD5RY'is9CD_.RK~z}3I~5!&(J?֕*1.;80va!`arxXB\ ɘ  \M"U^B"bP'D qkÈaXI;zyBcNhl[*oXyt0bбpM lBzC¾3Ո>Bŭ6zK`x=eԭ=s=.sƈU5EKKP)kg]tq$X*kVg]1Wf}EcxFq+MeEr7oSl( \; s^Vۉ t=tH$J޻FY?z҉'I"]#V"C?QO8_{aYPFE !Ĕ )FH]ge"J%UN6؝5WB ,Or i,aD4:|ˏ_9&c:LjSenC>n+.-8jsJrzG"> XWU%%DF\SEvexdgeDc)W%!r&29XEF!ŻKR>V 4 댇B8NxX(r]XxX(cZzJXǣŞqB_B.K;J刅-9,$BUi]I;::$tBca2ל -r27CS_ۙnFb^^hR Z{>;e9^%ʨz:b\ʜ93ֱ7K[}7馔c c\{F[Z! X_q>Rꂑ{k0} c'v,wՏ8uFd1~ [q3uL*L5M-D<$QI.H\[re AD-BC\xIp%2ѸH"Bҩ@ues.(Kr7iԑD "ʱ|{<-9˄ʼnzȩN(~\2 b.̣jb'Q.e>GN }gbLϭď`'>_ؾ.,x: K!B玅0bбP%WPM92)Xߧz)Da^`V^93=82ų}^4C\[q>`4 ؆=a!,vUM8cXWԊ9LF#ZQܭfTNּ y2|ˣT.yI[/8U&d DT<2O#Ӭ&+/s5}&/36s*k!F^'+vq&,!f%|_۳~ օj9GoX6t<,aZz { uݱˈÈ?x(  9"H97N流x}folg}j+kay4U}Kbݟdz:ы8ʦ߫|s c7mۊ CXS֙g'5)ŭ8g#](kDCY;2PBPee q7<QedB3̕q#1G+ l|\W?9^Q?(AZ{΂`;TDbIPVDR՝!TxV/Zqva6hq,DG,Bě0bб+ %Թp, zzÈ͈n (aLcuB3F,CBaaC!+PcfL?/X܋8_x8XXq]"yW{vb)s.1|x_ftca̭.kK{.mW_kς=.neŢeUzzH0Usrww8܅tt/e[x&K{<=#snEBJ%r_3ēL : dD  4vG3|]}r#/=WFGe"9{uxH噺,!aPfI"^ M3n:4 Ml:+}o)Ms\Xo弌" 0;JAab5$BʾzWߥV76eξfToj=:TE[qާNs,w'<,`p2,w߃:"؅ U0NƊ!B"bW,s5,|B,\.,r1<4,LAaXj#&<4,쫬qހD/||q =R :d5FM5{+gdVʚ;ıܦQ2IGA2w[eZR/NV d c~ˈL-ie*;ObHdf 7-BH-C6M.2׼{ζƸ7p<;{ wlaaWfҰ0 &ɰnaB>;rбPs d F,TO (bcsap0bDz Kxs7 ]X0xX~k'$X7w.f2KM6ΪvphPu".J9nr"M1SϥڕIBT*38W6EJy^R9`V18'#KBks"BDȽ3\k[W1k|Nl7eCA0`a S5Dܿ2BS;PtgFWvCa8ͧW,x(,"Xł F87,LYsc,!%ĬM#K'mYW8ϥ.K"]">b׶ dOWvS4mȆHSY#z*P!ADx0-圞5]*t!R %׹rJh?e8T4f dJ88W1SGč(a v~]' ^W_G Y\~[+ Hi,c!-> 'C  CBi:i^Xqg XT0sa!\xXy2t,$<4, gGXiC#CO鶸0gDBbWiN 7spƙ]ޝ/:S)s=r-g̽K N q(~Ά/6qNJNKB=riq>nxg~Yu8oy3!MbjݹePCmcyG'w[f RB>;#.V"0HGEgvSj.ǰRJ8!;5Ł8Lۿ5l77*$~Kˌ!8NABA2;J/Ty$OF*ט4marqb<yVzA<. t^:^W98VkeUm#ʕ);Ӗ`;[.NEW;oMTe4e(K|xM3T0GUV@_=,z_t?b7M*ΫU[*.m8=gVqAgGBF|6s>s␍i_56h-&ĤH(fHi<p7Hqäz|-R9CUqB !rN ̏aM`<\Ie\u0,T)XPY$U1;d+Q`Ig_ΐY,;aVߦа q:/c!ecpO^󈅚W"=b^:4{= t?9=+s8Fniچ<^M D&PVo+ |3Hwqg'tb|}oEo]řv}W"sp9qTE)}DȖ[n"@,#ef[{Mē1BOJ4)d!d\PIh ו1)u3#M)eK&nNLv' <6SY)nRY*ȩzEK\){ıʌr@OoNK7T}ֿ2RT͖΢.wq~kN4Xw-Zq>Sx8H,LKq'13v]X9#N^X c uR2bOp,q#rYE Uj_‰0bTĹp>Yְ =cpX؈b3 iن=¼͢K+Ӟ8' Źq>"cDD;IyL ӚY*,z1AdNeDua?Pe*SG,}"KI#|n.Q\"K%b^)Hpce|$HᄔY5L[B*Jc)3*3y/IJSfÁsĐaϫCazʅE,#zIc!8m.,a!0i8,o7r,#| >z%,d 83F< Fe<䚶8XG>Ĺeu«lY5B|},MW:%g߅+HȠ{2w"rmvDyu*sg3V.y:2~?eYN..Ue|\q}i*A v6^vf1{ɜ οAA/I.U@o<\:;7 !<;MH)DM7q1Z|tZuX"]$E 0O}dTÿ_dBJ%"3%EJ1hiXzlHDɨtLqzɳ'A??w)ƨU1o-y.IJߊa-iʞ󌇎sN^&,tքxHܱPnDBX eK#.y"yxK g8vĹg'uyȶvnb_7{&щFT#"7K뿸q} D]M}W[j7/c;(+JusG)c4&Ի e8WyN̵8_?2Nuy?LeL{.=c۬W8f^3Bq;JDV>RaѓND1BRHETLD*%ӗ| A"I'kDR;C*yΩ,W酌NDlEXEVEDT e{3蜰Yww:\sh9̀850q7<b?Њ!^yU б1P %B9XCOM0#  '::  9x8p5b!ъf7XKKSĩ2Q;?]1%.a.qǐY*sW ]fuSY# M _z^I d|2痟tO.E?x=)m03~x_(<vU|ʀG :X)G`9C( INl!RqQVJS%q8M륌D,Hy|>µ˕o BJ̺8ƛG |p`!Ca!ZXBa a!UJaX(l0"TZ"@XHx Ys.q2 ;' TVNL%k>RgOܴT%\]c8ok+~y_』Qjdu\wv&L%߾\攺W}]#ӪuD>۾_2kx8<9" ""5±Q}莅0`бPb]xخZ7`!ъB7JL/,Zn.s]DWI2CߜʜKqV?]ro1LAvN.Wq:^wA1{{ =ZM6ǠםJ:Q \^0W_Fpkʭc\hsBS;Ѯ9z{1쏵|Xp`!}Ԙ!>#*cPYͱpPX(<> OV!`{D5k&1(?׫;^y[WO:Qkϧ,7@W|Vs/:[qފ):ȅ;NQlE>!Pm!N&HT%2*R/Q0ԅuy &}Xieڊ!ù3n:È*CXƩ/]Xix8y8nnY``KPjJ'.5ru2V{8ȏi*iv!k<W_tåhy+ΧHH1fқ)lQ^ ̐J5D@5sWO&Hk!e1pĢ|hq;W۽, dͅg #RF刅ޣ+w<xا8F,$>yV8@/sĵF1G# {-7ws׌s.#'r\L| N_{QgMˊ|8"!%[G /"ǡdѝXjK\=?G zrNzE2}8λ:\e?ޣÞ9(8s.! DHEV9Bu=G޳ΦgKuc݊a -Bʵ%,T8r;& e),TMN펅p;iXH,}ʣ[qފ)-D1=i0ר5#uWɤ#.qh'[H59½5Yk^^Yp{Zq+:oV|Bzy{7 qۅ,"!uˡ]B @r1BJ5'x&.dB fEN{i8?wQe?߯ hxXH9бPxX~X(ފ9{ĢU2,!dꏔ!2~nyUAFWn88d4һG2b/sq^߭-!7 È-6c]SB3Xι8k<\XX YΠE譮m+#.7K#ql\ <oM!u[5//}N|nX|5֮,ƧuL`dOB|Uvp!]7}~V9"fCE0Ua\2u7K{F 2EALy mKF' w;lxtm^|9y9֏[q>x8KX(q.,9]x(_ ]Gᎅ=XZ, ?B¾vlBbVs/ %HylU76̹guѮ9b\\}d5n !IVmtk؎$,uR?ۅ0ҭq>ruz-yX:'*v*!<EÜZ: r]dB:# ԏuߧ1kсXn~4{,y?z#92}_R %*ΏX~~~}rϭb%YBcNF &, BaXI|BW,fzLYE9X|cB'8 K{yUO{V 4ͷ~e<$X2 g ^]""l!*c!J8'EM%lq_8?ow.\{h.'t}%qw:}4/W|NޓWq~^!m`L⡰wC &,t'w@aX8\ Å^xX,${.]uU8a!m7u(z E=UcU'!n^.܉oTIgǝ]D4)e(AЈۇB{3E3'Ksf("VܡM")YQs 8?~V5[/uL,BKXxcBBo3L8  V ;< KbxMy8WB( K3WX8_pc!BV%*m*)+sƪIk9rr2Ζda>q>ޫ{|]>!B]]gYpUl0-u2;ArJd~=8Mg*.:[ K{}W*S?#!xq6Q'-^FN$\ʝ1K5䥎. =1 I\#\sP)!|S[!!\d^||[^/?qug]N[9\xPxX xx&&a~vq.bQE`\rnW?3B]vs;!Ǘ$4K3]~y3ZƓ#VA\?MNwMk8]'k+"bY}%X[t>o;UM&/;3[瓎_PEKHg 3:b &<ࡰbÌBMxXxXC~Pc>0ba=׈kA`\b/(Wo'jqlLK]Rr]^bXot})gh~"eU5;ISy{wUr7:sxD]1+,O-R)-OBJ};J7&9i,ounɕ8&41S %'2`W1 "I@6!d} NDdҹs:?} ??|NOD92"FϓϘL>c>;hKF{'$Jl7{ɜ_a2z|{27WqV|v-gxVxXf&,$zȜ:5.&n2-\XIDu\}%s-q>sn#B%1zte)uQ1 5e9,+r}@ѫ'lhp8λ%>ua~PK' ttn+[r-;i*Iv2CֈlRFp/RJ10f)%3F"wV#s-1k#OBdxoR)b2JV(O/~)i=<}%rxH)d]Id <.X,q~I{s).[v𜖵W!y{i.ߛA zCB SUaB DŽ%,TOB"b!   ;c! |n-&w鹍XHs>es %=k$Ugj4#&WJc_2ʜN)ƛL1K%.ƽd]=~yJ\'WGI;1gߡ.k),Ik?nkώNtQYM'#B$,1Isb3!e}d,0ߒB Ө/Ԝ[eʇ}A @ 5:B[D@+u^S''r-$Q J=w:{)t#Ŕ-GO֣b\z{*cO!RxI^?o#fg9w<~wj б:uCW ir<{;> %Y %%%׏tPtqruZRG)#,Qt[R,.^)&nKc]SVb|;?՛8m0pnɞ URؾ?_lxꙣx>D>wh"-r~Hl(AL!@!Ĺ2%2Et"-þ AB zɦ($t7~ÛMd-d|Le>)]Jl癝!Ԓ'JKqe_uҠfqUy>\{O9umʢX88;%ƞ{(qxX(ۗo>Vڼ}\adO{Lº*.GWY81stS%M̐R>H)2PJ>!VQI$d{.,qDV.N%VBHz/@f Ʉxa# !eB Y%,}\Br1Bp҄@(-q'GM˾ԓ8^9ͅ90R+vH>9>dZ/ΏޮKkq0rV %<4,L Èt9d:3.xbaG{y9{21e="]YxeK(R {ĹZ֙^ G;ŻW.{+'7Y9{,q>#X:iz^"Vg8c\pZ_2r,D  e F,D$rwaaCpOo.,]x(,tqo 1.,Go֍XHq~Ioy 7:@+.7(5FW \2rpn9+yi_gt̯]- 2,}3=v=e-)ӣG×ګœj圔fRΩMU:1e?0'ODv'&($se(đ^qz&)U+Dix>>G c[ǜ+|n8",dR.auШ:rwv um>C=(ίu] ]{`dO[?~[om^{HgJ1Je4BR !%sqŠ#묑\<_AZI'%95rN6)d/"_wnGTȩr܈n ׏8?ccgTϗhpxs.S*<8q΢s=Hndλe4KD4u>W#4V^K; 룿59RL_L0w⽑5RK:,0HrǁGb2NlH0-@nq~LROÈ􏗰!lBCx8X  (c!(ەLΏwto[_yc8Mr#B8)터5{;#!485{@*ӄPNF&L ֝N^Lo:$fM?I@8!\~qy=<1<t<{4Z?2skaB.; $αㄇmCa[c! #va++qF,$Ҋ ⼗o.Ӷ.DhD%-0 9$G$DMD"ٮʙ&]|^ЉWsڑb|tV|9 JWDobI)j $G;Ai9eJ)dBH_%$!T_:UdaLeK+pV,"LK#,[q-?IT^O+[±pyBpPX(XBrc!"]xXH]xXVxXȨ. ՊVעitz,EHQ9'DqNI'"RH'œ`A\Ջ9Ost7Y 7~"3Yp!lj60Uկ>%BF) r?^ si^"]\Vx8#xP=b!D,Ty #F,TPXuc!' rXc!B~yOYVX8e瞹uqc5nMū4bm?p֮{UPu\lu?[K%}N hB)sG_rm..hy+·tAH\"ʆQ (儔B4~T9((>.X,' r AT焓0Qs9^=35YPexN>oJ!Sf)՞8opFV :{c"GOwWz,ݰֻV|!,&<!2 Qe)ch!B##"Y2}C4Ýl(-z"d s#ex~4կIo'Y'5RBڏ8wwNR}'Zq`NVF,F,/>0b!sc!X&< ббб0/BKl.' d5,A`xxƢDKq/hy   G,Dg<"Èбgz`c!ZxXP:R.b M7v~qbqN6||y ٺ3D8-!bfb9"ǹNRD:Sֈ2IN7Ć ~.s)2W|ƈΦFޓGcY?o Z<,DP KX86b!CBeЅB0PxXy9xK^ '决%*fIR憞2s6^v8GKLʊN=s9ϳޗ+[bg_yK+]dp1;UM*\8*h*.'U< QRZl/'SX\ͧԘ ʤ+3\Ǥ,ldDrO<ϔ矲O>_2tvPV_:=qZq`PxX%,$".sʠ+KER)x'kC"sDV AY?YnVp¢_8=V~W+[<( KX8lJX(I_tZ\xJ&AΪc?Z#y/}%.s ;/F,8:g\oH`1*cpOꌹ_Y1Fr*n~9% *Or-[T9 |,$_2?QR9fII%DTY!PS}OzSLe^\JNGxjS}\*6dz,u K mpn񰄅B s   gb܉zx||X¹[I0R8lz|ORVq(I)=y%%Sp /GaP&h܈/[9ۊ_q*ɣΑ#MdludkdZvWtڞvMwQf TO{ TĹɉ&"n'2m2B6oRډY#1\cPPƪrVy/Z8wxx\,-bFϊN-j:N&΅3Kzڝӄq(|(H/$)AlPIHNsv&vl՞oΦ0w_{=Za߿g=;5\B oxhpy" { v&pi" )}0qE=Ɣu;9,'w<:ᑯq5G :> n6*<׫ԯ#e>#sn1PK8Zբ91 C`q;6rf>R9aȡns OlƒΔNx;cD2H 3mu %K)hxdI9_9:6*~xVW d݆"玅hHySWx3bqnG"ίO?ŹC#}wB~ ̛}bpdH' ȁu 9n5^C:&#8t"HZcב st娿#-7 :> 3@JAPgJFH4O|_sEz~f8w,8GX,Dc3pB֔[,2]9P#Z,$*O,tq޹8!&,D8i펇c.KX7xh4Cn`aIaBW,9xBCb!oB)b⡋ޱy#"{;703qcA|R!E}8ŮU#:&.{rsht3!>@O)Q| ғx^yʿ4+y?Z8wq>4Ү-A 1Q4@[pGPE3RR a[qHJ:;X;NIHT͑I*&xτωBr ܮIv8I/s;:mY%l&.mCqS!NyF>Dun8P-a;_كSXHKB=HLѹYoGQRӏ8u  QFJ,IZ0:MxWR֍ f%1Fx9g\ܭtڭ} LypSSWb6P8!  xXZ.R,CB'*s( -*.X؉8zX;#· sC#_dk9\t֯ɜs8iNy+/9%ϑZttjzX."? uFaZŹ88cE?}_y?:FDEwĠC'$d$  GY/2 Ge "͊H,A邫8k<iliDq,>o<3]"i%ҥv"O\hi\8uq>xhCC%,\˥yj:D>s=P}*>Źq&}Q4@*st,FbO/A%)H&"+$B "(#k"Hd6HFBte]6c#vkg$)'qp8"jnO\1%5s .G&?zZhOI率cޚ)BBe[,NG"-b[BBӇQ'8wq>dD4E x湛#[!JQqbH)'8F.Z+GajsNJq.p4b!ŹBfY,čR  ycb!8P7%!p. uub!pxHB# 9-η k8w,4\k]cTK}%TwssܞvNuO)d&]kLHOtX4Dz_ymRڋW?KFbc|Y}c8wq>/ E]^- 1!0Ls "5G HDQ:H m :FD t`:1 xۍBHSAgavF`2](a?o0pnv%6 \#@ATIFIJG w|v.c/3lssápX&"xh"%,DX,[ܲXQ lS,$ ?B,Bf [,T<~A XC: kmܱ9m)8֏#($](楫{ lsF:46V#ؒ`9lb#<W;;8.'<7g>8v7â+ Al"}f nc'8wq>Hc4Zx]ks9I(H)Gi9 k psD #l`ܺJ p|؝݊t:t:g*;GAc?n˸]lw|>l2B`IF9?^&>k1Q9;8F率c!p8X,"x,ab!l8BbPܺq1h yR8P`BCBRÎ~ ;qބ玅kB(5yN*k{zOEG;"Y͕%TwH.džp%GF8q(y#7tZ0z$yn:c|Ź JJUs"p31)H_6RtEmO1_2$1+OV:ѨsZuHJA8k.vfc#O:E8 n}8)H*mkk=C~YuqXCl[oo]|ZU78q6NsñB-X:bM%B;rtBCBA'{;Eij5B"(BI)"Q!QS4Q$S6B" 'AJ3sqor4svHe<qY#=w}#Ώ 6NŹb!Y,{nPoV*-d %,{%,$*XH:R-gGל9k9,((켮[Rֳ0O69ϪwaplLg0 a([V|t?I߻:mߑ8kR;B!=.pniC!N ]^vuq4n4E @Fć$bGuHHNIxN"Nx9ĶRM*a>/BLXg©8k)IjG%b;)^Wߚ',A|-uEh񿇇έ=xNwgU?3G//'6NscaxqB+b!8t-94xb!k b!#*%r{B`$[(.<b}EsJK$ε9`cש8?b!fY&/1+Ć"O(gtKB޿󥉔2͑B,_ )H]!P8L6MRG x>Id&q΀cOAMXtw69<ǃx{1S=|^.~_>y㻠{qA9]'=bJ{$G/YĹs8z率#NC",R[,^B!;+",b!o6-****gޘWBN!*HCBCyIv"7 k8w,މcvvbPGW"> tm ǮN\ǚotF|Jς[sJ8Ylu:tQ{NN84+]yҨAanyLm  sI:7SDp_1;cqW{մ**m18Ǥ<.ΗFBtgodD Ku:F&I˄m.R4 v6clĎxYEJ'9"D7"$Fdz:0 r @Hﲑ9R#d%&(] 1dc>s8VN="1UB IRsR;u>MrrN093T=C05nk.9+/֩8zR!$45OX-Jn99$tLoes];v#CM?w{[LF-ޗ.0;l-9L9b!^q qX^ c8HP,xH,dz|6B!'*aB`CB}+ԁx8X؉88&,Dz玅#wsNל""p~s0(}*s֩Cn)e]HuZSz"yfS9J9u_:''=;IҀ\Dzi_8WQMpqo:c˃yH<;C_)+*)B|= uq4REVyAPcՒ3Ab B " RG)~v,# I~6~^vlg&on2V;#MWV4EUj"TNWw߹RT8Dt~}j*Ziuqx!* !e(b!v NT,$VX,XH7b!{wX2]›x1jZsP,5- qÂxX !P3  kxXl ^ Sc wڈ\;.^J9LSkZ;źsӶ*L=u.9s$)k<4CDk9OOzvu9ycΎN›s9׼Qq57 tmgŹ~Z5OЯ:cs츦ւ<אDdň]"舛 J$ᘅE2wċ G: t1nJJڐ #*^׺sRCX$V~>9S2Y} jŷ6"!Z%k.Ѭ*|Qx 5#|Wbqs.B6W,uBuǿɄHf:BrBv^XȑlBWtۉԮbf -FnV**x!AS,d)***B ZRqMmBĪsqX='bOT먪#KQnŸnS,ƥNfzX{nr:yZ٩:tvMv~zvЙYcNnekPQ<6^f˛qZkn;x&}"+U7pL|1/ 1;5vcC(!Ip!vR q\F4 A aGx@\/#t8@ZٱX:7~ V}Lo')e8% L$)8g 'I)gHg$QƒrlNqN̖`3Xg]Ucv Rpft) ~"}|n}ϛ^q7];q s?دM7޼XHb!S--2b!pNg+rloR*Z)AFrv&c L 5szu=Wo,ptb!y:QЧ~;fX,@X!xBqlgްd@\۞7*YN,xHBp)h9;\Nf&P& XXa/: ,ܱptVɅ-5ιLacô~-ucb-:k˙QM/*pgM:ilTgcԢÞj֙rem 9WQNLc8u˹|"}E16#F;'uSͿߒκpA~PVZ;#)96 1@@JY{Ih} rLo!Նq$ZNĔœd b"]@0Ul+Tqn%: ;1"qµy@G}8_Utpt`B<焇LoDZ ,2b!pxh5,)aެ$b!No(ZXiVkZ;mnV&,%<8_/&,Dιc(Mi*ٙ]z0Wqδ f{1lQ#]s.h:!qb'1"JF q$Y$]b W$#q}a1( t% Le&_SխS:N_%& \;N$3^)1dB=Di^}W〯8w<'}8X1<,`!b!Y,pBސX-R+) - `!k{Zt%3%,2q>eǩXXi玅'ЇrUtS~M{fw|$F!޺8>6x5VYSZqƯ4w֢v=Gc{LiGy?E[.7(jM)n^8e{ )V.Y16(.4IE=cc GDLjskQgIiFL Is]%FZNLuWn#t>i#%̙l8NiS8;w&S"|m}uKvC?Źa k)Ԋ!,r6BBBvAX#,XW,,ᡦwunPhZx8qNMXpqX8ZB.M_Q:NSu&vj9/պB6מ'\E1LoOY3_;bq^j?܈hcL999w+{Yٻ_b.. J-LNQ@8!)F:$gDRv4#BJJBFDd0"G"ҭ9x?ɔVgFE/vup6]~tT LՉ83];-&H@6İ;2ݓ d|5A:'1H ݊t$t ~i/puYI!iWqQΝn<9DHOK|_vK8w<s,b!0 xhl&Xul"GP? x$vqn4@X]sa kx,ĞCߢ +8w,cqH ]{tua:樲$ȵ3sgntsv8!I&WJ!۲Xwމ8;/Q1y.]͞]#)G4AD'FφI dzL%)IG:+cp -tQG9d;[!%*̵2v _gPr۵xew$,ҸXN^4bW':‡XjYS;P,fxH$*&*ʎaB y {C`a?nE#"VXoy?ca﬒ȦX0gCHfsi m7u&q{{xlj6,=s湫8~5BA?mϽS?,s羆EL8;(0M6Ib$᜕p!|[M}9KK7%J9i}eJ]#Zt%>N 4ˣQsryg"zvB^;|{\NC(mD C k8!-,  n'!<Ƞ}?w΍RSEX!cmO9:늱EZZ{X{J\m!rq=! ܜunb!v_W,lCx2Xwl7-|6,$0~炅qk|ͭlBv& {o;v|dzM4ִ?eөPO.{UMݤNCs[ϥ)Qk η<֟ŹREwEřxdz# c/H:H1?lDrVR"#k~4cDB$>/v>KHu/;kZ5ƀ;qHh$9'63Nbz88Kn qPھ!Wb!g yRB: m;PPpViI<,&**ZqNך,134Q#TFៜ\;]EmsUkv:ꎗs>[-?qg1&.}`{/HtuMd&*aDytsd&4+QǑmh v-v$F gtX)3DOPNt')%#:g8ē}b tú3K95X0=@Fuqk4Ĺb!%,`XX77 iT;B,Bbam$*ְPŹ`!8_}˭:Saб7!=ךǶc,]EAFotΓkN VM׮Ysy8O\Zg^z s=gZXژfjꙷc]wqװh2u.Vטv3#%j ǓAR׈a#>ޗ:(;t43y5SJe16Bzʖ{fH qcu"7?~F7c.8=CCU}<4XHjS,2b!źB KEŽ0a!n8PЎq5,%[R+\ v +H!QC>X,$Z,d4 )-HW,TRqn7Ԙ|kn[|ӓWc\852~ DJA,$ 6AHM!)i6J+l̄Cm m]9dT:H%HilPGB 靜MdN997c/Tc4У!M񅪞p4m"aa  -  U+*Z,$Z,TǴO0Ź Jp/0=!d%:^$aUeQBJ$5RKu9^'!{X/$5Ŕ")%!E &\nτT]#GE;(?v$O| )pX{Tδk|Ov/Z^Cz+b!1bXȦq *+Ĺb!XBCEb1*b`xԛw"W ˾3qXk')z(5y3T7~&_wlOs᫦UpVQ5H^sqkR%$>vy~5 wx$ $Nz$ZSkh<$͘GcR<'m$tanuM)γcĔv6C')Re٩ヰ?9FG^pi8w<+,7]sBaB N;b![,H,$)b[Źb!f>b!5`!P0~vb!oTZ,D9`a/!A7a!╓sqX(58gXMǽ4RL]cM(tgFt l-{rs$qoDf]4u9NzKDYkuG__m8bptdXOyw/1v~$Ο !~b;&!%IB֒3I@PA@z)ݞI*)9EI"z{kD2B<7)8{ScK{?)|z̻/8w>DyIqnDzm긊q۠NE:*؇s‚WW]ſqWW|bLڢcqB((Z--r!L]O5j#;}Cg k}Msb1MQSH>Q[Hgd$Uj-s=eC$uIKbN\g󒌒|j(DՒj]=>/'c628`);=wcc񰿰"xhц + %<,e ކg -**ΞxkBWvL/uSךrpFa|~5zroU`[^rmm9 |ɥ{ uҭX_dk@ﻲz)r#q/kT"v)D"5#!Հ*% Li̚U)%asg(i4MK<~.|G D BUs*!Y݉8_US?~{16ל;A`:5,M jb!FXȱi<,jZ{)vsu4W9kY^RjM׆rMyɽfy+/(w?HA'ˡ7xx7~ q!_&UXn]8G|ğS{n{]=gT[~k169 K8wKb9P0潎[mB+V]:ŹcaҌn`WݦkP=s>TJ_hP7W˗n(KEvߦ{S`pW?}~.95bBcc*'"R'sI%`:)Geq4Ff]),Ew8j=%]C]"FbMQ:B >gJNp^\,aEsDīn|sZ7W]\vex&8wqkĄ,NyE-bI2F'\ )1x,/̳lO86uX#IbZ"Vsymt1ǵiZ*94)(sRQߟ|}Nv8blv%. J g !zoBXXBWpCWz 9W,lB֔[,d b޸|ssa iBs~؊ ʍJNtIu{fk#7{=LtjNGS|+&>PA8;Vb,ƽ]8D#HhU& $dlkPlGwH}:*h{L6e56ZqFFsxH=Od 0ا.:R;AH B 29D7ujo*C/vqxWXKXH7\֊Xh-kN5zoM`sbasxYT "%,]XqN<X؉8_im*. {vۊRqͶk^b+!kSKܺϺ*h#q-7ۦw'K&¼N{}ZeŹnG=n @876cZ9")eֳDN49H&Stu8u.@$J8r؇Aht@o`^oj7c˃/rqx؟xhP(*OmXDaWb⡽Yxht^C-ц9o&]Wls羺BH4 dnPxNqy#tR-z,ZɟT]v#ܵG6ںiYyhJM> Eh'eHF)ߴjPmŹa_"j_* mXHߤӗ9k t$5]Yq>XX>x?A\NigU+b\8q$tPH"@n #lj%2E6#1ZLsIXURn:v~ow5@1D> So>xNf~JF) (՛++=/o'0T/U]ms:sR58 JH@0g8 Xy|1#!uIe*9$dzS&3}_hT(R$xd)>w!1z]oI;?p}1v率}mXDŽ 5,Lba-HPGPlCC* -faxH,}®X "0pxX}竮U#"^sZ:?\ékl-k%`›Pkp'R:{7w:ZnӋvvqWWy Qq(ƪЁQ1Rr Rg݁s]6T#H6 !Nluץq%ڙo1=׍`Gv|/L_3D2װ/5>z[V^yP;X늱ޟqqx؟`aRa0Xށy e F]SlyP}}8X\<W,w9(ڲ ^q玅})Ku/wh5\k\ܦǗ)ʙ:o9\sy"`X9C8S=u6q1V@y钳 {vsJ5Lojy} 펫bd\85B= XFLF,*]x0w壎L\Ĥ9ts$Ac 4bC#\j\N+9zvkoŹ tԝtbS;AnRI# wlL@,H#RޙK.Nr֕^M1ZJ^#gtj9>oho֨ n{\Y]wّ8 qoC&P߇ q_X`a|Z,",2XPP1IfY,u*ʄk9 /vGб?֛S4w;O57=O`P[GZiQgڼ1[ĹVk1ץ¼42my@KUlˣt$ÚH7K,uB_ AlzAlC|B9]9k'_M\ l\KN*ma;DԦbBJBςF \d]%R8"^UŘ빝W1!7 1+KB8!5nXxn5 SO6,8xhjm Qz*j7\^E9P8H9̆8/WH)FVj")H lL2SFnpb)RiE8E֣+In58 tfjgi:G)un65"e&('q?sQY=g./٫[[xZZ{ylϐn s_ㆅiX Q{R۰xh< "jNH8?b!qx$*{! >BO^mF,DLZn[ҫxX]Ek8so7Bq^|Xc!hhN[D% ԑt5B:'Ya! :6tpT+YdBn5]9'St!H)j+)K^{-jfS\ۺnr>\uɭAkHcWqn)!Sn+>rޭ4 B"]bngC1{XcR8ET 2k:F AG#Tqq v)X2I|S5G81$ =6 $ |f:gG68c?ΑƮ3U=}SL~xw5p<\ FAW}"]=kO쫻d A蠄}pD"a%K#Iúpu} 2\'[%)Xۘ]mG o/ONIK'CHh$#s!=0`3N,!;ZsB_y3{v)Eh5sYMWbݦ8mZo̗G˕:njFgKɅ/x2Vt_#]] '80DDIPG (E;H\#I©Iq# ĹRGLHS]wDJR골/V<:A! b,9F]yP ޼k99{9j-ۥz;e]izrˮ᮹RY, ᜳB8۰PgB`b!`xXxXBb!5,dOxA'v ឿ˺kXsveOKC)}*v~f7`RuYIhu>?#_fC8WycaL5Ǘk>^\3n"#~~KK;;oկg]ic5w,kkun`͹8;~~~˯կ?ߙ___sGߩ_RVVVBBVVVCگӯկ}uz߁___G#z&ʯӯկ};kkk;qŹs.=<<<<<<<<<<<<<=?BGIENDB`python-xarray-2026.01.0/doc/_static/thumbnails/ERA5-GRIB-example.png0000664000175000017500000006664615136607163025002 0ustar alastairalastairPNG  IHDR2G59tEXtSoftwareMatplotlib version3.3.3, https://matplotlib.org/ȗ pHYs  mIDATx}\UDSQ_*  PAPYDXi҂T(R!%@! C $P%onVV>g]﷒{̽s9۫}MBMnnnnnn$9A9A9A9A9A9AMo4Xχ[5?NyH{-~nUg/8-^T?+oQñ%ؙH vc[lLO&q=ЂzuT\ZKo=gn J}K<^[⽎;]+4'7I_(u9"N^f[2 㱶3Al'6 Nc- aO"rkQg?"~v@YQ!A0gbHDZ S& ǟ3` ~q?V6A׏.g{D;$kA|?ج`KF<6`/@l1,`K߉oIW{)v`78*~lij)6@ۗIлZ?NXRuR<)~{%  scJ{]C89Pb>Imަ^4ȸoGiQS&A\[%Ć1yx1|7\oD aƊʿ/>*υb)JqWy\ Z Sw/ _,$^nK\PT׉}b(vqo55oc̜ gx_\bØ$Pw[ G7'" ~R9;₲nd\)A7~XxX|m.H@Syzwkjl7H5 >wI6x=He#2 bݘ<_C-XUT.A{U։ v 6#V\"e#y.k V1-uؼX6Qj%q10ݎexc})N"RTnɩb:>nJ ubuԜxymVH#lb#i3&}3=CcΫѓ_pssnN;iZѠc op8##Npp8p8A8A8GGDqT񳛢`DN*;K=p8E{фSjiAlpt AVK4O5~+JTop8]ŠzoIIOA8NY ?-SJ Ld "AzMUxol )'ɸ'??|ϋ31ٰ֞<$~y +֯Ij !D,| c5"AlyªUo8A8A8Go6;oIbל Px}Ū"11'< p8 ./>R GOm+|Q,5"1Np8zCs.0bz[y p|?;||ڼe8 FN7GRr= p كe>N>)w `C['cOPئiOupp8=9upNc -!ɳF~NNӱrş}nlϿT,qH G&\?]|ů>wq7<p8:^O7AwCOuk0-|NN vW.|εaa 9W5Mn+ˋ^ GcsKY{\qϬsDC8 p8ZGvuI?p3ρ7_''3K {%Ɖ^E08{׌+upp85Ͻ|yæ't6A\`SN \O'Ѯ`4v;.9hڪߐ' 4: A?+{l`lk'ю `a3Gρ4oz!M ݂ 9A8v 04hGS fx_2)I$= CP2~NnPiI#)HiBzs@bĥq w~5:\fvϛfI7OO~ŒG^/ρwy_vȴ–1acVQW'wNNnh-4̒W^0ϻ_ G'c3F05x 9A8A86;' ?{z%3 u 1''G;Yݮ|bԴÀR@\M&Prэ_{SNNvT:q# ;HGğ]Xj7rza蔮{Z9wNcůQeLs+'<3n4OFqpU6A@Vaˆ0٥k=Âi ѦY#70OF7=V(ǯpcO`^Ko_CσMVg\{?||׊_½ӻvNg,owpq~ 2v4@ O/~y̅ůhpB[W58 ‘C vl%_ xgc3Œ[%fH\p bcf!ӆ~}[8A;A Gg)=%FLHpF"!LJOW?KBZK 0 ڜ Py!K!2wᎧX#~XC媵G>2wI{?_TףX>5CJ/ :]=0Kt׎ I_2)wEI//9)n̬KYWmoH!SG ڝ U hwt0֦k=m~w"&RW@X;FeIgtQe=ÆQ|=tʼEsVwGD6U`O`L`SM:<1.:$rQkʜRNbkSpQA(Oү9?BN q`bX _]֊ 7`GvTOu2bv w͸⇎JGW7\YܓԢWr(P{ʥABE~_iϭw|xN}4R"€< Yx)O3 R*윾XtNcKqG$lBGS5ݻQQh.52IwC KQ|nvSS/;w@|X߬zD,5\ c7nj>@(nߏ3̆|–#~1+{7$A^Ų ME#gt/JX-{5kDj/E9>Rr,V)xu]!TZ[ C!7PW88"؃/ogy$ i'>T R8x;>oƈ[]zV5GW3π\1PiW@LׅM<CW m$#A,}uu ^Jͱ[Xz }+*aQyt܁7kWݨ]:‚\d9?igTRr T 䧎bѧtسuzKy"YЪ* "س 6+3|vCJį7a%Gr ٞPXixJڵД-},_j|^Ms툾-4J5: _5.I7N(&.G*0hBw3%‡74Io 3ѡD&=B񃅰~8 󆭎O0vTq3~kfEi['Y BA'gkwãݟu-]:Bwk~Z ]P8w)~DxUF#¸O-slm,Xȩt7ʂ qd( @[ >ҋuhcxC>̼%G6%$pHSca"tj+H NeB9 ?J]]SU;)Ngtyr|138 [jD;xDŽXu:=[v#ϴ{@\K/j} 'pr="m@=kæNĢjqK<6V\>?v靕R{EA *'M)= !>a>{% \)EK"d3 OS5I !>qŦij2 ` E8$m<1KBIk{_CtxcDD.|$MY1MxlvuO3=;`HC0wU(7MU  -QҋR LNA =}"q|Yw~:Lyf-L>BvgH 0  JuH8;dDY!q$,)'n2Kv/o ..Čq֣ã2ncfjƿwޚ%Qj"{|!sN*W;g,Ԣ*g_x5<OnX܅g0B= JY}ͯsǽ(U[9H ڔ d_@\3+>Ц1,eH,X,ey />WG/2/CE&KEyݥԆY$!HHL:mgcn@KY)Em Ro/V*gt52sEu'CT "ѦqUFGF)lA,bj `B#@ pQJIc&7,F#1oIqN5|Br/`OD-kxPJ!$!`wY@57i񞕡UN@x]V K,GNmJF3G!+~Z(l&iSl$k7`ałCF  ꅑY^ChGj ̣ 5 l-Iyj*fY(xn( բw,.7_%Ju{ql-di˱ 5wfvXeemKUOw:v$|Uo]$AMn덫XO`߂c<yZ,>pL{ ?h';'u=O dC8`1{K¤Ǧ<le*I檤 2-)S2/qQ@ X'bX8h 3\YH=8a){-]*$5(*\fI 1lh*CS󣹘&LfwrSCH.<7aA`m˻B-d3>uֹ XJ4OBx]PR} 7$^Rs&@iU}}ǜRQB{*) =<_C@7å9˖`%EJwL%=YZ)呋lWvl((s4 <7y}m3f!`Gr*aHh"y8c%&ǚB$SNkn'ayv#|wڱK<&6]ѲpEmAv;وr$Bh' rVy:ʔ8~'Djwv(1n^)5^2:;tv׺_)uXSՀqTxNѡEsZϟB F,)jPYI)mNwtlp|˘{9a2!.-H)&~UP핳"R(pC+k-BKx"g'DN \(r|@98qj%L+FT)"K^U\EIr#gIug@JZ춉3X<.E ! 7R#ո0Tc_HV*Ȧ lV\гzyXJ:6Bfy _wڹ =E]UZ=#gl Kc[iDpe=lcF2TXZ !%(P{ B @##tś@V<8! @2#Z8stsUvKtY&@oPfߖe|f'*mfX,E5k\~ `T|T2;Z JZ_waZBW{>Twlf]7`WҨg_p1Cv2Ų;TGfeӏǸX+h]%9KmMf,5L7(q!@̒@+ xUZ|Ҡ.!{vrU=l.lhjl!+vl?əK^Ap^WQ5E =++< 5Yާ oxuKk+)^x*#؎ わk:fp SaJjcq b:vnSX* ]zW:kfV/㓞ʞK,iR}{R">Y#=.v<[J'9fs y=Zo4x3|)il  b͓}p\k[Xp!A\l)n1=ݯip j#"+)R,\QBA6YS/ĕ$ Ze8Y ~?pȜD9R (Z@ZM-EX0eyU&j5mҲq}WU(M2)UZV2LkXѠCuG?f>-Pg' aIoHd&uXJld<`|5Eˌm`lg2cAȪ|+cYj`JZ"V*t iT*4ƲTrNZh8Ʀ0ZԒ\4/RU,2 ∻EƚdZ$\83z2X!#\2 xvBA2MSnpm}Be>*u %IqJRgκ HRi6A0 tkAJHݙY I]6'=4rOƟ,I)O {IJP>kPM5U;W$Ij 'Tl9It RUńg"{A:UQgIφ\}`!uA[ +Fȩm2 {$x-5 LyO(Y*.UkDžдBI/RXZ2cyV{6QXz7N _etSn|Fe:KUC!YH8xloFuDXOFRYO X$:k+Õ\)tXf*+X 213f!S 8HW=j%ΐ`xJVAi$"sg1Xr9 Z!a%ӍCaxd3az,p)"Vi/)MrQ7AB7a!bvqQJJjY䂕RdGes-ƵkxwZ:T0T e)jbcGrr`88Kvrf<Y]xAgGgEܘ}Y@=*ـh Bk>`6tȊ1tTXՀ : Yڧ\m|@bӥ#RƜ r.VIǍ K@ƅI ԌTA9>Ua2Bf"B~ b.Ǫ;nZG_ԜfQ]7eM+ jj,D5 59=,bz΅\m*MRn'Hs]r@J7,"%$+G&.Ů -:grB&k-E9/KR̃U'1'k̃PΒXD}c" p=fH)pxX"9DyCd9qs֖2/gDb LƣU-A*;u(Mz{RV)~eHkV+Alb?A %"$x&>d,O󗥻#)a0կ;x*M)}.N=0BR't,ڻFycT1s4Ρp4B+,Mh\+˲uɥN>sִ2C]JEmtd̜y)]ME\[XnxEr2Uhj*ɫ<\C~{*fQh]F#;vb\ Zt޳:@K)Ь8>'֠PDjV0gt"1`Ѳfe&7.0x3ođfIYy٨uV  f-'8' ]y\ؘﱆV0Vbi$SMr1\{CXZY\IcUb=|Jc =L͕1\춰ܲ%nBf&%E-L 31"iXMI71%%RBt~,Li߀#UO7.gVߧoT r!޿jU)Pb̞ %Ǝ78AU3"z)ioR 1KXCwa2kɊ bBOlДZ Y\jK!rsVm9.2W1{3 &3AH i0ᗊjט:b\Fv1;:VEX`P UE5D3p$AX%bu͋pċg:.EВ{NU7Ya RR+,'FO sRV32vģPڹzY5bJ=V]+Z@zزń<_X3s. ܗJs-T3b2@}&[Is 2W-Jp  kI[pwI=V`1꣩;/ %B#9kO02>@KM*/nDRG&v+ADr1E*ͪ`o IzZLʅeН:ou~k¸&՚KTGul(:ZҚ]B Gtu4y늭T*1 Ҫ Y`R@6ZS5%Ŀ] zy,Q&<1=0V,d9Z2LH^wY>11T蛋%8dW1㳸t9A]vc\⥌I#5sSRNUl OrA7^ylcʛk>H֗<ÔyT"UP0@,gry]F)0~N&ר^YFibc|@A&B), 2OOh#CT5\BWY+`O&Y]K׌c4&GK,F6Ke ]UMw,q}<. LRIay_iz'XEࢲ1\TA`!LU"Y,z=O%6 ܨaC77rF +v InXibx;L'&F"5ZTJ$L7AltNb,M`]jCaSDX pW#h=f5ꊭTWⵖkYbA*\ 9+Y2$R_&XÛI'=cLGw-Uy`2،blRuѥ;0G RR'Kbb;+YʺBJ/C[Vr_JS+͜Yy* TH7[-n/R@Κ(u"Th*^òwko$*CNtU ZdMCE햩VYtlGӣ~Iθ 9ZαX9~GL tYYfnTYslHPUMo2ʐB,Nu_9Io:44R󞵤KAu@˂ VB^<5ádr*dhDk\ zk(_*ΟRF}k6YfNUgMޤw#;ʳrA' XңaG2 ":QTa}S! y踣vYU(VR؞Suy4t<7 rF6qWs#Z2ͤ/+GXAw))5T cJ XcRzbR? dij-3  H/]P,IqRsIi*8%긖Ez5 Z@&]UZb`[;W|N+sʼna]9*s+7Zj*Y~~9VT]s- B&{YnHUsyRL7bMȬvz6AVHLꪫF’`7{V:އҦvʱ9Rr~o,t=j.+2!#L"+Y9$PRZƌlR)e'`U"V/] MO Iee(r޽ғ}d;[qY^O.^CtJE1;K6\j$IHqu+DOS㒥;!<9cްa$CV5UM^VǚeFt}A# d ˥F*&K8$ 5*ըË ^>=_.X t+BƤ.x,9 a#BUc\DtGj*#u;⬙ŀGJ&Sa@`7XJJ,cѹ3vP|r; %& B+b_AKIa"a͐V= pV%=@I^Vc7r#Z$ -@,Z!Bf9vYءI )ilW{pE+;T?@{[w$I]kM48TXul,!5./T'ϕa2',vdz(XٲJbȫ:F0Җ ✆!ҡ'ڇeלΘ EJ5[I^܃r-3-9rx1#B70JPFlF#Y L Ӄa䰒s+Ue5J6SBN @MJ P=xp*ʃ jv:gbI㚑Ϲ##@XXYŕsĐ$vY uJI:.ޖJC^Yr2[Z$n'$j}LMM0k 깄Uezֲ|W7_-o ;*/Xk@Ug͈%SeF+@W;!AM 64Xf5GK1cz}ba ܉# ͅq1RLY&7} ezDWX"[ AEκ}ƍIH)RPpAz#kIp<)/U&n-Y7R WkZfo׫<ҧBN*r "5I4s=+bez1=2_`ϡ0~Pݻŋbt)mJ br̲Z+̀e6 vdr׌*/|VɲĚgv} ~zWVjF5lFAX tElJMcqjEĚL sI)Ey/.lft3!ȈY󷵄 3%ےF*dt %b5]&6vAlDϝlF{춄Wb_e(=: 5,)rEuL:#$ 0ޔx62(:+Ql f%V*D~1CXX89}ФAs)+I)\Z/"\ݗH)ZTߘ2|QΗuBedRQ'[ ZcO :8!+.b"YK`OhoT:ۦꢍ͌=-[63^oLDe$l49N)?QD9 rs e&t_3swz@vdI`]),r.rANPyIUa)Q"bHώ԰+IsyՑʯtk I'S{KK)/l Jp5_82*}gاz> Z@S‘ek)s)]2'f)r蛋`!CVz~J>:•u_ g[HE'.[ `1^pbChȮuլʥis^P.EʍLG BJ,`3IO{|£ Q40D;ToNS%N- FqH.C4DWe7Ք$pctq#lwbn,CJcL#*z']X)K9nkXCix0 V HZX%=Ry,،O )3 /0RB*&C7D){'6'9j9ŋ$`%1E ;\Y*nI]+b8:|0!aŗKZ,$N thIjFŚ0\Ln"K' @Iba9=-7 ZxouFjv rLrf!'#,\!H+Rqd8ͨ.ҀgÉY)P@}&.։d zLު'<1(ɭ庭.br9 ʼBZ򨗘EX\qZ { Q;h9 )ul-f+AaqQkLt@?ⶐH%3ǭ˘Cwǹk=Vorra{e.幒Rz%C}$A.rN!Iπ3B jQRn@,5FœT?CJR\.W]k1[ODl]nT ݧ3eɲ[J{>f8& S2!BweU'涔;VPN;ilTA*DNƓcn%k]D0\h\O%d?ޫzjФR : x\Qׂ:b֬lLa|ЃE<Ɩ~V`?I픬zwҙLb)`-drRd'F1YfKK~#XiH`*t^L{ԩ&SyiPB=C )nN4ǚN$j2:8Af>aK;FւTT rλMIr N!zru&r`ɍ#RFȦIGUfxדIzU-`n\,t תUg<; 1Hb1 BVV覰 $"%!^ΤXj`*Qw`qƨP_..%kZ Qar(KSGk~Te%\=6jNj`V3 c#sc i+ *_s8RM S XV'"ټkyVRPJfuk3OKKLP.fD~)3*_wbai ÅK'{BVazH/Zf;aQZ4TxnR>r90I/W|Hl@;(uJ9AtAH}Nqc8 &a] t2ĥR@9z˕BD)3 = e y+#S^|!i@[4Sʻ^@8Sn|RRzb3bu7$9 "%8}J/E'ZHPiȽY[aWNHRS @jYĠ@Q}noY&k"R2յy*1d%Kjh"k ? :VY`l Vi`yE%Y*YすLh籠 : &DvR^azbHO-vY#:!+ < 1RSv\GRRCv?SzY bu7* | u \5*H_dżfshKd+>JTlBbb:NNmLU9&2ަ1Gg%:nYEA^վT!*ˤbj]uNHOb];@K OijT/G2ؐk7(ab;K7 Um J°Nm)R&ȑ\jw@LJ%8`<:R!JQ(1Pb:4.+RvIJbtŒՐ7`RHSJHp$sHR2@汜 : ܂qô[hp7(Jawa̩LAϩ`n-hPѕ K~CL^׺ĐzΉ2zih;-AY k5/Fȕ5Ypn#}64IyEAA5,º[-5:^e"u2 %&"-0v-;brѢ)Ǐ d({ݺ @{$#26O3BD\x٬]^ y#U Nq'>NZJ4=SS9ے@2G4>0Ck*\9ژ~)a;,UUcRR[5GZLD[hl.17[bY\Jl |ߩ\($сd#s }Ǣ[N7CrhQ9-C XpAk8%zy2nP}+zNmХJIkJ @INY9y#>%I`98A2m .GXGJ9ɴfuH)uk[RZ*էaAr²e|6/`-C+գZQA"wV <Ĝ`=;.;Ήu wmwc ĀZ어Laʲfݨ}YGkޥ ͝rH`6bn&\D6I[֑Zܪdo5tY+zxU0ar₨ Ko!#\f @-Y˵G@&C=> :уM?KE_3778..,zg: +yC`'m0YrCXjF+*$OW^"6Lra!h`#jlKPdpZu-ecKP8( 'UasXB4?4R1VzRKb?@,(Td9琞.,fNzT-All#`|I%:0I Uҩe1I ͌ #?e, R;;`) v(FK0cB(`NN5]t)%NtlS0F@Zяrq@#ՠkU>͝<9'zǥe&HךA)=,UXfh69A8A/ '\UUݴy)g_oQƈK$"fu *%}]6kTKxާt|m- pQ1g`eUvrjנQ!k1[ULHL`pP;uT P2Ci|x,Ֆ`h ĉmPcp&9c&J,wDҲY"j3FYbT*TJ}?]pT߯Rk(`h͡L,‖BDŌlCr%{UH`'vSyO/vnҼ GYc.w-_(=Ŋ6~Gs`Gu֜ BT(^#CjV$*Dݡ(y L >q%a)G> ?BR PQVAYg)@Rup AX`s0T*^6`'jؠ"ntt>8E3Ֆ qlIzH8T p!! $ذ&g\C((a<<T;H]|n40 !5kq`xM֥njI]n,E|΄L"S aӺ$2 JK ac7]I1Yshon$=F Dtk$ x[9gt ܑ3Ƚ2"Ahu[GgExP/S=P 2h#o!QQQptU8y@Ff *5Z.DfI8A8AtCE69rqFa ktU7T}mH'601P}*0:L6n@\ۤJ΍bNN t MGBGU *(]ZZ/kD;K C.[ઝp 8]X6K59Ҷ`Ck( ;At0vBDPj*艙"q!QNXlZǃdAX޽r8ƨ{k&6E@!S :r0أ6{;_l{ }jrA%`?)smp8|4Ne?)y'^(9ןa^N\w+U gxG;dQƊ0ұb ,rG%L>MBLos8 Fw}Eϡ}NNCp Gq푹K8A8GpM>sf5ħ_p8]e;9(w@? p8GB M&p~;dZa~Cs @wpp8RN^[' p8 Fwr?oش+Vp8•?Y$̥!~B G{+8m''vINN^.}h?!NN }8A8A8.@ Xf8<={{oc۷㬐[vz9:{UC+?N'A#o}&Vxn \*Ƕ_pOZ]urT{}2l{V:62N0>ܳm`_ieD_Ӄ&.}f;qlvqNv/ s=\={Bۑy'ıb^CְN"ŰTb6 Llk;.5FoV>]io@$Ƈ7mq5?s y!<H((-/n}`c'vj=a!$7~I.VLbM]c?%mcgsS(ܵH[2Cװv$w]zA{9y:RvI#bWܩm'vT胸 ;FjM6ŰJy| !ϯj=ul13Dj/d<5m "`C}UG߈cm־;n-878/E 229;6A{B]P>",GǶ9>oTڸFȼEְ$CDm1*cbh?c+V\] u{Ib=j9W[z09<6T}SM{1$VmKl =~Xa[|8. 76*Fcs7/6N>[ %0=$?z]}?."N-AKDz1A3.]+ռS] mӎNe5"b,䭂DD䏏uƔ [/zcsnۺ9'cK.~fMfNL^Z?IN,<;\{e%N"-o-'#yúl$R:$г9@=1itl~sscu!˱[;W<6Tz.N%\8!pcC|@,Qۤ;\*|b"Z-5Vo#c"XEFm(u<;\jx_<ÛL -!|1y͘?*9#cs_C_O=6?wDZzjw@suD2w!gDɉvfldCU s|l~sscVʤzvd}$~똼M<ك:a%r{l~sscԖlI}7>/j3NhRY~l~sscIt~R&ި=6?wDZzj;8莽SHNw.ǖ"sm'5}TS݈Hy>6?wDZzj[]|=뀣 md?B8uݤ=6?wDZzjK'8gW Ky>qQW:!۠ -?W?w8_OmKdN33FO硝}`Ħ6 .?W?w8_ONfqD~7أC*LgFr|l~ssc~,b~|퉢&wi.ǖӪ'"qdfGǖcԖA5¨eΖsdl`lᅖcs矫[>-SDa"q1`9ǖc }D#8*Wo*?W?|[ޯv' ፚcs矫[>-SNJ9A9A9A9AIpsssssp7-9WRM U u57c?,s6 -^ N(/w7Ž&;%o*lkXן[KaZ-Szbj[w70Nl g_0'lNUH "0;um8g`Y|-t|~,nZq}ZBǟA3Dx8.<şFy'u X 4r$AXu)qUaĬ8wE6]gR{l:Gj r{8?G)~FA8A Fcw* })Ok ?Ǟ[]n6x7?INGx爟_l8Qv2L!19A ?b M =Mk ߋw㱽c*x x{ ? iS`Ƅ[[ $i_;XA`S+0R$3^` CL;Fhf*BL ; f<א T>0׽~:A ΍Y8-eӲ"|P\pϯ 6Ij$"I}LRWB%SCFS"8οğɘ\?_ĥ7rܷ|$OR;A9A9A9A9A9A9A9A9A?^oģ^l$IENDB`python-xarray-2026.01.0/doc/_static/thumbnails/visualization_gallery.png0000664000175000017500000011324515136607163026540 0ustar alastairalastairPNG  IHDR59tEXtSoftwareMatplotlib version3.3.3, https://matplotlib.org/ȗ pHYs  IDATxUֆEra` CF̨HYe͢k .JY]uYú*#Jd F\_sԭyܷos^PARRQ?: ˈexj/?,'6 waT)`?o'Fb'ѻ.q>H (n'4#iAA0RJ@ǒ3ܠMJ+.)+1uRS-Ԕ/,"~ *Ni(Rs#bz]H c8MT%%Lj~#:Bjpb WՌe€s(_G7䆼Cjh#HD~,R|;~a_f=k7cч]WGp1qlT=5 &1d0PT Y ='2{e1aÏei*R 2^2\Cy?UpĆSL<8'#AO .5۹geo93V,;x' T"6@_`z:-D~2UM=T枖b&cMA%`' 58~S:e+ʯ,2Zj'S1=3{c~ ksOQ17H (99ޖ6eA_2M!8!RE6V~2kS{hۨ-](u{)WS3~ͺ!5f7wض RYďĄ˽z_~? g$-!5PcjL*g%*QE{-Fjz܅x!{jƋhz yާ)* Y1S34d3T T JjsW@럣 5q-5 ï_Y$DZYB,H ԚῈ`hsrps!Ss_=GD|OqJ-I<a s+2P{?>%5pk OZbzp+n+`.{Ǘ}:~H=<؜/Ʋd͊>4Mqe}<< q7H Hm,T;hp0Y <6!RgRRHwpKb!&؄+)˱O&f~Y]D/wi 'Bxod9*ؚܓ YCWqO2D6nsR3mL;snygw֕>s,ƒߓIt5EtIM5>{ R@jUjyuCz%~]MI$ԫa!Sڧ6f&idS+ԎHvZu$џu̫7ORލ|09{$If65ZX;aŢQ݉`*׬C햂V,d* Z$5[t\LA!kn#om:4lg(OQ1 פ~u[X!wF,*kSv$5FU8>':G {U-G͎8=0-=:"C6hGLHCn9[,;g`߱1#o$Cbqi>n߰IX sbFg~D yg_Ek稽O44X.c#l:F=u'l+ΘF~1a[9xN{I)orYr{"^vK>W1C_/YsG>>O͢93|nHߔJmqRԔ UŅCxqO긊o(30 @j 5_j\w@ 5H &-RӛfWmDS73$*bE<uOVRԔoiNRsu@ 5H &-RӫGUoL'lڟOϽ08%:q R)RӢR5oT@j 5HMZ'I͖ͭDY'zduwÊ*Bj 5!5$57Vo& r'DʎOH_…%5baIM1z9d1}]?O). RpDi>}i ^c,w>I˓MԈӐJAR '\Cg`x4 !H]Rsryx&K)!!{)1Zxdw>RT BJG^q>ts/~IΈ}גLg-p6,4'=\Oog ~L~R\ki$%-aITEe޸@j 5HMU Z8I5&S4n 5KM{ku $궸a~H R/5ݫx6paހ$'!S~W5PxmE( @j{SjgԘjRp@j 5t^[1IK111a'ӛ3"tCj 5q$5Hıy+R @j 5t%YL@jЦru9Dg}BjAj 5HM $57r@j 5Q9pp iTd={z>t՘V7=i"5X?fB/-\QߚD\[X 1)rG-<'_ʸ8\$џ^|12S{zM0XiM2)5R~An?`ԅfޗTd)8Ҵ}w?CW7ftpK4r=.HK1丐e2'QÐ{>ѩ2Z~<ȉNrK׫_@N>{s[s~ȴVg<`ф~P&RcoH߮r Fp{"b7X^~ -̠"H &Rӹ{5ĥqRdUh%'=5 ]jvPRg!p}IMRwiORd.Aj Yj".-7H &Rө{uuNb$5E#HMܥIͳs!U#S FfRbR !5JMvk;;G$RtZ{1[ q!'RԼHMzj 5K=BΓ9Uk鵽k'cOm-OMqBECLM6]nHJ%  +=}OHS/Z25 D,z^DJAu1P^Ydߚ$+oܤct tj}Ϝn!A6jz/$&RS8ҭgoow{7k+bWIn-\+˿e_2!%CK$N-5!_Kfdzk_Xjq[_LywŘזtlt1Pqo;_mوg#S525[~N,RgWھW -bYʃ 58KM夢KMqH &RSF>Jjvqqft(Rgi߭䪾N0^rHM̥SZެGf&A毐H &=RER3c~N 5H &Լk@$5ˉJHM]#+t@j 5漪U{o3e@j/5mH R$5x/8d?H@c ^!|kNOb>mhyh}S|u.F6 E% BRNtJ3iZj?c}^:[[$niׂ[v,Gk̘B6 L]"d* I KbG?)W>ĉ|/<=|-p<R" 6lN#Q/⢧r"_k;v%ٮHYj><@ 5@j#5X~H RTGnO,@j 5%#5$5yp@j 5QRِ#XRԤKjyS?R@j"IMc $D,!v20R)iER3a'H RIj408HM}VHMen}oܒc@j 5HMr~H DղZA'䓋R26F4DZ:nOk{o6Lh${Ylpkeߒ#2dJ.(z;a.LW.:B)=/}a_+ \mCkDsBDXcS]$mh}rEc|t@2HjnZrHM4z7?x$abV/#"-Iq1l!dj.(Su1G)2] -s ~u[,gIh{-)KMzޒ3$Ncjfm 5HMHM NtTB)~ڸ'yEJeH &RO RwzN 5H &ҬMx+/:9UnN4'H &}RӬKCo;@j 5HRӬziĩTKQ1N @j#5M44N 5H &4o䭽zX qEwnMͲth-gnRan e#(Xv.A^IɑE1xL#22.%Itfto)8~L~e7>6oRSTJ5Otɧ<9w_ZK,5{[&TEĀ|K[j>,B@zcyTeuSJZj\BSFHMٔyKj%Nml.|B,K44&` *e)8wэvYf 5E^mZx@̏ĥZfxwǼkZRc{"NKQ{Hz98͵61בZ7@j 5n4~2'E6~ˏ&ZN{j-5Rx7B>﫣|3Y3F F8ҍ>氹gOΘ<#UѶ-+)I?_OΫ#u${U)Zr|Q I?#2%yN9+-R {]IM6T]@ᵥ9PؤWOw%d|+h~|L7`FU>+<>ɳK*- R,r=-+>1Gʎ|A=FnOŔ5cFwWo!rzOH~Uߖ+u l!e~./HM{j 5xKM ׯwRn~|9bќE"C RԔs❁@jH RSrRSFo^Rn}ĩJΔʃARtlQls 5HM'YH<ūAj 5HMM=@Y@j 5y?z48I2~b0߇@j 5i k4H R;gIj&+̀` *L玚Hb8|dkhLvr`pFީ>Ҵ|-f];$ab!9-43mcg'Ͻ.IkmlIr%fӦs+ޮ-ӻe A<’g|rٟss-R fJHMĀ޳BkɄ_Y/%.QFj$1`pdɏluGn0HE:["."ϾIʕ|k+Xio[ LһuJCb |~[ů _m[)KM>z&X Z+~mS9RԤOj<:H R&5_>y.8"n$g@j 5IMg:@j 5H߁._HT辎XL @j 5%/5;7J@j 5$qzt@jb-5[zR@jOn|(ʳ|Ľ3ħH &}R RD9og'@G|O 'DxYw O˞1ܼ= ;)a#SmRr֕"dU”:'+o|S,bat.|_a# ZꢕRVz_g!Sƥ4dٹ#tF>>>6щL2 S/Rd9/3}Xpŵ \TTH5OꤢKMqő>=KTm-.ۗo~}ϗ?찐3cRpZȂZ\ymLt/-Ӷ w]R,`)zYȳL 9Eg%}OW}HܲLiߙDQGdH R"5Y$5Ow)r@j 5$5Sw1nCW`d e 7R }SSI9@j 5.OA.b泈]gp%lZgA6TkVRԴ=>I XHVHM{n`$5 D3Ш=5H KMV1RS,q=5HMs@{fMXMS)8)5BjT$NOE#HMܥO1q?/LNJVRi>Zjdjs y>ozMKT %E% r:\bI.,)%L.@j.5zrF &K=Cj 5s VO6Ij'nH RSJ=5s@j 5HMTK$5;#~!!5HMM+R@jR-Ռ¸ԔdzmR@j"}z̷ S'SԲJvnfZVdAd!H⢷/4e+mK 0ts,옯YҐ}kOld!ɜ6R4s#S1JIN/^Ȝx-] c}|FpK1¹|_tŸ%ܺw`5 &b@,@)Sk|N!𠅣%8{ XO˕l|uAK)3s2]zW,|Gʏa>V}M䋟c!k#IIYd Өm!SEڦ|IN"z;|Smd%/g#͇, #5f(C:}XICGSR _zǘkH uf/@j 5H߁޽%)h[<_H(5w$.Ny*%7RԤAj!)-[fcBj 5&5KgRs {ɒ,t-@j 5鑚|FPRչR){R%±sz(ĝOVHS\0niz–H Rs͓e_oKzq 5HMHM} Nɽ\R2+NI\M)d-?'r ˆX*5ZJ%=Pn-\#FY(O_BSKr"SruB/δ [$,c #ӯ AF=K1nNh2.S>dj|/2zO*>4kW5ȩAL`8 NIK2vO/mwI2%ZJ gӓX)JNM"u:,)E# Gj!iR:d!H}]:EvP ) R\]qŃ_ϛ@TFkR FtI xJ4i&9:!k<N !RoUBDr>Z:m#B# ӽ4j_خRb ‘xdPi+e?Iu-U+sGdXBzz 6L“w{Η^j"hB0$єD!,?1R 54>#6̌<=SP׹.A-e)r@j.5HjgD&1$Ԙ2O˰9:;_o.}R4G 5KM.IX*d"0_XS)=ynCK"hY2!DʴۻVk$r{ } %8"U`oS!Dsw0R #l(A,1r=bm"B -/Tm)'瑕FF >RQyN2^QH|J6B&tAW#%(Ch@Zb$DHlXrklAmdJ9;ϲPz/k){B~B" <:-_sD'oi.q18#,QYaFKj8 !)D0j̃V 5HM^$Rr@j.5]HjlH\SoAX@jb-539Ah@j 5q{$1Af=\§H & RsMH R&5=qV(F 5KM@j 5HMr7`<RԤEjj6_R@jЉf֪ԤӱtFI763V䪾G6Jٸݱp-MaR#m,+dF-2#,dHYQNdC޺U "%߽G!U|j¤'R :_^B;r=>S+@j"JMvۤlР݈LJD6R1sI a—@ r8UVMUU2_HQR#Ej)ZPR%.L%-WI|z(m ;&ysw@j 5HMڤLj<'H R]{x,&8MgfFۘ:1H &MR4yiH RB˾$NRs[4 5HM<'H RIjr{x.*8I|RԤOjz) RD=IS6'c iI 5H IMLyN 5H & Y]{3o $NR.*7 ɝmݵ/b)$yaMw IaRӔR 7ȂGwL׆^GvLM ӥu.)!r)h޴|BJ<))'asGKع'fxHM4zL DHqϿG6J |F֒:E]4Vu ,~]HoX gG DZ I*AC y؃Nw`뢒J*-'%Cܑu.sε=Cv$5H Zv&.!.%{G!58KM->9@j 5(%y/CO 5HMqϹӝ@j 5HMt@ 5D[:H R>{t'H RUjx] E#tJ  col! +BBJ/6<L=-FǽNՖ3g<n]H"BʠN1LNE&$?Ojٕ>gfs NA@jN 5զEF!|wŷSG%,;h 1ꢛVGGc[34Y.yy/ZH ۷B«tPY F*8eWI Rջ/m7 /Լh;-5f`fn(V$V !5HMRs4'Yj3@jʾb 5,5]qom-biNjuFJ]Rsr,h5<b$/@jR0;ӜTp)8]jZt~@Gv1QK .s!֤fkyt/0Cj 5p9iNrqR4]@8Px08wUז&̯5;hmjO\kҲRk9h4']j+d6Zju|~I E{%5CdHQ8 ǧ[`H AwISĨHq=5KM%, $NLM9QzsqMa;v7/.GK!]xP61a6HA; k쥬2Z>G7aa \ϕn+VUHR~.:X|e[Oַp}V <7 a1R S)(qg~W5DLB' Z [K$~N7!RT~[~+=jN[E屒TKzh l;[.r"+ysvS7iŻIj& _5M*H1<7RԄHMV^8-@jʾc)^i;Ijj,y=8|1}卉Y,HCj 5qC⤂KMH &Rӈf'eYIZj=zL H &RӰ]gԇ-8i9ћ7 5HMfd'H Rm;{'=I q8`$H R>9l$'H Rm:{Q qtrihFD7LW㣋XxtVxQ>e%FzeZR A ua⨅ӕ.MԔk<',2bن>1y|0IIjRl4 W[dGr:%Ck;uuZ,Zӱe,p} +90үes}]w\n}8>Sk @S(Hu5qRs.RS&:@jHMA RSt:;\(Ќ@j 5铚#$B!|x9\=n}1w-y>AH KM_ $N RԔiqx'LNׇ%VBxL/RcSZ֔dH R.:@ @j&5GrTf3DObVօ@j 5鑚ڭ:yOz;8\~:lTiƚ_qH &MRSw:a!+nGl r)5w&C!5HM& $RsZ1b^ol}•zkpq਩Rpҽe!Ơbu]uFˆUJ.)ߋnqH1^%ZdTڮX2,zZr\C-Xy>,5$Vgd jsRsWԜ\^f-Ro]E,uJsȒ#S5aF,ڨ|ٰjQ4{B-Na2*{m~y\OoCn_|/a0%)KMlo[ $Nږ0B&&DT B00B&5a=zMIH-5Z~BS8'QE|*;ˌ7iQQ/?&hS&56Q&䱰*.R&ЄIMXO(B&}&5z=h)Fjjed{}ǾH){x;RI"w+#{jk+@aH R3@$5*yj.I=DL@j 5鑚z2$mSr!sKjќdRTnH &RSEG '/ރ@j 5i$58{H &@$5k 5HMfP?;@j 5(hvKIj1D7dO @j%5-AG;@j 5HRӬ{ FjsEuf[#Mb߰mu^-rX6O&<|,L~"+R>pB=-MRtcaS )+7dBːD\EM嚳PlRS&'Yj3thȆUYFw'tD%Bܧ#N@(^tS|c.AX{0'Z cZ\Ȅښ5W#f~IqYSPT՛urF<HܥfCuf'#y(!5K.Tp)8^jv:]\ qxΞ9"C/ 5HMT't8]~*J@j 5W Im}956Vs'}oѪ2Rci xHMQHu 5XKM&v38IAl,-@OG F詁@jtI8zj 5^ $=5)"kjv^OHM*RS7ݕN*W@j.5UZX Gv O)'4eWVwvP\rARжrHjiRjt㪬k*HUȔQ_W>&FM[6L'r„D7!HCY!b!cb]r=\Rt}$5UIjZ_.8ңe߰T۰Tlo)$zJ}\ ʋNX~>RN x,D|Ln _[J^O\غ.ž 4j:@G8SR\U%W֔dɕFSYio@j N*[@jb/5 x-_ 5=U-7 R4 &ߋRwҰb OAh#n({'1L 1RYj4H R$5 2'L$BaܷO19RZjx\R@jP~+1)l[w;oS9D& ݆5>Zt74"hXc|4b)},9h ؇5-Wr?u% {"NHYjfx  $YO&{HRTl'H R>*L@j&5kR@j"IM^î R@j#5yk s@j 5v3W@j 5i$5R@jPVS~@j 5鑚zR@jIMz ԤwyC=ai~u_OmiA~ 1IP~(kpb?GbF0~`? 5'=`~`?G 5c?e}?ھH RP~nw2r^ވxX7EꟃJx?+sD^ގX[1,^R:O}0s),,Q؛WGs X8R&+\b 1$&\>Y\ڼr @ /,ÜhKx/ r_J### TcmjT5xk o-[A1h7M.sFA܏ qJ8☼Td$'IQ##7 T|Gx|O34m,N%?D[Æ=c5wYt@cKx?'TpJ!.Rג~,Yi=? ؗ#qqLRRfv_i R\G).e5 \Nu UuRR"5frNL!5Э jkoL&>%V&*r)y9:\g僫Þ+&֣[mNy|ARSSq=GC}n_J zjR{[}3B977u;9 5hX~,:!f|H6\,5Fm0o|)aXϰ$㞗C։^!)E<<ȏ=aK&Q5~aWRHM BHM?9ny!52QN*cjXj N 5wn"9]Rrcc'^oBZƊÉp!**"ο 5\id5?VEjdz<:zOOwq$#HI<^2iRS{M娮5]R?|^U;et"qY/=4cQYĽHFjjOmQoy]sk 5qq" RW6_0?=Rj?!O^%h$!51{9e90Pyf=%Δ/ O$>tHMdLaq;67lB>#@cqV̥0|7Rb H .F\eDZ)RQGI^>+H @j@HCY.➁!/TRm&Ү_NdHUJ 5hYN R@j@j 5@jvW*q)ĘvS@cVq@jIk <%=8,'{ԠZLՌx_<~SD~| N|Iy ~-zz,@!39&^%_)]5M5U}@j*,S_֞21B Pos|xnsbwȜߙsќDUXb*ߵcGO &R#Ѿăj%~˓nvg N4_kǾ >jӹt|`cۈER%noޏ Tj˹Gu؞bu'`y%bP,+ `lߛSBٛI ǜ=]g.;Ģ|"~G=&='=W7ŷ?ss}$E|sgp|8R#9'I ǝ0o1ѭc 5!5zy>e.|J\p_JwCN;`ഓ"HͿye0[Xoݮ0sě'cis%Ԯsw4ǂ2^2!~Ftmjlm|9|91qMH97Lmbd/CT\].!{BhSԜ"ew߀c; NHMiHvY[]mB0xcK#Zj㞣)YzԘdԘ$9x}ەLҲĸR0%<-[Hpl VrQԔԜ!۲D_#?˂@{;RwusޗΓA_: [/h >v%~ɀ1@~ν!r{?%!E9[ckeow8w5+I:(c9r{ާr,ӰM.>KFŀ^nz}mcNG Q=G͎F~1a[9xN{'~SvrYr{"^vKW1>7j!_޳8澏|N}xŰ?9Z<\O#xG~fy3=p8\S9N9G>P>O͢93|n0Q$.Wj$m#G&6xšGGdHM?}0yȭGxπ} ;B>&c8>N9) s6 07!+Ynz};aGM{pu6.X>G}MIcy_38sM>s7CqRԔiFRsEvN 5H & {Tv.H RI4TͻjH RBU7R@j"5-HjFUkR@jJ͙@j 5HMZ&Ru@j 5HMz|e 4brX޻>"#eG7z}Ǿ#E t׿l!N>ggZ)ZZEOw}`!?8O|zzG>'L2r2I>&IK-N¤Fo*%Iͭ5;:D#ZrȱSzL-7 8f#o!EKʏ!TtS-4xj$9#_w!)?O). RpDi>#?X t}֗۴80!~͛2@j 5HM{kut@j 5QޣaS@ 5H &-RIR3NH RBU5[@j 5i$5wR@jе{Uoņ@ 5H &-RӦru9N 5H &,2H RIԴ%yaH RB.I/3! W7Uo]υƲn,])\OK1iZ$r=>3}t*L%,rSƥH}12S^s>rA>G˴Vg<ӄDJC ?Үr ƝTd)8b={z>t՘FG 4nҿ4Y 1:[mY,R#'cB>j9<>R>='z@N1"ϧ56=.O"J xM˒HQ3 >H~;w}e@ 57D[b 1$&Cj 57YUjx3tqH RSd}@ 5ć| "g!5H$5l$FRS8]jrWf&`q)ߦxps ud@j 5$53[tu#)RV!RkDR3k]@*!U#S F詁@jGǪ539GAO &Rݭ΁-&!ˈyyJKj~_F&B0:xCOHM*RSJNb"5E#HMܥ#IkAj߯K$r 3Mu$OU`J n~f`ŷ?,f>͕H\ 7:ѯ|#z))Z&p"!_Kfdzk_Xjq[_LywŘזTl D ȸ7VXZ⯶mij})-?'-o<2wE*(zMNb"5E#dzm? $futStgƹɹ)ItҦEItc/e"4=]pbV£KL6H!<$a@cJ8"$Yy-g&%L ҭR=s<ۤЭ-$FSCZS_,kL*#H R"55jyouRѥRwi߭>ж'Ɵ1C D\ .uAę ҹHMSZެ:{р@j 5'W $Vgv|[J '>"&` @j$5j{sH R4gc+$ʶVx(J[jHJ 58KMNr@j 5Jt=D(\xM-Oy|[iK?@jJCjx RD:_ $#̝ŢlхVZO//p^Vrz!A:wO^;gv'?,e.~LǾV߲g?Z^g> .(S׵ BRNtJ+ BK~#S|~^ yr{~xH4$ذ;}>^G]T@ˉ|m;w[ؕd"`ԹnuF^g#_tRt|iDG'&'oONeM˒wa6!,TeH OXLcR|Byr{ZVJ*)EHG_6N09GJ.l*E̽.GV֌퟼5-R!kE@0aa@j 54IMޜA;@j 5(Z׻mĦLBD:\Rt!lN 5H & $5y$NR3?`bH R&_כ'H RV]y $D,!v$ @jGjr2 RDen}֥ $RSg |h+HfH &RCDZA|J,Bt *DGQjbRR3nɱ&o~ H % ⫭IaltHc#'e6Rxj>ג(ADbRx}]ѕ]YR,O|{L].J3Z-/~Nٗ 9oZ{?Y->Z$oTHFe}^1>ZSލO$Ncjfm 5HM)IMwʶ|bLK3J~%7}?$\-{_VjH#)'#?k=C"ҤKc $NR$ {Iޝ~5NiH !5McwR'+8]jwnفIj5WKUH &-zoIE#HMܥQ&ްO $nO&'F|N/~ʸxDj֭!5JMW.vRH&!5KH/%j0BO R?zf6:Ib#詁]jԜхI<Gb;Oķ=ZI߇/RԴn}UN*[@j.5 rz'}@$5]B$:4 Z& 0X`鴿~2ƥmXm~z~#GV9%p Ażu2Zʎ\n2-FRVtʸ%)8eXL6EYHGJ]s`/-x R-w\$%BjY鴇uQ\HYWY_#GLһh |/.1b,S\`#=<^O)(IQE,e,inW'4.I-,QxK5tx0;yQ0Rs'r*&2,RWF9K@j.5Ij}i qs )tt(RkiПTt). 5KMfF'y8 5HMzw o79@j 5HRө7ASRzFAj 5R@jP户 RH }+oӓ@j 5HMtjՁ@jRĥWOw@|IHYB>&/+Sejgk'yvWS]&1r-GʹOIjזۓ ZΘ[Ax'ЩUR-q;Ԥ&T'h,bFZH9 )){ Wʭʓ\m!5RԴ~~!'H RIj:pqģfNbRIt~~N 5H &w@8I2~b0OZiB\ZKbE6JHD, L A6a K~ a.G) Z\~<ٗr?hymŷ 6mYv qVzNveXuߖeSN 5J"E".qIt#udC'Sei{ZTp-?a.sOEJaV61I߳hqɽ.Ikmh邜Rs:l~twN]B2[p聹RNd 3>9O9sιW8I$b̗s 5HM3IG8@j 5s5}L\@2߯M@j 5钚/t@j 5N@-sXM,=51H &MR%uN 5H & ;7'AA2=Q C!rˣԘ KÐH & RmmK@j 5QFOAj$^uQ$fd!5I>];۶R@j"IM%m7 W.0x@G(5q|iz{bAR|"S8RrCUۗo~}ϗ?찐3cRpZXȂZ\ymLt/-ӶuAKHY,52^ˡB UZiX>׹/2?HMjr~HM& Kێĕ[кR>e?s,L,mb!gStg$.Z)eEy2e\K'Li$r1XrD':ޕB/T_sgg\4kz)a=5HQj3G|Yw\%z|<7oH &Djuc'Qkb">I|?Tj8TU1HMٗ)4◚j$5퟼5\~oL &P<$&Cj 50b]DFD_XioCxĻ|?V'xp`R"H &RҚQ!T!rLH Z{j*EW`d~e`R ]BTfritAx˘ԤG 5HM|%AĔG3tCeŜ5E|:3tDmjO Xj R?ln \ϭ Hl&:wnt{ ,E)8RY=H R@4f?ѭ%ߌSORTЬ]bpWd*߿+@jN.E)8HM쥦]+IwrT N i,qM.?L(5QFfAʗi"w6B;bRpd,%|G-52y->,l!DFJAnSF 9_)1YS4R R\E+-, Oy֣>?!%5m\/#]%h./#ԸR R:}/ ֩]h!w6 1-E G[̺}|,:ߘ5I,Q1\D4tr%e0<)2Pg_[F]gIqR)'R# X乤ϑKsâ}nZrPR#eV'3S)H &Wi^aLjLFizq~W5Pxmi .8@jZYIb!5tۛ8L\B\jG F90.4"jbAj 51&#̝ŜQGv<DT)IMH &RӶU N=5Bj 5ט2ERԴ7*Af$@j/5lR@jJMϚ $俷Cj 54IMH R$5m2 K) ȔV+VɊKdzEXq@]xP|S׋t1M‹)3s2]zW,|G [-f牅,d!?BFȦARoߵBb'ߗ|/Z;hwldE H)?Omږz7HMjzzHM4vj-E NO>O΋).}+‹)&YܔDv3GʏA̾5ϧ 6dX)^RUFgE%E.ٱ>ZVzDpp03nɱ>.|'XJjHzj`ZVHԔ]B6U@j 5QF MY)fi9Sqn%%8PR _zǘq@H RI@j 5QFe@j53,H]R,ZT˅H ӮNJ+-3c!5HM5u2 5KB[TRе RԔ'_I)J_V9n 5HMٓۦRfua+2 t-!RjdڳêtoL)2ZL͖`R#g]dpbɉY X'uzL\ۓ~znԤw>1f0 n !Ѱ,iع/ RH k(ΜsL5{؟G/#g[Hٲw5S6NR Uv]Stoo;Ruzv֌2ӿY1  )'q9S"y^RN my&,Si?}z e@jL'x&1yLb!69f*5Z:\BSR%4$FIa&_fOqKר0Rc )4(5Hjdϗ #Sna ׈QV &lR#FKJJq MaFσSr)4-˅)4ZjZj&(+ <^X}It߽L@j 5鑚$5?l/?r]5A'R)2NLY T1ibbH R&ۚAYnH RYjԄ @⊒9?R)ARbK 5H & ʀԘJ<^w~IJ̓FdLq @j'5~R@j"IML/{|^ e@jN$Ù%)58"<׀@j 5鑚={[H R,5tE7"-ތ٧KW,.(pE*DTee9?>x*KQPJǢ,X)1:[nCDoپG^Yn]հuEDu5!Iu-ҫ$)BJ"eӓ-v]ONyfTF?Aj#4ؗ 9_NsUV VXee9?l W,8-Y?肐Yy}Xhqˆz>)'0M3NۖjZH1Z:;څC-{W($^j"hB0$b$IL@j 5nԬELqRԐԌ $RAvU87n˄aY" a@j 5R/J*W@jb/5.C&'͎gLpF:[cxi Rt+_I XH!5XKM  $NR\G,,1u~=5H KM^>?IE#詁^jZd+ NR3@ˬ'FxDb ;.?Aj 5IM.IVTp)8@j2M'ݓ^$aMU(59YC”B&tڰF*4EW`mS!ϲaB^^Ѕ/hX \iug>[rL6HYB U[ <:-_⇤Tm8GgJ^I\RG6']|Rʄld]+[5D!ϲaGrDUZ~w >rAKRNc!ד))+R, 3Se~XR)g#!GgܑyapAO!Qco@jt+_$Rr@jb/5Ij $RAX@j,5IjL H Ru6/8HTSH RSRӽg%mnwx2re<>eCQj~lRR@0p@j,5]RTLJ"8x+1Nn\َǩH KM3+@j 5IͬU9I5m&1x8*qK3J:q R)RS; LE _ J׷HMvۤ( nl~|Nd#%|)"]WKSX2]XȔo"eG:m^O}$\! ** JCS*|)IO磥uloŶFWL%dw齶['lnxzBb1xؗ׹& 1KMFn|bR:tzllfU}-[l acRp¤F62- ,Z,duG&Y␲AN"u8A'Cj{Nҡ%Pn\|OXZ'%CKع&'WI}H9K9@H%ջ/Td)8Ҿ[-1^Nd.o# +BBb,B2 Ӎ%Z{m3\>u[.rߵH!X/?'yJIղ+$|\%8w6IH4Q ̤uf)H^>2Q R8Wym .5G 5HMwS`3Ϲ< @jܴ2'qT8^jfz:-8ԼC<NQ|Zg2Ճ@jb+5;z׾IE#MZV@j 5L $NR`"tuSfr.v"@jb/5 2NH/?B<&\~@jRsIG 5HMwS@MAQlkS{}zn7~OXCK}zaK#U8_ň³;݄4RV~[|nTҍj}1ҮBZ0]Fo%_MbN6rQ 2sNJiҾwS TG:v7/".B6RRtAX6:=X6Rtl'/2Z?/jqXC-s\ a]$Ht:v^/|Ň볜+ynŽc:$5NHF,l' Ex^ΖZL,%,֬(THMqVTp)8^j9nJ q˪sG!58KMv &ߋRiz̔@$5,@j 5%#5 IjN}xH R$5HjH ZDoӟX@j#5vc'H RUj<98HY>OqRIk C'H RUjH.?\;ڣUS "@ѩ)ODHo;⵭pBSePP h0_es-]w~%8 g4*5 &W(i\ A60V7a))ac:%FzuuZyS] w Wt6ZZV~lCK c>S$5&aUH R:;N 5H &R,!9Aj.]i@j 5HMNay:@j 5HRSwD QAj 5CR@jIMKnH Rݪw𤷝@j 5HMT9[y˗#6u|/"A@jE<sjqz4we\8rPEUfޗ}nu"Ӹeoh{bt +({-RtM^d,FKlzaR% M>X2|zraD-W}>Fjps qԶwRdd{}Ǿ$Ԙ*}~]b%+u˄ԘLo)BdkʐH H4劸o'@j 5铚>7$`D ѓX/Ҍ9RԔoW+wl qS<6"&Cj 5HM^vʶ֎@##nRjLS?H @j 5@j $NR3_,@j'5= \pw>2'D-bQ?@j !5HM9$5'DtA[@j 5鑚;z]~Imѭ*⿻߱~T\@$5ǘ_sD7Π0Md4 ld tkq OX!0JX1GOz{[=4 y \UsIګ9!2J#%k8B7M>irw4w❒4ub+#{jk+@ݫ kReaA);Zxt!FXDEp}}BzRɆްaS Rr|Lo_>eH"+.>s WzXF oP8 L#[#MbRԸNR3y'l/&2GYO&{Hb#H &@.5"FJL!FQCj 5?"OWqRԴwHܥfc!'eRqSI{8ӌE#HM쥦zsopuG)PR{JR@ ēnoK8LY(0hLTL#H R"5 xKMH RCR@ GD&Aj 5q* Z{OIR{@+DH GH%#\j@#Ƕw%juv CJxnZ5Ps)0uR*Gо9GG_,AԘ8bqR']'%bhY6]>k^ E؛oR){%zqLϏ%@A)qKM?k~n#fq,!r6!%J3ݐz[8Sڎ?qZGlcȒHQ##7 `=H 5H HM80{ OmO?uo*_X`Di9<-Q79@4iM0s%j4QwinT0ڔRܵ?pF^nj:*W)ELpЪ_/ә?>n_J ~aHME0:77u;9̯bq"@bv) F+~ ŶLm78|)!X1ʵNLBo[/(J0k  @jBQ_+`=ߟY`O£"F{7'k FMlE rw3K<h-Xp/\ @AAԠQ'_ F\׀`X}[c9J^ m|;B0:sWk[9U_;z6>@+w_UNܝ610nW$CS>XNW|6^oy]M= @j&J {^ FҬ(Ȗ@S)k%4oFKx:YP癡b l|1o"# b;p`1@p/[ QU>V)b ###=_/5D7 @JqN:H Y!H 9`ElCJq_`JqH|9P>+qqqR@q9";!AwR@j#dE]"IENDB`python-xarray-2026.01.0/doc/_static/thumbnails/multidimensional-coords.png0000664000175000017500000012043215136607163026760 0ustar alastairalastairPNG  IHDR|/9tEXtSoftwareMatplotlib version3.3.3, https://matplotlib.org/ȗ pHYs  IDATx -WYo!!H<݄$$@ ilQT4GDQ B ¿dTB(d #nf…$Zuߺ^p}]yާjWծ]{]]DCCCCCC#444444hhhhhhDCCCCC#F OI~¶hhhhuu ?®9 Ha_݄>p>m - '|.omF6 'ܐ!C 8 Kv6s'pW 2\Kp?J$acµz=LsĹ+~$byYZ؞F &''ׄ "Jn±!$ulo KJ;q-CO!LL9̄8 #4w;% {%6 [zBIqBA47"zu¿$pL/uZDx}6hزV?cc>/1~YG'|:fW N_C{}@| u~dh%~ g lO& a1CB n 瘿Lxla ?ݜx{JEc6D2jCxwK,0uH7qNaاꭚaL- ^yR~h—8v W{94Akr b]ӋC+x(SΤ L=q_c%:_m_;N.\Dl>^R…4mA 炔sCKؿk;8㞱7Z+7- ď:2L9"L ^f=ϩ~\w=N+6O6hMaB94ͮJLA?H*%ܙ6)[k6S:כl|i|1ׅ~~9ץĝN'ܾqLהbF4j;lS* {>4F CN ~My|LHָtc?WdMz{DZ?lALJ5_:-HwbߡCg >`P|7U>DWNALH)_}#hWDž]}M\{!q_ iײ61mt]C%Я , '!c}#zy'þbߊ, mQLaI%E!x-%ܖiDZxi mD`}7^# z}?|z h>9[imo%w<w?ӝAg ɃXߓ'r>ҞF A444444hhhhhhbkOb}fCwqhZ?xоCRێ;i2o N1ӝt=`=wkv]qس;t'ٯ;y=v-Y?}_ӝt՝I 'w&=r#>,!ϣ _g>) |ow_ou^xv[?u{ gXG^ ZׂQ-SRߎcu}?-/_ߟk?q6]_׮G=_x# x ?/8&<3l =E۵MSyúwqT~F>A@Ⱦk3޽ݛv=;c#Wxx C۴Oi;4CxJ/VHO4y _%В>>qQ~– D`Z7G,%tkZ$KEɵM7gO|x~Qh]!CUP a ?/ l 5q"mPպHAK((뻻?|]~mw}g:@9"(5$=M{{J3E:h~>-D,L >}]{"%Нt>A $uz|WdI| Κ(AB"JA>@ޣ-b993lw{!xv|WQGC߇YK\+ȡ& BCϔFѤLϪEBZAYa@$x6MF BWN +# R*,K8$q]ն4A"4ݰu0$ph]@ ZBSz\ih '= 1@?a:~H` 0|l izΧ$Д$45t}T .}'1]A׭kֹ} Oh]%I׹>- Q8I&6&4>w>8خss=;yF&O|}gX>SϽ%5}E@ώ#iCLI990m ̨)`#+L{8͎?{\in$\ Ռ 6ٓ'[jЃPK|z`" !CZDS1iz@"h.Dh}z-rU΅В"}:5LDZ"PΣZ Cשk9^}f# {{ihz;@׹4znGO`MK!M2ALNGzw{,USsS7֬Y35>`n.KƖwSf"s=N*ljE7#$`JA'ᖠPC,Y 0PD!97 W>Gd!Əltuo00<>}Cz;&B@Rh%-YwS#Xj`"/ 3HB7j<`~oufDJSAYEhhLPDϛ" bZ&}_ &#˓0xKA{FPzh5#z#HkD@>eMBpz-B| Ьu1b->K+O$@kpr;:1iGM͂E ȁN,Lu-ph&%]? 9p>& #tWA0t "=gzvwfOC`+2 84OۅmLL H!C)VjPf".0af}8FL >Dz0Pk9nZa,a( Yi&'f$Hh`Gs$|%PuOgb(*9༺&ȩȱ.+158BL2 |I G0k~h'''uT>) ͂ϛ$tB^N6Dgi"s{{&9\D Az!O&Qh1vorRm.YNu[I]4C^D h 0ǟZC:W>yd vf z !BHOHvs O '!apŏE%2IKQ8AXT%R@KA.=/<ԗѹ} QOqm f"'?G+EZrIaH34^h(=0EMk@3n Xơhxt7V OrD9UY#zIZ)ˑMl@|EHȣ0>=h ڦ 퀇M$nj0I$S 3l+N[ȡPsuNH'irMe]-.|> ] ڊMLI} $W߲&p;A8 vr;ܜ%^Z$(<+x>;k o %C$1-0=C#U(CY½D⡙$y!&OrY!~1/I1:bHz37xȧcA,K=(!'% 'q׊ӗH1!1o ;(j\ QԦ&' >Iң 5*TCdݤ) E}7;9n4|!G LI 7'=G>q"gapWWiled zbgRc# Dz@ĆChya:k@k]%hb&^ځG6@#a< 14[ϾDJA(d;ou~G~xvs-P&!LPj pYwC|775A\j/!N.T;k-Qq5Z7Q- Ow@&='.$}D1-79 A4h b> ³G*XGxLINubn`$Q %u'>B8zY Ǘ~n.?Dr6zO|uv?w}w m:FȄ"L3siLg\Hp>a\ xn$)jW.Q  7=iu`S 9/^tz=H&KLzF! -!U884ʊ~iz چP' & :FBS@&\y~ ۿ; D4s3sNUI\ DȞV\NB_h. (Bڄߐh:F$5GN-~>£߬8Ɠk?P m < χ 7ZdA892!LC`;GVLD3AnB7JP} 8A8)kz!ፙ?KNLڏb )R`Ax^LA9CZd3"ʰj"\@N9 =Jl*uHˉ-B u=ƻZ^(y:^{`MEE=w<6G9F~ׂ m“+ٵ#Nj=d 7CM pWl5z6*ȋAN; rͧH'PJ3ڃ #dȺ F/BU3'H t\aH%I\IKY?֐!MΟdݏ.k f!HD.GΩ:~4 }'Bb)kNքBBi=$ܯ꼉ZQvۗ(]'ÆWCI kA#0sȂwC#AmLvh$fd3 #?8W!j1ξH |\>C3xU"YB>=$!.@&G0Wk"\Ihiоtl[x&7(IHdUF:9Ϻ_k$A2~ }o2^ 2@; D](uӓ ty 9x`j-.Dz.yw5YoڮV !k(}Y!ȳGVNEx3 ⴝb|}~: NspST F*ȑDIHQ. 6xbtNmGa'r(VR$odPvJ']$恉)_()7+JDBйiFI#q MC}p_rp@DXmHAL')MP?kAZ:Ejb ‹bn ٘l'Dgs=Nu}gQD_LKAxsQheRoIj yɣA=4!qY)2zTk[9ff6Y &A>k !5× ./Ӑ/OdZ)!;5i$z4ZL懙Xx??kg[8- JS"\B=H5;lYސDmj{U7AyYُ½1$uzyRKzyMj!etFD# @ͥ>pDA5ܤY8Ɋƌ $1!$L7+~eI'(bτ Gr"ֳ@nΚAB\KDf, BB=iD@zn ulA}@}atr{ֈr\]fS/}'2;1E61uKB}^ 0wzz}>]Eq A8FilHDfێBwr@ wrak&ÉU')6&h.<A;0@ f 9I o" Bf4;rf&E2k¾/#!R>O:>KBp!^-8Ǹi&79j_Bu2+%⢅=_^LO2_ëOԥkܯ}3MukEI5٘ I{yKi$忧!KoA4Zˉڃ>:kExRB"co@("N a pBg3G*]"I~#K:yHARz(3;FPiWΗ=h5Ơi~BI`tM`A~O/eʽWGD9yoXB_}gqצ'v# Ph:K{5WLL|6B!t LuBi{$F%נLh% 3\sI"sTzoΙ,b\*|x&$LO4b2z11fDr]_\ӈk+Daf(w-Sw0Zo}}z-ҡ_n!Ą ǤsHK}OZ'I >~iA4 .N:G-cIMI ۔0!AP!⨍ "$CpP! DhD:QVeк΋σj$ܹ6g9B8=^5{LGX\~J!c@Zk:_G$}@X$\ bX.gtkb:?dDC@݇c΄ ʮZרh% Y2D j&rmrXO qhbj?9i ?wW]к@ۚG VCl3D+9UZMmdesd9sNZ:I3"\W&8Z(;* a LA~bjc5Q h.CO^É TXx(opOԖkt.RA3i1KyDLRtL]C:8ćOA^HB"i>b:B#-Gߕ݄KlZ봌'c޶`=ګ_:IP(M4=9rbω`BZ& B ?fG4-AdbP^$KTXo@GPHTU> !e{ \PY589pnp"dDQkaYCc"(J@Tug'y/ Oé-hyRsB$Ȧ2+>[ ~( s-.&VF#؄ Kx%˩Aj  3/VFfz`%3|Y PT][1WiFerLH+YȚ9MYHTHm 8kbpsFD136HbpKN}нْ2Ao/ סs2r EKyXlR9e^F#8: ƓDOchA_,A - qw%\liMrMU ZKn*Y^4UG i,ٜw[$Qi)@C7m7gb&@7Q( 5$QsG9jRg)&s0/ab*~YCaݟ3@pKvrb.yu`L:yᐃ sPEqϐc@ s(<Uf, pTQ;4'pA, *zb.vrS k֬^.-R>ЃEWpk5?'cQD1s! 3 s["əԉ-:M*߭ (kMmD-MX-0 3!AV "* B}U',ga=ˉx)ukMt k"tyv9oGEZqG2N݅$RόdTh4 =ϯH05iPQyi.Mؾg§[ Aq߄ '!dYɛuO6U\,.mCD}h! iٕɓK4'xvL v޹|qqfy\>pK½ 7%<3u³c*7O s/F9IM|L*z5b s״\OYm .?|M/Nq$BxBeF/ʹȕPhĒ\Yٽx>{]zh+v܀c - $f('Z0QM I>&>1G{k[/==ipuz/5e6{{!Z( mD@HG!A%|"SLoi=j-QnFD h :~>R+ %9_LI)y$QRBudbR&44.q~jg( ͆1] ܣ=,5y9ewA ,GiNủ=dTW:B~}QL}f&/O^K;QYPk=] CZ~Jvʃp-OgL| NʅAcvݥ$v g'~J&iߦ7x_8v OL:~Σ롧3A3 K|r*$3;E !$2G`"#O5&,>A)0sK1EO@E]௮½ )լHz)qiYvTb+|MzUA9a}| #m64Lcve(LV>{Pmm> D 5x}Y B6Lr3)l;uʦU:Ad!-{ZA~E8Vu7xDNz EcNs5P4*Ha*^5*{=k5Qܯ+׭SS @u@Yulgb  f(?r5t^ M (L@NEVf O8'\ϙ$`2f G:)|NpPbjd7xAc,Mmz5vtR=D4A ^vzNwXO*r?&J{Ϗw(QN49ƈ(:ߠ5A u\jU!Zwt}y*3{NR_8=klPWe B5uiB&u~'DAPXυS3yw*sr?%m!2  g(̬RW{7uAÉǵT9JMf&JɌ,]2?!~GdJZDhZE5.dbSO{ASYZ0 I"ө|*Q4E֬P_:oX{eqH'I'T; Q' ½ WNZ9E {:'cAl%d ВׂTX0np=;٤}Zh1-]ͧfXT {-Ḱ(sZD&mσJE :YV&Aa3osmc12iL(b%iM=\&f$Q@pguCM4jQlU!t=ݏdL39bIi91uUS!=w$AL$T6X9!m0!h 8P)L#BX)^hPe3ZYסEq(WIU E6DAInD" gF]'>X቉O+MPN6Ajdzi! Dݚy@SEޣ.3 {`s_e=sIvԄC}~פH&<pa1ĺ!C 4{61 L?HXpbF$R[5+Q0AU%! Q_ if xW/Jdh&AN' D9MgOuBt| MtƆ7PlB ! 9&+KMzz5deH%rtneKڳޛ#"z;U%:\NIpT-}2:q"4AzH밪vEz-$ ڭa ₩b!OD9h^ߨ#ǝdC 76a2Q,%KDt5+5Rj,DtuTU$`߀)|>i-J=Ds;2Ks(S6W53#"u"|N)lM]Ih) :I#(Yϡn=̚`wwo*U[! HcN;_T ը ;DQ#1զ>3)\hP^4aDG hDhEnNsb3ajRKs4alX> iz9LH>g-u$ImYR"3Ut޻8,3A<,ᔞEzzD#+*A8,3A"=Ol#%΋ RamAd(!7WcM?<:1 ì4;F4'z Ӻj" o" % '_ώm5iŅh ">5>c3lPaD0H2g<W@nw5@}2Cu؊ PנNFiy I dV8t;5I"hC?zz ALRi!L @$(,3A\Y`!BAܾLw}Ow]LrPW1MBgQwTU2WGWh^eZGUIZ#̵ JI"?;ՙ&"s%g> 5ʺkgn"~2Km1wtkέה' &rS!ٕϱ F`drcN!V 'mg !TIX7x %{[I,_ (*JSDSbb"U_BD2q̤}Jm&,\$va1a5HMR`m:wH錺Wu)Wɚ&k1Ŝ#Zb^{`R8'4yB?L^  IS ʇzHp_ӱNt_h At6S!3XiZ$q!4{ei݈"ńE4 i +yR9?W3@<h"npWw"Ý<->LJ1^)S ma}<[ڒkpͥ/:}%~ UBƕɈ\Zk$B(u$ DiU5=G(uW4pBk)+ dɬ$miYڅ|I)HKL ah0.ZGlsw&#'IV10֗0b kbk1Y\$Y"̆s@buӽ# #iMH`jc"Dm %ID!BwyZ&$1_'[~´j#p྽AA̓ [I&&9Hu-ABͮ< kpQp3jtZ@fC#kCFoȖ.~1`fr~Cل?DF9i|윔ہU0ZπvrLŬe}?.3!2 ܯQ02#Y-0Yz^v}<`}uz%/Qe%ɨ8h>$^LH4k9>"Y5 Bi<:c}ygU֟X?rRU'5Ch$au!w)v`Edg l0i ul~c r=&1"̲= $I)%yN$E95.D 4ȭ#h]z: h 9dcHD^ #'rL88hds AϏJE_$Ȭ:^8̽Xw70)yֺ͕@ljJIHb7ZL'o7(H>pe> KN}K½ 7%<3}2>UqzD/]W| 4&l<"'2!ĜNV}15YB 8?Q9$fN.d\=Re WIְi=4(Bc{4ڞǜ~Y0Z3ʷyPU_%Fֵj}r&1uVue`FSᘮg58)|z?-r$0#tm{oˤ~43lُ27- N>!zx0z`Z(Okt<:na>'#R5uC0g K!TTa dJh!ʼ51A`UnV F@x֪7CD:s"PX&(c7nbr rsrts 4 QM398h!bn8ݴh7A y@15yjlĘx!Y"[2ADœECe&w9Yi~Q$s]5:,B _HAT]m429lGwLopTJRGgQ "@s"Q!Ap{OF. ڧ~r~B9ࠞAAc0#5XTWvt$}!Y+5MX{[He$Ţn^"¬T*F<}~} L:f8~dTL"TQM B 9P pH /ZUR0}%"99BG",<6yݩĽ*2˳K' )YtwKjG5ZDM¨! C-uݜuj ᾈ;"ڋ!e[+qLG324o4@}!.,u !nA8I"Œ4uA:WslD "TjSH &@GbDaHI )r!֡mDEaԠ5yyETxVчV}yC*^鄫b" >4s4i"*'B6p,gP}%o 溴$a.JixxUo M ksaG/px 8Ϥf6Yd&h%LLtf'<ю܉:; ϸDm:bbNxդp.^|E2I`Wy*Þ@G i'{)WaAPMb*q؃6}WK^HXӇFK<AynLݠQe9Oza>(U7LٍE|Y (34 d)jI@m0`,"r6FW::A@DY'4KE+pM#PLSC21WM2F]b11ATz90 Y~J)(K5\!8o.2TD2'BGPIL2|5$HA1ͩ|$Sm EfZ*M6Br" BI!gYMiƮYnprS׍_M44*Q0f{en3 NֹjCd`mJi ^MAdp2q Y@ ƤpZ]ˍm쮻uc!iu (iR!V!*dY]|YNG!0g<ɚ lڐ$C~:QzFd" JI1 85= *WpqG)$ K0C='T$ Ѓ((WPR ;Mz]m֪I£x6Ŭ`03 D$WE@96VacFΎD"+oȯyLAIS0sXuD-X_Um rU_-ϫ GmD@T=B."9NzIL XS=aĒD7aFK4t3ʬD1ŏ1]=ht +E$kmh4*2k0-g. f9`w%>߈c+ jG !-@PSZciG8R6)""A|oC!iȗ ZR5D>ު?S! ^(pєBk| s2/jN3=zHATʩÌ^`PVߨZv#e& ݔgkX7nZ~pkD5YIP(Fb\ M¢]03eP 0Wس*k/46 Wԙg"Pi(D?ڎ=%rI&6d "m!fr BSpm!\3Y(d"Zok1GaO Qta^ X'zib>c*q7qB3%x|A,#A@SwډҢ@2PkFTq-4TXl&@><+jpCHHA!rg0(jQWݷ!8A`:󬏤9">Ie}#Q :K %Y4yJlJ Oi: 3bbnQo|!/!Oxh(b \v_ل-̎R/l$CA"̤xx%RQ4̉VY%߾ۻop{wѷ.Ν0,=w"A+s!"u8>W$"@H^~B[Ċ'13BG$|8NC Q)[фݻ͡^DTm K|k bW"A,18戹=`F ?2W^Y BTOoBTu,VIS?B!D}WnÛ ĐٜPw݅%;dB,i/'օGpM2a$!>g3B;U=Vrm"})`7֍8(DE@L,KQIH¢tKk\ faj1ٶ (/'|`k&l 4t؂ sCivMk2YRT ٜ=V3#AбH@ =H$ko͚2ASpǴ2PI %-d%, <:jk15 W=P:[qo9Ax[ϕJ'=r4(A|)> IxN+VA|ȘW%`1/EQf{8g-byk2ylJt4$,%%<% I,$`]|;Pp1 !=lۑNQ#I-^$m"%-kFMq1f5V,IPgcdQ]rB98 3BK9A NG4ADʷ|A''ܘ gak)IHneS9")*:V0)Ih|幉HvױUuaGUO8L@D5H{A0jdBpDY\@E]ۉn9}Q=8jSXnHc[}Yb#JŽ#n &&̇ AjB8$bX?4Ij *S !| Ú%79ꬷgM)rM IIM8+1߲K55 AN_5 K,%<r'qKېzȴT}Z0F ~ \cUƻ{. xk f ݃wu-Qne o蝶{E_`9|[sn1hG&'${LGO1"{j’)ƴZ >DA,`APݵf hCٻf"BfA #W/q wUXu@)IK~-H>fSU 0#đQ6x}|Ÿ4XcXHU_!ݓ#3PR|˿ӝy-D iLU_i1 Ͽ$ؘ ?Gl64Xrɂl}\٣DTTeY (:\X?Zsz $(~kdˤHF#7w\V#0@%D$@TUg@! UQ=',NK`@e) - KKaQw!Bf QAW6sFcf rߍ , Եd;Qu? Gl,۬D}HaX [ 0P]* ).&b2Q-4| "&-}o AxZV}BNj  #>D62Ci6j@NzG9""è{MԿ E{z!lo<'ͥ&7 CNܧ} f .k&$ amDi;v?8=zW~x3fa!; p(ύG+IxTC̶&+ ].2PBU}ׯ׮>p/TVDA 'A/(}oct7͇ E՗9QWyiA1yah%"w&m4ZT ? A;H~.CCM x4 2qL; 5m7\&hچaHC7|_ם{u^0 \=/KC B)s"s49AԤzA[iiүdF}Z,'\4.A<6 .rjsXUO=#XaN &yB# I"" B7?{٥}}&4 IiLInc1P`N\!Moys^+.^Ofeuf@>wT9^F&e0 pv%N[:ʢ>L ^wm3:"3PLxM >uF Bj8^E΅TaRB!f9NdHBC`Y|n07Kǵ41@zV+A$$@ -AB>vI|yor/ݣlЗݝ'gu镟/E " EHd!␆á=ο5D4|yׅjPea#K@]rHAYjO+v})f6|d_?;z&OI5v9+s7$t'OO:InrNX ݘMABkw>x~Q/Tws;|{C }"oi(D$z4 _6D Z,DFULb~xCN4< c4ě ޚ7 aio%:I#'AQ=oNxzXys #Y=,*iHC$!Ge61&u23`$!% 9 ,h5i"uYKpD "_D rXxvkgAE ž޹I7(DF~Od2QeW1t MgHZC6%%mݜ0֕:d֌^Zzi $!-@ žgNvӻ~y(>4A}㹙gɤh A1>1-M #OYs!VN3bI9`rRDp;65yGg %Q%Zr"8ލ%W R`*ӓH$/@%ir(K>ñ/x6{oϤ^ a'"czq& fQ"-ev!Jy#[ .!3 3(%`.YvH $ܛpS39v2NZtMN{&ShD });9*4 x>q%ԡAxPVk4H{L^Z>%+H+v~ 3)@&MbA$@Q+G5@[ NHQz, .Z\5Aܔ"9NICBZWT1I*H訬i$/:-R1pZ{4M[K3B%DyAAH{𗖰ßm.rdb$ѱ B *56Ar/ÌY8UnsxY ژy9Ė%4-X_uD!ej&8DFs 唓!i{N sSQ(NyrZGJ ـ ȶiM\^#⫍ 'e3!; 2C_E mI]_X.%aN$ܴwޣ>O: r:pp"Ry йDDfp 9κFĕ7LA-2y-iBUq}r2LL#="s"4 T&E8 BHdRRhAZKi4 m~ޣ|9z i߆iu^'@aHP@D|_Etb8=Ìğ&(hk P48ɓ NQJ$-,R@D&- 2\z˝yNh=S7>oߑg2"\ #-M" PTd!C˭frZfiI0#qm5hciumN'-~Q"  u%i.6db8䀙2&556-Aɷ,Y; D*qkt܊TYU E/l@!C׮r 77D&AsR7I "˺B(hƩ 6i@ȷ `pg:2&$Ivcz8t^i,<4 |YO MB&'i"i="d̯"MbZ!˜(|rAeHBN H^HG&@Zf_DlJ$@3o&Ih]|A}ZCYH/4qCK(b$bI!M"4}/*35M D` ݖIb9X-"}&. U%"πo_ J"!AE3GcQ9oH2i EL" ](Dc(䰖B~#x䷬241 NfbjB"ԥ.ЫaDbN $hoAh-R1)^K@ D hK9$0_y QHIL]k1З.21\{A7@i{ə9ޥn) B=Z ȁQ@ " 9^j*i/}wڷ:A~$h8PombuD_ܼ=gn~}7wU%:6nꄦmD-eTZ@HTTE-AK@K }sfGAH NA-% a:F"r:~F[ d2 N;F3l"FW>9a4XF"A=>4pi$/'?B $(f'}$DC!E6A"h z!aⵎz|tqH"'p>;#(qhFYn?33IAJIvs :`}CZW—YH? 9֙oBk}G2&$ OhYw}s1.>!a8u f;:=|7EROkPޟG_h&:k ^I2ʃeJ@1CuKDPš# 3m Év?5џI8Yq *V8i>%ۘ>ABM&K3 F[z)c>?PRLWJsmLD%_B }#hqYCwa ͳbg1A 5# sPmށ 4 zFC. r n%$! |0 O)H1_ㆉ0#&G$bwIO J?<S7% @oSwge&"]ȸ&λ!%&;i@I.dy= A pF# Qo[- k7cٴ1]8RDql^7I}B]}G4h ^(Ad0ACh 9IK,`^lrJ(2')xh ] lz07Ջ G;=b̘h7zchSG5,םZ /f"<ڦ`r0ۘAI^77azi ^?1繰RDS͡h)98E=֝ GA5 m$)KX0$EI΋p9np-›UYА& ~A $Z((ׁOB\A B'$}$ 0A1̇VA&aD?Y-E#i f ;B#6 p= J}":k!J+):ePu"4T(D2tyye"/W2Apݫ>sDȥpTD' ObK9>[10;~_c/AD EH$pU,h&y Sd\K)dA>缧$u8y3Zw@NѷE fAfO( ͘i]H yc"'1N5AP#<7BRW~+"`n3mZk_rBSO]hU}=)B;IHpJA<వ≰wGgp̗ڈ ߻vFKb% i1[&' P£E( Dy疩HfL#ӔMF5*|};搄;C(p`U`sq#=8"2Z\Aa?/#I& 'R?562*5dFb2w$drw$xG.Hc(wPH*<׍ f׉]z$ɥ9kBdYH',aOW,&W|GI(HC.'rט!# 1A hxN>'τ&իz%9avBqg6Z9ڄkd'B##dm"i]ڃI"Ns"AWZΣRwT>佧ҡi͚5Mb/I!mUZf!0(L1I!G&D$eu"fɹiw5r) F| V*F;cNJ4(BhUAZ5k6' Bۓg>]4p%1>-h&sd!MBQ´{xx_8f$S [iq@7#k"hl:A H>Y/?h" SjC'Nb.b]B#F1]zZ'*Iy"CQ2D߽WQxAA^s6aok|Lȁ>|UX\Z2mD9Z^SKdhz-,c &Œ /+&)'V0I#:^YLA"f iIe@(9,2 l- / ,!OB#ّP"ǓGdzO}lHLOsA ˌŹw ?MLh9 39(50-mh$9{Λ3B/D;/ֿ g4X{s>A.a. b#^DpT$"̝,&!_f6i;3R惒u`Ǝlk(I5 H#O md?ZjA?M @g&WW$q^ZbDO'B084J¥J8'\ܳ* D0? #z(rSCr9)Yp%Psah#3k@ /&J$WTKzkEY˂Z4.EOo%,9A|tk׉0#T{FژD#D{$wm"AlxT'4hcXa9oȉCjRz'H(*_!y$( H'$@&#'bPJsBuӅ,,[7!Y)jXB[A~tS9 Ji$ԉoys2u_W>ft'aVMLi|Df-rD%.ihh%5 F#}ԢCOijAHPI劣%pvTs5<U$0%e a/ 8~;a8iFQ)$_pJ|D@# + +G2ID>fRIrDX񌄯$X9Dc lfJB;Ϟohs)a"as"fΘ\0 mb 蠁1ӀZCCI_;+1)y+-GqN}!V#Aû''"j|Y>c+'nF#F3B'HP*x[ev&\OVSkFˣ''l?A1 A!:$d)'EE `AbWBb"HQzJ;5~!A䶱'ACZW;Aǟb2%ʈAimL2ڥyWdl;"yi&0Xxx]5 X:zhGI0 鑻P x@2ooX rWܛzoi=; CFm̗ |*%6PXe'DMM['Yݜ+1g@1P9!~Lh ݉Z|KN8[MF'nH5H-WCs*qk֬ir+uԌl%Ƚ5p"Չ&Qljb]`fqI#6 i 5CCVR""1Pe^b&=ϚeZq-2 IymIw;͙\md@{k sȁ6c_ɒmE8Ǫ#}|G'B#6 MxC'|DÆ;E1mH喤hEԍD5%3qQ͒3DAX! B %9j hj^4'rBihD =P>#¼7jDdQ D]q?2A, Ѕ&/1'Fm # }KII`j c/+pH[-5ϦM`QmVєإ>Q8t$)p0͢c %*yR1Ynο;gr £8F%Q-VAoCCDKkcB ?7)=Ҿh^ W~BBH"6\H8}2'y:was A)| "D-B9\n8w2V#A{?A4hc Y16)k'1+Ja^JÜ,hSHKS;ǡ |pժx4'&+Y=`Sj&0)Q›@ZiIú>-c5:E=CD#6fy-ZOal_AʨʮVֵ4D( 6x쿶o3MA0`59֎ʯNjw@S˂ Gߛ yy]'e AuH?0A4hc:! dZyWeS3 i"`$tCzB?cFaBaxрcR sMuG>i+x>NAk_eKſČJupw3;A4hcBP̙2%Eܻ~ 5&\&\KFA1€,\[ 71Pւ)gP;0';^KdY$q|S21\_XAD#6f}L+9UU\TV!5E"(3$06됄gȅ}ڡmOy_rk+5Hw ,b5D34$$$q95WJ$_HK a)Y5k' ^z܌)A#=}4uMf}ܴ58A9@_7 EzJOUO['B#Fm IPvCQŒs2TxqqY$L=RᩐN LO̾cfM>x ܜQL|1AN=z8xQHK?V;AwD AlMݾO~DhEdly2I eG9t-$h1-A $G M80hy؇965y6RcpӐL0/Sf*wU"K_ފA>uD#6fpHxU-0%#[%-e$'By_y(3yfD !]@`V_ j KK+kliSUL/nnWOF ژ!GT^ggb#S P׿{M̻OsԄZAşGgrX^1A4hcMPP߃*.p#(6ׄ2*N~ A05$rg6Kꆼj@r2 5뵎RO~|6+ ?}ćgZenǽt"4h  DL1r dGȻƩ]Z)opE[^dDjкqʨo7)aF }9^=N2OAyi~n?wDhl̀9o3m"p|5;' >%!<3j'eYBWRN1QH 2!%,p߃4s> bן9m"4hƌRVB!1wR/)7"H\`J!v.̝9ם`TU3Ȥ G}25Xsk+ 9~X#Ƀ_ΏyDh@̘%eZL[Z{$I@Ly Is/ OS'*js1)Tn}" }SL&1U.ji"ϝB?`mƪ"MFKӒH:% Xc,IE_zcJԍB2(&QD:kڦC#_Wkmg ڄzuN| _{drPLL|IY؝߱ IqGbm5 &/ kA@b~ѺfDd?G J`Ji,SA^fF5[GP N.1h:i1ZrT\h&Z;A;9aꬺ "U9 o) >d*qn?{"4>A<2l{}700`P@ӬWBٷx f]_3h xi$B!H ]-y߰;)u~Zs(D/ =`Vwʼ$4& vܻ?AL { o9U?f͚& J8~%GHJ K8Ky8)>Dx'5h#jiΥCh01i?}k HIԂOpS(F)K׬cڃ V%An{"4>A< 4%\a`\׮}7y|ާA,)Y,¿Cwh׿:1k7}%bNcۃݮ}v50W$OL2Nov}b7թ;oߡG A444444hhhhhh1E{5f}q…83 Wrw%|76#<08t`.O4W0WbeylAػv$ekfpR%`{YCGmg%^s]\KD])È_QC#!dP7 ]1}p%`{/?&f{̄ǭWb6ؚM3|&pa}iQs ktN_3J",_w!aו?\A̮WͦbP-=Sf S}Li߮g\"뿦Ӊ"œ>>>cb;-氢޲ύZC;~R:&~rƭ6wS-%Gkjnnn1;sj>!N!(9ml4UƦݭ~TdSZ_13Pdjs<Ή%w[l~,R> @:@#LYjw]wYDs_VAKyVJ"DH/f1C1g"kA+=:5N,?!Z`XR#~zqAh*Mn>sOZl?^;-BKyBr泵?aoQ>AOOG8Y˗Ahױ簇גqj5MBA ɉ~1!(:p}3f6)Q4%-`­)πoӝj &$JOz>nʎ컱VI$'+R&iZn8DD;@|}"P~┶r%t1\YGl.htxGOvf]_>iˬkDT @\pTV0vܒ^IBLEe2ò!ـw"~"~ʅeb;uo񒸖*gsH\DDhL-Ra%L} $$䌻͸3*'=0FQk[^O3QT+7CFw2Uf +!hڿV]uXShQ8Xؘ362*$^Ĝ1\ꡌ-HaZK+g… !6T?x?`m>ˬJKf16C_g#VrP +8ӅU:]ۗ6+3_?ď"~"Ì,j`u;8*fg_3d龘.ÁB;ߎ]1P 6N:D?ev"vYzZCƎ4 D>MP6skUj|>2 yA @,׍0 ̖߀?oW5l4O @s[060v0'_[xBˋ+ ^E@&v{v"~VA lP04?dlj1_;h$>^?f.qFUOJ;1hH M:\W9]@qz.aws (O:6}?l"c.bQh_&1{ iqzEl?}2run /"Q= bHGGJ:}BQ0= ݐKğDw)^dp"U_< QWN :tż*qoC] (@Fjv,h"ůXcCykY#~o>@{0 D(>bܖ!"{~Fo8q+AZ3nVz7miE1%;c D@<p[9fֆ'yAuC"Hq3`6G_Dn@?Ќ?v'fd bg/UՈ%!`Hgdܔal-6=bmfm#kEqL.ODVoۗ%TVRa0mnm%3bv0;HKrB|2rTLg8\8>'ⷆs[']Al=4Vzuue?ST"k9bjq(bre{ JfӢz4YBP' 4\"(bn;nA4壣 %m:ԴGA 6 ߍ~7$;1vADzG$2sԈ>:k7~UKm`IpQm_֒&eiQDD þT ]M%ƛ 6@}TdJB/3lp@ JOXORH\Sc BuDD LY#tCD,5:DF] |<':^ZCtKHxOjB&n"}8׎#;RO ;(Ua}6_lF[̡JwN7@lYfx^А/1F>CC$]DXZS?a`EIE+t POqL7d*QA9,4"=~Ȧ&&j Yp#?@ t@ -Fn34ܠQcXF|44Sn1}ch/J|`s>-/qO$wVnpaVe%ݰn3^GNfU -gka1c: lfZ8/Wx)H,w@#m16M@8{uhP `csGCW AI 8' j6~sCBxDs]Y(bAnVO i‡;E'f!OY-Ƨ#أ2i ޤ.4Vlݯ\ QT1 UxIS{bϾ]ч!ƆVVė37٘$pr/bD~5r ?f1ĕW(F4SSi6mLvmD.; NF٨yN2A&:N X!I"jWVa6;ZGF!(DO7('HNJ\r~~F @-pӜwjdDDV` *awoHc0,cP)x]kN~/jC5*R}c"*v@V ~(>,xR$w z/K6rE,ul+ = j1;$>yK{:/*~ ssc]fnjD@A NE 3KJ ar&NeE'S,4A cЪ z.jtvn];bw #H``NEPO_f0*>Z=RQYmGm }@ֱVίñIYv씱sEH|zkQ?H&'RHlrV Kl;3:NdmrC?xJ ӕR$ ("j5IuE."~W PV ,i{LB2pAO("nH7`HO]'`yoo=XUX&w z6TlZpI9`UY:n/=@.q PM@ɫ\םHX/YvhgiXnjIw"?݁+I$9a1y>㿚; "dl?O"'Ǭv˃Mg-ⷭ%լ,oFЏ|;v -e8ѿ̓YqZCwݣ; ڋ+B2 i(?u4'_)p; { )H(.ɃMj.Ю$F3YZ>Rqw+'Jb:Ȥlۺ , IVq5 c#J:$eSn`Cv_<Fp 8& R‸#WSYߡxBصB-)rt WiLK)5fv6+g_7E鑓?uT뢬8Apƹc "~)V׹r!ؔr+ CB]vQx}:(>,蹁t16>++DaswBݰ & ' ƮaWAPq˭N̸- `Cz0r`Ed7h E<0U >I~'sd)"LeQQMGLa'fKA6d2Sɂk}!שR͵߁vp+†7S?rdX7YݬcYx'pɌ[f '3vptY-I5T#!*Kڭph% _:&/G϶gy%Öt)96Buxr-Oohq@(\-cnt-fx`@)o>%jbƛ4Pma?896 M۷vg!&jg۫{"an y}n^K5H`Nn|r,q6/:x+6&JRi,dpbb1[J/ G./DU񬚍fV3lB)we^XY2ujַBW2\&o2] Ʀ};fhK"]CDU/3+'_5@NZW/CCټɢډzD c#!mA$ zef㈟ ^zR3%.ҿk%bOW&D9F6O6NTn?1G y1jGl[,tGC35ph-jPED9jPhQmfgB/?Xkh`jCUώqlgGRk1r DNQTI< }}g-"~)LX; V00 ˋEg3AXСj7{Aן &՞>Vb Ij<*\y:u%2IVm.6Osd&eU!~eo_3K}a/:sGڝ<}ς0T)=PǺ!r:j׎WNX%\nm嶎z<2J2 E<8*u󵎂|=:;4(1f.7rPwHF>oO}7>3pp 07 N%yNHlA7I5f+Ærf ʁ;iPN:9&ts;zP٤ܐX$s(ʲtKiơq[ň͡#`3#Vl9-䪈 "/Ɖss3S4 <˥>h`aC1CU$&.R! mә79(!'?x_%a#Nƿد7рTPK:k]}J ]"Ss0!BBU!ScE&T\A$B㗳{=[_Zvm/C䵻G1SG<䊠& )eD5GPnrA ayNDR_2֦a].j0n|Q_ ռzU 4H V}D|F-|3\dGQnx,T产\WT屰@rc,I})PDyMHY/ϔkI='7kb b6ok D=ȰvI=e$X 6L?E:Mk5IcMn5%֌"nS$7';-<nHazX U@%ح8CSfʠg|chĀ됡%\X<-iuhHj9Tal2ӿ {tK]U?Rv`4A 3|t'v"?* 7 ϝ}d?gdPlu &f{KVhj!Z4m&&M Y[}ir < -A^VS1-Ĺ+ /{+H6M^Jr/MA$+jPW@dx  qkw{;C1 /^u)*eT_23FoD٠X#3/*_JW/ 8ڌ ?SYTPIC>}j X,V$kv\wy WsHp+v"8s,o6+ΦJt*-4M۰7j-|8TlAJ#$;%=2XG@}$X-~nI_D66QbaB4zcVn✩W>bFgm7 tsiweQ',nPX`a1#Wԏ>?@bFlQ "y"\-*D*H;|p(2]vj0TY`AV8VQ`[`#u >c*^ADNՖ@ WѸE, K1˸lm4(!o\g᠎i|NςEfTس?NEhےȇ& o`aM40S%+*Ct^K` fݑPcZMu29+LBK=dgGM3yٹH`"$qEWGD;Da26VI.   Sq`gbG[b]B 'o(* t_%>PNM !: }l[Y^SIO+b@- *T )ve'x֝ hN~_71S76qu'@ [`7ݩY#ða\LmeP7vUĝP A(f]fوM?@Q#.dѭ_a#*{+;31CSl~T?P|x%[|賏7 =~e%/-k1ጶ2WR?P@A!5f5]W_La̪V Uᜄȱ0ESƳ&JM¿ % P@]U P^-zgڌ /FykňmQ ,=0 3_$$$֩ d46y*7gP[ \Kn.A-6ˌƄwiS l՗nEv]1nwO񈟯3H^ħ8 a:fhz PATUmLc?AtT!DBwS>1o?%A`p7CῢwIm쪈3 ( l =2~a9@k|Y@-^;|"sl綞` c1iS3Ǝ2lb(ף=ǡƩ%ț!:  k[`n5յgu쪝C|O Ҧl^DE MM7Ɏ!K:!AkBV#b/pMuÀ-}oGQEŞ?H 4GKD6`#.@ aP5Siy1۪Q&h.Hӏe_"~"˷k `+F2ě\ 6w +հlըWD8Ѭp-l?}5:;b$!lnc W?. H Vs}#v@]ovo<2Xj RW[5"\=T!\D_[SU^]T+a&` c+y%$BjkMQe`3z ay/ߙb•m$Fp4sA]1惺rpc(P8``f*iǰ)cj26ccM} 8GW&r+!~?s2n$ +DX1YFqcFaoy6:^ؐ p٤Jށmֈغ!}5+8uԋ%61Ŀv!k ؼ ֒ k(=:A;Db H߄7ßD[`[K(B/=s9?h/O's>D'V"{NHBذٍasW]x~Nğį`&Ŭ/ lF8Uea[4z6ˠV{'dlDIOӾ C&!".wB(+EG( ObGRV{?}sckNF&{?s D =92XAfP "N"~B\ HT'*MDLIğH0N5 1'W=K#cۡD*B}u8ZJğtį˥`,t{܊~XI;X?!ID u|CuNܪK;] lFN:ض0mDvEPōeQ{HY|\ D'5"f w*IH<¹i"ѶЇ!;83V0=כhȬkI"~㶑h%Yf}5PqL(Z:ػhqc 0몯o*^ ' =++t NS;]_;z׫D#-H`',Y0z|qt}~36ЫN}1 Ʋ M}f/o]} 9'!H7 x먷n$w8Y\LUQ+A{ ؕ;iDx(QVcztuKN06(`$kO0Nb@Ad>d^O"ڧB:[Շ8f}M"x@yK&)WQ3u6cK;b$LW0_. ;M&e;lu" :/ N%l&  QF@I5crI9,dlOW4PMBƾc ?@.fXƩ[Tހ<ƅ6nvrH1,qVx>eeEb:=#0C }-a:Vw(B5qM'O"~Bl \B ,;@*7 P;z[_" "P̚nBMv펪O^û; kH>ڀn$|7D<+GDy?M5]C)ߙaU܆rB/*X? b1j3, ߢŅ%ϫ0dn[ %-'`?Njfy)<"wgFY% Ch?Ch_yMI'%=KDrDvw/s uZ>HԞ\~ :7)VMa*38u۶m%ƲeYƍ9ǹ'7Yxg}!fW{/W 6NU8 L^888D^w Nǜ&"/}%:ժd {=YқZ O"} p|x9ǹ'zf+4DR?e"!>o?XϬzϪp"pGBM|x(Ź'WqϦrWY%!Vx vɥy3ĵܵ*(|N/"#u=(H~`i"?L2fb>wFjMu| $Ah v{V}g N"ƛRԭ[mܸ11?M9 }a  73@!q~U-A٦YDn\A$F>$vQ$̋; lU7>Xib L%.C<=f(J(W=cfd|MRw5B|Av]wi͚5cڵc=;zT͛ݻ_8}ߡ'~|Np ]uUg}e-[Ԟ'#Fx__}UmŠO5_&xˡ_:|=ܮ_r׫f"oTRT:Ta@B%Rx;Vy(bFq]n} YZ+׈ S;l ;a/nZ#iӦ7|3gO4)$s@WXZZZ4hq⇫+>nu7ń>[cuZ}bc"t4T#rt3*T[dAjI=f.&[ W6O7mڔUVbK/JJJ7ocX~~ڵKs+Q2vU::ϑwCSe<|]:u=7og6uTWĭ6w?ְaÐ7߬mbAVZod>(+++ō ݉=qhX#[3qp`A_)$_;gE_f\',Ep[ A9#ϡ?{GHyGE+P @"̨n[vg& _(^?]o? \LC_P'@ \O|s$6eZ-|)Cv'?p ,"mKꩠ.P>lݱb`A!d뮈70Y;D&滹ˏ~w0%>V:.I6" dtO#ouNܶս.( f_?κ*%2]@ap{KOo_Yf}$ b#pMjↀcSOK=Q$ﯩYn[Q } :{G@⺉ DBeNYPHRK⽌vS᠁6%~} 7Uf?ǫZf D&BWP~TD[ &vyX\k=gTQ^O!kgB@ \j)dc"S_%?4OHl2H@(e>1Msߟ6/I(@-҈@ҥKM`Go"SROy/  $7N|3mP5#Q#øCD,5w7us֩!$'Sįo"7nDQQ1r"~;h"}# [oգٸqcֿyJJ'I&VٻO 7ܠ%A ͛/f^z3@R:??%K>SO=^z DB""57DՉd@uArnA5EhBw}'lb'жm[֣Gvap@jaӦMG}%rA .!~<ݻG߭[7yϞ=j_fo[xXPNVF͘1dNO 8SĿ݉_͢!ަo p߯ A@&/[,s(֢'Pʛ,߀+Vxn8Ey K֭[=ǝwyD㡈?{z,@QQ&( TTT+կ__qZ.]4]}UEQG!"~p"L(|㨀҈{ nj.2znxf!qi. z9#;Ճ5kx\=̙3C P@.:r8F DYDի=&YjnٲŇAGrgas If1p95|PFa%(~jE>Dᒄd&~U-"$;D!.sL`K/"*3nȻnؠM_r[Z6h]pj;n6ە+mv/cԿ96۷~ք&MpTO* e0zE :W^|Llg} n7^K6.O;/cԿ9^)G{pQSDצI6!k.V-xn-6@m[n܎r@7Sd_ǨŔB} _.kbs4@ikH'6ɋ6:WU -8[_??lI3^^v_Ǩ%͌? q˭s);Xu4٦ ofqFYnbE!g9K;MI`͹_i-a+vŭEybǨ+X:NCǕ;wyI3XtHϕ|1x^GO΂wcO\AjA":_ceUnek縓xn]=\o>4][3V7hSS)cč]LۦktW,GkIY"W4Y]cڕ%v2:LٿlM򄴓\!sW ~cϑ(dvl_s;˭ l01'D3jWL%}#ٞ<}l;㗾K26dBnb8s].mÌ!uI78ў.r6.k)Y)d6ٹ].:g^;{ΡvQ,Y~9ٹήo9⶯Qzr1W`gjIpnostvQ,nUl\<9m#re]t,""ڮMvn3j+v·ʑiH^5w8(g~1`uVdv9vQ7&N~Gw/y?q*O~dv9vQ7&N#}Ŵ*9VH~qQ۵sFvoL:-eľӥ/(wcd!l&;e1iKWJRjvϕ([g6ٹ]tΨ].Ioi]rh"&;e18{ numm&;e1iwj^dai6ٹ]tΨ].INJ񓑑񓑑񓑑񓑑Y݁KMBFOFnm 2"~2!1NH& Zw UwUHU۽? ݌d፿raW%jpO界̹_wN%*[-(ˣJFOF\;n*­ރy$#'#s_mG$//Ci>y%#'#?5BW]rք)?????????YT9IENDB`python-xarray-2026.01.0/doc/_static/thumbnails/ROMS_ocean_model.png0000664000175000017500000015776615136607163025245 0ustar alastairalastairPNG  IHDR*.I989tEXtSoftwareMatplotlib version3.3.3, https://matplotlib.org/ȗ pHYs  cIDATxxT׵cz!B^I{ǘb`ln;5Nܻk\ĉ۱M|{6gF32Mb|̜9sfF=*{D=JRTSQGPT*JRT*JRT*JAERT*JAERT*JRT*JRT*JjT39j J~ovF{ b?$c9s%z_xsk}9G%:}xg~Zkp񲵽}^whN!~|o(?;t^dsiAQPw }u/>}JAE5u_&8rhG{[Pbr!r.:z6c?s4ڞ>\j.Q4@T:Ż)^U>3ȹqWG>&t'1opʝ1vr跎ںGZq{ *-tA Y{wo=T *.Ĺg~QcP;P1y`%&SToG˝?;JemrwBr!8toG.H{m^{fzxCoqޟMxh*ծ[r؆C{{0Y~g܉tzwN",sx]eM;ğ9~+I{ܠz^7…Gtc'={u线={ױ\ҴA Q&mq Tnu'}r9f *w.04Xp5:fTq?'Gf+-~fw#;O=c]z .OVw\e*8~uֹw8z},g1Lqt3:zȝc<86&BG\+n1Mv˅v8A~/W^!A!6\rT{rxqfhexRPi23Pg1=A{hyڐc9lE )""ASyfN-#,~f5cB. 菘T *;z;2/74P>3=NsC?W|s7 qpD;vhz(;Rcx=;JRP9"7I-GBʄÀHc 9 +!'b;*S$Qzu^c8oG pTƈ =t"1bpTv:z՝@[p?sޖ&ȿy$.Ր9!Q'wy߻Jgߥin^.\~~N<"G8)滀1|na6qsIÄXՐc+%2']U'߹ƘX~r](=ʪa+u.wiڄuNE)k}>V;z*+LV/?b3#g$b9.sC,,TN|ׂ -~-4Û Հ߻U?gzsg.S=R?|RP9MrU~Nu}v|oYݸC?'T:Iw?GA%&*7uZ^ ,zQ`{viQy}o_Z}>&!`3ֲ?sG.[g-&7dC4_ '_bܐsC%W=T **t|8lV*JAERT*J~JRTT*JR)T*JR@P)//;T*JuZ ՞^=(z[h^hA*JRj EaTl **JR5P?. [ **JR)PiE l)T*Jro~TT*JRP9!-JRT*'l)T*JrB@O_e>x)V]pwBG_dB#TT*JRPTTUV T:|{|_T֫RT*JH__ep>st> **JR)2TMFr>ۯxJs7EuwCTT*JRP *_9tT=Q 0(T*Je-ߤp?sj|KTT*JRP-_iSस#h Gw)T*JRT>u#\*eusQ>tt6G?q?$EAERT* TZǿ[MRT*NˌRT* ~TT*JRPQPQPQT*Ucի19ҵ^r U_f-JR:d@RA{_d-JR* *PPyRT*z:uDgϦX+WP9*܊"/l)T*Jh?A6lRJJ UTT?A_-JR\u]d@/~-mݺ_WPi:8ʯ –JRT@{嗓}C}})4TZӋ-JR\%$$_M^o @w@ϋ–JRTZhA * <# * g>oTT*JrOJy]-[FK.UPiJГ-JR͟?fΜI.effoPyⳎaKAERTnVJKK껔Ѝ7ި9g–JRN{\ҬeժUb A%sRPQT*iәgTWWӅ^ Py.aKAERT,XSNN * {5l)T*K/dJMM mٲE~_v[ **J:Ւ 녔?hNk.ڱc * **J:]tUWQܫW> Isϥŋm֨@Oz-JRBT!ܓ樼ԣG+&899xF*IﰥRTZ#FÇ *X;yddW$T4oaKAERT*2rACzO|W2g3BVX@aKAERT0?c%;;FUUU4a/[nTZZj @%SaKAERTFiZz,lذ1._q{-Cҟ'#['S4RTP&ۻwo?~Cՙ8f̘AT97Hr7#c ^x>3 '"[7Pu@ڟ [ **J I{01!oߞnˌEi T3b(w}':t9O]wsONp.[$&&a-p=|p⋏g袋AKcP–JRB<`֭[SllF; *G%)wf[[6xJ.ݷo 43brJ6&Dȑ# L 0ˁj@5jr- F;w4H P\5kPJJyo #t+>lΥ7觎=:AAERT\p&ܳyf+$JʊeόPz?=DB aDya!]~gG?J{$22@մiL><Gro~cW^ 9zh~:ۑko(ˣ:F *?B3E[:zQ_G{n?nJj~@}*َFQԹzLiOCהYOOULO^K(15z!eƍWdXڶmRoŠ⣒/\˿o߹s:)++^{eҤIAcPp|"""())nfs/K~G'N)SP{A4|v@φp>st>~(ݞ **Q g'/zd -_hH9,Q;RtBUA+ޘDޛb(9IF[2w+)334InOٔSHL⊠ɥ3gO8=ma#.\yklTjժlL={ٳg ĩkVS\F<\ߏ:Ht嗇~;hꌩKYT'MV9( ki:dSTtuUٚb#tZs]Q@%ՊR(0ƌS'\3a8:4>حVpB4X&o۷P*!f2X 2 h*TPڰ|_ - ,sTRuR(K?S|m۶ **XHl, ?&[Ҙ&ЬЫSlZlw&鳦SzN%8wQ)}`QdC* ),lgׅ-_ONӓvQPm.I=\_xoq6׃rbhk!@ v_{gUvAhX_ \S +!/\1o5 [zz:uլ^?leQTR̊]׿M7d\<P8:L͚7qM1qJ 8A&rbb((z/.iwTʷ@p"@9P5>  " /0>4mk{I?@LnaEsϙ>wi YG;>BHMގpQZɴ{TT*)-6P၊8Ekc iġ4E`*F'qq!FKKJN!KQ"2 (R'rr`*T 'pRpGzTB2G:ч.p'9z-OuJ:Z8#rA#3%ũYq {dup궠 ]ӓ;əo4ENJ\ ") |$IW9* NJ} e***2?Ak}A8uƵCwh&7=F5wΠ T*U&;iB㏍%N-$ΦgQ穥4u  *dž=`;Hccq@Ί:oZi7GK/5M6mDƍ3NU~m={ \|zر; ru"!epc'rA |T;!i4}s[PWκc<xi VpTrv(RPQTMRhW䧜m?SI r ҏeݔdvs`mo"ʬQTBKC@IIGJO0rd<~/ɄY>_7JhݝheKcpPμƝGSrb\6NW+h!*vOPR\ҥK)?? ˧~j5"BtB{D3oj9C3PhY#w!\. !!dܦb Ilp&! rh[RGB q!' aÌW4JLl)xvL̏O **Ii޼yf5\I?/!T `AQOyHPXl6ku]oU"J8y.1 xo>~0HiX@E%CJj2B rT);bEAEu:]MQZ xϤ#,tc0HPa70`g6s,KBAEV0HAH} U **IU7HfO*aߟzJY)u܄Q`onMqɭ)1)t3=IGgћV|/m}suߖ3Nz^ku%[CQbfyn;{֎ (60fm->$XH RljxLlyrbAĖ*J$+"{6?.qȘ4e]6ugҥPTPD=^8vW!'WkRAR ٫+#KXkpG: `╏"! N8%(8%;b7! E **I=Ltr'f͚e|⋣GtBtlkYڡԩw2]Vw:a/YP@e+Ku‘e£vX?ր s' )Co^zi[gU~W.  p3bL0>b$>HiJ8@%5'l)T&!4^JBq&7*:o,LrU>_#sdVy?n+ Mֆ@ `UaX<HS*^[TT*Us=ge޽eZb5ʴG8\wy4pdddРAL8'H>&f/(99vEm"Z?og&^L&UL<;/;О۲h,yQ&!_c_LvQi-6rAY " Hy9;XǽgC@^-%0߂Ė;G@vՐq)4eaKAER%{ IVZx={6|ͦ|߹s'UV%HaE=gҽoԙL1|3$z{K*K˖-5y15FA#)+\lXdUWo!5-`*}kR}C> ٞK/HMaP9jњZln+^@=2 Uc;+vGAWdHAʸ-\J)w҃>JD2~P+&OЂ~ 8`! i4Ί:+%C[]T "[p *|,'mzw%F׼xU7ɪS"vN"]*H  )[92楳–I^MD4%&gQfn)guȨX*//ڏAR5RNn yoRa䩟;m (!aT981ӐA4{,Iewjr" 5:"@ d8B Pek^p[QJ2t@@ @EŠT=zqERP96}嵯cPŸF}&Ԝڴi*U~=zL7y4طaՈ?H@>U M={ԛ;eDھ"Zt)C uRrciʎSbo|fRxB)~ِ}9PYpu_\nvk@9^^(TWb.VP9vJ#^XTNPM}k@FQNi-M1Syt˭GIm[x\GѭiԮ(\ (v3I3Ϙ(#F(חWN./_bb \JGŠ :q (zH&5uF/5L]g)wvت9G9Ov_+G(YFV6tC6y)M6|hJjڱcedD/&A'(!!t 5]wXzMiG~8ȟ7"C<ޑ)|ُx{3bڗ Ւz>^JJ %y, (d\A؀JճkVᨻ{;ǎJ\PYJZtt6(>3hʆѼuWN#!_R0ƌ1-=!ykF^!< L}2fMl&F:-yE}$xՒmg%H$p'aHA廃JV8͹;Q *rCTQ!1p$u[z-F -if4U*UC{L>@MLtmYHY\*_O Ib#anoo˫YaEBʱO[.lXhIHv 4G5sRK\@]V0t_SxJ>j-J@r;*iֳ 0] !X;#AzM{mZг=juc J+C@v^J0P ($=:>9|q3Cs8NXPP o vR8і>@Ӹb걉u@"Њ bwE*^)*@+i֖ ) *&83蠝d+nqtJ]xᅔV&PY jPz!֡Yn[+U[X&aIxs|]wiE4ͥݻwSm)*.z! /@JߺޔRvM4Z4tXYEɵV 88s^a^Et#u> ٬-Ⱦ*v7/<E L*v * ~On[!J[,Evtࢠ⡱'PFXY , ԡԴ裏f-ZPLJ*ߟf͚E Z5R N(:)ʺ^{Mf8ݻq,ҲҨhT u߃i&DxT[ggMAE:(LdB 1*x7NNmYTثzǫ TBxtOn2gJCA%hc҆o'H'OAdJmKvAiՔҽ:lKU(G?*Z:n:`Ta/,\Oi۶m7S5ȵ%}3g4aVIC \Ӈ=?\^KBI%%%Ib-}\sBޟB0K+nI;KÀ4 !ņ@ Ê]lWs"+yXV~$X@-$ͤv[P|NioӘ8PZ:rkyl>s}J.j&6iY߅T뮣:i4lD5EFSj;ʷBfѬ54cq-}yQ( !ޜᅡ!DZlJȎDAūI pW_^:b~앭TW*TN`2mT\eYJ)hμyoO:tȬaZl /Ogu_N~;湸ƂgX`UҞ]iA4q$Z|9]qM'EQRZ:ťgQZ(jBl>T符4|&uTT5k֘0!7ۥ{;U23MxxQ| - *LA6ӷ"&yt/*T /Hhq@j@ B?"x+TڥS6-)))RF̼<᜹?&Q/E.̣[QLj`HoːCؠ#Tm[ *'XhGiɞ vWӬ9|P80& Ԣ XLfFPkfSW&bz-.:el+WQ7RG3~8FGPkt4ʙ3Rs.+B^1k׮5+7*SlV6[)f ߲a@{=$奼Viʔ)4w\㼰:tRLr jxa%{y_]D}w 4EDA 2ly. +p[~l٫S xM # hDk]a%XZ<JY/!#EJA%tE8&\)mt&11LV3i!%  ҩpA9^0z_7*^Vg MX U&' *:/ ی@x?}6k> P 1'E$P^BZ~YO6IbqU܉RjFQ A8cdjp P yj<MR(J-,=()xLzXgH%^y;c'>| o}KgOzeєWI NXx `c+ !2GܷJ,3/l{!) /pUd,A#hC]0Py] ȒdȆpA%=JA$ 1 Iu`v#:Ѡ+G gR,܇ >\p\6Hc͠Ž p&6r]D.EzSkg2R[k@gLʚ hm@ V6n#<ԢUkڷo͛"c(0*]r1<74,K-䔏7/4@烡=&!_;*_^VRlP<{@b#v\8YvV^d} RG-z;@ ,܈UX8L${9,v^0P7.WnʧJאJR𖰥rcdB鑁ʘL*<$Ph$|00@<8I^η-V5]j1X ? -@HljŔw%NEQ)ԶWmx饗 Ls>垽.ݝU"Xk8+[8+[pDY5()ΣG3t9eGփ<z^AyVdžP ƓoBv2EDs:0Nx墘kŮaEv[gv1yQaF tPpPx;ߗi@Ŗ * TFm`fT_.H=CÊtR ;M&200HR*J&¾0p%EvWdZ *-kABUЏ @Ma>xARPi`gs=ڶmKimRxcMHWfӜgG&FS!&5l 7 rd f|ĠIwtf *tb2M.žx<ȳ$ Bh+ؠ}\q~ \6 \|߅w-W3q>%VWR(U e2GAjp9)# pUp͠Ž |T2:5A:7-a pM{ fsng~J1w8=xTrIvv6;߭?tm߱:*>ـ%@Y2MVexGV1ȅ cvA! 'J"gζ(rTl@ Z_VkV9*'V.#'RMM Ӈ{o߶ďŠm^Wʲ?Zfs?K 4dGV0pN\/"+,7@ 9,9\LIghST\Є/*%R VW>+.r h2&Φ،,Z҇yQUu)GϝnhWT*k|kן=ϡL{C'-%&&.2kXlņF9vKLZ !1OqO]PÏPR_'?ЧA EA~P'IJ(ebŘƍg,5\C@WEN!΍ &,sXLB //vJ2s_9 pՐ,o[ *p(y*m޵bxɻܛXaQ{)cJPNqqqQUսd%;m4jú+HFr€ԥlA11ԭOSB9n$ٮ) _SFj;n1[֍:u+zqTp$sEB/7%P.MfS:)^bƚW7"K1F<9qFB}g;*v8nh W%XbtT"^(S VTNqPAYٳuԫW/U ~:q֎fk8ҥi_/]'!B$?@H؁d< ۾ND]u6eI:*9-;v{.2BTrk;%kzͤL>&B3뙳ܕ ESRΔ7}9u8oSaA)#vS=o^//E};&c^!Ŵ s: ! 4O8 6l@GXy9* -\ǟ;QΊg0P rP0pACvc\ÏJ̲,U@Z 6?F *Q}BB-Y `sd=_ *@H {Sv2[ɻKfK(aū5>؆A-‚y0EJ&t`b\Xhkt+W5Qm隆͛)iDJ*+3!  ;矧۷SE`Խ;tf`"]|4yo Y IhK,vEiR=s~?aTNgqW\H$D6g*h֑ p?]!'5#Gxkᢅi|ǀ./b*vݾCAבa ϙɹT >c"@EQ’n $ܭ. o~,r tGl6% #_FAAQJrr2ٳ>:G}Faww&/pa?)/W؆V31@%؎tZ?ߖA`˃\+#mqǗ?YmrE j=8WT\X @}TЏjw?Mi+Qk*++ ";&&MB"#)K)%͙B+SYK)}|o2,D*lș8 =Ռ7ɽ7 c~2QZFy]s=W]uE9{U&6;Px:B 49#gOK\h3s9f*%DIK):%hAYYT9t>`LDVc̜3DSy?&Z9 8uTe\@1injkduWPt\/\rO բ @Ts`eJKkK_Q,dz炮g:o *Ԍ)leSYuEPn~RFg TumQ"27+#AIkϏ~QQl e- ,\h&GD:./+xm N T<e;7vSNTAEUTPBd4 HW^m8 ӫcݺug}/"]c@"A%dr(G2IM&J)PnrP`A ~Үp4~s;)]>Pa/w!o;ut`p WX.o.ڰO|Z:MJf& 2 F8(1102l}y3]~*n>ʘKN9԰ $p;1s? We/wzOzL7@C=RVm(7=n>"ZG^փ:QNm[u9]>Ə8r.I) gZ˶RQfAȱ Z:ζؖ쬠%oά*ҽVeen ƓIz?wTwW Cט#`'96JGV˰#tɵ +\a CH0H9@*8Y WTڷoo|6I(''$tM7/Y#v@3 5#erp왡5y/*T8Wŀ-lsb0! ܆qO:Bù1AHԁKS{){=e%VUPBBjm֢9E&QءoCݜk}^M C@N-gn&jҤIAE.U5E *pT *}y?H* =vm¶wPbi%AL~er+S /Nc~I2`UwjWQMBO5I6ụ-Of)&** 4!Q Mp!TY1/FJ:RBy/ʽn\ۀ,VXAÆ 3n.'L4]d^|73ԦMӿ_S'v>\O,ai6jMdHc=yI F-Od3e +2Ġ°<Z +.vK NX_&HA.`W[niJW/YEȫ HK+SH%HSX?~18!KeWl9+^nP/CKq#n ʡX$w"DIx6<Ύ &t@B=*B 73Z * >.&tkac9+pHP1kò}?eZ@ 4jأreVZE UL. %mIYsPH J*kwӰ>ۨ6 &/5RfUG]AՉ:m)Uͣ҄_@q4|z(.:Re…^zzʹl'%,j$m4CHtdg8[8FX.e+VX*Z@Vђ'ՌM8!Wً ,PL.ʅ_5!\ՅJk{e;׏GY,aEpb-;*"A9E:<8RP@ŭ GTJ y'nb0y4P9sU)H.9+[wRJrJHI5g#G삶AiSgƟƷ !%Cgj4|qVpt Pna\AygSMʋ)1SZlj۬DAţ_QJ):3,c:˸NRK PLN$DtLI:0*"I=<'Ԥn/Oa K%?%*#g7O\dl`iI V'kD+C@^"[Re{e^vQ>rR/PɪӮ"T5u*ys !Mn''!Fx2jαa6"d<0IH,ρbaŁndXs[;x̸$n F 뭁 *11p߷uh )o:R: wŀʮ# zv`ń2-A2i.YCC> /(@Q1o;UU4J{i>',_((aTWQmT 8<ʿWCM)(3o.uՓZF4D8W老+)aL-;rKUM)f(%N ߹YGuZ^Ȯ&¸ v+!{ Hw^VaGU / y_NeX]jZaw;rp+TDPҀZ2ey6G%/`j${_S6 -UI`JP88?L_E B?eo-ݐPG!Ou_|`sS ]'TvsNRqޣwg`'?f~HP$X * [;,&\ :#nn = 'Q1]kGTs-8'1 VjӗSmjmw+p\6BCϘD7"[G!{ ^KQėZ4kU^Ǐ,XZGE6=,^ؼnEG[&CC$CpU)pS0c`.rqC^moWr,u^Q2gfprM nepJJ(+T*‘Jn2c*_D\/Éū >]l*eTT%%p0Beۀ ^ Wp> <1돔dS.-3/zE&)jr TPς21z?|>)s}NGUV0*q>7Ѱ^ۨ6kq\ j߼ 3aSe]SYHmKY/v^2v+| ݻMokcIԴT*ST'>X<#0^ TCL68Ά!~P0b/ bh|"(+)v3]5`k1pmX2@%\@JcT¼Hw R&^ Rx\Y`]ofxE&A<˾ vȦ=  )6S- /`pPhaPcgȸ(WIz摐P56,88AR.x9PaH# UM5G䥜L1n#bhiGSNXi M;@ ;j bT1JZ8SE0ROdJY3NRx<(?>?L``蛄kvT&*K@vI+^;0'A*ܹN2f"pAEA]aKA%K G#hH :ڥRX){wir~e9-tŦb\HC{$ځ(| Pu<wp JDVANB&&Km 㦸y) ) *R.I Q-)8)\Su[)T'=/4|l->qdwJ|?BPn\޽[$ϚIkSŐr]~:_!v>ް֮~iʛ^9kuDkjٺ%v+y皎?0>|zE ) Ԭy:y4<*Ql\qpi 7xH)`e2/KɵRMl x$j@  @ *,BK PN4PqCHA$؆BW"XqZlbǖ]$[n 1žq+?Dԗ$B \4ZxYuT=W3W˧А|=T.ʒ ܔQ?7sq/߲۷/t]⇔yGJ 3`% )=k$]4ly@1:P\a:rqY Kihհy)hɲ[֞ H845vМLQƍ37~}&.Z3t\1px%r~o *> \9ƫ;b́^"pC@zo#k +F2 + *Y~<ٜK}觎V=:`˒(^ M[[Hޖe›]^h W Kv 6do"`>QqC?aXA1s8,TT2/ !cŁ.7 T! *ZO-k);RR:@r0/76.]H臿G|$T[3fqy̠ؔh_Ig4FE=$*Aj. ~pۼ|~wa`' Im/"_<^y*,Neua%|7EA,%L*c}Gu;1u4Q&H` ! I@"C8E‰]d'V-O=aaБ660½R \c :!)n[>88)؟[ !rq#}SL7Gfpn 3s9@ J`A,sSdI#tbz+5v tu 웴ؗ\6c@RҦjʬջy5uo>Lc~ԫ>a5v!6,WsH/׎yA ʍD94)Ȣ-S}L-BK,{OMY:'`w/<.5NB-0ͭJa%c¡!ΙJ.Xen_HXJ TND *a~0 G:娸;*&2jUvT b'zz$۰eL Tx #&ֈVŹ#wřxECƱ9Wug|pRfEgWjeV`RIni[g\-ᤠA( (I]믂c#D]ÂE|mT hx衇(&2i*]M O>fx$; |$]0v:I.u__rJ'mg eѱnfJؕEӦv/i\:|y ^DA ev7J}>?1{*ʒg.vUj "Aņ{ꃕT~'$p;Q?[)ȉsѯhTV&5ur{`Td\Q-Y\,$v<%6끳5 TY b'z؏!-:,P<dׁ:ɳuŠ,K6>nGYz_3&-Dڵn"#S#- T9#@ JPq ׵} X0\6 :I y)M MYH Rܙ./ [X==#t2nxޛKzVg6TR6UiҾvJBBmhCIb5bJR`#,ᶄiqft=4nOerM8:gz&f&?k[}ĉ̛s+^^83 FaQ.Ї?;9-X ]qٹv*O/H\}|&g s܃( @яςTPfE߄~xxgZ})3fTʢxFv fEp((+3T摣km{Wxc@-C ^`Fʰv6P6u TK8 NXB*#8x#}JA}t?ձc*+z}IA? }Q,mqO4yT߲夬-z{,S[j";t+j[VFHX>J]'ߔ>Ѵw7ݑMiSEKҾU{[zӮ'#4D{M,MGo/FQ?%`BDpnd$`[Ȯ0w(݋kZV*+; *VfFŽ:F1 T }1./\6gjT) RvTJkFi d;1MPr #n(N':('ѱ5H,zlg M-YN&/;3a QD!1-]?eݽc+Y+*5*lJ(gTԖꁱ=P9rƫߘ͐,:oIs-HַTկw:c҂dPO/? -]4?`A:mU%{0_To~: ǧSO=u2J|労 פ+?|Pro}$Yr`޿yd[<;tGao~(ӑ/pc~y.M}]_\scݦ;&V8*?2shm{, .|VЬȹֳ*J-AUX@'_~{Nly*t4J۔.;*ps&@G& r|}ĿK}Mo&B_ l-:<;H=Ň *}0ɿ\Qcζvs8E-~)d*s"Q&E@Et eX&hSЬN)Ҩd#3ftקz/}uW ]t򵧧nyJ>.s礓:;7S>+q}뷤6$׮Vʴ\No5vX|_L??-Q_y4{c O~i3O o|Ƌa `$GG%@ʀï?|YCRگOهE1DY0k_8Qd ij*UˎcQM'*͟_k)*/TgtoJ/<0G?RߞR_ө̿2}頋 + X;rEOs{=f -?cme0gQsϏ`Écό F,-p(;>8<9вsIׯO\pAo:e7-ZrLzի^֭[ˎKg) Rh?d:Y+\< Џs;}() o> ৴e!Ю8KYW-yƁ ?F *) DTv4RV7ΒB=P<2&1{Aŭ#ЖL=_u,p z/<3.FP  *muI>TZĨMN ]T$%(P7P^3v]nx&G~5(T!BVepq'=͝_'k6-YxxZ~i~VayiKod/ސcJs6_7_r@:r6b޼yCo4g {C -p@׾sҊ34rL.%qP]tܹ59fcZheZ``Agt:*-i5|SZ{i?y39,:ւ)m̴2볆yֱz @GQ/Mc>\8ѾcLkm*gZҬ.T&mwgV)_ Ta,Lh@~K%BF[u2¤81V S֖e6 5|TPzM^2(;}v[EUd-J?jɚOzrն|5Ovs/< pf\ 0*_}ւnE-ZtMS~7zȆ&@ n:q, [ok֥Ӟ2w;$?i:ը:쐴 ӡ^|솴5ckҪ7cȸ1A7< o]^?Ӥl@yO /x1RwPA_Y"s۞ &@H&j;oBY,{ENfqr͋eoq= Cn:+[83HPA-%fwv|s^u??/W}嚁p3WDSE}07ϯ`~Lvi'UJ^Z~9KǾ@(:fƇ\>1l_[]_4 A.?jG}]ڰan9ǼLsL:c!grzmvHVH zEI%p!Б]PКE`w .dYp̍6/)+'{rQb7GK1@%fw#])mVbh$= bz2AԥM[ι+.x,EL_~: 1Mo7 L_ע @Q8R*޶ӥͽY2WP␒KSE~ ^4yYqZPdïҽ1+>_@Lg}tJt 'g_q`&u~fwXӕtѯ?6o8urK.Fu)_;pah8{פu_|`P5Ś:/etJAK&Q2*^sSZ46gplX>O6Y$і5Ǐe^Wӹ岥Gf'g ON)l$H) )fϓDQx aZ$XQ(Tl3T|hlרӓw1 +s D&#MiשwTܭȸpHh*]aE0/4Y"-vc?P_2qscZvmyA3[QlZ*@& (9?C5{ds?-L*X+뎻+rNo/\8f67ef+ZNo}:c,?˲׏Q!CumyoL"GH; ǴLQ?LKv` eŁ2*ty ae**1I%@/>׋Y*M!TJ tPiP1⵹sw|AA#⿆_ t(`/ɡ[zJ-]xG7~-~t@)(D n5PS^ Nϧh4gO[4j߽Qbi/ߞVzx.9byOl̋f}˛oI7VۦXjYz_?=T^.cM*&p˗3a z7"f p ?!P8Q ȰHg[ j++V2Djժ,r&l.{^{wҴצy*{8Lr$s-1Ry M+N:(ƴ_q}}Cd_ۿ~??NGgto7K~&Ț`n8l8tȦdQMZhw-m6 _rc[C+wy(H2GRwg߂i劃es玍OX?J-tPyꩧ矟tW. ;x'd1ٜ_+UC=J.|fȰbmY-K4 d4Q~`!Ҧ_anQYT*8עQQG|UPR!& T~2kK?ښ ˞Jp4F1Z)i-eO xi{4+m\P۝fx-Nu#\1qdƾ3ϳuq34gx\;7#8"g+uY4+GO-sIo۷[Ⱦfk`3j*e"*z=AZut%n~W.;zKe'H1<6]e1cuvaO.\9m:V$>tɏn NOE@#>tΟk)LcX:Ftyk ?_{NjJ> *(N @%'嬕[^WVhWX8wt][i/l.!>/Ϗ9>AqXqh>e\Xi({}oO2ǗozG0SX?̰է#PیիQb@Eo_/^}ցJu96 SGu-iOJMpKO`.M.`0 )1cSS7]^TRPYOO[~ktL7E[.RQsg{|2 d< c='oLJ?NO?y2p7d0=Pt!-*OkuGqtZr֮=&=gN1c![?1&Ը%2)^`h9O++-s|kgs:$R;] *g`־K߸vmYnt-M^)nUK'CE׌JX)yotAs?* KVH"MMD г/n1[V8x&N̍\Ӷm&|V. %+½өߐ\}lz^Ng| %J_㮣ˏdTژCuٺӳ>͛7%Kcn\ɠh9z-+t٩c#4N[Py62+8ccq{%ߞiiܕ_ݔ<iYVpL:>9ʠP*|zsh@;|zJΒ0J?/e=hWVFd[& m-e1k'fXұGÂOz?γL\h`?y1bZ}TOO*Ŭb!pM&q[.D=MFtqL mI;Q,1;Ժ5+,7 s;#,9ڭ{2 \u2`A"x4G%5J8xY.w~tg_7go+v{i_U̙o>෼-NH ZNul'vTE gRxmJ̦xBS3&s!K#yoSu~Â{N{cx7_jk[nD6]zYZlە$d^hI:ܷC/5Ou~c+jɾB`qF}A 1b` D}XW B +1Z^Ӱ`BL [++3Lf n~c@ŝk;@~})xרio>MM4eS5ueMJP*M"oBFav18p_;FDž] S3!%PT*+MM^gPv޳W^6wnaqu~tCj1oHxai?[Կ;^*yUCtqѬ%1jK.%OѾgiV|R?)OXy遨w*/28*?q̙P}~lJڔPyfQK?hk\84ǎ@IhL @HgmF\mԹz66p+ڤѨ4О[? X ѴoXn%PqHK?MKC]*qÏߖ.MNNkY;E:,K\| Jޕ8|+::(=5;% _UB3ֺ&M!K PQVEeVe?ogʲ{w(9V=8iesͷ:7s5鬋VًnH7rkSZqnqz~@DsrU2t0>qB}~&E˜naP3E 0 V_3ϝ~LљPyzȱ3*olY*%X!BA Ld28;yt;n|b;[=)Kt}WPi2%&K  t09fL̮0FN%MDRVu^:ߘ=%tm};<t_D]Ȼ70~.HW~sy)c[OI:}pz9/y}TnK "}L(c5J@9E@wنJ/Owys?;(Nsiy׾t9~S:[']و?[2|lX|^wEvmʠdd&put<3a]n~qܮ_b[[0ϖ~*3q 2ghzXfPy%^ՠ⥝R(@j2NL4uM^ h! }l)n*}J 572$k#۔Zc9)v%@GHqP,_1={RU<@ǏJ̦̉r%053)N?t\9̔l9{;=]7N$7]1}y鵗M8/]{տ7G ^w45AsnJJO (T*;ĵACJ/.y͍JG~tzҢ%e}l5`aQ'^XytCq Hsu x{eNpfu~0H!â==ȬȢ_cNEϧ&mƀ#G-J, pL 4 5TaT0 TR-MRӤQp K)ňS&@@$\}ϦD YM)e\\Ik jiuHqX@M+Ud7H}m?kBkoXVvh:SϽ!]mS(,8k&~;{e1֑c@t~/=+A3QR*@@K>dSR!޻J>-T;IJh6UJYhdVD]9qv9ҌRI'BK^d$瓟 hO1f_b4UۨRƉ}ҢN _RjS v,xf"@`0J3>MICD*O;Z]B c9OqX >+ZTVVEC 1~S'`EA=҅oK.=t??Ooy[Ӫf@y=[g,L56Nk"t #[ 'Cg2|<ZM}W ~DkAғm1Y}f6#@e ]c@O+{O/eS2E RheRK2`]FT"(3o\J)iAmf:/sPb_R T&>`%'{WOX;1| +R6 Z6i ~̰x)A~C %M~~_r.XIi]nl :B]@RԙB rG_ܹ ҹ'kPz ^vႫ12 yJ6<0!2*"g&}žeDIya W\Slf 6+AAJ)4uI!Ӣl ס8D(>1ܯ TJn!n u\2,ߤY0[6oJ)_Cps|R[* gZJ0lU2->I}gSd ޢJ&:sK>RLz?cFdU t\`sFccpONeQY2%ue\IBm%U#0B3*ğO:rjϚ BzlĸdWrMщ? ڐ_z2+pM=a1ѭ#$^UӋ8kAEZRP T*nP(.*J0B#q2`:hv$'sT&zQɕ ڼbJSOuMܤS3lj^-z<ľ>4n۫4Ee.^?nQs}m1=@/@ǐ,K[VEUQ*YZƞu3T *=Qn, C9AMtꙎ\M3Tͧ' $}Nn,Zc['(uV =ebsט$P۟sd8A%v*޾젂_S5),G*%7BF} LE_*MtuAd?$M\[jn22 JJYa'+SȐ_\;0h꥟.ҖU~0R{ĥ{8>VtaEsP_[-5n~(KYA+ʜn(o:=b~r1,QY,ex@!2|d{Hdd.37ؓ *B- (>%(R*`Tvd-lF8( OL9^(b[ k2Q)bVKtɪ4j-2*kĵ%,8[$fuhdEkmjU<Ӎc׏gڲ((< )9xUy m)*Խ}z Y`yL芃 DeS䑢EVc'K@=-sSEo @:OA8_bHϯB-z9m3T:r {m؋>ҋWz~\^A%l/ks*^syJ YoE.eS\8sy Ϫ.<.V ,lʐD@!"̎깺dI, -*@b4eM7=걏.6 %e$#VI,[;F~9ܦCaС8;p=& cSmeVeUQAZ+Tܘ-?n&gO3RSg 'u PKC +4T}V ٔI:J@ R8Xމr!Pa?cɡ. bN 67 `4%vRγJ1[1JKj?(G霞MkD 2!dzq(`8h+c`t Eў]̘R%z_̦ .-Zc' le^FH z-:/ Щ5-/lh-lʠq1+~() K8h `'W[msfߎgiro;g\}bgf,_K/-*z^TPi!r0H iRvҦWAedVbL?/ҀDAEKO:-+GZZ}Ga%aPY _(QB_RLIQrmmjӍ[h*\wgVJvT|_Kq*à($ț@8c(ϏVΫSH b*^ hY2|3H.۝Z2%%KʤCI6WL +b"p12( !qWYscX r|_xn3 T>uؑJxPOŝg*/kr*q brNR9@Hso]R1~<(WiAXnvj6׷;Q]⬚pş,tx-u:@})n? RS&NIS<s;7@wDC&?4-(lJvESB``4PrF@ZXyE2'9{~NɞeQȎ0 d}w:6~z9Ӄy̠i ?|omJ3m ;^|JfnaV3*n֤UM~*>ܰ*ſ R|.nvv %`5(eY:v&'\}+QR*EXi5A8вplJ)JB8ۉ ߫JΡE0<>}~&upt_;"0d>pBN;Ǧd Rry<(r9Smʠl/{qG/~ר|JlY+E:Zڗ+f_cq0E y>EAɧ<k%Hk1d 0)80v숖 R]L}J".d-M.T?F(0%PeX*A0PiijMoBKiB N8^+MJK0bFM4eY\tMKΔ| \ l2s@)~rAѱ#M~/VTȠ("@Gӗ$2@e&?jTPDX:+1 VGg+ލ-mfrmBLg@sq>q֑l |Amnua*3)>)]=Z>){A Ϯ `  6Hi\) ,eQ܉Im 0R:,^tE cQ~12QgR*xP$fՐ?훎Q)0e:1*?C!8QPLFR*ÏT& TJaSAEZjm˨}֖@m9 e!%S*zlf$EMGm)eUb+.ځiEb 66ϤK2TFğEҽOvWq4z}.0`-۲*^i&T˚ l҅(g<"c}c l8 qCŻvrz.F`<%.N=wJ8| D^Ab/WWzʬx6EQY*o9*L1RO }~YFZ|z<;t|Ni2gM<"pD8:%AmI ?:^x|f&)ۺ!7vcEg7M6R8x0[ ?,t&N-x/eTS mTm֒60,J7Mᤩ}-t't+AůCB uҏp;eC9>-w>3}oT!2(ʪϯ̗+b6nT4SjԨ2 J?Ms?|*.,@KJK"pQ!z@m@EǂMczRպ!$-]J@%Pɵ- ybV;Ig.%zp#pFS'LiϰT'>(w&Xiҥ r,Уt@(%P :t*Y XR:/o=B;S1>}'-Q{ʈW`Dt^Y|*PQP(6A][G *eT2-rѲمVJYVrA)Jtٔ*Qq}nFwr3bqHE!%W4\: Vb)fT拪ĹE`k*$HuHt$>m 8+T:\wZ7JWm)AJ[ƄgCTҸD8r[OYG"pFU:a09h)g^?dGJdLt!y Jf9,;TTJ>(B(@ТVWŽU"Ype b6Thח*z\ )eC/oEE${2ϪV@`!9\e|+,\i ]J%i"st~X0`iK1J/9p>:V(g%Z%Pq}׈yQWSa7}/Q6>&ӡ9}u3@e0r22 R䣴*  ~th Wj3vȈzC%x9F~_\s3:VҪp_:;Rq8q+v!we_TJGlY~HѻiDC,R{rO>/70Pjonʪz64fJ&7ylϖFl[Sf^c볥ǀߖ.{"}>,H1s#HgNupXTu@塭#GIڼRA`E i1QZK7Q7Rb7Qp;@E hV1?G@~ga%Z\Jԧ8PD0vJ]-*QqP"DM[S+^ss6PEk~ZqLSV%(c|$- a4(q"ܓۀ%t]Y)s;|>?rniEvD AքNb)UP2a9?2z_4,{ rv4I$Ǭ ][IDa孻tT%N< #4u̇4BJɓ"Jt5/r X׏6ԊFtJZ=&~յATҠD6'`GXqCh,gQs氢SExIX>q'z?0XSD 2%2% +"4AI Ϩmޱu䨠2(K@gShSV0P_-QOҔYiF3JꉷˢkmS8^e/(cd~ODYﺉqZcɇ:vM؎TGG =C/zDX)e=nOwR7kʪ]{r̰@%ZɃŏ:ѰzTYXghr(H}N^6@gֳ%6`_)DpPypQAe2*MδGK%@;`s_5Z7MXn3v+HL9GMpqy6EA"f(] t`}۲);KTb0'27k7|=T<0<% lt?R7P X1)AH[O,)]Coиݾ>SxQ5VMZs \Қ(;6'@ā=JR *T:rTPBPB') Ztۦ A1AQ2>U7fS\I0Q*z.Jq_M?(m MqV|\ϪP(AJޡ%5e4{g IfmmxiҦeTJGu6AxxRD jʘsm&㶶n8j^Q*e`!Rcr|Ѭ){8,ۆwFI *EPy֑ >5XKL8bfykd,5iJqFO< nGPA"7-(] *9MR{oa'YC jYc $e &R'OӬDpȱ :JSz?@%0go$(1}.L9iӑ &P *]@e}[G *S*Q8!tgVZȱwX jʬeXDS&%fTO]kx\}: w]AeX~ћ*Jv%PaA%i*Yvx QD[ Q$[}g|H >J#D87#FfР,8^+-dR>38e#Q'R*t{T&ys@C`WHp!KAz.e =Q"x*Mt;wY@%P!($&wQ 0aҤQ^,FJ[m:tzyUu/@1/t[01oIξAMmct]]SȧgG3eK>s:>X,T)`cτ'HJl9*L&N @gK >i 4v&Ha{ f:0MxFTiz 7ɌOGT2X(2Jeo{K@~R~G@!-k-p.(Lf)в#%hKidU<*[ɰ. )Tv),[G *S@Dh)~JPaKAIǴD}2,~kQZ] S4Y%?>)#DHi= b|bE}4 H Gk^HITq wˍ؍ny(v $$Rٴ_kP8-yK7~7sK a D'2yT@K@宭#G)J eFo+ܯ R(') J/7iWb ? 406:U$;u1ͳ>8na }A%Nv4 EKVj_Vb6 JUV4*nF><T,r pu9ֲT)]`s3'z]PQ *t֑l/R7J TPf\SZĿKwqCBGibHԳ V]ͽbV"t(=KqYӌ` T&Xᵖ*20K~U4ykuPVMs:fZiWgMqsϑX"EmL 1c'(zfQyGEN*TPٕr[TPPC`Pi3|ٔa32:e I,ApTbx%1 תzhW* 9;}bʜE >!Kuzn~Z}#Jx)0ն !E:J6`i.6}JĔڛK ? 1|E"Ҏ8;✨(flY +ڇ8o*=Vwѹ&P۬_YٚIRY *l9:ʹ8)Sx_znCYʏ us`,OYRxKN\zXb1mP|`,y9!PBiЇ/G̨Xw- D:jiQ* *rULԑ+M^1M*t51ύ Q$.NUD=Tf:`ӻ钋gШ-*~UYD+mtȤԭdż~󖑣kmP^_+D(2l*r0J̬x@3.%ӷR0wܗRu*C`)eB;P!b^Q_ܓɨ8+x/,>+i"KrP]JZP陑cA *;*V(V8x(HS7N _{"z+M]BqǍYx+5o4 n% t]`%.qn -*R|?L톕@%fOX2^& 8gCpWءp`q!88gK@3!m)6 2=쎞KJ>mٔ (urƍόz}wUP^Zt]~Iҥ}E/a/pƤɱR-9.*3+@ [ oQUiCgIlJ*LP9gF *;M2j&Mзhw &~9z"4(vE}@ |&L Pj[#ZkGɬeq1oOv1mר HiҥAʰNG Ȅ08b.OSą[!Bx6Ľ}Jq/#ޗ[i`irֵόmJ[qwzUM)x"W&pÂ, "J`e*oS(`Ot9tͬwϪD a@d? 4,<(w$aij,L̈gHNܣot)bZ40~y R\&λ>SS *T5OT!_cq]E^<Ŋ<_X)@R x/B/e> DJ " zĴ*'t lt+mϨ@h^*t&uѪeW b;qȈ8xMgJ4qP4Ya$9%!A˖JtMR3hW9FٚE۞ *g鑣4n.P^ÊtF#t9&7Uח/29À0,PUAx8)b7P VJ&ޮ\*x9Íߘ3 t4HůPJ'i`-"E {ɺ>čעg BZ`mUt"LOQ3)ubr#Gi k). BwB`򳾀Pq8~Y̤x!"tD6&PZLHiWFHQvC%݊w5eTGGڋP | ~ɜx,)@Y]'1.'CtM咾AχCt]{DIHϘI T˚fT@۞ *{r9*LcX)J|pP!*dZ&HQ8(AJ+mS8h׷b࢐28`) nP* VFјE 2?ؔlg<\s]{%3m\l=4aCFi 5,rHJn濾3*WS'SPU:窧F *Tڄ%H!pmPTb]qH*DIcc,MLIL 7gj.0  &@/:O g(#_ B("~J[+8a/*LÙyb٬Z싇kT8{,}F RR+^O/9>E[*TPPf(*ZF% s׭8\>I4,t{t0Zp݅6Ags,J43Y[GT\ -ɸxi'IZ;8Ϻ;8[ϥEpT6-Rbɧ Ndi`+M8S:I[f*lxS#GiS)E*h Y(Π2(,m%aPr!`#B!% AHVt>EW +0(Mm{;PpB_; T+/&|y8vm&.R zp1~?T`֡KEE)i\W>)RnT rS#G*]& ]5PYA`rT" kcXJ\UW]S37eX){ 6l@FiTXߵ#ށ#Kt]4+8FwZѸ e+?wv>% "T4JTMFmKRBXKi؉Y&HὪ,^$ (u2Tν⩑4ۺH[hkF T59fGdR(3$1H"Kږ-Y㮱dPI)%P V8 8#RZPlQAemK6L;rVb6Py9dTMIJ_L̚)8 }XȪhI9+ 1}P9t*AI7J,*]A|`R`ab_ϰxptZ:}0QҏDH2,YAg㧢r@GX%]QDw׏yL+~YxWJ6AMG *3TŇ/_tmAF$3qY A"B`/] N#0O V|!M1?I5vbHPs6Zc ȳ'VXi /E`Z% ՞MqtL&2)M"aCY^ϡ"S޷o8Ķr_ΞD.վTvu&nu#@'G *3 VA 6xCBD@hsR (Qd-=/@6eTb)T=BهC}t9'PQfe!E;V`X"(:X<{A{uBq݊N@K8CLX+\=.=}9^MǒN b6ŃcۀmO7>9rTPAkEKюP,3cm]q} "Y K8~부&J̨^;N1AC6\eY`Av m>J6}JrRxf+чh8PjePtEڵ0Ki/!J.dĨ+緙 I!)9|OUP* zrɓ#Gi)OxM;XȌ ,ԂV+lij'/n/@e*d[(s ڄqΊ_ȼ3=N/Z +ZXpB?lfm]3)MY7`KXe< EKXFC jU߫!tVRXSg0ĮȢԭn{*,?1rTP[S[00B+{הDLJl&@C30@xwOD,Yy! J>J Ֆ,[ / %"F/M4Tbw:r;(QKg!u ̀J4Gp N&%Nh{_ y<ޮ֭nTzr#Gi *[XRF/Z E.BD lT Łk[bDžkidx=O[9*y0> G]0.[ m>Gx]^r\@3*d=xD}>A%: R| `? Q| -Vm/CDQdTC))>G$TV *rTA%Š ^!-nFvā#J)tWJUq K ʘENS( TcÄnE97"|AzızXkPpH!3U ?Xӹd^:؉LJ T~y F}_W@[!2*?>rTPAa6dJ:t%ׄ4AJ4hgTP~Zʤ_K״ }* E#4e:r^͙?N)h, %P)M;>0"4):fO<8&>!asxUipBhDWS mJ˒/ qGխJL R'γᗦJ T:vvKN+FLh Ewӵܴ#V X `ij:Sg*htte iʮ: =hN=< RAs[-# S& s=IUB[z. TclzFTPUMy׭nTE~b䨠2CK?,ؾػ4.:x6{ ;N gg]f )o!ǃ# TpZt*@ e~P-DHDϣAcAnka ]NGsz?o)_שx)P|xf4-pR *T6<6rTP[R-{f5&iɮVD@X.&rŶ04`I*TPҰGLK iiAm{2!SiٝYQ} E+s e[ he.3\Q@V( $ h%e@K /y)H/ǽ8*nO6h\.&)[)y)ϓy!+P1,XUP2oeF *xkrwf;u&T&&v>vXڲD-22Z8CE0^<% ŀJIBPz*t& N )]cdyqw ;CRbZu\|xcSq*a>kio*ԭnTv񑣂 ]M @ّ},ɮmz ^4xPHH3YfE!0 ӂ]3k,Т}c;piʒ(ڷuiǺ0]Hʻݢ -G֭nTv#񑣂4ަ#LLFe:HY5Z8d-8(!ĢK@1X-*ށUjq'c!BN.Vz Se3kʢ4Ve{{cnu29r9*TXҬt/M1`uUo.e/謡(5؁E+3`b3q찉]hw w=aTt<[]X 'u2ŠrGG *T+@<7YRf݄h@E?ЁDET>ik@VyT@>U:8I8^UP"PS?:rTP2iqiij||4]@@E0̅\ALL0l2z[)(4+>%8(hpVAec+S>2rtymz׽d *{NeOŹ@ |W,0豢Nr*l0{0 /\y\V *TN~tT׌JT + nk2;4w霏!.F º?ђ];!nu2AT*Ԙe @ )1(ݼlҖI)jmxdXE#5 'u *@e݇G>]PyUnmG}̨ꚑmP'%Piʦf/ĵ@oĽ;(u[f\z⯍3*+K{{qnI.V (֐@u)mwh0cwz׭nur‡FQ_[oH/2EReCJIpVqXqJ&&fZ"4MխpPY.=#ǰb^l2Š;93+$|u=xYjYnu*@@rozT5*TALc52Z#DRʮ*f'uqe0fխJTQ f9T(N0Piz׎{Lӻխn{.lZJY*Svdrf/qO.ӲVUPRALtOP 'u[59*TP52I2 DhN_Iխ.9=P9#G *X~tmשNV *TMG>2rTP2+`e(ҥf*2'u[*4񞑣J *35]9u[f%QAʌԭnu2wT{QAʌ*@RmҴwT*Ljo*ԭnurC#G *rpRnu,@wT*%)u[6@e#G *g&IV1ꁑJ2^kVUP,IV?rTPaeO u[*L1TPPVխneT0PCw[V *S*K9*TPѠRխnu)8mZ|QAee[V *S*^.]tQAeJVխJ *u[VUPPYJVխnT&Tc䨠Rխnu[*Z.-#Gխnu[*L,}䨠Rխnu[I-tEXtCreation TimeThu 09 Mar 2023 12:10:35 PM EST3K2 IDATx}PSYٚݮgkk\*5/P%עvF"FFB+tD (Qatm5 t 4FyA C ι&< >W}ʒ>srW9wC X(@P,bb X(@@40hKaLjЍ-֌&[UH 96_&o0xK0g?ۘ"I*~1_q\@5 ^a[/;}qr>QC'.^|cu FM@FcWv\xyp./b/˼[r0 b"aB3"\w()>.D"$*[m4Z[S*E*ktXM9!4UWyeZd$#%7 ڲ4ݽ:aՕI"ȸCo?u)='S$c|Z{!?i)$3)%NVgN"G&e=zaٓUr=|e4t.=m=Jy0@&)i˖hyi*MS=6fuY5q!h$E5GEQ{HJ1I):;2gX@g@:bi7;TSh!#b3ϵ;`-9qE-3\G4Xx4-!B*RPk*V2?)3\!̼/=2[պdEܙ^1)p97'ݟRUA̦U, թc4>~eZA + O(ڍ/GэcAKW˫/BXOZG(B^FrS4LWgJOII9S_>]1|&6IߧIJ3e& W䡁kWY,rգ|x}rFhr?T (rU"6<(8@A\N(BWD$5<*(L߭Z^Kk m!È5"hm !|Ƒ :hjKc]ZThI졬1Ij$h>"eBսv.UE*Է6Tq|Q˔HWiuc]cQJT ּTf:Y <$LJPWvWĉ%))=cYҫ 65;=[ ɬQJ$2Ұ ZR$RQ'%voڱVMBF_R,vXҺx%}wQsB+Gumi^TЙbq~^Kh%0LIsEP$.4j.eaČ<⮍d=a@0+02SM^ʒDF<un; ɖREaeBNtl3iekg0qLT$%+[[H+d0Yvqz[1=|_O홁ދ6OXZ(AU2^P4oC8-mچh뷼Oio̹ 'eb>_O-5z@tkhwBu[.ۢWo |g2!VRPBe?v7k"gc`U8rAX 2_=+CdVer^92(X9Y=6tj1#/N^ġLTh ((eU'b.c3[ek`Βz52Cܟ>+?*}&VN{= U UrC@ VTj0lmzoyX 7qb&a ٸi˷TeHWW 1+IM{u|c"O"eic=j^ 6ӓO.Y6f0Dj2w{J:-ߪz,Aefj#8m9CNY2:".~ڭZoz⼹2|Qa(o5 +ndMU,Rlu-g|IjLK `h'L`3l.T;̻*tgaQ!̱`kHxrXlT[zBU1.xg6{EK9g;de8DTU6dF A^)b}Sek;QPflIכT&ׇ&QS1[6ZT=Cc) d|ET%7}e3.Wώ]dD'x IgEvLI alS[b|OٟN'_zcT٬XkuP@0k:홱PYgR9)z9+!=8YsK+A~ѧh8]tVxgSO'ZNyl =xpbUiW$_G yٷ=˻Gu4+S^rw)94U+0iJU՟}80+X٥WG-W1젊3G"\]F2PҗuJz[fwo{ il|?U`BA_8m[M•dSƕOnsބ l{ ɍ/E_4kJg5[ >4m%9lB-sp}@f~#/ Qi3:c8թc%LEABv~RALsb'ڥKUi9{.Z¤[$m<6NJ+KKd;] 9~@rRҪ-\?Zk[GbI3 MuG)6uwN>aӡGםz :)=&O-Jkȣ_.(pөe[x]|BJiZ=QYZGuB@{`kcYb8Ee~*8}]},C}= Yz 5i\Q' 2w[8\l"qA5%"[7&_XcxXFM9{][aJ.\kb8K*8D5D8u!s97*k*kn Y|x.`-Ȑ~DW#~a~5%r,4 Uɪ,,ؚ9 ^?2SsR0 TwNYZymޫfd.b[~ET)7Pv9(VC4V}'4.k`;Vo|8͆%W&PhwRF'!s&C^'ÚŢ3dٖ#%jZf =liPz^5oYH#s}{<;7~bX{b%An+֬_s<)$.=Si+VDP$&澅1bX,bb 3luNkƛJv 2&mKxm[; tXl^9}"XgTbxpQ KSceϺNKPJt1v̊X _!fr?%~kp;1tNu__^|gu`UBѬXKsͶ\k*?;3ʼbYϵR 嬄umVu-WSNp`7QkPn&YZii7Ѯ'Ï-S!;]3.$kJ%b9~;vƜkza۟zyY` s& FOOZnuL;ž;Ų(D7`rҔr O̩s8|b *GkXYv9Q5d!+R[XtXˣ*[]mךk)s-E tUęNFyѕbn(V[͛5)VZ%n{Q3-#\$XN>=Q,s' [3,8eo_'eB˜+0"x|WUTWn7R,9Hg)܋hd :U>;h0b،2qaoa붜~lt#X7p缉5;.[_Yk`gloG6A0s[#Hw1|Q,݅pi;0kR}kqZ;!؅ԑYc6CWyvҧX!'$gFȃ%;/iI@$qdzB"*:V,s&@ {HPVZ&riwN&*s.is\,bqcV1L5܆Mwaö :ypdPW%43dt?aRsM²łk@sl˷,ܝ^&)^ѥbYVk8R,NJtSDAy›2NW grV ݍ|I x]1b LtQx1g (}گԷ2zQ3--Ӳjc#?=Q,W3a 67QYC9].^e@beX&OwQ!"V=Ľ*r6;.B\\fKo5P˿A -abX"YFVy\vQp.>T06]*;t!-kPk|cg~Sz`3[c\&2[li {,)Mڂ$qX\RQuCkkmYNBrZlkKY.=YAS7VgČ4Bn&\'7,TUzm^gNh'l#S007洰f;>Ȧ`Bʴt9)׭Nť 9Q\ȥX!VK{\(_¢ /4ű؀\26a4a#s swb8KVVm#Wp+ `Djb9)ְ` q.KYLr{ܤnytoJV7fra8?=Q\>M(vKu"KP,6OSŢs"WKڞP12Z\\W@DUs'(<@BCÙ-lIQjs)x=EϐD>}P(/ZJ_`1w' n wKl5瞹mȫ?һȲd^{&4XfKjuZmv[{n3 b3obowInܹ|_("}D^k1YnJM:kb=09(z9ghܪu99ZDbQ;]&'B3`wnL0wzLD4i{zM#g#o3&f[|q\?4avJ[)MWS#Ţ[$ػE)ֽKE+f\TWA}0NϹr^#fݥeEF@?zh˷XParԜ+=CڎȺ~qr E8;N4Lj[/b:iGpRf~g,#,D"c>=&\kG=i cRl\5x.rkބ xJyDFe,aDLv[ .w|#sB,:Xa;X uڥi (bWEe;oZ+fX˽-y:vr'E8tYn0Rw*fyRN6` ȭHw$/GvXxfW*ٛskŦ/D&uF_v M'U,n(]P,bb XX(@@P,P,b XX((@P,OOT_WŴzѫ\@ @ ^XV|+I @(rU_W7f|!@d(Ói߹dcUs8

    ~5g,kmDoYj3Ruh".TUp"i3ŊMHiȾ}uDC$# qX/`=R'n/[ZXˢOĵN|% IӔe7X_h89.Ǐ_~~ۇGo zLwͨZ{;E,RYf&e]oU/}GG kqKj)g^9t"f|z5̢_ ESOo2ߺ'zơQ[;hB5%Zyzb!,֛4 <_>)V@=٘MWz>i("Z7˘~杯}y5^Lw-T۲n;g|0jlҵ"go O*VϦcJly>1?iS}oCh?rkɱ!'GdzJ~ֲOqP7YNl0rtͅ#PiD$+h H;WY[$gS-"V [cANOG!(S9?t&טǯC*4uExq"Tֈem}b^3 feqVA?zd) rW̚ɪheۆk+ TvODnW@or`YYso#%:Z_.72ٳ(W g7X$[o BEW,7b="$0X׈, VXJrג7㏇ O3 yL,QQ1 /(o6Sż!cE 7 ۛe) CN!8F y.sU>k+buDal6tݖYO1ե'JcG_IE(ToL1(S,X4K*N#Ek9˂^~:8a= f39Vmgn5~ HQ xe4zj;U}a|b]v~-rx4$ocKZ71ߣNUTwXKXa>4?R4-_ͺ1qS'EP(ybN9̣:OXxn"ƴklQ,P.r3nSQ_-ŖPshs^]epX2H"|ʹ!:*ZkƴtsG2:cU呌t8RsǷ!oHhFs7}{DvZY}程 E橏dv]m0R- ^)\`~gD~`.QgYDA*#yp-֏tHO F[d>x.]=u[fY e0N}?DiEId4כsx4AI4[бG#S_>qfJMR˿ť,N\GWdA.x۬DpIkBp.W%(*um7E7]atXX!XŖF,p6׆i0P,aFAXYF*Sdm1'xlNc.Lش JIeEea8> 6;R4"Ɣ[ :,9;_ќX15 xn D~T ڷ! eQ|ڨihpkǂvu}~s샏~p/GgQ%k]H{_>t6E3 z`qC?\kݒxkg&iLUQn]~xm44b&xFq< T3ƯHl*t`#C(źm aߠ<Z%U].Ts? Ot7WȈ.sŔO&uš+l3 6\pw˒Qp_xQ;BW}DUJeZ;fk|(unvK+ST0bx̂3\BOXCš͓i'ZZn)7r7c(40-o(@<DVWv>mw fR-3D_I.7.fهj\ݳ ˃nR5(( f~NJ=_u`&AsW<,'/VwGbYX^`>ѭm],Vڍ,/[x 9~61iSj*֬w'z稍>| 5Ag|G>1'㍼U[~Z7b+puV(XWA0͂(ߟ޿8g 2gW|V@QX(P,@XNTф'+_W(}|J#^Q^E$n !2sXT Fc>kdJL*bDɅWYc[Z>s9IޡK[LoKʼ-}b4>h)y^jOm_S1a I۴c /Pyc5Fv`ʄš 4y]m@-bPڪwWw 7h)G 3'jT5?C7%Hb}PeW_ޮxb"G_Y$(3?X~,D>n(/Vs.b( 'XMHb{*O!xwrąj\P,(OP})+Rg.s|\3/ o X'Xc=/Ȋ,R RX3恆*gY},`~pŪsfSw)+<돞99B_-b>?TKWB^vKӰv×۟_m*2r/PWEEj= *l/õ R,)oJќ7 ]XbL?pbQ.\XbpƲy+Xb(P(l/@XP,amc/oQEh5)R"#qD\ J@Ÿ?t΢ea&h^ɲ 􂭦z49hFJPQe'&0ueE%2x&ö'_-R)˳@~Q=АҠt'/h8ݦ9X]NF"/4ut5#-\f۪5M*bMGK'+=K8\'Cq!{5=ںq☢@ؘDb@^T}"W䌢w.Vߝ^d9TVZVr=K`. .\%IR05K$I]G*[먬lM)&ƕ;CX/b=Fu cg}a24=3&}k^ipԬLJHDJvuN?Y_@ p-23N,UTW,<҄Np_L,fMVgHdъa+%YmRtʴȝb٠+KG T%3-"2XEI=;0i"%0)QUs2i ;=}=Ei!tɦHI\RQc𨮫:;Qr`" ~wpX?YW)M)uir"E¤T&L XsH/vk+c# ebIjuŇLN;&T,SٴFq6E/,Uk~T>(('(PGzj;3Ա@Iu UjА0ƻb0 uuIJ6}tv6`~4ѹjI}2tq8Rdm#IJqy)ȥbC3iRɊ^muvŚ7Qp\15=H ^֑ʞ_s2'fl$3R&:z&Ϛ' ֹjsXla2&-YQ:)VDf̼a+;wYͪXȔB-7zi=*.: bA}+H>;oSȄH٪QASK YqbEwXw=+k29{fI$.[+(}${v$S<`to͹)128 #qXŒ(zv$g t#"ZQ܅bMj%1VΩc^K]JHSek]Jd$孝ǝ m qS+QD/q26ݐ#G$\l V,0XJTNhbiKb#dfVJ¤ϝ=[z:cɡKN1<<'cBv3iQH)]JRPm:;y).FXܱYTZiI\aøļVS,kbua͠9L# EZʫ=LR0ᒫiQo,>e'I^USu)-Da 3cjJ2'^nH T0֡9%LwaQ,'wx4V¤!t-YF`Kيri;SZDҥheν椾˱i^v*T]=q=%)e3T.?ey9V7'q#M0w@fOh&@`͢H7k(]XHƛ_R.>;])`Vs{[3eHv<{#uQlXَF-98SʢH&NZH'XCR.IS,Yze?] {!j, X{ G놝f<7 w44[s][J-sK{̢ 9}j1=R*XpEVVHY5]5zftKv}b׃ b+tSEH`e]&ͨkݭ ]كt>!IsG u- +5RT=g,I :Ls[f=PŸ*B:YE^jβ:'&Hw13: {M65j ZcZ,t҈Nwh蛻de\ks̉mnҗ%m3>#fME)!;=cCq!ZQzC*|[NS{&+6&vj_jmLdnJr򩛕O)Mt̴hbѡJMriܧ4ㅼRVW*S96M%Yō4kⰮ2?%nSƎ)j G IȴvD1)\F>Yzm{i&KCZ46ZGhR:79夐.2 ٛl"#jr\~GM~"a;&IɋR{sfn-<rXbA SQm[]ޘRV^ub;{N)?\ 7)d}jPϊIJ)h(Bc;fH))gZMړI!qI)߼Ւ"4d~42Z|4%2Gb[v_z'XwG5f`"dg_,WDeQW=K|ɮCIHZ򐘴3mq!\KͯB(;8[WҤǚgǴ*"\laE^TP,(ˁ޶8P, błbAbAP,(łbXP, b]@X( b bAP, @X( łbP,bAP,(( łbXP,TTk=@ ~@)ִф@ @ ^0x @ @ XP,@ łb!@ ( @ @@XP,@ łb!@ Y XP,@ Uh(ԣLL=pd%3@ Uh 'X]ٞw@ R i*ԣ\JK~dɒ ْlO"bD @ ps-];_|^xًKB @ XP,S߀8ݑ_w]hhhddGvGqB @ X/b7\QjV&W7:_ۿ<999jYYY>,@ (˪XztH Azu҄{{ &''?pGrr4@ (˧X}TC"!g|v FG~{&kJMM}1ŏ?UϜ?gff9kY%!Gs"K7Ky w;}w)6ȹ~3}GerCNxZ^qnyRK##ϮU=z[O]U^!eFO9n$/lZ':`FyxvFJ!y%7rb3=srSiLHUAq{Q]}ĝE=˴!>g){=jW@bq~Aܗ_;چF{Q,k@^uŊwlƭrXPQQMelz1L}r2ޡv:oT&k;UIVS}wzxvZƬ۫[tG R}FvS}|z?-zސ7}ȾA;r]K̻+usr sٖz{W?{p:oWK֋*px]˟b5j;':jOj!?Z]}^oVn`kl M]Sl5E$))p&IڛmCHvxNX"'uOv͑$YD5c @ gXwg{PoDu۳>O}se;3z~뵨[LvrDۿ:q(O[j뛹WNˁMn]ѹby{{[V[MNNg?sWl".`,;Kna ߗ]!^nut oo~&&;ē-Y1 51k!+zk`Ga]#afA[Ãև'z-7Iﻻزym֚Sw搭'(֭[I5osqNpvǿXyۻo`.}:1b=ճ0L!J00o\"79Şԛ=Q,;K6Pw b{αj%eorpD {/Ӯ)ۋ4O/D-~[qwL^UPA]5V?jADgfzܷy0[]|Zfqzmiaϧ5dg$&4BnXc7?nse<,L>ωǜ~Sn+!+jC$:-O~ B ijRQ]/NOw:ug{^L$]v ]SO:T,z;,~tq(OZ!)ke݁/beddp/\*sKb'OzXsϵu2.K`Γ@6S3Ywtiwٛ);Z9--7wNRӔi.;+l|7A31g-ֆ!s%}X1i fܝIqo_¡f*fG)"yP?U3oڛ̭;|K让+@?H׼muWv-z{s2Ӟb ala<m O,8jU«Dž|{[P5Y2?K\uTO*ѻϥwOڋS5K>b!Y)==U]=Ssh9ɝ_z^+ޏ^ӷeӋ+}.ӭ}gGfREa! d0Ya0i l(+@+EU*UJ_[@)h$40!v}ȝ9j}g^>\u^\Pbbb\Q,))=8јĽWy]34ȔNsAJ)3*F*W (< ^_lbŌm g'ОNqJol3yzXNy,u!=qZK WZؤ|#?êV5"-KjO暪.ǥ\ȍ1F+:2sUҜSw{-H[tS;Ss:sA0߫\,+̥CnX^ޓC(]׸@w\Gf]K)cgJ.;P2ѕEgBvfB(2E@ ~ P_wW۷1}A43*\]et Z$+K1Rߔ6P3 ($Bz3J)AHaTUe'DB:k/in XL.h3%Cx^@hNHM(+nMd2О-3SSK z(ȶzs\dlv:ܩOg} R3@I EWR&Sm.*+e)SR J:EϏ]:s|ʮ7'IUUUr15bhRÐKRg?Q,J͕][7Ԛ{?>Z.N*l-y/|߱diu۸(w^ҌK'sҥ9g{ӡqV꾋e9Y'nQ̔KG}ӭniEVW)kԙ'W3:ʞ1~q&'/YAŢo(LD‰W9n'B[EF[sXҮ*0>VĔӽSyEB}kX8?o,)IO*(o_: pDTTrqd~I.֥KH!]Ņzٗ_YE:XRQNyY1Ǫ+Q+4U\AoJ+z+Tz۸Au}/[#k^% 4B؆lMHh̑ig oi72?yPRm\{W/a׵E>ėx BDkhh/ 'E6 bgJ]x^`ŭ ?Fr@br\S{lI65;anJhՌ W$XTTd6\A-UտR)OI'X*d99oyƼI,ۨK(łb=ed}W\k){9h2޽?iڵK3[sss]&Hx~#;TZ8@(,]_rq*,d+o eJTTTȃ]Kbcc]WrHixb={*HBiv <^+|IҐz(d"˛W^ /pI R"XϤb1|(wx\u@_O;2ѧ4/.$H9xb=ۊ=#yIJȾdałb56>q뎦l߾GDlC$ۓȾ+XP,ϼo hW@X"Zs ٹm@X( łbAP,(@XP, b=C5sXbłbXP, @X( łbXP,(@XO{s !ې-łby4;gԌt ې-d/<9@XnI >z !ې-d//"Z@X_}3fh׿^{yًKJełb=Py\l߾}׮];v숍]f׈)G(sXٹ;Z+RYYiZ=fkjj^}ե,Rb=׊a|~WY~&'''4h4*_;rHiA:^/'"ևB$ Fv(~#/C}Uy7ɻE{LD Y/l>* %-gwU9!FeN 8aa:Sպz}~sZn%: JSĹe':ԳO{gn.e%o3'ɩ:]LinFdhVɲ-U_,+z|5[B2?bAV" M i=_UVqrHi2$J*aIJ؟ħPDqZ)/vNK~SM[/H% vYe|/d>#bW)|*=f-%SN/fJseG?zcXVxR.J.؟0V*M.h7/GuWTcūRnq6t\YQ,A܅/gJ7dhퟺd bwt'UkܻGXPdnJzJKK;&0ׇYK\Xً"2q-?rXHK&{}5lZk[1~("G\ZGX?tO& T5'Xm\ÒjqS*Dsޭo?'alT\7pD|1{,UZ gUŽqfYr|<rłb˩+^ݽ/|m0;@ Cmܸl63Mx8p… ,//'6uܹ?/Ç J 2CS6X]tw2b]%޾7lEnRC])ѡa$IU}dž6={-XQHOQ4 ]: (:LSotM g2m+jʣBybj.΋NȒj0ZoXX?׋zn%+'nK;=d.֖ /_g0~ʋU)ҡQJ£Or/W]XOMH5tM̍ՂG?7gź4c[i'ũ%gA=CQNye-nubqT$-.Sp ;^>ZQs49=7g_ӧZ,wΡIE.3.ONw]NRq ˭_%(hסnC.VbRU[Ux~A_nUX]Wu_KGfhB .僭p# R_g2׭֬Ю+˻l(h>|t[a(fbA\Щw0/~Ll?1L̈~oPem30p.)X153@7% x~lKڻ?2ش5$jraiVaR?5Ez(r*ʤ|*mFڨc{l2nI4jhY"'2DqIU]=;Qe 0146QhrmH3E|xV{O$&$FGƋt8ݤ NQtN?!RC_WF $2bqy6NT,?IC"iR8lX?`v:|H=Wp\irfOI;S _g[ǩm;Hrw WU9Rqeeҝ1ݭ$gw|K>SY}kؤqZGuAw v[#1 sjߕ_W&S^wyࢱpA{kį਷/_Dnzh62r #&K߄]}ۜ[ʤ&X_j҅M^0߷XuO_Ԇ 2tT,[N/ŏZֽM( W?*㲭VK/ǯ~_r.򉊊 JSM!k[2q9I $=f&E8)5N4 2*O&t+Z:/`**3fƌJ s^5ώ(v"ǔ3e Kⱱ8t j^jV{ȯC9*}zgi[LOF'.|ĚSe X~mF+|r9y gZwpISYvܧ׍qTǖMWɖSəV3\Y{8y!Ʃ^Ji+#GJ.ue~RwC7C`X)H@P,RY݋O /9W= sh8yH]OObM24vv&d?Th2FnS?5B俲2sokbVEz4:w{(sXAbl۷o_|rssT,c).`a%.a Kkb &9{_(0EZa0\ I(U҇:ZjK. RU}`Xv\b ?s۸JF丁|SjN`^~ɔ9ns3e;D˥~YvQ,?.mVDQpGÛb1[3[>P)\k灎Exr4yD\|}??$~#+`ݷFf\i㍟\1>#k%sj7*8z}Yޔ)ͬhnuchLTi?o'[(nZgk.:WK6O"Dno)ݐ7.O&U˺P["asrNN13i}.r8yurŇF!p}װbJ*N*w={T֙zxzx.Yk\RXk̺|Ex_?qRef#bqZV˞eb BzwzWn߾mMbEKB7%d6b#ʘfH STg6،] 6 sU\iI|4{[uC^IR$&-jͤgr1vrދEL=Z-ϡ锚%-/;ZRѮ!c~TT$PjUu ^]+5kgAe<[[*s}LMغfxh~I.J@AuU5/?Ѱ*'H[1l[ {UMv]b-Yy_hې 922=̰-Cf;X긻 y+^ ?P βWfmᄏy&W7n߿꫏X`3k.3~sʐ%*Ts]~qaXܷO(ֳ7 1/ï<|rss/UWW_n>Oԋ,wmlii)102)hl=m`aSSODe9Q4ٞ-beiܴ z8HDV^ү~AbYEEEfyjj?+ T PSE-S=ݷAH]Qn9[Z>f#7D{*-|#/TXX5q֭$=E8YqP5ӥɩRQfqiox1pjq.'5ՠw|tuDê)FdhVɲ-U_,+z|5[B2?bA+". ^(--%D\`_f-q-bYd/.#F{P$>"‰e?ljO XU^㊺+X-l$)ڥ[)_q=[G6)p6zq=ݖW;NuVdH5i魗HR>DSJ 9a{*s4ݖIڵk9}L(!Zt +9ЧƧ$\;35LyK=>c_bUJYV5dR^#zlgMss)=ߐY% 1T{h r[.s D0ƍf3ӄ_~.\prbSΝK/=|qpR)owcmH(Q13uMi vs/l#,챬b#(_26$ybW-xe>j% ~LM*sdI,9ҝnͻ.({pS}E.3Z,۬^dQukXgN668dyK,Y_+F@XWNͿQ_|?gbSؔdbF }S,c [[m Hsd5P'b9EUJZB6Vu$;!:"-(۫t!O)"ts|JUl"<@49PXDhtbJAC$[eƄڛgK[CyҖ;\{LJ".AV BhqR*q-j 0~e@X~E~𳙫bx<.jK~ꗿ%")(*b1n^`mJ{u  ,G*YCoJ(d)huGYD(.8?XSrk SpGƘR0I H )ru1{J%EXwHw|hP}:DdR@*se5{eT"ey }6U5X֥P ORT(Mef޹ALWuk i\et^\Dj%V*=D{Ƨ($Iq|_kݟHVtFN_A&& -c E ssCҚc5Dƈ]xRubZr*ʤ|r3l6[EH*ly}lr*׍DCIE^EvBYqSq&9u_7hk,7}zMV_mܗ,;={}ۍS%ßkF&7Ҭ3JEfz+G/5g%Zm3Eg3c*բԂ̊qOJs0syOjV皶Y=G;P,2ˍW&7d,}vmsn*lcI7y|b%uO_Ԇ 2tT,[N/ŏZֽM(W C|Mn6۷|믿̊A>A)TsJX1lFK?A.(Bf52=ؘm&s+UֲQAi/RbuN82Ge=oJAu@IGVeTNWQ#F3r D{ѢQjIR`&06EPVIrR[lF$,Wh!tM#9^2UmmGD7| 󷣟.ɞMi Zo]LfB+`mxKRl,p$wI: sƶ:cwl=clƍq'XK DLHKj*ULsoFXmhd&x(V7~eLi;L)g_l>&h cFBWȒ5a kz- f&6D/mp_+sz(+pFЭcq({Д:9* 7v 韨b}_ѩn>^nIQ,7TZ(=cMDfՀ9bbneVס:QwHE;;&(%H\rjy,elQs@*Unܧvbk+Vje9xZ*7{E W  źk3jٞҺ\E/<=wHptCȺ|uޯbgqx˜^ߠwꯙvOOj͆|o)֤5-$毞L(2F-3424!.zS;XLǼ1Fmz=%=ϯb慣gPG򳳳o*o~ifSN7݅EBOw0dsFRy~_[Τ8X(V(3 S>rK3%QNW,vXZLFb1]M->ft̬Kh|QTN^XL)uKUϐ p95)a=%V\㾹Krsb9r$`JuEorb>1*k8dhm(We=F.L!Qjek,޻$C_xmqpu?[O.]+xrnQxWk뛰'&xb=O]p P5k0o6 }͛7yJ%,ȁŅI&lϛ[ J+UP}sݼE:}{ޡ_NݧDzW\_USegȢӨX뭘Sen}FXo&ko;Dy{,DM/4mքdV ~fv!@w_U,u݆E빛QUāחWE>ėx BDkhh/ 'E6 H`Envu9Ri6(k &{St?ѹ)MFܷX?W܂HիAկ~588H,+11l6OMM?\[V_3kI94R&$bl0#deUE9IP 6cJ)/>SUr{FZڅ1bA~eHQL ?&ݻӟ֮]4#o977?e >) Иn6* LU@6Оr C^sr2SDłbtal>Wyk|)Xll*R) OPWLshxZ'O<|CL&Yܼz^x[NJ z~06>qb>D"""<o[TTԋ/躐KJ +ރQ8G,+ًKJ@P,(kYch틉{D~6dK=ً @V /5`[+XP, bAP,( łbXP, @X(3Xj9(Q,(@XP, bAP,(łbXP, ta`n:;  P,(wLsIN7 ْlO“ ԎW@ ْlO"" 7c^GȾXP,(ñ e[UHHHLLwڵcǎ5kxhH9xb=׊e._+VգivW_]" / (sX&wgr|rrOFB/~#)ACrq?*b}/4*>ARmdzby7R>4zhRydUmyWa~ӆ3mHO 4[5*t[vxX\QSt,m\E*zVgoRĭߔ=^Nx ?x /ЏEX]k]Ȑ3jTS?I7 "d\+<<+FjSaД[t Hsc+֝zjdO\#> N#WNʼnVJ@JnxAPk{̡Pۣ,I OތyEÀS~ _˒#ҫzU4L顮zq|thX?T9^5b}X4_\=`n xCxuJJ\zlfx^'sQ)RBc$UZ}{Ol s &C\^C3T29]H{LDm)/O-BX I-D(W-U3P{6_zJ6;!:"-HRN֦5L܆nw9{F1UUFv2#ߦz&wUIy 뱃jT5+n6*0E!I jQ$TUITY0ªD*(GVѽƳR׭I "Y^ "IT@uI[r:/5dBvLʖH["y.={_I,+,Q\RUqtUƦ'QM12Dq^8ZwaiV){S@˾ M!>kv#uV<%UX^.i#GOJ E#7=$D{*K *_&IS@e*vxqY^UPa^W@5A6a#e"#'K mu)')*PW*NF_Ċ$)S6M"c'3+5j"*cNK4nKrh蠐4K{x|lrƥ&=}.fU{R{T336k(Vw5^!Q+Lw+khlw:F~1?b?VF^Yn2!cDd雰owkVdKM C\\ Zڰxa]ev %]?Qغ)bA\g3Wx\j}饗/K.E>QQQA)TsJXaT+X)]&_r߷SMdg֛݁ӌmrF _tDiz˨!:;N3k?@*$d紳 l^:EԯCI6o4-CKR/=Xhf/8~iRwN6VwݵNY#2l\^{q!3^Ks>b$./G:2fUCwKDU7݅b۸1NrmDZ :]F9%Z:h&8p7EPgyA4aI5w3,5bT|R&ػ Y#x -*#r;<>A)$F戧+jX<,'{m5o}'WGf+09c ?\.%-Tr+]^vFK3wSsX&9\eRO|. ^^r݆n{I/hYlO=?bM24vv&d?Tϡ2FnS?5BzUepY(>:huo/-P^+z7,oï:3+XF\:ml"J"5WT̡kxaփMk_%)Evh3j^ D \`;czg1>|6=}94ӟVt-Y5Q_3'o==>5aln#fc{X? 8GSaNY-uK\e(s5O(z_>ѲV=} sK;Q\kksT,[XK7Ǡmpң"Ыwk;A( %H{?hV -}w=*3gy|ϻ"_W,J뻗Npi'幇y,a$m 9xω^eZ⊕UIuLvЭϕ%.xco=?b5Qnlli]tmuL! y#!oZ] 5!s vJ4+V~X>Sy/T5kp9P =:}-;Il*a̱Ace+蜇 at3!JTnӃg{lLvK^ t I"=H̉[> tGC3wHs*8αCXN,]}bWgYZ+RǸ~*1Ne|AɴsX8W dqEH{񿖉.i&"ke,+#*H}R}˵W6%V 8ΙuJYDPf+\$7X]\49ݟbgqxұ}E-TU;oVHӺTVL ^zAˎ+X=A+Ԭ='Qw7d?[̯ghVehB\XwfSyK;bR{)Kz_" Gό ) ӧ/XσbujUɯ<رcL'&&>TNt =ERÐma4_\JQ,=gϞwID7w|X#| __BsZTq|qtT"5-]WTE_ޛK5xsMϪ$*\TujU.0i V]WdwSg9񘑍ǹc"oz9~2+ƻb_^DbEb\o!ՀP(Vؒ(VSC(#?3ђ:[RrOŢ|Euf>z5Hھe.{QYg~ &mv,5.I\Gf]{ϔb届w3R(hXʢXB^!;3!?PCWg.k۶m+&&Ū R)X)b^=~I81):.12v7Enx6˟-`35sWEU\LBjxRtn9"i|!=$,a.)?41oU8>^Bw״/Oz<ǜCQF,5_N`Cߑ/?c!2hJ.]WPJw.U !]*{tLThp1/1>!z ҢOj>/?@30-\p{񿖺jaFB .]XjpkbKo+k_1`m3 (.~jM׽-fi8LybD?{ԕoy'Y2aH(8d,\?g pP(Ђ#W(HXi*Q(4r kLBJYž dM; 52*ejQpt[C:]7n;F(QVd\އFX/Vc^քK=ZV 92iܱn(%E[9Z5Rq8-1q& US3+/SnR߶5mJqc6wUszFȉ$y 24ں)A{)̽ұFYodү Я~pCQ|?8هmM5;Az#,vVcD]pX7hN"@K[P0 KnMM{Ŧy"*H/|Met13_^SļwM|Q=A+%Sِ50`캾XTn.K H {%VTB\\.~kfghwB1x 7x/eʋUyY6ћ FJSpc5ڦw7Ywӂ'R'nL>iSW}/8u߇ b5*IދE.-m&-{mb& 9A!G[+ֽiuT*ٸcW".ZktmH7&0EP,Vjy&-^-ߚDXԶkS3*5㻗:0m' δ\O*YjoXOve-l^`&wߵ|~X]+JioUl:HLh|a.-鑞 [x|b=~; .XQ,ė 7l011ADcD疅bܠǑT@`q jwhi+>ԫu9ARd )|1S,fGXQ-@X*zK2_XH$JOO|wZTT׿f%Hܠ m51l4:h+ |Wr~ {&bzd1޹Kh*N?t7o\rٲeWdݺu+' 0Ij$Mybk=J r=t󪪪K>3$I OPW3g܊,&>y{{K$Z~H%)tzxn8XA" (,`;WѲr$9E΅_łbٮ˺O (و֤0>w9#W@X( łbXP,(@X( bAPHZ*S XbAP,(łbXP, bAP,( łb=wN zcȑxfbA?yu,<9@XV#WJrv\ҥ3Wcȑxr9#Z@X_ϜW9KRełb G;rdɒUVEEEm޼yB^;ER T+n|z^z)//`0mzz_=E,XObٹR~%'' 9)V/~ay"I(SX;w;wZiR|Lqw>sܹ$Yr;ߌ0LUբ06 Vy>!+cN,"U@NLNyGߍ_=n,ԃTNd'*xC:ɘ{؃p└GnbpQ}w0I[|ʀ_?*m%!|LT*KI( ߸U%^TɶoLÌ{g)~Rqߥy^wjtEQ@X}ïv?˲^H\~WJz4KbNNAX̤@[3dggw"E`n`%E,ŭ"4V,W@/E=@(1bxk*cR0[IUIU鍮U&~К7>?do1+2BRӞN .RYhTuwQM糥ᩕ]ک(֔yEf^eoݟ v|S絎O?oؚТ SS)E/Gvk NYbwrd(kr'Da(ww1/sνĦΜ9w??{R 4T, #2M$J 1pq)Peԇ 'L2|IRI*N!#^.*Xu,iu)V[QOX=G"I͛5.7:qk^QJΫ4pbT\:ak!6޴9*/c XQ_Y͍vhWm^Ĝ'XbbAW]Qg}ŋ_nlQt:f`ss39we,"A{Eh̡oggnk,͈H7' DV82#4P #2{=YX' M(r ' i,M?zIWJ q$IP/qKI D1uk80&¬XbWJ4+O}3:S%a1=Fn蠽,5˛*c(SDb]h;4qH=\H|鬁1=9Ja15( l5!>AͺQ W;k00_SZNu.YC5AeUG)Gr@GNM{23aw;lTyi6q4R-Xq4pY6KãvV~cd=pj42>e9eJQvfrQꔩ8ɤ %Hv6G2qG?U]<%_F](bMmR={Q4.oYlUAu_Q0|uhF^~.mˆ m#Q}l_P,(_89R|>`0&OWpDBFLX1gNrr4UbRuI;r2k+g5Ls⩁)wYN&GЙ 䗷uU%ٻ|5̘Z=x)+[ndAzrѯl뭷vk׮ev N{5-͎bK"|ays1az>n~C|nτ2Wd-xSdBI񉯦{WU3/!ʱGR>\k"*Ǭ;√uTx)i\ br9FRG;^+#2ɧn.2-lNrvjRxV?T[\PR|}.Ɍ ;YOK,V#+ӎ-BR3ThNdf\b*|RǠ?+şmکr :TI^UrضMMUy{/4SVn ߼q*PWRUg25%y^ \bޔb\{M9@DX+eeGO[ ,9'(hPlwP]KvN:n.KwSj 7QpzFoPr+ ow*u龸6wZb=݊_(֞={t_|qNz݋Xn^v OIk {@XQy*ܛxyv'32 TObk+;h uTG7FNZvтZHSC+ q=Wdݙfb-N)/ќhv$̄]72ѣj b*_3("{gMWz*f=e&b9}z'UupAVN LB^,zg7EsR'vhW]cTy™yq?l*{۩ܾYP5`g#RK_vF?J> WLS]H,o;\kSb=T}9]Rp jJ|\qz|bQa"*S_џmJv4iI$,y*eЧm^;3vkzax(Vmv%j^~eXL>ͩXK,QT)S,=مa]Ff(I]W&1R>X?b`44^i.Z9T,;Eَb٭ճQ)ЬQۭq::G UsVY|ujWmo*{gk3֫´]W75|sy\e*hOLQ'0.G8:ƎYV9oDkXgn,2p7E\>MWb{fXODAZ_(Vff&k͚5s*֪UXyyyX0v%hgUF)G􏦉X*&A\g)޼nHE|m]ϘXx\y{qnŢaWF9S/5jkHݴ+Fܑ~n?6g=Aœ;R#i6mGNm1fgS!;{*"kݾƪ:S%2PQ,jq^6#s0b VQGcW1ݶbF*;,B*ZeNL快rԔYZ,cSqbtߧ620H&ShEa(zOskK#-&^;U\ ^tضpiI=ʴ/iX G-7 Mr8z0R$VMWNԟrۜ>)ݙTjKuFBCCwN꥗^o1b~pKQ݇8{qZj6>{[L5t3HRBx"výj=';F$`5̩X vpX?B)\Q0708T, f`v޻^BghJr?(,1OVPkPC"eY1NzAٴiciS3da=J7DF'گrGAG:JHb2w'R:,KeU-9vG@HQ0S"EDWL.m`G$ĄXY7j~uσ('L'M::U9l O8\ۨ&mDžm; &fgv6~qfО5k_"F}<484(>5cib ۄv>oׯ[ZZ~/)95ӕ3]p.v} u z9zoXJٕ~EvaM'0krjƇ IBzjbB@Q_(nZ>YpsY$wnYa9k7b8K2ST'gߋ% i7-!3tbQN"1.6oщqهOwLZ$y-֔ÂMR OjC^oż*fJm7lҲInEm!VI~N,^߷Bw[zS۟(8Z{s?dnOb=u; .XQ,ė 7l011ADcD疅ScN!џqaߎG v&t5ƺiLR=3#-b=!vIHҥKAկ~J,K$ [?ݻw-KT*_3ߒtHj$MbuUg8*KciٛP"OO6v_#fˢ')tHjxb=E 9+Vd1A["(w??g}Cr.IG(ӮX;wđlƲ\ r9UX@Xei;wܹj͈ ǐ#,r. (v]֍~J\W@XF&Is1@X( łbAP,(@XP, b=FժTrX` bAP,( łbXP, bAX(`swRo;C3 eݮC'gŲ PrsUK.C$ǓȹbO欜b?Y\, (f`8ݑ%KZ***jׯ /-IP[ljo-K dž[\_y{K/g0l6==]TT/"`]PUxg[N]W9?s,g?KNNrR@V_DI OPO]-gTj{NM͜Mɇ̷r竰:wZiR|Lqw>sܹ$ + 5%0?o7>/8$&^~klHG'b>[Vcy]Z9m}ELx"Ģ-Hvg~Rqߥ)۳}8eܴqۇ]xৣobYvki*SIvVu[fI)(SX_mx+C"Ü37)Va=3ĝH(ok"gq+Hj$M}YEVϝ(7oObYǺ~Z9~8B7Ǻ.#+En|k.۟)H,'VU#fh %,fg'}rZJ쬿p4Y*̺;LTW+Z5P,x+ԤmJ6yݯb…GV;;_|9z ES1~5<@rcY)↡Ƙ"⋿ Ν;:s޽{?{Efl/76(t:3cb}Y^uU~qz 5/-kJ_ܟGT$E^Z`dLGOR{4BP~HU#/뙋2git#iiaFcNEc[;?(}޿b}1k߿-RVK݊O'˶oܺ)u_Qx'_QjrFnFJ+rEIG&i? GPg6Ul^ݷ,{۪ú/ƨ[Ofv]ۣz%؞Wi.&NԶD:~?Ӿ2N@lYį=0Z}|¬"':W,>ϭ2 ?/ɍwsI2d> <de9_ǘStj*1? "){GNx]~o/ωz!jIR$[P"T!a;>'Wi Iߝ#VH#C&W`R?Gʒ$Y۽^$ ~My{WSNWqlwl0#?:=ˋ1{OO'aZWs];!,8+ٮ.  |#Ce"Fj2) QÏВڎ:J# v¨#_1VȆ(,(4^#];4F/1fH! ,иm$(hPUG$<ږ[SjۑUrؾ#GQhZIP?֎,bW аag_)ej,(&Ԟ0kqQJs[H9y(Q{V-S2^;);e{ɣ15pipOο7txĴގKSFf+4E{FnVx[3V_0(^ZT948v,z{ϲ|cR$īw0ʞs=Fesg_U}{סbGڇc_Oo(X*,dWİW%Tz"**j7]DbbK5\a6T,#LOڠH`Zƍ ja˪='L9NaI:NdHcv1pM0!q`KqN~feh*.+wm3DT1?g̵*}BJ>$(V&?a[J'U _h%uk Ԏ9Stw#`D6z,*S:0zH/EN[A9=i_E9is3w.xL@AR>Mff&Û2%ۚ.XvuuXDE-9U>$ʹb9Mq8fԌ_~ͨ.JeşmکYo^{G~zk$[3NҞΒFfNEe2nCTP',h5}<GSGbjg^v*fr`}IRnR-& GAɭ2LFݽJnpXԥڸKirD+3$ewm]nT={0t/✊ /pXG<Š[Qe(%%޾aeu]#F6{œT_u@$A=ۑeLXfך8O1(K3u OzKh.K qjm;>JEb{-NJ˧:߾YrM ڤyldư,[VrbvJRR( ]EZ:?]Ŕ`xǜ7WXTs>̳Im'Ѽ`jb*VA<.aXnaGemvAF՝._'\kSf:PW?jwt(ӭqu;ռ҄*44vvc^4jXh*MXMO&_;uIPV"ߎ&Sַ<%oZe_ K5qguMO<@ubpݯK+Vii)AOs*֒%KT*sJeei3Ykb:t9;B"*Ӻn<4x톎o2I?x rQ)ZT1"_WV\ĎS1#i&bIƣ~rQq#cuY:P#4=-cU,n76QsJbBxjS̖tĸ,WI1`4T}pJ,J믤 FTbzn`f6Ĕ: ~Q[MtRCU0wXS9U,IðX ߺݬ:&Z6lLyabYGMOwJ:jr&G5qǺG(kQUc{NO~.{Ͻ?U#t}Fzx":ɩbQ2$I]BBrWb+̏S~ߌ0;v̵.ƪU򼰲vEq6{2Lp2ud>iۄj%1]PO+jtRU57^Qyp2eV=iKbFrM1^K`IGŮƚ zu\ȁ.4 ">PXVB4gO#W467~^ވ*#(<(?rZ4ITn6zҠ޶"GiGl貘+^~T s3Q,1Z~$Y Îb,tkzJ34w8;MPQ sOtbl-ZbW=`qzvkkBqckFK6zƒ-G~S|xSި(5QpNrq`ff&k͚5s*֪UXyyy.nξ;(Mw˓ NIz@brP^jLcz3n!֢bL͞ zxmT4kel n6ǾǩlRomXNٴoo3PeP-f&ҭt.[:m=l7hdb)T?aN-M>RY[#߫65Z,Յ[3/N9Z%~߼jzfJ3@œVcxmXbiI>X[/ͨ0+US*uy''6';OJcw&b=-]Wow_VwsW/ҷ~m1bQY)b^=,aj&Ao$=b2%fz1rdOSRCj]`,萝1$#=+%ʉ HS.Jّ-{kRjX }z^L$ WW ŤfĈLvCoSv*ڑB(@jOYV|\0aG:`weLO7Hn1P hߝJLQua5p; Ƅ3 ٽ u9 RCzPBgٱ׋Q`L@g0>ɓ yh帞驉t!ɱa{%o6|# 0:Li`#-K [TQ[*:{X((Sc_=?dh^\۷N>tsZݯ?zTm͡A%onhx%/ao޽_ʱ!lǓMAA~l_\3FMs ,tW.o /0o|]rwwoii ߿.+ He]֨{-9i!Xv脩[T'gߋ% 0t'bOZ|>WY߽$oՁng˷,7?QpRRޟT{z^=_:+Z*DK6l ѱzj^sfggsX+x:0?frEB; (cX*46GmN-]_WIJD"Qzz;߽{ײJ׿5-IF҄bA t-biRd Hʗ/bWy#3P,D("ӹϜ_1~ōwx.U9n޼re˖^u-ӟ,6'4XPn K@HaXs\^#)1((k1-Yu_rTG!b?sD޽{oqXF(Z~E!))cS̺,IHЍOv^XaZGܻwFt:jҥ< 9ICRST,/]-ߞSD 9+Vd1A["(w??g}Cr.IG(X>u?q$,E%)ībA v^ٹs*-A!GY\P,(h9bAhM zc0r bAP,(łbXP, bAX( łbS (ժTrlƩ XyOtWP,(`!k Xba (@X( bAP,z$rX? @ b @ @ XP,@ łb!@ ( @ @@XP,@ łb!@ ( @ @@P/@ łb!@ ( @ @@X@ łb!@ ( @ @@X@ zA @ ( @ @@X@ B @ P,( @ @@s"@ (kޡ}|.ϷK}+dě6u)+j\u_EE )u< !D1VCWsNmٌߤ@ $̨C<:>e$?/x|!z(dcxiZMA3pVUJκ:ҁߪ@ rGzä[|.ޝC iHtsP, c(wWy.f^reIM݊@   OODE,43(cXcw&lr_=|kz a@1_A>ilPGZ_ImmE T ،% *wPƯlȿzxE)}e|7w۟wxT{} ̹X;u!I#\ hu/"/\H>xui]>~)gf݅ǠѢG\a{ w%PfR46(֣XJC+y˺*[rXPGI%L=ŚH+Rm8sdM•>T.us|~UDft_WK*-YՑL_{3_߲R}˿HU3ʕ{5Ռbp?R]KW]o[[AW_b=X?0xbʚB|WoH-Xe~G3te-dvm+Sc3ʓvq~{|nH/JWXrm~-V/ôN=*+IzS 6۽-9*{OFIn ިwzyGqQKm*Y(d=C2fVSݧ3!"9p햽v܀t#0م (9luֿ$]łbA挬W zb)֛SްyE9Pulzu3s[[.m!x/GMA#%Lɕ0yk>s5sjwE;PzOfHsWS=~S~H\fdr[gu[ljܲu;! Ǽe[Yo.0%"# ɵ.fuuRW3?ܯρy@?nG>3+.f|=UuQx\c~<ǥVL޵LaX'[{HFcoJTJ+M|׌b͕a%gꗤ7@@XP,(P뇂GP೛EQ,/WL=֤Pl`; ]IrH @rR!zcjj8t[zpGJUo;50{0b;BUYW#]iMZVtE)(WG:ro1g\ _v6}$a&1{l `.ڊclg]BmulOWjZ)][%6xο0=?{T?[9E13D<Ӹ}{O2z3ugw&|jW[QIPme0QVXYZyTGQVaFΌY*I5c+EkThΥo;[oT2)؛({=<\[F)g>rY, SL5%tEW.!}<[]f־}6}* Uv+.dؕGiV@ ]2o7qWy\w -Y~]Fɶ*8ﶾ uO_~ 7EYvK>OZ3cOf>~51ρq{{5;N|֝TE>6`dl60}36sB2H]vq CF_Ona"Ϧk5u0FpM?{|ӽF>n]Ϲ_r6u2gy=l7ŝht _Ϭq<㸢ҭ=2vdy[jѤpY\b eq:SK FFc^c0kxL[by|#Kf1'vX[w_}3Z)aj xH1/ c[;l2H-IUظ (vsēXڲދXF(U͟o(~E1鋢X?;R,j;olW#~itX>9`Zz u)w}g)-b{NNiU8Y)ײ`Mj{s31w oa8kXGYWҡZ8(M|;g}^ʜb9(\N2 Q&ʝo}Hiv:=?Ųtd S,,>hc^@z&w1˞9j(l /QXP̝ޘƗVLE1+]c6*A"7 yMgdgW0]ESobA~/Fn glx-VKQw)l9%yv\1LM2oV},V?9f*Z~Z)4N)@Vc.]ALCeGulbJ:y}3~Z>GWP'J?SlTj~s<^ W4uڴ^|Sf, j<{G?.F.[bO^:SGU,@u:z^-y$G>rzfm _$*WRwZ"4̇{/ߦ㽎 (bYJmSiW Q,Ixo(mGg/Zr6}V&X 얗۹F[X/Nq\SM{L}^}]P҇Ҙ1n(տŮjɼf v2luP,1Wzo{B=!>3z j(N z0#RO]r5?>Dʻ-ʨ,Q5BTBZM޴q;Ynԟ ƺ->8.}6 WP W.޺>Qw-˔%M.jsZioI'mۉŌ1,zq>kbbUV|U*"S֦&S%~b݁=ԤoSkU53y;Y2ĥtbuiQ;JHFg AX*ނ4Ҥ]qBwguYy :TlOȻZW8Yb95IBCw?[:~"z$2=@^=B{ZCR/bKZ[ψXgg53]5D&xJuL%yS'7NX:eƞpkP&9xmk;Eb=>n&v6Ě~bФGFƫ=GNCؒ4ob9Za Uz@KmA2rGa-]-:tEF~sUg՗&썏S?> QX8B^h|Jehh>-6&>TnQ}ϩ]{C;C":ƔHeP,=՝XJ"ԝ&sRq8">N+wwV#V).QSM6e{]j\ȶs:ͣؖZԘ;RYa1KU&T[F !JUnm4k< iUsR,yGMF{GDǧӉdTmN|yҾ{dHTZc4[f,ȤQ2kuNsF0]ni_Xyص&v_9%Hg?t(3N? K,6ĺ{5)sL)#0]gsGKRdDžDi<$4{$Vx7;J:KH2r4dąJ,Gcv薱&35[-cCTƖw+fVGj=C1r5:Kkm#XQVu:-5iDiXc&}`A$)c}2UPwOT_XpƽQS̐h(pbنCs/zrڮJ>yo໏|`Rս(U3eHwVT*^u~̲Klp)j=cSAoO =Te,D"}?z22j JeݻwxԡֶctTHbw;صfH0\ĉDu/nHyN: Q]:LXK}:(!%*W鄫%@XZ2vDDžomh<7#Ћȫ;yZ-Ջ7xHr2C%FdJc&slb DiL7Rb&#eϖ+WGΖIN.ٟ'6x2R i&Q|; y(RD~{o{STr~%f25t iNTIŲ\ S}''6#4DxGVYBNbzI8#WǸNKl銠б'=7$ {1(_8|~uF:/K%/>D`ʩ?H==`3lmlb}9B>(U{h(uI3ZX~Cp<K3؞c>[14k:&>ѕXN}5LXd]BCw&66D07mԯ5c5G?YbMܤ?JŮl`oj@$q3=5(%q8WѣG)G'bT%D]{2>$XξaGR=%դp0kowѐ3$sn̓SL.!J,H <HHT]kcgq`BTZ.$>KYcq0uFbM2HiF:?yG@Hӥ(oH 9>#[jMeXwH"ѵ(ůZ)W3Cʤ=ǨpZ'Te(hdLJ'f^ݬ<5TkR ʮ3YSkX2l@bXcbpwbINj(ɐvT(6NkKYƎm;AUwbɣL,2#1<\Ϩj o CD&H"<SEŦj8aGkO,*DT^}*zKRr焹D;ĒoTYSbRAQ<&_i 7b)ڢ{Hvw{)Pi|Ja;iKEϤUĶ<rJSKw nkjLZ)>1{_,@]Xrgs諲c8v4~["*:խ"i\{Pׅm@bXEb=aA|3.a5 Y.)\? Ҹ_$%&/݅V0٫uŮ;tD+C֘ Ab=_DEXbq7H,kV6UӽBv 3ZwR->몖gB$sE{F'1""""H, H, H, .$$$@bX@bX@b]H, H, H, H  """H, H, Hgk4i;/5UbYmDDDDDKbXHbXHbXHbX$$$$Eb!"""""Eb!"""""Eb!"""""Eb!"""""X$"""""X$"""""X$"""""X$$$$HbXHb&dsyhѮ\8c~W$""""5 '#~?dϼ.=q E̅Ϝ!k$^EHb͉ǦL_/U__ :6)57dc_<.諷>Le!"""Xs"o}$/H9>b%d+IX'˥OW 'yY. zʉ^aLP}Z|]""""XO3\ j?uHb=gGbo!Ğ,VKDDDD)'r_$,+ۇ?DUd ^!敢=)uNz",tbOf\H-2|P땧+CvhGyÞ Z#mJ)#WZ6 4_641rQo{^lsԳB~/etY"fD56]ۚ$9=c[p8mڎOLXKϋsnII?y{y姵3Rˎ<+Xځ^ 9hiE5r:-*ok9/m Z#I~?>|쓌~XJmmy5%JOb] )]l:mzԅ&f>yh{ԯ5u4fd:fϸ"?ܯ6aWgߎ ]LRFX˥0 Ě+i}cϭR$5;bޓaHbop!UYi|8ڟ{~>xkM:Ń%QGN}랥ךz3ʽ zWp&7#TpNߘD|?pgޔ~}WrZ*7%zrBrX+.D]yĞïx~i_]N)Z 9mL1ٝg'*MRX(hْ7~Q#\qoU_\K?|iٖh@(hiN5q݅ C6C^,\hKaϸu3 By?7z\lMVǷϏh9Pk~ѓzç>;c~u_ǂKG sem[?j|FwS_8p+G^aCB/O/q/Z~)sN~ϫ:kev$;hGۼ~e95Mm4uվ`WWtԯ׌h{@֩Nu 7Lav%6cЧ6iMv-l [jt"=9fR:zTy[֧Lx)-cNpkۖ%KZ}S |׋Ώѭ7k-}$>pWQ{u6̈x o6h4U%քW؝#R,wg)K~銯6hzsJso2 S+;G:N$wdIagu,tVxׯ/inR/w?Yykuu޷iBJ,/uKڤ)yO''۟ůpbYyc{FJ+_Zx/7:ڒt<ҏV,+eKFkU^{i~uY{w͡+^T^))6G+^g_%yݲu_w]Ԥ>7]G"_V;*xߝm myY:Kmx8:^wnN3DtJy1=6]=/jP:XhugO@pD[2ln2(ݍz'-Qu@sO: !Xt4K^/3D2ER[/@Q]kױDn{pKzVuQxEL'˿4Mm4쵅9ђ|Ӧ/K,yz3:ĦnWh VKJ[%B#bJ czszL7}Å{}$5O69GM4Vwd%ŋ2Ui.GkE˔扱&7M%VS%"ȓ1"rbI5`x%"RLbu?[BހRbAOw+`e=f_M+F.1ȿm\.]#%bۿ5GEekVWƜMw!iْ f._]7\ߊܽ/DE6&k.VoTTI}{Gd :sw_K2uGVx!u[۷5St;2 n03{NRw'eGKquA/̞l;w]}ۏR}my)+X)^kBbyäe [J.k'K,VW>i0j]C_j*͋ﯵ?ߥ?mxDD$HX~tʗ?HKLMQ"qnGQݾYCL*?S'x_^ kE4.Vb~=c<KX1e?Rc֜~' i%VuW|k?UJ(Xuet0[lKu"W5%ymߍu"u (K*xV,smM~+tjoY薢B>/]+̃"5Fj[Ooz5WqҨj@{z_ǂvsKN6)%WwZnNbEtyV x.zD]6t_HbXz{Եޝp'ʛO{^;Z,o\uRB֟0YXntZD kELXWulá 7}N(_bmsӔ\[YWXF^Xwբ {F Nr7&^;x=m^9 ^eiƕ1UOVM&V_b} pw~.ڨ]~6$tB'ķos<\#N֋rzh]pd |}Z?츓SfK}Hb=Ux&Rj]6ݗm{}єSpݛ"fa;\>uҮC˱*n~o2~mE;NT5Yl99QP-(38%Җdz:$gF3O+%'&`r$d3٪["DSm|d =ؓO|srTDm~[5}ާ&r0}kڷIIykGNtJ}RkhS\Nފؚ^p:"rLg,sSbPmw;N~~<{"E,Vn|YFĨKk4%Fc\DX"S>>v*/>oDu}=뷤.Y]$=,biKE٭{6|9o\q&CvMwaښ~tHbt][۷|{Gt}Ɖ33bfT.ѓcm`5 ?WJw&w'{Ou΋MmgUv[iKOw!Khpj4l#[p(_'9W~DD$kֿYUtCoK+$tT/vߢ'w-zׯC:OҝsFR{[~CC}d䇨ޯnX M⚓3bE2u_z_6sϟxN0EiܷX_!m&ż/MI,[na=󓶏v+V,zqْ`E뮩&񨅿[:!Di=hKEeU+h۞WUEԮyKd۲ƝΊHb1ϸb*_=È$Eb][yb|/ǽ0HbX$si 9|pTujFOiW>!I,mjuՈ~+OdH,DDDDĚ+Ϻ$""""EbXHb!""""Eb!Eb!"""X$X$""""%c]6̀:؟DDDD$ZbYwN\̀Z9KDDDDibexzOՋ>_uKDDDDi&ڜMe᳝Xؓ%""""4krN"g4rE_IbOXDDDD$fb #4W|?-|/Io/{nԉ=JDDDD'p`hV_S;8F}Ub}/JDDDDk$ΈiPg깅{ [-Hbͭryw8X;8Y%9z#""""5w I,]I, I, I, """"I, Ilb@DDDDDkv[{~](kRVlc/ADDDDijw<)Ab n """"4aEwH +qGVbGADDDDğU#SYH +0FW\,DDDDDֳ""fWb\qz,Ʋq "WO[-{h!""""KQvǃaNYXIՈ^E&XW$ H, H,   H, ~?&,Ljo%IENDB`python-xarray-2026.01.0/doc/_static/index_user_guide.svg0000664000175000017500000001443515136607163023310 0ustar alastairalastair image/svg+xml python-xarray-2026.01.0/doc/_static/view-docs.png0000664000175000017500000202612315136607163021652 0ustar alastairalastairPNG  IHDR'sBITOtEXtSoftwaregnome-screenshot>-tEXtCreation TimeThu 09 Mar 2023 11:58:50 AM ESTXz+IDATAEY%Eu^dT w;f?v5cA AWQ#a[FE $\o<οܗ 6/ }"Rs] '_'L!zG잨qRdu`ή7|*N?vG!Zy}/w|n-D 8FW}o_?4g/6vI»E;){3 ݰ 9'~kc_j5˝Ig'݋gi]7>ARbEH0RwZ:߿#;~w-`F_ nJ󻜹҃HT'I$$X:/m7]{:?\JNa`:VqL 7G>,<-"ϛɝw OH>b*,n࿝kGQxʖxd n]RYαu#)i#3;}N/rypݟ E&''~L @q /?ce8p/w'}aEƃ\s_xw^mQ@ޯwM7FKy|"~}ܴߍw^qܷ쫏s?/9oϠkջYN 1! gtw'XwWPA!H?_ _~ &ۺ!7_\(}$ *H,|Q&twA:e'_t ޓR/~w3"LU?o;3@*I۷w=R'vk"RytY o7e(O7>/wǟe̓}~V٧%]{}%1A+"K1n0B/5wCBm:@_MNJ' C߸3Evtjd/=/}y'/wL?q AU#?w}6.F3/ 8V>HΘV~8X}~S\N%z(d/d>̚eE \Ł@8xT Gmo/3%aM tZC}b|4990!GN?iAx\P][ D v2K?!tl;AO ϝbؙcTAD{!e8Óe%]^drY$pI@%@iᕵ[*'|`ZEաz~&>Q tcqLhRCn`0NTV͔ @ؙ8໿8 XuF@UWϦb`|XyVم_SEX&9-hTSk>U b*;J)@>x %AQ2HŹ{h" '@wS@Ix]/vtv88.Pյ"&@-D/RIMG$ ?dxz~ 5H$`X3 "$>ڱ~QdsF4NTw>OT‚L:?6@}b  ]*BŠi 1z@r|h%*[@h`u}QuWO)H%ЕwZpE|> p遰BsIJ45u] ! xWÐc|p"R`hthպ%I9|h@qBڳ0"F ~\?.~t!`xLp֏~pWw߈m:TǸί,D(x7(層8Y/bؿ^ރX`?neql4ӽ/@ h/M2G`ϋ ɷ;];C:,>T ?*_;vZ1bۿ}*ΛE:6[ʞSVB4NÁ"޽gȭ?`_دH"c~oaP;pFҽ g{\9! u 2M vMG{~{zpxqw#&ޙaU:(Xr%|ϛc?%l+z^ڢlJpx`?ndܟ$5zR(BU=UvG Fdm% inw]Ѽ_w^32DޘYqk?k< JUu*}K΋4R~{^߬|_ÿ;;9/n .jcaȢöm_/n?w %;wo6yA)D~wl,<@04{/|qy?:^ L*ǘQhE~}cGKQ1mr WL]Qrѷ 񣿯qڊ>;"liF|tN_ޣqu nÓֻ=v?O!7qokoo׎i/> hD$^@z{{\ `n{GD3+,vp4qo\Nf['9bT^|x@vmcȽ{aWR1\xț8ez*D.ڎEuv3Dz½f;'yh{}דϹ0HēD[q/>v^ z}ʂ:rgdn>Gۋ(0|s>VϿPœ?::G'llG7h= ?oc2eh4oFٯߧֺ=#/ 7Ut$A(kvumaG|?i::ۏlqv_^pAMhbwJ?߼_܋4({\W]{yOys߉_Qz]s{cCA7q? GMU\<띞s* =y~/?z(oyMnko;R{wcdPؙOj#S`ǻ7y]d~~]\rCϹ=r/m*TdT(TH'Ho^ B*+ x|wEL6?ntL0K.IU_IPvϔ(Dwp |vFYX9߻2`[D[rC2d>bm^@q|#}ХA@xnȳAx#"5è' b0E00 EbSY0,+:Yi˃ؘvxUȡq X(cF,B¸Ӱ$>(qT+ 8 @^.8 s7yy88Q{&|R !,-(`⼰.p@)x轃 b42z@|@  'e*k!q1Q" %QMPU*[ 0J?uX -{$pL )2 @ίY@YU~1.Tu~|Gk@I.udeP&@?0]dBED]Ɂ!L$<}c1 wvoYpwxU[`  ih7b1خTD@L"?4T3b2iAp\-Rtp|}Br|a'4G]eS?UP#Ͱw\$Ir@7".C;,H 8B3 Y8$ZCUXQPp"@ O#Ȑoq]q)|0 jS ɐ UI$ uXRY= LHc=>xu4`IXPHJS ;P*vwnD,v7H([.O;cW$_${`0!dcgA.A3/:L)2;#$#0{٬[r_͈@(>4BҞlqه2`V\G1y~[@RD҇HZTl 0K.U-_-C$Y !p+DP@NKPT(W %p};#H gǘx#$9QLAS_Dp]w.G;h)ʼn|DDš؁|"h/Xzp K22#""q4* @,(Mvŝ`fyT}ɺ$Qi ,=c9$ z?aЃ;˕u@N?%-wR TvP&\%1e ȃ|cuhr.a1LXF!A1$*3!W|0DNzga<Ԓ(D RCpPǥ#!EI,QI )>Ѹ0  "DE1d <@E"&q@ /qܓ3\(:atzV^}trz8hDiph'/ DD'@M!l *$GyI* DEƲ .\QUH ˤ 4?/&L QPAۉš1/`\Dq:'MJ@H4BCBCN1$>KK,׷o'%wVH7R:ICN ).b-6=iӱkƆ^y u֨0HSw "hÚ; _;; V("<8 fҙ`dp?#HC+. ;߅@rxBm}WC:/\Y+8*cz@Mrst,yyI ~7xw҇&8{' 轧V?.-#Q )J6n>j3J#K䔸O򝀻>D,lH aЙbq,N4+$7 eun>P=sம\[0 x9A"1Vނ&)|TuGEJSVǝ SN,3qw ҝ>z~_za~Y}ΆA6yR0 X븯'xr`^"y>@AмృO]zw# ň %z $Izp^RN ptŦQ'F铣9f{pdy?H| H^*j(] ^E $v*]:X!ؑh)*v¾#KSwg':1K?7,\gϖDDwR܂(j]`.B m5[= }VCRБvFO+PÐ/4ػCONP8D2TNBy|@T0LQ$t7} 8P S'_PjiEs!`ur8UټQCVXP"`Mвx d;# >#$2A$Ooq{PPO" DS2py2Cl(&_>:H!;Q !['RH9 y qz}\$ mw$RAsfaX89B2'0q2 > &])QL4D2Ʉ/Fy8  AƧBp8AF>i##"vJhx{@H$  t]&c݋%"ECDZ#WHi }Hn"a,Bs:5a'(I\7;95 , 8 d a;k}ޚEFr0) o 5` X:1 (7x i T@ ;1%3|Qaw:gq(L||l0 y7a>a%,/`>;x@k%$խp\# wCT!, oEaު%e0xe #  a015(M6rpL&Y &}$ah^| Kŕo!Re?\\gp!@F$p- F @Bo&/ 4n$QN _v qp-׮iⴀBLʚԦ 0vzNx3U]7!)VW@$o@rka4K gp}vpF QrIu ` ȡ7>HP@ FTi] (` d jAm&giD 1e'~wX| GOfIs %1nA @D`qp p$: $$@4L ?@\~dpF|sr#z71U qgO}!q,wȿzׅ0koG Z Q>PXyrIQ0C@1j ]],AB \*f)2 Sl&2& +8 l(0 *}5uX g^HXro@#8{힅!b:Df^ee]_'AQb^@qJ2(~6~FhA(ѝ{l2.N7:'*O˘9= UF績\fB+c`q*H& q= v@CGS"s% Hi1PAJ%zbg1&}ǹwCD!zK}y|0tU\rj]H`W0$L7{,f qZrVtA,絏 ; Q3ϟx쉈 \]_ۧ^Y-XVu䅡9@C}~DDh!bivDڏ/6CRh=!$ &Iv{K zIubX"^@qXt8~?Iv'r5kqc %7Ip}; grjØw;acuqWAi~aS$)z]!N)֍.#$LiE ;O8mkxea 3*u$ϴSN;9oat^yׇrST,g}G ~UE'(݅Vi8}62>wQ%(1ievE$ f(AxFx-#*p݀2u%|@^/֪S> ?/x5G'»^0<`3!yw.f\qWݸ㫅6("*l d~jQ"X"#?<@;H0C@Gwo/h E)(2 (a߱_tGWn6]Bs|e.2<0NcӟFɊc3G՟N]^vևj.b7n=EF8>1JNOPr "dd^C~rE@raa0\^v ٗbϸ:d,ew=HΪ`@M@Kӌ3'pcVP{djq-fsb''ll:O9, %ʎMTsHw;8֝M6/0l/QQntǵ#s{1׽;8^\ 5de av[z\&8oQw-Dut ,\/Gϣذtw&ז+o9;=0l6!F'Xg߂n)Azֲ鲛lS:.C::y IɀGEztkqABwܣdq\];qR\uCD.(3+d;^2l (;i(+Z<`+ '3.8kv9,'Ѱsbdgw+(Fao@P՝f6X0hӴ^Q,,2(NpM\ݬ 9)zAe*(H {ZdPv:>dfWʱgZ8"jEY C/Qs;oϾ>B !VB R@ R ((@P(D @!AbP@G>_n]p3JI:$]vsnCƫaWE-{}q߾·?w>~ʧJAJqc~;5~_k20O?O'|G?O/sO~#4Ew;Zo~}_?׿}_7O~3~K?}黋ڿ5o|~+_G>=rѷ7?'ۇ>}>?G_A#OoO\0sS׾_Ǘ/_2 [:BH02a6 pƥv3] 5`P&$d<(#Ȓ[^@O B]%cM8HAU *gCY.!=&#XGmi_l,‘p vQb a:5Xle36DPǔfmW䀴vѝzua{u.\ڽxIY.$HA:Z 6aaiQ CvVmg߽1#Hvk6Ae=Cg5l^ȱ{*jy}r*afC -఻%g9+u w ;Hzf_d}hLP6&9.!`bs8^p]'΀ JNSf oCsݳg'TMAѶÙK3M7XHn mY+90:NH<1 ,m.gLu`K&eqn#D4.p /38-αmVE< 6P$AMM-{}qDU n2Aڹ3wfz@±3ӖL K44X\]U v2pc!hё wGw}aPG_{$lV".- Nþ. 0Tlno= ;t`4 CVE6(e@`n4h[jٹ?9xFtEgLq;c6 ݱ0Δ.H 2 C@[]@CFj4.9l03GܕF'3)5X@Tb'L:r :#[[g~s{͟?}/^>#g{O>o}G7>_W:#y_s민͟>3#m4XK?}K?߼س$P~^{}ۗMη_~ok-}ٗT=zj?x~O߼5f"Qh. /;Ч~OEҔpFUǨ a*5M;JۭnBݬ"C"r61GζeM.vVSA8䴕m6eҡvIDhm!Z]i`0حID0ĨdVd;9@AThN)4ٴM26m#=T"R 3̴BtFgLd4r݊VlF&iӞFʮnZX$&FIa3j4RkiHe$(:Ǟڵl4@!ήS#)jdض6]m'ܦtQzL6vhbѶVE"S=XGBB"aqh2v]!VN6#:ǕDzVlH h!Q G6I(a2eFU#mILgnΤڒ{&3MTd*)Lrnj@RmwI¬V$H[V¥ͪHXMU 6+1ijI4Kn5ZazvPb&H DLlmk%tf2lL^٦igjQQ tpdGgX6&0[4kMݱldiqM]=:*9W[ -+VrmiZiZVI&mNWM̙δA8:l%rajJt&ILn64rS9F @JIUi&ݶIU؈ֹFӅDR{m6$ )sO V$# QQIl6ifQYfHȱNQj6ٴ%Gj/NӖo%h4mj4 )Tͤ#gm[8ͱUld:[4%Dbiu+Iff"!1JL3+yէ<݇o7w˯~^}(6mMZ&!^+:lJH4dhyyO#}'|o}ǯۏ|į=eϞ?o{ewz?7O$" ^|+Ͻoy@o6ٷ‹owܹϿ˭;<;.zZ}}rOߙ8w?~7_y_Ks418W_~{0?'>zVUDg6iD+4DM)L1&nYlV#dRYaJEiGVk֎i9,G$i&e+v"FDO 9"Al74Y"]F&F&6۴iZӌ3fJ`h5HZvG۴QAHX-\:Mɖ퐃Ҧm61H3I(M#+M5$iHDbg4cU*M4&JFTc7YhM"I5HDu$;V )-D+DI$mmSc%UdѤ*ѠT9J$mhd*am4ѶErnj=iJdjHjjM* IDm]jeώdM=L#LDIVH!MLD#Gv)i&j mXUTZӦJV2͜h ` $,ьɞIDk+P ) bu+VRNӴҊL$Hf'ZiꚑhFѨ;&'IZm" Ҥ&YtS6$fcӖ$J5,QD+iDRV"i($i*.f#i*I+$m{TI4=hPQz&H[m6c["ȥYmd4MDTBH m@tk(,]CR94I7jZm] h7lTMIфT5\&61ѓ9z$&hkMN$Zє՝utn&5:"I%XvJ4THZm$-ېLcHhn;mHWȄ4 IĤhʯow}FkBFq #ef9.,*"V&x#~/_yW_}?xI.>/5mqGyzG?/|?ُ?|w}g˯۾K}+_?p$de{r[\9.77.$Э<>؃qs~s_{|o}Wm_x!a?_x7^V?8o?ȇo[^{+=Oᓟf{Te&-diDB:6iuDNZHI`$m ڨlc; m]{vTAfeFL2DVF3٦l04m4&gegV4H@1{DuEث$ht%@4(H$T9IiDIliRIb!ښNnی)I*gI J!S}NhWۛDi*J%R٦2M ]J3PMȦKf䪻I&#k*lj$Yhf]]{&#&,LݴJ6MQELmH$te!֬t3hvM$:J2idcci60THzJl5L&.hg6]DV ۦIc#\cH!0s mHȔә6jvg7Vw2q65,,5Q&KIu3&PvhWS EfktH$9;]a4iLm&1ҲIFRil`$NsƘHb05*9eNs)uݴH"mHZ`7XaFEcuvImRI[[=LDShHl&ͱJDe9DNҰTk4093MsHW$EW8&P&I՞6h6 e76сIJЩ=JdYm$ Iڤ2- !IZMSts=еIO0cv!&;$M-1t*дɱ;*mN9ΝۛgjfF;9I[ &INh7Q[MEʰR2mdXH4bM%βhv$d$4mҶ(&M/JhJɶi\UBڅI*t䬙XT"Gm+B,$j $ #CmnZ6lv#!9zlOZ]͜j70ǝO/ |+/<?7_ŷ#zuN?ս?O/}7}V[3o晗Z>x8\vZw>/}y_^o?ǟ//S߹ጝٟ_'~/?uc._{׿Ÿ>C}tk{~>ay?qIRrJ-Q9M8&MVc.&;$0̜3lLVL'5hmr95L'H{v76fk66$39G5G=/\2I6m7^wr=Dqd9ɤKz!-[m;֞DusLlt=Rv09'sdd8*6jL*mNMRӴtm ~uL#]z!yңiwG'lK6l N3v423iӚsá4Ycf9}fתcz$ӤiF2gvd3;4M'8c`/;1vK.$np^I&Cg:sD<¦k4Ǒ&lh8ӥ#1S]a{lgLvu4i6ΞDI= [N7h.7GMds8+ifY&iq6 qv"djntr1=s9\vi{H-Φ*LͱG 4̕sI+ۣ0T_ȭcN!˹g'8HG3gsHE;krNǑnLŶζ˱MI6]3jLg&cf =ljnizM{\r\jlҞKiG~lݤ;hM3Ө:ggj=JD/94&i%'ΚvA~޽[7@jILc28qũ2T%9o/ITrjCDdXZjWֺӛ>W&+vF3uy5n:&I6j,$s' c%d;$&G_ٗ m>mZM445hIQlީ̙#wEj\73iq2;5WE%fjhf$3ojiF7Nim'IԙlkmH23jOiu間$wEӎ]7cg/ٞg_7IGlg6sjkZϙ3W*+6tڡ{3M`gfƴvٍyykά}x>"mMu{3]ɲwDT)=3}fӛCg+"LԝJu"^53_o?GowWk)^OqM{o~w~~ezLm;7|ӿ·͛/t4O6ۿ?ޗ>q2]՜)TۿgWׯd7x~+헞?u_6NgG~[BΚ_Oï?;o˽|G?O_~gShݺMY8Υw4"Iss̽:ӝmAҤnoTlc;iq$G3T vl&!kvv{=M2F雷cwTNqےУ.6I9v4ilh4b]23Sul4DH_[7//?GW_m|K?ӯ޿/~~×fϿ[_Yqݤ4L\TddV+&Dƌ\;ٙjێF"٘n ;11+IfhV$XbNh'.U-6$ OI4q7YIk;BHsJ.:MGiH I+IDMSҌn5)f3:NnA{Vtώ(JIK6S qYYiq0sGsYyFM6[2s&-sb UA:rrzkuɎ3"I&Fg3J;􅠴I{&GUujg n¤&HI4hFSZHA;W945M$]It%YmHDJ[&[C5ZHٍLFR[9٦Tk%{clt_o!aJ&M+H&*S{kS04}g;K;W6iucn(ahhf#S*"6MBMN2y0܊$gosgmGc7Y4Zn1e[VcIM41ĝT4V$bӒ$If+BCDJu2i:Z3Ӥ&$m\TXiW"+sX0ݖhHyӔL:=sr6YMWDNCZM:M;͞46m$Imejz_Ԛ!ZaYARl$`ecC"y2SѴJfbN{ ЕG=6E&RJ4d㶷͘ZlWv$m#XE$Rض0#u;;Cc,I;JL҉4ij&V*Ж$9IHt⩑!YzzT;醚Z$HH{.:A&%dV1L;gvь0ktMfvI"JM68%m&5-3@*7]*KjמgPݹ262s&[5tCrgjH4NTlCb% ltТ$ժV$/w_ѷ>x??yO+}RkQL۫?G/Ϗ_^?-˭4ho}a>V2W⻟{}1fd5ݛH_>?G?'?Ň?}S}Vvy|s~鷾\xy_|_ϼ>7x,|/~oӜil5iC YՆ &ш67Mɦ;ZVC*kTl3iFVRfr$*$ҶD%H[ ͆"H)TbT$$靔V&!:qIMUHafg5BlѨl*hRMn'=MhSd#Pt$,֦j+=71")Q*QVIfEE3MT5mӖLMI3͚ vFrETvHVی 4% RtdiVAT[J4)CFIAiHa"i5IR>ڹ3R݄WJ$m̎C4"I29z;U&皎DQZv["BB6mkGZiN&e`$IMl*H7m*Zg/1$2bDIR Hh[%iU +-$#bTVZbմʒ4R&`L[64(H4:ELڶimO#eb'.hHIm[1JM5iBi&$*OԶPg:h4"\TE% mjccfLۦFFG>{w6+ My|}?<{B9u>O:N:%?{g޲m&eV}{|{~G7/?ޏ hn_+[ {o|Lo޿]}_>v<7٦RژMFjVME&tI%#ѹ+H2EHj/#Ѷm;Hul햤ZN$;YME!Mц]ե„mZBH[eBR'R +4 Fx0vZmFRl6=ӄ $.PM{VI%i(iRmh4th5]0JWG&Ֆ0F%bg:f۶4!$ʺX`փlp"i!4sc&l!D"m[]IF6mNThM$VW!vM:ʢ`!UL6QjUc֦E$ hQKMNQjoe4m3ai"9T;+I"5d(R&[:J\Hd)Qm !R--$#jKKgDuڦ6i2i4vTRRfX*t4Ld6mW$Q2"fڻ;P,Өn4P)U$h -JČ5j &LV2u + dH$l6ڶm#aSmH4m4"BP9%dg EQhRH:#aJN$tґj&hK4FU4H&]n4$iL 2Y F9lID賭Lj'33Y+L٤.icVR 4M4*Q$0{U7*1M$ I IKАb":*nj%SMS赚:X;61B:[HBͰThT(hh"},i4f30mIA4ZFldVhh#%PT""j*lVASdzѴLژtwg>OG0? ?şQҚz_~_7?[G?mMj]0"sC*6$dgz_~{?7ßH2+Q_[_;_}[oo_Owoy˿/|鳟Fb$$&L$]ɶ!t"mDLZm#6TϦ3Q UD#mlbNgVT9H(FTZUe)4$Ѹi#LrN FZI2RESCQ6մwE&Fw#TMIN;͖IdMҬ-D0]?Amyۏw?0f(uhIOW\ѕqtɏab&V)PD>uF*4i6 R;")HlQ RU:#Rެjd34"JT)H"՛0( lRSd$4HR6v B 3ݦL(m3c(mDBҖLDtc!MUD&19vIi* /m fv{͑FqTIOPNG#iۑH=v-a(Q M&BZ4F=jF"Z%U4@RIȘhi)((4YMƤbۑHCc;$s$[]:M+d'EKڭ&0&MNl,Ht&hZMa4S&M[ITiI儴ZRڮ݅36HNBKiTٴLҎ0aեtfEa!M J`NMZ;єDoR ʔtI#iҒC2iXdv52 6D<ՌLd:tSR =Yi&TTlHJ&VCl2mИLDFJLĶ&$Q5J۴Snw:Y4VPftNehĐm`,]l4«@ՆВ&4ZQmgL)*0Di$4U$;l}!/!Qi\0W?}~w¿=mۨ7~W˿qo3O>|뜓x{?} h'ooos&ǿ>hfNwT}|o _ͯ?_/S|Ǐg64sVNOۭm$2+\kDb4ݝstdufͭAMVMv$9˒ޣ٦L:sYi&mۮsz$ڦu{53!n:ZYnVW #F+E5MvyEr{DNȎݶh钄]!FOMmsRՄ4D2GZZB 97LKN NvuۦRi7d$6ꈨ&7L,YΓ혊NpvHt2v7[z$v̕E٦ëٝHUv6щ6^mK+ɹE432J6M;lRM+iW붒I;T6V6n mc2TN˥M&f3$g5g;̉bTV*z{3MS=NtfeO ʦǬR]ŠLM:!vWdb#'n=TʒٮlD)v%fe'i4T&;ck6f[m"^M%O5FN3"lSIMd=6jG4ɼG"dĮ.g^Y1IxiEf2Mt6kV3,[!3*4hz{miڽ6vNfIeӽ(dCݺkhiԉN tY:J/cֲDV]abJ%OlS-""#/6R)h# ITi#U "˦FjL+=fvJDlh;fi64ZםEc飶0TGVh53k'AiUf2IډRm3dg_iKfFN'MNݮYGJ:;nj+ؠ׭L6:kT4Yڶٞ{Zi IN1RVG-]#0 IMio2D#NIdD%$=vI{#NiEl1$b@c4ܶ+cc21vx5&=y&+?ү\_7{־/nw@;74ǝ즙o_ǀw| _OO؏oO?WƷ+_?o|7k_O?56w61ͮUmn29sNߟg̜G-O:1]i_霽qINgz+kVf޻Ƥgڱ={*{֫fU$MmjgxMWz}Kt͝YnI;6ǽ5Lvv&i[9Q-;¼'i}N$s+ޮse.؉G6=zmG4:={W6n^Jnִlr쬕mϞhڲzT7~={nܞsD׶wVjMy6<O3s:'I{ Igm4t&6c^gLvm۽9{+3.WN}vgfft,{Mkéӧ<#otnnNV{0nn9=&]%unֲn27޻ٝMdܓعNw#q||ޝhsoܧM643vfս;6Ӝ~鴹ͳ}e^v97}5el[;ut{;7SޕTVO;}jӾ^k={ϝI;[l=k۱7CTլvs79٧uFi4F虝cew7$sꮬy~^Nmd'S>+3I3{fNsv|#oSVVi{<;}ޞg8=η^k+&2ݕD4u/o|7|O?/?ڿNtӯw~+_gΜӼojܤЄ͜Y4Os Muv_ů d|G~~;μM4E"nև?oҷS>׿~}\3f3";˶⼤w"潣6GsnJڜ 0əVd[DǶ8[&iZm{JNJMcH6fBvd.m{=3Ц0sۤ{^Uef{Ol&5;$InN*iv[kft6Ӿ>M [L-.35ۤR+zbǤͳZdg2ѶS.mҸnr106Ž>YщtP&ON\lI>sfkji3Fzn*49ornɵYSMv*%qD5}N)mƙ{m$6 cMT9lsպ c;"2sN}hn{2em_涙ǤۭNv&2nԱ4:II:VU$&j9t8OCni{iI61i"JԽkѸb7+75v"vJgøۍsvj5=LFv73[ 3{:i''b>zZ읐{dx1,s[O+Y( sMT*coFҬ4m4sMvҩȕLChۛF͑#;i}Bsv;I!mf]wғ$J IҤim3{ܐj4lĞv IBc(+2R[&!hMfcIzҜHɫyv&M=hέ+D:q3Zι $v۹O%1h֌}5599Wjgnsq$Aɾn,4$M&W94$45:gͲmLi㚹;-]I*v A4E_{W[Oӿ/s[/~-37 |O~?m[GeM @Hh&jM!Ia~"s՟ܷ}ݩl6{wL" p;_ ?7Vk$H$:iV6E"*1"30զCbmFI55JJiƈMNhEhi&/vh[ɔYY! &jUhJRaҔt+\"$%mUDtJ6!6U$d*1#JIG4R!tCC&ܴ-kmg'!IՁB#ʍ4He+L#se[[%49[K5z"y%$HFv&]vRŊimƎLMSmHj!M+i *]Cb"նKn# fih҈2F B4&:I%Blrʒ4*ċ]]I'mHDis73͒F06hc&VRɑIL%Z.TDŽ:IvU#`u"$I(keRhfӌJ$tt4I$dM7XG۶cM%ӁmU3:6RMS4%&mB@h5$ɰX$j6+6IҬm6d5F>*5+LE[jҞlҵ1kQm$ mЗ6eB ђ0FW M*$4nC*I#tl*M2`D4]UvҌdL3Rs٭mdey&&HnYi!1MJor"*=$$j-ё6]If! ٌT:-v-Im@9[Ӷ6"$L ͆dm"Z*!]I̪04m4%iFTA"I, !djG$)%i4¤)B*I#D"5ƪnND'Ո$JSCee lԳ<뺟{{m6[-`H UTAA;OˠN:"SZ61>ާ}kIhRhH;Ր&%Hx4~ ;7_>_~cx׿}_?˟W^[o/+{{_}.=f?7$Kh4imۈDvH ;y뭷zy{'HϷ|t04WxSs}}w'?_kSlTS&FdjWmH$2M6Mh4)ΌaH"L3!Uȃn:VL*F+Ue:'5mK5ЄDuӠ$XmAFiݓέITqcWlrUMHlEPնVT2RID%6dV}40$HЦrI *M<+#d!i6j7D lRdԶ$4iդT:XQi&&qjr[<}'~>7o}~W?}ŋK_>}7/}?@svNݦ!Meb"hNiJiwr^yxw7/~CGjܷ~}/??7߼IfΑ&&/?K?_]<^yO~V smVeac&SͲn1Aivz# ҌK[Iij05HHՐ&bW##SR'MS#Ӥmعl*͈Unl Z+dBV mAduSUmUɆmD@j&6L(%Etn9m91&mږIgvQNIDXt;G#ɤmkHbIDZ4M61UGVZ&5r@ՕmQDxIdlVVV6dNtJfEN5mi4&HTfkST-ƑlJlDhf.Iʒ EhT׭Vv\hijH;MX&KUU#`8M D$2MKwdcu҇&lFsشG7 nlΔ)atېcnrYc3&-rd3CfiR6۴J ωQ!SHɰ]5@b0q7mDێL6jJ0fLj-m1Mj6&٨HJ']U*5IlnDLi(Y&JIdI HfZ6$'M UH2=}YL$պuԶLf霭I]6QDit4mHhD3m)Ӕ{Bt;nڶ6=L:ܑJim$ݚ!VzHH4`$kޭi$d5*E9&+i҆nLI5s646b6.  )3KQHJQFgE&6!&i@fFIhmnAL{)-hNpV*U$2F&4i\DZY6VN#v6]ir&# :L]m1MlG„ݔiLNhk37&#Dj*iXL47[7˼g~~wȧ~oo}7ۿ~򣯿}G^ܗ歿_oW?o-  RVZ!*ѶۘhW?~OV_n~/w?g?Dd(@{f_ٯGxO&]*iFLNVT;FHr%ƶ#9Ef`˦NdaVw7Cmn&jđ[D:Siv7L ݩ&ؓj NH&R4V1$SZH;eF4t&,IVsݦwڊJKKMmɐLU$Yò$<&YI +׶fEhifG'&*UL'َlhH:<Y6iuN٤V*i45޸mgE;Ynw!z):醤Iw¶ۨi6 mT&- 9rcZZˆL2lY$:&N]] M;L'Qf&3m[vhiRmE$#SMڭv㤉MIFWK54YVZsIZ%Mn4+ vM:7IjhTFJ+tR靌vMڄ4&Y-ɘU6"tswMML̪n{m҉4DC Itt6-<ѐSb%ɦ4TQU!Mf-$)&4me6iDcƈL7QI3V+d6lOJDBӪN[mg )B[kڨ4yfʕ4ފ@5kΕTͦN'71JQ4K%Lw:KK 죧MohMVDueզ;$VaATLB镫6ӚnC=3hHPĨۤ3NCё]#ݍdZa'ՐRM3.HN6 mFBiJ4,AJdIef[IAii M2j$m'2n[$\f3"Z\m;\1d'&(}׳@ۨZj#a6=ݶfW7$C٬6+wg+o|3?'?/_Vwc?3?ůW_/O}?g{g7fKΫG>oݷ*se3[D[3L5"#IfyďO~Oos;goIӽwx~c;_o~,ٔ٦J>~cKx}4v;٩Jcuw6 (XHt_wc'av2r6=3g6-:a&jr7INUir4sus^;9aiYuGDiM I:LnNq7GFnZYݽCiM=}LG= $ՌyG-ϱr$&Rw8ҫuޤLI*Imx ;:m{h Md#lIi2ZDywؙ0ۮ&ǙI3IT7{:k36HNݗMIYiu5jv%IvD& h9sj&mM#"Oh۩2`kȖ-}NtrfvRSI$N Y>$"ޗ&#pg׌v&II"Z[bgp۝D3mlَ&N宦{jvc3lέA|6;o`/s>wԏ}y?Ͽ>k=/m7߽o=4m#Iz3m݉9u'5 )£nF2Cڱ7?ŷ/d^? ~yyEO~>|+wz >'W׾~Ͽww۶dy?߿oGv{Ҍ&@&bH2Gi6[wgb_7_{w[mbzϏ|O?_? ;Snɣ3?_Mx///&dшMGbDgV%xj&U1(!dǺs&j鴍=I&[2{qvI6κmnLM]>';3C訬Mni6%w>HTVwdtϮIĄ;SJѽTҙw 4M6Ef2Iw'4yeL3Ѻ mM:\mvAmi7NvMs29gmhg(f#qn$k6mcjMZIHv4gZM9hՄFfôYvj{*]")hfh'{ΘG&T}D:IMYdCs;&yʔF7-l6iٛmRHdk;aФT>#Ӹg=ޕܭȋjIwTrϑҶ]mbՐO4AӧA9bٖ-Į6& K6l%S9\Ƕ,[ԭnu};%˸ʔ>Jd0GNթcJ^%ڳ׈C5y'rkIEc9L&a;T9s^c1ͩ}:akda˚&Z;ѝ\3{&y5]iT!Mekb\6(Fd56L6TDLvfi1=s̕W9M*դlVۺNIZץc&W0:iNfwf!L;d{"h4J:iӦٝI꽶sv@D Ō5ir]y%mj$y/lR\Nؘ9YM:O紕\sn0&W7Κ6Tʝf綮p&M;&1ə4f+c >y''@e^x#o}o~?ܞҋz?W 3}?G7~o~)X"I2'sn;5Mdۧ=GrӓZAҶ4yLq{ _3+$Gɟo|w/+/O~OWoΏzx3OW7o@gw$5GfLNw LLH*3ǟ_O}~G_?ੇg_zʯ>-~7tbh;m}[ѻG/ѯϜYvm4&Z@m7I^;ې$Ӧ%JӶ]Iմiش۶t$ 5f$n:'hC8W[F6.1R=Ѥi+mWۦmvJiմm!^+sKi݋̐m[W&Z6%1ɮIC9nOe6aLgn{LTrMwV.&de5fڸnf6=cWQHMtlJ`nb)l*Ѥ)ִi۬sT"3iNQmeRl7Y3i6$#Rc$:H3k34eԢMvZ$HR! =z4 mT*M J'IseI&Y.IfMkNK/Y3M6I1i6]wQIsgڍkڑ=tإM0$]j&-m4-hFùFʢشYFӶI)k!fnGEn'!m`h0ja$c&cf*iڳaAmDz-MI FOtڋDl6i6@խH\sN2nu52JniҤ6ebU9 sinv5mV6i\3d'D4kFbU⊕SiRU[8͌_?ǞG}SK?~x;sg^a7?³ݿq}89@w?70>SK?o۫t^vd$ۭ%L:I"FMq2mVhLy/?~7`s=Sq G}W~?{ZUG/ss?Ͽyx̓'?#?O [{=ڳ6|_|'"g_ Ϝn=ts6ɟ?[VTW^y$5>_23O{ɣėsɓNn iRG{os_ f]i4VnMҨfh5Uj"i;vff RIKdZVgt6:3!.tDƵU31RY@TQ)!f&U-T64B%#@DhҒdcgem%mrIvM"h(mȕ.P AThi&INNR!&m$ iI[ժA؃(팙WtZNbK&I60MՑړiWK DfR6"!U隩RVȦdLJMhuk+#dt[$B6*3EfKY$bBNWFRfBeۦkjdF7mIN6\UZܧZTIk7Vj&5vgFhMj*"g-@m6iBm2LD:Z65ؚl$h \KR)W7I&lF#6dR#1M$H#؄ۑئIzScFI4"TF5@THlj3Qh)I&N"rPٴ*ȕi*,ջqtqnlKC3-mf 3i"hm-+jSctt65Ƥ"v4)ZeN$E[DFV:ɮؔ(IH6Y&j&HVk#F6)mi" $=ŕI1C6 ;!5ܒW3Zdl̉զH-h +T!re#T]FHICMە6\4k1r1HG[Irޯڍ3 QiFtEIn 0ь&DW!UAV%3Ͷ-e@&gƧ_/也 j_“Gw~_|g'o=ѾtKJ`voJ=~WϿs;ᴛXّnT3A6mHқ=Kfe_uӫWLδi-n}/*2{Uُ<#ZyO>$+/=ӑiZsQ.M'pőyxמTfwwEd$ioy /9[Z>k^{m[+ͷG>gtiH3 tVl5+&63;sod=gwn9'l+dٚj8Mm&8jvMiI6̜T\9iLj¤kw)C2=6>S,"euZZlHLNoPjD)f[M뺬f2MS:Ʉtt6V+HVz`R:M:35AMz[5~6Ii\Ik*$b4Itquf2+եi4I6ڈYd'yY@Cri:c欝&M9ɦ&4nöNnCmڠMՕ$;kn-d\v+1ޫLg3M!f9͖:-!l87Y$&bn.i#]2k1BN:rhhK.NZm-l3dS޻54٤DAjDZV*j422VMN3.ҩm# mJ6앝T9+>QmGZnƈ6+)'92{]am$ܻiV/k8sl'ىTj2W6dl4M6eZΣmL\7쨖$f:kJێ6MƕkwIR\.&#ґ ̡HD7j(pҎ̽t۶m&=seVm/5j[#tvl̊ dc%- 84d[DFTЦ\rMt&M2z5l=$'z68luJv*[mVZC62(V&gjJz2s`*nMk&;=֐=,in&I^6:$TKeGNڦsYm$2sڴdRi\dDZ;dN&M&Jng.6lȽSMT1M]SvK7rK^t$VIئ욊&"#H#6դxxwo?=_*vμ}?ߛ?}㽇>O&@  EI@$(IZY;~~sO_|7᏿- @ hQ$ic:ZHM;<&-Ֆ&n=%-4dG6c{;V,[]{ Mh{S++ii\\seٶuMN!IL5dR^ TmMuDEi4ao 祏|3_'IpmJG\5鸶tDr鮳ibMdsvVfzW"&hj+JJY#-h%dgm3kmf{o/{q1\ͮ,dcJ 2FDc:ӨjDu%hbh"U4&Rm4{tVQfPR ILS"I^!Ib*NŶL3N[c'iC6G(Dj"4t5 H^14#n9#vM3&zlž}htRUi;]Ռȑ6.eU s:lUE&Mi\,JWu5th[CѤLN Ei"ҙf:Ң:LSM54I$E,Ih+;7jcKkt, =M$)m"RZ1V4cN!! h6B V2-۴ڋlN'l*TrMd۪.&n*dڋl4TQSmt+$mYXW6skZMzaj/f74YJ)H Y1H34,liEFi4m4CdJѵ4*"M'GimZt.DjlS4Fn*J:3RQT4m6"::9hj44#m .GD.]fqtdQ%&XXmۤ$,^abZzu ؝h YlPvl;H{u1EҴ""I*5Pi'iґӨ)*7B X$FJ!qeGFVi!I$j$R$B[M;2! 6mZF*146]&$G{|SW~zߤ>ϾGI7ϿW{8yo{o{={/=<{s?zo{y~oo{ sO/}}{e&Ͽүɳ?{vm~y{|5O^o~^y{= O?w|GO3x|_~~k7^g{~Oxiᗾo?;_^;>˟}y<|O?~5T\=CC<4  yfnm1,THW`18W4ՙ=cFV כKJlNBKЪFe%uI `hXMӋ%UHyƨ04qy@R0qO~|x?,Bk5,I"YGŮ1hn޴(Hc-e`e+%Ez'7B/ĥu:MyƮQa+IʜO,[kLKc&qZt&)#L ò srZNKqZ€Y:ڶl `n+4PY.\iɻlוfa RY.Olw@U@H"]afsǹ˰nÄY VԀ07U"0֕rɠ i*pdn\إE;g˭ ks SjK60Pqr ȈCx%:)6@]azWDDE7Cdl C\J* H#jJݙx(k!krhgm:gW+@wgM( teE9I[&l="(P" +TD;HL؍hcA "6³2jD;4۽M tƣly I 1\i<2'. &9![t,c@44Mh\ښ:ò3ō0tesES XmZCL7 j!T@k3,4h @rn zv&=ba<:.ɨB)&I-1,IΪha;#0@bZ@ȇ ٨9gH&U7f5(h»l܆y1Z[Kʑa<.-l-krDuI-vܖUπ 9,N StpXo- O`$KUn]f8Ѳ).8lOI2)7gN,-ƼX3'@[g9-(sxo}_싗o}txɏ:~㏾o p?|㽷?~?W=/G_x#o{wk1[/xx}ߙy~8_'7{W?8{fD@p\_Ͽ3sz3^/|G||o|c䛟޿7̋ݿK/}˗'|K}~SϾ_w^#/'o^|eYX A8HAF1, ggPXle;% ˞,X#.JV(jbA,!Bd{u68@,@@nRB K.vF}o} ෿k$EKVFQ]cg8E܆}!+WLJ* ](";YpHZYaQ\Bc.JEi(N\ X &tt`FfH!lq\,=K*RQiM& bRd 1Hs憁4 !`KvD, 2.%`-O²Jg/svHCX$ Q QG% Sؤp.F1,l$ zUr{!I'̹J% z80E@22 .AMXEes7F]1e9@ƀ 9t E1 `lB 84Z"(#.{j( X"XfU,D8]#f]1`;Bq bk(Ua%%D䀃QCsBV(.^g-pk tV JdI IcZ@paB5 g2*@t`22nR1D$lM+ iό@-Ȍ%,0Hs Df'B9 ZvbȠ$!%Xj!iv҅ aOw\A$Lkpi 15'&s!\wU&FEU FZ67 \,q W+kvv xOү|3/kDn*),졡؀L Q*L@N3{*Ex`Ua6#Cbk p 2 M5n4-\ `uFx@ , ų+6H:ܳؐ҃Nj 3UUK"`Dc HpTeȑ+[e34GǞi K㰏uu'TA;98M€sI0;Qp/C0Ǘ/?ȯ~kۯu??o?w|׎cО7_^?W?/g?g?/?ȋ쏿w/~~?ˏ/?~?}Ιy>|p^g_>?ŏA7W|{?~o|{ǷϿ[?by<{wLo׾ֻ(`͇@a ta yc#8}^Dy4*:#n V/SWiJDC$Ƚ΄$84tuNI"=gܸ^ij'6ps_ۿko|-i}Urd.^F& ݡ]6ݤy<ŜpϬϳ̎xr.8дŅU_DWkYi'ɭN 7\'(+3D ǚΕ4wnDEǺ$&zfv%Dm CGuS7֡ac" UAفb3a\b8d>g$=ݩ4{:pԻy6 @y['{q)xN 9x^7åIiϳ5yX+r9aG…{i`S; ̺a6r q3 . r f^ 2#66삀ڃn1kc5.;va]ƤG\|ʪI :=vvv:.`p8Rv N1̞:CYš6-6;d^ r_sF[>vJZ*zG֏ۯ?q?<'OWo'[oIo_{wzo3:s=\o ?UnGνNp'??OŇ<&n鈮H 8ѣ;+]q&+N%X0IAE1lCu(q`Bpbhy]<I80"!I4ہU3F ]@P4K vXHi,ơdi7_rxvnwNV: NDw%\H-qW+隬 2D**SN\1'NB"j 0V1olt7Ab XBEgtbΞ +"gۆm!؛-l,f^B -7N 4^"B1.@+;P*P;^cȡ:w9y6R9d sc7qpvy/c;03*6ʼnbe8 [g4肃иK€96: N. ,zMA58Ŗav%ci\/"vXD,BEF;RUpL*(ZkdmƜm{"vis6*R]6٩t$.rbci(ANW, u" ݏ-CӵK9-8 lK{GF]gprddǢ9g!F蘺iK3&4D1hQ#ى-takőE ^Src/nNQPk2Cx*V1$&ǹm"VSg (0& Q|H'l-2F 4ZgְHbRr!-hoJsQ,ZS/z(8-e m%b`ca[l(cbsk{ܚ"5gB%d WvV\")Sv׋e w2$8baZ^ƙqk8"IbYP/z.%P*jVf̢ic6< Iޏ\%ĹuܺVxT Lɂ1qJpzeb`T: uN"XBgs#YNK:xyY^G_|?_>ywnϿ~#6`W?pɯx{ڣ޽΋?w~޷|"_Ͽr#W~>yOΫ$fn=|7߼ǜyb_z /oo;7?;??~wq ,Ҋpƈ0iXo kh:B.h@ @KK5̤ĄpyPi]hHl 1: `ڜ=H&^ AႴJQ1TRej0*̶aQ 2h%W#4$X ,8bh+땫ංbq ] 2:LЮQBQ4@u5YmcXs &; @`A(M8@8.tqǁ+]" ZvLT@k+ a,9#9KNX NTP( J"\UȔb$,P66ZA0D CW`C C1a ic.R]Ak6r3ɖ6CufHjAĢ%@VӨN0Ćt zGv؆ňkPBrZfC:auV]j3 @Fj!HH …y#RI*4A3 m'Z Q0tVJ"#Dāj&Vb"lD %E6趌1,DpDmD.FV\)1%"`.c/v%ip%I:0H!dE`me`] ,s> 16Lp"%` j@ A&Ք(GTPvr%lb `\RD `.a",c.+,0.]Z @D9Ȃ$QԬMUs]dž@-j' 0(q0*A$($PZpbאqag+[ K0ȀfU$,)H68z#]yd٪4a0\D` _?'oW7^{<|_>X@9g_$\/}6^/>o[>w^o|co~>y[|~9\׼k?ӯ>]sƁ뚇8_vA5q]/?_}~_~[{{w_=꣟y0"RG&Ʉq!0Ε!Y(nt;ӰXa/$Ӕ K0 hgPpO 5Ё0EbǠ ]@5drt/ qiQ$@$Z&XQHBfp3KythC UE lb qCB40Ska\VVdB(VwRcܩ 6\m{1梠bH桍CH300iw1hT ,BeЀF. ^HC#m#M .4.1ph X80,&TvHtapv4ؽJ`EAiP8@$a%[ C-b@ ab%āQ2ChYha4džA" naXYP.(A\vp2\„$' LcڝZB( K&FD cg ! !ژ t.Y$NmtvYaaA!֮%r1c2D @!a$2 0&0 -!\mJHcA;r!! -rh2aGwbБJ`6ldaMp(A( \%Cp,\q;6 ]pCa!,)Bop*dUiq0F%k s H 1"i&WhK`jܩkb[\F 2X, AX!3$`lyV/#!) ;,خ\),*@hSIkLee*YAI)Xil vA&Āĉ !Kkh{,DifJ@UH/gSrb.X$ʹ0In0j/j܉n ҃ dG xmg wlhl t!65h8(_~o>oz+p_/_/;ӧ7}?>"/~٧^|[=>@q^>ÿw惿wo>է!&wlD1X TA$zɵQds)DEpvi0&\aAN,0RS, h$%"4$Q aA \4!EĠ JDkuM"i @H^$m^;"Ԓk d FJ (2aLиТQf%Xdm庵28(dᚤI) @l;V,S(,#Jh~FEb- bm!ud,&dPm@UBP"+\uj!6V sDr64XǦRL 7h(\.!a! AV`,DEЄ kz_쟾k곧/>~ͷ_vUw|7_=/ ?>|Տ{{?ßn^O~/|t^5擿[\?㳯~ۧ_w߼ǿO~vs?g<_ɷO_>ٳ^_wP 5hG@J@^U^ۑ!@PE l]8Re6޺bSҰK,"$ &-L` jM౽ $x-4 I i<^#1 H$aLC7bm4U X(A80aɑ 1+.jDNtn30!!LMg^\I fAH%=.)<ǚ!['-Y[0IZ᚛B%k4 :50jV:8E88$ E Q G#,eSD쎱ld\ 6rsd r\BT2#İ\\HUELL6"f'@H@$Нl҉deĘJ!Cj/)+iD( @Qf۳*.f]B4W6sb*аD&8m 6@4!"XVx&,!68 YK %^H7\- BRHIM2l0k VS 8EM|kw\'SLB$L@b )$$(@9lef7u`9Ʀ؄S!8.L $mc@RAƘCKWH[L9n@nJgO- ".va* K@M)5 @;d(fP+Va !beȞ=t`Rb6@j$®h@(MG 0] QfL jQ]@ȐR2 Qd( H 'k"N`HZXI2!q99@`n4FE{}8PE SfZ۹a-={}AQ: $ ThDhRA՚4!-,⬺CCRL4,\Ya\isMr `=Ow+ЇGo|oևx>ƛ~'M?ٷ/oO?//v~/~營}~wwzF`>z߿=xW?y+}|?G?wηS?~O7OG^?0_?ޟC|?|w{kO|_??~G[_yy~/?x!+)4!Lc-w};˸S"ʨ<Uь{~LFX_Ծ[_`2w-z}i<apBn2/̲J)Gh h0ƸpXܬ'<ۑ4̱׽zpje9u.KnLxyKF9nzsj'F,7ƙa>w  ܧ{>2ݥw9 blنG gxlقTVv.0{&q>e.ܒpeCuɽNxfp%gu ;\Z:!i C8oƽKtO x`6ܵ.ts^gg98^w:LA* ]yvwtqάBz/*ن}2NNϑi6v-q~8&UOgE{Թ>tv.ѳ=m$\d۶y4ouWB9sWJn3#Tٜ=㝙3.&y 1Sdm6owv՝vZ  6A1xɻ=)Hөo#҃9s.Q'l csH>5]9by.6>|l{^{;;?{?ϯG/|Ϳo~ _Ϸ^|O>_k現?[///~ _g_W?y]f>yw/_܏pf2n6_g]o*/4=;2w^9;yNS/oz>_`ݹ8n>/CdBfDz ǫCݠlY7>6 3ъG:Ӻfv!q"hm{KOo.XMw&E1w+{"07=8.K^Kmu7m3cs,ǡlZg$^BZn! ;5Krwii3OO'9k*BGgFfde=v=^vC$lx sm.}'rw/tcLtz9w\x>><&|GqrN$ xbXZّY^<_^(B2›7͵a 樇:Cru<00}mtr/l'{) {}잃/G"=8{\psxӜAy=Oh.tח'=Zr0tyih qwvs[Ep}aJ^dtg67Zlξqder4lw#ײt],<9;f8; {umxrח/9YVl}8wgWgg<ٛ|J0sstfYł윟/ԝ/={:r5nJ]i9vv.t_Cp#0qjh9q|?_~y'W??_c֧k_?ڇ8}}v?t<[/۟ol}=(}}Wo{ͷ^|r];[_~__oG//?}~oo/;C|+?w?|/~_W#W;O7#\և:vpmaZvnNa⼢2v>Sv awo*pGeVG-t&ϲ15wdp v+LJ6Nl KH=O.d0I..aw&$l=^&݁2 (&Ng1nLNvr6Ƹ4 ި4'iT!bap2{iw5;p>i0=6go'gӵv4]o c>.ά2XX N Nf3.@CpyǩzV:,.;l7s X2CgN ;,ASY`003Yםfv&(6 8Q;#ܹU gL@iAjf7 a jqǑFWGQL9ps/Mi0hYH `;=]@y BY }2pp-8 58n8(s3 m{#<|yS|O/x#_[W>˗?zg/?7ӏwo_O>Fo?МW}|O1j~)_W^,z=_ox_˟O?z|ڛԙi_y~ܙY/?_?or^F__>͉!$(LW_!I`"$0 uT# If9.:z'uZM4@*um@"R26`+Wڵ6 ΁ji (*p \EX-(aT` |6YB):3I,& IJh3&'(*,@Mcmӂa0Zr25EZTRю %]$M f=q)9nd@:3TNA "6 IJ%` Ð.5,lhH.Dl4;,, H[G1F3ʐ:ɲ 7"Lk P"teq-$ P ! MEP$  D%@.@'d-`LI"MsN@KOF E`6rjB%Òc붱pUY3D!tu D:Z{嘬) bB([Kg`icI00*BDl;fX\ .l+\P!]ʜa`JZ2аEQ&D%pdZ PY:eQ'>4 6BHr+ibH*AeS)NT˒ZN4 $#D,Nc4B;C €E,ܡ] ,Y!iM VvA4 Zі%590{3iBF5*!LH-HSlΤH&Ab!&@$b4 LF! Dd K'ƍj\2PˆyD5L Gw((`fS 2$,fH$YІ B4, 0&Mh2AlC % SO?9Go_GPfx|ݻ7g#g?|[9=>y`ǻ|/O?~Q}/o>; >^~,y'=ޜ9v>~so7+/}壗2}u_?6˻+/>y3ϛ/هmoޝ>}y0rAqA05nhs滹KҎ;̹,!fwvIg4QXt²!m#vZٛi0D4d2ӈ#99LV+  2͆2ֲ B3*(S6lA(6kfqA7{\f#=4l,2=_ PÝ]&'fkE&)RMbgaAy$Td(, l.0s!g# T‰Q0q`aRD 'vYEm̒;LZX&ՈW #xXEL$tAa 4F X *"02Y8- dAvu s;fقv7gGaZ7ø!m80  g&)'hK @;H XuE,"f P@@@P@P@\\28LRiwpns,ݭR<0ǹZxӻgK'Lw\pw3go9{'|Gaì'iF C;y .A 6dB+]iuaY4}wY{2ǝ?MGSt:W{MCs$ [lsnS+C؃Ox9[p^.]# `,ut"33ܳ[ a&6e>=~u;0.97wkH74zx!{am򜀜Ίj;6Rry F霳\8 p(6Eq<4v=p(= ׁ ?]!W>ݝ=\wrW͏ \m1` ^r4gF#0ev=盂Sg>;ٙ[ey<_mWu )~wxca+<ޭDNgfeγygmԬw7sұ%} ĺ=q-sj<˅2_3,p?8x=Yh.9❐Uw[Vɰ]iR߼3k;v{{vF3Rc6]v2L,;#vgwf;=x0BFl/نp붾5^tg .Ww!}Lme!nzd]Nw@Ϝ3x>Ҝ5,o:O?s\n}7Κ͇lo˳p;~8ÎriD=A;eO 2y -;nvu\Wפa~~EogzҲGG;6Zn K!o8 _cUQdYݻ@n?g7Ígxmk\lڙlΧNlV$+z:m@D_;7;SE퓞̮m }Cla wa*ȰFYu[˰TЬsjs;,nَc-= Xs!8wޙӍI0{;m`Cn)w,c힑3qS7 o2on{m;nUaWn>DKFٰэ~oМ #Y񑠌m,;vmPٙ{Z2F`r[vV? N+ Yѡ3rf]SG ]kE ƀ]60EZ2k). KK;4(pc7,#+۰в)eaiis]( #^nwh @:[q [Af"?K]/mgLq֐Zɮݙw@ΰ%.ݨp6o 8@$I$VwÅp[}۷4{v퐮_{ݛ-߶4woܹ;Uo~owt/yencokyϞ6g3^dd5Qf?vkIjIjϾzוz-޳1C8==jutiͩlK7w:jAiyzUz5Ә.fCX^ 6fPc7dylf/khlom<{=7A'f-G[[Ц-}f:m+j^WV=]6"6kmZ9u6Ɯz&~uTN^W.V;OXrL_Gv^|fZސֶlf7/myvnO̶X16g{ଥ>?4hL ཞNJ[ٖ[ AwVW+'5x6x3"!A{~qss(o߬~:-2hZζF};[h v."C <[uavlӦ\-fjM;7=C^k+;TF2}Mry?gؼo{jՄm鱥޶))Kx{ʺMbF`TƴkJ:͎ƾV${}ں{&T{zo6Sm0{;Zjݷ^_X!s^Ϲ;7{]2;3$J%,glB6ʭ>v[YXd_ۯv{jzZksqYҜ|qzQUnmv7Lf&ScWܾ#_l=m،Ӱ,o֓V_,,coj9Gy٫zhˌɮY6;ѳٳ\xnϼSkkV[6<ՅӢy[xs__O=_f3oz>oջqyo ecVO0Rض}M-[&V;e}i6)qK"yf~o x6xuԬ6z;=7fjuٽڕmڮ1`oiFs4=v>ov^[6YW66{&"=og[=VO.[i_]ޖwQsYilMxuɣqMcʆ^ض<ˤ]6Z1Zަ9Ƕyz= kmAu4ކ_{tgnc/,Gm}fWnֽ&Vֶɡ-SdlK:{}`Yձycs~}h{Oez孭:7Y{^^rӈ&iWWklܙ-{ͮŶgv6 6Wy6u1ng]foN^h{_y[}ZJ]mx^k/Bۛ_jLu~6׺˚nog,heiD&-=5Zڴ{3w^Z콼maf\^c{?mzb\VVw_6Kkvl~E*^=E:CF݌}H+IΠn^̘l歠}Ϊ=49fޭ^nI{gGF}MxIn>nV6[Ի=M=]k'Xʾ켛16OٗH:qrfԶY{>q_/.\ͩ=fmӻ=kZf׶xҊ{,*/!Z,cOr/BM!罴Vdm{~6뭵 \yvYw5-kv{۞kwGRyWz58o֖nb;YF3lX5=ުk},')n{n+mk`{g qF@K>+Wg^<&1o[<+^¥Z}3Hau߻fo{mˊkM+=mm'OiRmk5zջkZxwʉ3=-E^6Fg{ fj{{y͚ފlߴg{h6wzkEms3۶r_&}:X^w7[SŚkKm{޶WstkSKoֵ4㵋3L_nl nҔ6T~ylmv{V3 Z-޺/?].F[kdc.g n{zm6}ފG߻-nl]hf[֢߬ްG&[5Uov__>W={fc7nn%﷼dkk1=5Bݼ^Y;yسEszFy׻UmӋۯ_k;M6[̸qI2:fm5jwcmYkB3LveJgyS&3_ov#M޶~:mmDjmp0wy}FoZLc-ۋ\ۆng^Fsz띵0]7,{t7yq=Glwt˚e-̲yt3z%ֹnJ6:i{X?޿vmǝ6~SUݷ:jm۽R6}EGǂs۽|ۣv56{{uW~ͪ8om}lVZuuٴܬԲSޜw杷m^Ľ-fo3Wλ7snNE^Ϲϲo{lov7o?t^Zz޻헿)޽ԩα1S-nߒ.Kْoꙑ h4s_KXVYX],Tld$ͻ#T[̾};r㊔q?>Yy]o>jwIsˮ\4$%{KT#޶vVmxٲ̱/[vO%ղt${fE||^7$9Dݜ6|uf-[I_,~]'k%297vstIeh. ɸlI.l\EwRao]rkDjl{6.|חR[dy/b9.߭jmivGkzmH%2ݕFeKr[/I?v{XsuY}IdefK2O&&bJ"wIxgsϲ}n~\>ӈevYBlJד$ݖ]dg}/ٲ%kVrW%-tzMv擊,}w/4Rɷ.,։['^,]n.5{,[^Zer"owۓ]ouu3]l#{n[{6ۋ_ْo޹T$/9Y6ldZr%zN%?Þ+go]ﻬo݈/[tb̷l9%x˿ml3異(?n;l|Ke˷/~yILcK^\,^~$]D,V).L$^ݖCkȒ_W>*}9mv0|&3;mR'ݲE|t}Юn$I.bXMr߷Ų-/kMajo {~ɶl_Lr?ܝl٧/_d_D\;~,OMonΧ ^;Mz.TrWث3-w6.w[B]Kf r9t;ɇ(Bgݺeߎ$߷/[k$ce/ ]Nd?̧_Kd56N.|\'M+y[O|vTrmV&;Iڥtr,dqR\rYX$&&,]E~ض;}CrΙcopd8w%d!wdmw0,&voeK^,>L! 4!/L6w|M Sem#v`ܩeKEv:% Ye: Yw]ؾ\Kw'7,4ٵ"dٵ:˨JL/g%m˚$lMnɋtqYSM'%g\"j!b5c/wn4Xl3 ?0ۦl"Nxr \^&!t bDH,{f;ۓX&옙,f:,βL^%˲jݖ#"{mͥ ecѭG<9d=rlbYluޯ5⒓cA^Ѯ2_^f3!M>-FR7Rq'O&DzjdB XgdIٓۗrm9 d%YX|6Yz0XK"XYoB&c-e,gyd, 貳m:Arliԭ\"$kF ],4m9$y$ !ir=%ɖ%2iՂHg_T1uv;I}Ύʅ^n[%IbB\T,eA!$JB3X< @&'o"2[sR=G}' .~mmcU  sm[Ǝm Raqb ,9%K%-,Y_#:ٖɪ䬱[do[&I-шE|^='Its6M6!$Er|Y2}rdKV߾,H5_mwgG0}#$upcͮvHf>lTbEHdo`—LZn -sfsI.-V[M#.ahE-"jw9!&X$YzWȈX-У/2$~n&IY]TJs[%&-fm,ebl"[^>"Z6c,$rmۜH\$Paf̲mTM&L&n.{隴m\L,3Yvn-d3!$VelKX.ٛ.InDy2 vwo.DȸKDLR7˂RkYbK^bYdUwjal -lmi <6nqa"V,Sd[^ȞdzZ]na wm2lYR˱]d*YU0n#-Zv IB20L6`g;uK4m2_omU`i6nȖ6Itd2Yvy`20rEY-,=rm-HbʉFV%Ӻfm,zf.V%gض=ٲ $Ƌ%d6m$rY R;jҷ*abtnaz [b&֥NK'Xe0آ,IXm ̤FX"9Fnn~$n,N-@m36J}]D[fK&-Y2$PD,l$,Ĥc&"t,F,PY% ^`zN$Ftz dn,D6O2UY-7'[2=2-.m˯?"m1MҾ1ܲ;9gD"y#d,|iYfX9[Oʒ 4[$ڐtn}IFrbE- ,r3#rU[Lrgɒ Y@H"Vg.OˏIXszrt6D4tz̈́&IlYܚl7aB|$ l1fi(XB,ŷx[6oB-t\\e pɓobY ߺb"1n_< B Y. "m~\beeb,Д,[ꫯBml,޻Ŗu\ry5t6gU{81yoTKe/]5W[n9ZܳMsg̒"^۲%cm5Y L;9g!=·'Ğ{jq6_v"t[m[tfIr#;!׷M=v[n{z2lDhe*/t[neݼv"u 4˖5i|z2vÒ%Nmm6YLTs˙od]nd) ۦN]2 xuI狴m,qY0&n.2}||R]ﲜnXf˶X$9 e$m\{o˄cL$Wܶ|[e^beMEHC{SI[$enվ|l,H;~ɬT"˅ˢrd5ݲKnݗ&NbrYF_܍d*mu2x H/ns"4yv\\%O* KfLqɤ3$ܚmI'[HX_PC\}]{fIN?o'bdzeX#[7/$k}=7?1žK4Rwx,pn9Dbs,Yɷm&n]j|eloVge;[_^ͷEF$z׎/o?džiڤJɋ۲"Y캞}ى-UNnv6_4-X"t7 $lM>ߗvO.wn,evtmnR,hWlK#[fnKVe3LOO&˧Γe2fo~LYd.f=9YzϷUzIe㢛m>5wA*_*o5X2mB߶o܌`XeI%].qvϙ Dw7K=;IˮdK ,߷Jlٶݒ4Ml3C/]fJ HHX:iu$ I$ nP˚#\n LW,6֏Wo=o,=[KXndzW!~Zw>J1tll5Uy巴ȶv[y?6碷l;fKbJzym,kjV^OG+on;{۾oڞW;ǵa/Jмe MVӶN~=k\ϫMlXmfcyuve㰒/o{3EU ,w۵e瀽f޷7[_IYy۹;.^۶9|޴mV8k%av̴]O=mxsۦk{ٺz~kKn={Y&5Hi_wHomζ}쾌hs4m&7f^5c̷4&V[e H굽v-~W [6v,^9wjҶٶNWkqe_V7wvaگ"k66xolk^o#v;Q{ol5yٶ!.?K޼دKj.1TOo:;{{˵56;S{v\ewY ZoϮ =?'۹e=nN䞕U ;ӓRv4ͪ]c[gɲ=*0aN$k3KUceLOxVLwokO_f&3muW醽^yOq.7oYdlvN&?}>?xޅ_^⶟ٽ6\E׊ζ-zu\lJ &k]ާso|[kۭ{svJØve=R٭iJ{c{WϮYҧUK1i=;f^k 3dWuisY1u3=ꩭm=lZXem;11n ;mVpc]g_޳8Y]i8^#.{U]bGY^OͶ7ؖzmӓ[m[{{8Lz6g͘"cԾ_Ͷx=[[/{S[嶛ސ*0,k7iCÀ]mo5^ ^7Or7:Ȩsm1᩶ۮ;z*h"~q%q'asԛ}r[) [[n}{% f3=yBڶ^a10MUÓWyO'ӎc&7m4y~1O;1fmmNîݻI<~o~>KWԳvk]m ni ^~C no~Oc 9~CC{؃O=^^7l6{^`kovOf/Y3a6浾ҶqW.ȳ+操}_Y&dno#n4{VVD g_wܜlz;o-=~JζِMl'綑^-ݷwl/myukg78I0y۳ნGڙ?gϫn-oѫm-=_MI{Yz[[i̶su<{McooWe9)p{ެ{/&uW[7=5*xzUo,ݬҪ$euYl+=mlZ 7}m S^{5~֯67b^sy55nUכؓٔG5zk=;9;XԞ;[eg[4oZKt[OnRݱem˶nC5Ms)s˽mxLԹscWϦ;[O޸6zښte}lnX6˚.7rln=3~~slsw;\混v׽jKﺸTpEpiTn_̓i{=oM Mrޭݕ{m̧W濟^EzOt[X{ҼwcL;y;{ 1&MۗLg={[Ul|Ws=u<+g;^ռn;lkc<{۾o?n>{MޫƼߎ5s;k6l^{>ytzZU[Sy˲5uמt[W{uw^O sۥi\7}{Zmnˮk#5n_^Z3mR%G^6S3 ό m~3K2ܨKt{־_׬qK{7OafZcT^[~,uVjpnWm.}lNkqdۮo*6ޡ1o=rnݺ;/O[6[ڛ]mz6POVϫ54\kti+vs{n*X5nHzwֻM?{@V7~Rwnennjk٢7o-7]͢Ө5}OG33nU-jv-b ?5jWsIcղM:Rߵǻe5f6ՋV{r~shlֿ@Ʊ_oy+ގ[!g(۬{9cC ޞGk,EoiO1ۨkwhkwg-Rq z/em5iv-[͵y7λ6 "3%ӞY2;[veiߟl{~{J̘m/:~s=]=CI\S{V/ݺ~[EmTױK{~D־߻^߮wK}nzٖǧiuL/Eݻf})/NۘOWnyWϮ,޶i{{￈iu5mn(ŴcngL\<è{ߺn]]=Si5}Moee7mZMm{_o &jQˢ;:W[[V۷eb~yWӦu[koys^]:}{&pC"&m;7o{u{km2KvfSUlu/[uyL/=n~4~6x\Rq{n4-Mܽ}}Mo[mUͻՋ-1v+MOeώs뽌1ymU[gϾem[Mٷnx{]e^kQxVnz~鷹Q|͹zCٝ=l7!_ڼֳmG:3'{ݍ]~=z:{{׶m?J {v ۋ̬vc2V=[.^F2~`qÚ~}ۺ6P1$ؚs?Li\{{js۸>Ϧao{ݯDkq};_~Z[޽yno{Iߝ-v]eml:O;K|rnysYְ_QիgսE.^{[5Oik%\6٢nOڜW3wם7Yuw{ks=+ 6*Z9]ՕiѪu+kޥVݷo;m{8~jJ&e+VW= a]Uh(k8P}XY# 8ط{Qaks< *.$j5};'A8_@Q8Ϣ:XvKnW[Eva 5L.?HLEOOn-^o[~|wzlZ=noMuC`8긎N@QK$;Q߾]"N߷x awxm?9LC qe >l=6Yyt<_b?>9/Sd1pѷiHKՁ_w jv??2iwE!L݃74^9Xǜˤ|4'%WC}S${n\p_w&cE4ٓX_ uy ],98,siv?Cʝ>kc}z򄷽1PV=[ ߯u|q7G}`l??'N~4uPpjl3a],Nc]ǝh4;.*v?\_ꚯט2/?w=kW߽:jW#mǓ7h|1a mw  >_$r1 ͔/4xӃc1T|98;QT}1EfjH|tkk{D/wy'`Mt'wg_*Ý^aN]㒘0.ck㧷7*_Nsz:48~l N? @;A@F42ۮnyv;u'Xާdmg=Kryß=}8L8q9Xw^Ǘm <:k7 _~}Kٞ/\Z8o C}e7ޔ)(ro󁘫qwYcDibx~LwncC}x O.= ~Y3p8:h8zn1m7C0~>~I~vF{1!Qc:Rt7~_p_?Og]}yo߾HyO>}#q__}6{ w\"'N.W5ZCx^o\ރ;}Xyx\wCAI >o`>:A=3{:tdtѽ{'㞗0q{uod+6̷"<)a>o?N6H|Hy{eҾ~py8q:n)cvqAY28> 5'|V_EoͻL{{1Wx|'F6wOME}%=ߒZp#`xQ׎o ϩ>)0m/{?}w]ꈾ?;~\y:8yw!:gYv;=§ytoCTcmpyq^~Yh/*ztCםGs_J p]{fߓQ;;޼G+;8@I?p]>,WZ7nz#x~a|u ՟f:`?@nYTi;SЎN`Bs>e]}-9 83(0ƭCPǮ>F!}AG/mע3~jp^~MFb'-}"D{ūǎ[xo~?/-i?t~+/^wO#6(cUTBJ#:7Z-"8{3VԇcȅLvXm^l8L!KcrD@a ߀- z .;OS Q"MG&tQDܯȀvrl4%h]!8q;BQʉUxU8t6QR~B |y$Zª=t0=٥W8^OA,8}qU80ijzDU&qr[$+ zXtyPs]L߆b舄3$8e<$p%y:Q<8@Cш*ϷۤJR ȃ:m8ouq$Bx`hs`?@Ea" ϺlZ1>OO'z'.00Nsi!ĵ48iJtap'(cEeɽQN]*2AJrMti+3!]QyH$bH+׀0(! SBH}ٻ Ƞ )D5ݎ%:AX (q£]ے)GvQ0P9.=54(=ٺJĢqQz~D1(=G z#\8B0ۉV,J ?> Ǧ4j) ώ8ad yy8V8sg%QWwL@t@ΰCeH\o@GWuTYI+/vʁ4DH@|\ljrdU<M*>E.:  :c|u=ӚFȦI9MQn!Im@Ӳ'$cԑ: .}.Bu~_ݿٿo$!HPARA;@KE аx"!0 @  @T 8N%QB :@%B&q@܏?7*B)`*QLWt )D ݿ?gPn伍ԃpk4墀(ʄWúH 49NDӝ '1L= 0,`ׄMKv !RPgA7 + CI3^zCX DD4Ex4ǏgQP(Z=T^}Y<4"kޡ?C#HBCܜk^]Э. ;IIx7^!TsUA#!5P@I܀0Iz=MQ!qtI8λ]s #zƒgYY,3@Xu v pG D7D4I(cwXv}ْ*QJ3<ó-hL>Ap^7"N.Pǐ;(,8|G_yAeOqGu e?^U YT%P$=%@!͚~dk'CSTSqlr]^D|oo^m.&aZh$"s[S @iby" wp;}$)f\0h2RIŌdVxOʮ4|O( p),GzmGpѮ=WWQ"Ic8h8C  wB~,O4Ňq[ 2'}@(G*0͏6_`>;Fu $x$!0nד-8?B`}F5Nz6A( R8:HyfG灏n4-vO\'qp1%!΅%ߺ0u\G*H̚)5 @Dl0:!}!As9LqmV t->HZAⓀ@XنF4f;'^<ڽ_c1R}"\ W| ىwm/2o68/xΉIaG!1򽀎R8;xgQ/=sMa'hq9";c)TuU>-td  E@a\u#TXN1<p]b@C"H#,xQ@ DJ ̅\Vi U*@S'W(R]JH8c=$;?T68@v"tDi];$jxGqysn'I|M~./ ; dۢ\ofDȁwE&5fB:.ЂSR9!).9^8`taj!(AGh!H!!Tt85ᄐsJʘ󇁄v!̐FT*8Žl:_ΨpKNOG 0$TqNk^0(U!1p.C(,=2$< a0APG3.: ?LESAa(DA60,RD9z荫e< 0"GU\n”aiqMzy:1Dt(8%n Ձ˂@ڈ QBb*H5qcg@1-+^d88*Y@jJ֏ QBLD0 yY88(tz  9D 0E! VSuAX&NMK.tz@H (U\8] *x! DtpCB@gCɍܤ"T@]H]N (ӏ3cTUpn(".h&ZDA z1\ H f!/{ L3B2BÆ'Q! 8Jr 1Dա%)^7 QHT!iaXO&[*0$ ,&l~ڑ4ץ e8A`tjc#u#d&(WnW)9뽯ΝșyVUuv[FxoљژǝM' NCqBEiV[nS }fNw8/{$nJnhNݽw ,|ͯ#$6;}őڛfGf&9ӷsH] stdG?uu?o2$}̃&1t_ ٩~WN* 931CnaFGxM7p 00)L0"w;74ܐ;eN&aI*$ {#Cx ͥD|%nփLf`[qTaaf_@ɡ8GK3߬@d7* *c77"3]^Y:6R]iO!uhڇcdYdz{7Pތ =W5Dу㬪3#cPNPv'"&q.\HR W9 2l3RUD4U Н[5ޓ4_rX偶s4{YՂ!Ӏ{ݓCA||m4"kx7""eGY w~8΍t#p;@t@b>ᕼ|_cq|y:Θs6_ޗ> ҹқ]Xo缸;z~>4w|*/
zt,/ /9n>gq0(ˀ ؚGa;2cix;Y^mHxܻ_?ffx` ΨX~q:c ]C17uxjlE/G:~3pܭ!C3;|/yG?Hz\OK/ޝ߬Eu17Tޏ͜wiΟ]83*0?-̓nyuü=5q\_Ww5tܵ 1xܛ4Wdqwjfǻ6bkrsIG z Q#6۽ }PX񜕩{L-76C37toD؁!{8_ݾ#`lWr+7׫kj& o1 :qF};zj>?,OߐWw:w/'^ &m8ջw 1oqw#廼{-Ooꮫô$߻|_7ǿPrᱷ$hȻzљ|3W7oy3/;Ncӌ W;ə?Tˍoģ{y S˻~N|Ww9-zO~/Wo? xBXqqerUip]Q1=kw ;{ðQ?;t;50|_>vn̘wqv; M0.`"fdxA8RgxFi_4fk7Ӄf!M;rt$80{݈ }`ay x雽z:9_Pw]@p%]q_x3(L?{s 3~|nf77OTG/:wMۍ9}=*f=c8x'I^ăO:>(O?(=|%v=s[;%79ج*4QFV 'Gp:3pz;#Q8*u A'hsX $geIʡ:7'f\4 uoqþswOJC1c]a\7IIS2QXW{u}1Q@zc@ԛ08fH^;ŤA/Jg;hD2gq6\BzNb3 {6$O)ƌ؃8.b)aPTf9"Wg'07p̭$wT `*U{tL&/APNiz{܈s97%#ӻ7V)Lr҅&W78rA P7s ӔT)=-Gx* d?I:n@I7gx*̕8\ {\ y>;I߁?`/^'M 0ٜ#ra]R brW `ie //i/n$M@m-]8ċNsamyq:s%*jm>f=':9UAdsqp :P58;| 2q;ߡu ʨ2~J5*wNHwS Z :z j1arsɯ \;qwv'@䊻2ko4<F8#3\o>`D!+J.ӳCJn4D?@^ B0Z/Eqf`~ɌT=&^6W4ɎźQ!eh"0ƶ(&DLDjA4:' nA I>"{!}tD `"R@L-dqAcqj [ؖur$nT.k'!@E!(X@xtф8Ӄ/&]'pP`P F*N<0  )( TP4U<EO%$A*HyhFFJ0t'! t(FMH#dE(#H Lb0tp "JBE![VgjvهT -`9GؤM .wtFтuN 3,lJPA8IP@81ZDD`:':#@&'`De`rBAw;]fr`A&w`|QAԂL |#p/$ ^:&6!\d,F=), S׎GQ&Zy_0Q#FPw*aYt R 8$J1PN;hąףgQr +%cLL:I= #)fw&)ǡ,Šrճ8@đճ] v!Ԃ 3D*-L4`'[bE"ukhP_*nlKQ.{Nr-TE~"W]}3 aQ慅n{C(Kk#,03:SL ~Ap;҉Lŏ~D7b0;h A!V&Y=!)tH V uOYQ.,sGǮG'DU'H .TiP;6:9> c 8"੩QG`gC3.ό b9Nt!SPh)-P&!)C|øX)S T %ȽZ=qKoP&b &P!zh2xȉm5**̆PV\$YYá `  %rHz` HHgHvEꞜI:Mg z@磯,.MAB8IҡoEMU} R$JB.a +A~4pƟR&t |݉.#lL* Nd|*=8.<\$ ]*wc <@^?}TidK BII~RFn(R7.*FP2(qaYH R¥ZNB%N&='oX%ZDVPC BB($qZ_20 /;Xڱ0.\aQz*@.JAĮ 01@TvϺ$*AEǵח(ra@ Tv*AM8Dģ:$FƂf @8ˁdg0tK-@U/$ `\V|u?IBд;^m&W6!App9˺s6Ii4LOc+Y$ ` Ȯ]A0VЕ_[=APdtEoÚ_Gwe~y*~B6[:9m3GvdQ" NJp_^߯vYQS'{ד!]9xz{1}\hCD{BKF{Z>/AFn^ivc#8vۂkQIPv'A&g{ ~l~ˑtEӷ^_a9}}SeASsAȎ^ls @o,9[l~Po7xA"0o֒}?zD/ =g'àj=oeX9욋4权۠ǀ6pn(Pj)}V4.߷WjPޗx 焽Zpu׈;O^Gt_}-o~ey'b;$U[rblߣ CTX/}p,9Qe3o -p}[{DZ% ()xg=yW#;ߒ\GP|nE qlyp\ip %BMDuCR)Ё8)xCyu*~@Nc<:Nߑ79|n'//9ܸ'EȽNL*(~tAR .A._{)9.X"r_77_?;'O9@S9Kx EP}L ŋ3&qlNg*i( Ņ\-ܼq #a;QAxu@~x!{1ݞA0vd^~9~AnX@x(wqݓ$S+Ip%,~|ҌCWu]P)5+.}Ov+Sit9Iy&y'_:wp:l@Ǹ 00ǵuKƇ}>G09=,{\g~(y+q=iݗVuxbR\+$~z߽ctW߯>z Exu܍/+`n1,oѱ"ط󇾿oۅ}Iqn_ٗvGOM;v t<~G3j};ΤƨCՋoq^OwM'n'9U4p}:v_{S}t\9vswϟsg #bW}uvC qs t^78׵n3ݯqH.O%b゚߮o~8:v'}7Y_~H<ّϏ^_Ͻcw߿߷{4^> >8S &5A~ݸ__{6nH=\} ^;C\?џ罆P&?{qCz56|]_C恖w\9'^r}J@o{O7Aqt{EuzWߟ3~i;:nl{-u Gvbpuw㿏#MֻMзE!/p o܍9v ߨvӂt_9N6Ի>9o?~]ƅ xy5=3@ YZNw`9@';;}m9׬m3\J1\gqɞK^=Q9vk{,iҖP]w>ܲts4Y#lڍyݽjvt$459t[SMv)2S^iq_WػLVyəY;i\km;);+8s˱mv!ImMc$}N&is\dr du{3fO\9;/g$849WlsCMf68fWӓ4Ivdˮܮq619شt'd֤y}6L;dv+mr-٤I}霤:sحVҹv6hxu祽b:{贱]LtFhHǜx-6k~MuM֣m[ct_/_÷/@ (P (@@K %%@o;>ݕ+Vu5<߿~>%@%P(T   @ wo?G?#znW[$׿ (@~_??xٛϲDt':h۽uz(@PB  R T(PP@*B@?NRW$U=;[cso?/~xz-@ J }͟qj}3Rʣor=lմ3yȽ}u?ismWzMCTm#JksbJvIN*2\y3mw[Zi̕Jfviisi͕q^Tv(f3IsN ݞți'Ns{Ror;46M%뤛"aq'xt7zEeFAmrJ4i\zVJtm$fjy$}tbZ͵}2yacFfO-;nzD飽ZS=idd!ʞM7wsMu6koSHLN4fHzZI= *mK&5gʠ\!G6+6W71:ێ/lٌLqiHƎԃk7Jɾtҫ{}XٙaT:QkSI$G]=^ɽ5#]v0M6LV+Ӵ݈Lۻ ZRit}eGfte6:r̾f $Q{LwI6SͤLtRӓivzD$6s)y{GHIAtٴ7>?>o__{iFi;-Sӧ/oms__czKF}7Wxӟ~ݹ*a*JL/t<<w_?+*t5f>_{y.><~?Id$D0M4F*pjDLVJfI4U*&I!!$\ۊ)Y GvZIf `´h++$^ mvidV%V#9mf4&͚촒Қb;Ӵ$ 6F:-C*,Xlǒ6mL]ɤӵ=Mu1Mn5.QѲk$.-mS)$hfc*D]$ӒiDՌqi0LUlIdNrsI24a9L#ӎ$6<6HjIX1 " #pCkj*Z(iN6J84iIg%:I:KZێҸ2ֶFHajHZvhFIFGݺy[A)iF41XIhBa0D' =VLTIRjSM) bk4I )KtFRG٤t,QI$&M\3KSNLzd]\L6QBRm4ES!I'#FILit;־ׯ|hz+*i.v?_~,ۧ_}w?~ݷoi HZ=٧__W@ۗ÷_o jmɘȶ?|_o?<F6{ywͻLmIkD;IiϦ{*dfL%DѦBiM%+5Rکt$Ѥm5"j2mnMٓ;&-MMZ#fLf^RΉ6&D&ɐmRӌe;Z١MdHEQT"\Fg;I\qd$jMy]l=YɕЙ +v$+4LfI2MTtlGJ6^H"ZfFC`*RڦIBڶ aZr7ʤ$29'ĴF) jI M7!6(˶xtml4:d44:&urJ]jJUZ"kUJEKؤ6$aw[SlA[Yi2ն{͐i7#a#mTd6!M5iRF#i&"vSf[Z6-}dI8.œtU+͔ N$W:"ӤӝLQ i'~pO?|~LfmTtgᄒ>o===_}/gnHĪ!?7>>?=}m-;@hH"HGJ4WqF66D¦=J=ыPFw2rD ] &'=#FH'jnÉ,ݪTS " 4JLҶY,Vt:lgfmSMZfٶRB>sV6S&cZK$%-9\6HD+ɸ*i]= e)M2i.*v%TCt\vT+[EAdADҽiKIT6"lԀJ"v[;Savs=u2XMPj#r ۞dkm&$ !@tq ٨m5$ih{TצMFAh[VJUFJwHhkM0f5Tc4@5mr$1qLUv]m.])f&W&ݐ YWWeFV26EDBebr_閦MI752hn:G!446duvfr8M[[MNVkiV Ig(=[w,MbEMtח秧O=wMR __>~z_ooלt4:WOO/[Ǐ_߼,I 8K$v&ȆO6ySW嵑l[_Z߿&rֿ+Ϳ#dvˑ̺lQ7XlR[ғ 4k,dX4.ey%l)Xm害`IfMη.uaFƻ-"|f]N&]"ic۷-V1lؾ[vn*5!>Ibܒn ٛ-"b-*%{{c ;E]l@e[8lLҋHB:]2,;.b,v۶NLbbg}s=ENz"Z۲hfplݪ?v#-lvnީLvznIbpNh.;%g6*mEiN.ri&s}?]$O~][8o!3]\u&VKew1yIw_KX1~+IFXԮFXsF>rV[ެ޹jR&mN|_N,I|bݖղޒ%$XfO/]%Mxݤɲlɇѻ{Ȍ})6|{Kdn|$lJܷ-mm˒K\$[kkź|TO.MLovm׭k [OR]o=hgE֖.2YӐӥs}7&_=g}7C\r;3Eز?C%ݰ._rɒs>"k^}Ry&t,l]㝟ƒfVKߤof,`&ϖseq{t|uߟ-[{?]c5Lz}mKZ$LWɣ=v/ gv]Re[u\n}ޗfckQ{k}rlVrl'¼nEY,_ll_n#5N0HO,%So6/w算=^-',ڻHI{f]g˚|HJe)!-Ff%nz'=\o:$ȳ Zo߽||,ӟ4ˑ~nYwyYy^o/zhvٲJx,߶En˭_Ub1U;n#c|e[7۝K:|4;r4Kuߩe}K2L]Ib"KoRAxV咟o\{}IٌY~~d76k6ۚSm`=+Drkݶl*ܷf6˦~_E5&/͚{vu.$dMreAY,k}AN^+ O&Km7ft`d l3|#CyDݳ|sYfRYdq\϶Hr0{Cќk:6.7 ļYn $Gfݒe.{{9",][ԷݧL$\`ٵߗ"$Y&n6D.ګo͊D-..crwRڲWm6؉*ej<Lc]l[~* d%D8]6jqal՝7ۙ'K4)VJl9[2qzV5"LN-fU- YP2ݸYm3Fv\nrȖ=lY[}[Ě0@Ej{[ 3lY ~[^/YEĶD6.LJ#<u[,Y I2IQ٢ lܩe4.".{I.Ŷr|:oul1r 9Y#klòK[ddʳ]yrI dA΂ ۤKf#%"$1,'k2^qY]&$dIvm溌pvg!F6SYvmHe٢!gH:$!˼~)vv릲dȆʚ{cN4rI0s%If%O}^%6HoشIf%Dypjؖ\\"[yM727l61,b!^1Vf"%cLql}l!,fR5ȍݚnlnu=wL3I]mۖlI]G||G׹dA;,jKܒmVx|.Vݜ}-: sf4.2e/Ml#D\fѓueFѸ,5}IvvgH$|_uL]$rȖ˖!\2Gح%V&2Y$# JDu͖ɁBt$ɲ]YmLfglcT~sgM"vui2bWmۖ\rض aweg[g0u|;"$A >]))*&e[%_&vzk \\£#.ɾon`m_''!~|Onə!myvE]EnyfN抻ƅ% qi}K]Mֆ[βz5E㗙ؔ5Im,/@m-7[M*g*,n/EfmcW _fLai3obuk1.blI~su,mbl_Ֆ}L ºﺌol̈$uױoȶlnĈѝ6EHFJۖY24q5smQwg쨘3[&ٲȆ6-w=)}s}y1ݴ}w oin3{s%OI6ܒˮ2t"d9vt$ݙ&MLv{mDBf6kdgoYgge_Ivq^/&擺,K:}'\j̐O{cHrq7Lr[6doH%"Y&K7l5mvwYd9K%ͮ_n;-re4]g&1Xoc=l6Ǣ׼"٥ֶ蝻T.5l"0>$h.?n w,>^4b.٘$ |I^y; ;2gY\ 1@,s ט O b%]mw[Cl.&m%,2ؗ/79ΒU67!c,d"?Ւ%I8rm$b&YbH-#!QH.45_sYKY#3n&= ]BF"-&n[n%%A\kd4 }:)$Y" 6sߔ#XF޽e9 pӿܲeʓHغ!v1rHwKKY7F֛YH44yqٲye>}m{1R咄M$˲!rԒ&NeY"7L$~6dHt\Zz뱆ȖuA_,s&[K"i},.!5! $dk 5idgo&"ŹuXfeO^\$؎uTf, tlpmۖ%,s9!ew}$dnp64Dm,m|n/6.I.-fMیl]gIDy3ԅ vVixIj/k新mK_Œ^R\nߒٖ-ys6fL\",/_Ye-cʒk2ce͖,O~cu;Ȧ[. Yoַ#8-S 7vpwgoe˒7Y-xs-5u%_,s#V8n|KRI"c$[%=/l r`mےI-{ۖ]d̲_v)dom2:m_fqI}v5vӭk^%X\rg6O&"ڭ^H2љH."M6YICn,^'鐷?v%UzΥ%w&;םƒ&t@L}\<;9پ9Ͷn9d-ܫ|o]OҶ4l͘1m_Ł2,IXnY$Y(݊F$F$Y6|Ng˝5,aiΝV$OM/|nN$LӕYuI,.BLLwI/ܑd|ˍ5rKXufk›ǬrL8e7W[cߊؒf˒0r]_C,T `?Ylo˃M*mq`Foc@Y[&-oeD"dn{4 X"IgH$!YFr"0mfy_:d"sn&\šnn2~۲ nM,n,rduY 8<xرIDڦQRQ( $Ap C$Mi6m6!{.{kG7$1a)@"`h+Ijv7i4*!VK؋4tvC"bӶ$KfldhHMh\iSM fFMN*DO" naFBvOMB#ljA"HD5AZM1Wowf$tCB%1h$b-i/ ȺBe6YhtGgwFC c'GBBdMIGڶCYV@S kWW0*dȎ^6Hgd2jl+#TKm21VJAՄjYN]DWcJ$Tin K4[&ѳ+#BT24INNdvTjiviBӥ LhWgeTL-'&mLi젦"VƓ6"vvNW:I%&;4$\\5kh1=ٱB,=zO&הM̭H)Pb6i R!J\jҜkH]9M - I;5Z+ m鶚+{ͶSc)MD$Εm `H[I:q:vLLNڴɭMաdRdie5nrqĜ8&$vE$mRBi)T v̺mәʴDɄ\ 质4N&jM]dO 6A(V*̮k"Qng:>tkrJKI[bM t$DI:m0׹n4 I 2IըJ\Xn\9ɬ8\iS&rfd+VcN-nl+-ĦMh @Dl:w[gvmP9\g3ҝl9\;׶{vkN3*[s%I_gm\zil۱-i&ngrNO6kvg{,glH29,e\s+ImvϭUIFUM"MmJۦjnm&F%t-mr27k\˶=2HԶjv^41bKZՒAZR,FMWdٕJ7tTz<ӰjIḳhhtI\;Hr$褕6\ʹgm2smn+i;AN^;j6GbͺllJ&ҝldrmlnіN{&+ vrdo͜[imr̽uRdZկ_Ohk{_yg?_Ï+ TLۤ<5!iz {G?~I/|oW>~n+ж_~?w}' `mQ9iͳ//_x٧束'@^ƿ^y_IpۄhK34#z۽(/_?x>y~w=~=ۿo~?|3_g?ӿw?׿Svڱvd]N7~3|G;I?[o7?s=~oo/O><n[rډP]I'1Vۻi#iWYifګ[z]+6M+IGUyЋ1f6m^7$R6&G4h" MŴnI6I\.ݦőKZj#ml[HGӭH9SZJ[&<$]NN5&V[ND[mtDDe32즽"ʹ'1!s{ε;Ia:jH=ِvi[ivΙG$ihH"NU$&dvg%¤':Nnd:^ӽVVf:Ӑt4 F&ʽM:͕KќJf!Е&MD۴W&IMڶwsM+IbN 3M#XZM \X2W*Cs "&8k餉K--5WV$[ i׌>9F魒I3kSLٓD/ҫWL5\=UsB[$ĥݶ"ƽDi$OLЩ'"UCZkvi#Sk5jWOn#i6 *LҴt\Imdj;F:WG$,RjM۪!H] U$iSڶyWz/>~v]i|7㏾O$8 _>'~t&JlgʞɄImt^gK_|'/䃇]>k_zw?}g?@w?{r= _gf7cҔIѴH+Ɨ^~us~/?w{{h!Ͻ_yn>i'PY[^se^)IF$`z׾o[hyko_O{zo?>z ԤN{ۿ!LIJUlt& ->^{sѻ~xѳϿW}yms%=~_>o~O3"̖6LfiFil SˊD6YɊfH$dҍ4צLDZg$mH:$M.i* i %MY14FҴ:m Ʊ6َ3]teIbզ{%sMڶFcI9ն[ۨTŴ)^@l5)Pj4F&>شi)I4f$;r(JumJHh*G6$$9ZiNMGemӨ S.ۨ,݈LS82 S6g$S$ѴI+ي&m$)ENd02i&M ;m[Iv+i]ަ͘$FRmjĤ;\=f;m"j횭Te"ҪLՒJWBdZ^4I:tv& 윈UmP)\. VBJ HfzvAWmiXSmMd7&emM$lmJdTLGt&&V[CnuL**M.so~'o>lϾ~Ws/>3yw@s=<<<%hHd05PmTϓOO^x׿+_}G?@<|}$ OvWgSMJj[FZo?yg럽 7^!O3'$O5zA]=ښ%˖%NjC IrDN(/8Hq c25i۶ddiK=}Zss]9:gutIکTtj QRm۹ή׿/_l]Ͼҫ_~o߹p֜dw6;׳uwfgaUg$9٩:oѤCUL$i?wK]z[uY¥+o_8[?yӳlv<7guvItdTcT&4Ռ2! 92K2QM: M; Y"Mf!C $YEюdFh[ 1`TvԢmTUHCd֤5JDhhшj[tF;`*ˆY RI hFuh6m Jд23k$К,iHHVkGDLu3fUf4tJ'M1c9Ftb3Ҷ @2$YI#] &UQК ՙCv0&e!LBDD%mkTiV: JY0jv,(R*2ΑDZFF:SC0c&RKjh9L (d5KcQUJ#AҎYM`&IbLmfB dΡBՔDttRækfIIiAөZ2"QhI4f&RFTt0%f XgM$A01hh NH9LN#md!XJ$3[>n?{i}ﭟ7wσË7|/k/޺Zw'Hڶ<~7;㷟|'U#chT!H-MPUw~/Fel]q}W+/x=|tzv|m۶fmA۶ h[vm4VL֤I!}G?y??n{Ƴ/oΝw[ -m:3I1њuv,IuQUP Gg?_<ڝm]yo67/]7_}_qN߮}J ZR-Ҥ9AT$#ѣg=uk|p$h{poN(eG;'0hdJTY 1J4fɌY h64RєBJʘ i Mێj32YLEBLKƚJ+H:J::$Q:YKH4eL3C%"m0)FM'*2HL)Tfu]D#3)NXCi1vRaF!˾&ڤZJsNfsY4&+SuTL!#3%Vmi# )0Y M4Ih6fL jSI$i02k#dHMHڌ6-6٤"4U$1SԶt@JKb45ȪE ,Q#cF'%E%5hf$14մmEj4K24Jk3m#"(fC")SR2ҦMtu&F#͠Y4!`MI fm:SAȆ.mDIjV0T;HIU::jJ[fѴH96E)7 s&Ӡ*IAfZ:mhD$#JMZ]IJ" C4ΒYIa&I%cjdfjPI V= 7n_:p}\>:w>7}t~/靏sNON l6ݟ<~pFp9Xu;;>鱗mύyvr|f3~_v=xp,zW?{n{t~sH`l6#t㳹Znܝ<2w' ѹf3w' жm 3iR!Ɛ\~KG붝gg_xow?y3O}.Idls2ڹNώ;9:rxfkzvd`9<ps]6]fwfsp]YK lÃeDu;;;٭ hnƒw'''+emM2p m79TҦSHB*J@e,>;`\:yG~Wcs/$$`$:uv;٭k`YFsݟ;YK6OCkzhO`Yߟ[Ѱ=x<綇#ǧg,Ge swg'gޯ~qll1BΎ6ƺfйwݜl6#ѹwu_DIMf3JAJ3Z%m9L"BЌ*l2TQ`F#ӌ&#)S!#1ӱ$F VFEQTh$Pb4&1gZB*5JQ)RK6Ttl$3Ks3#0d$"MbY~HUf)S*AtI"mfZDJC*IH 氶TZLm*c19%5&5G"њi#&Hg:Tb.1ic T HBJl B3Ijj)iGBD #iJ415MPM$#:I&0Đ 2Ai$#:҄!2hm4IeiMd$mk4J'K3$fi:YHIDCͦZ3vLc&!v)k"TF1B4 viU+誑$FIȔihҚaHH^DjK"ҎUDT R"фHCZQJ5*0!iT#M L5dȒu̬t`$ÜMI"l "b`F&6̌袩,c1ډ9:̳;o|o_8sK''?W?|O_GϿ_yGoGDҶ˅goG篾^}SWwO_+_=_K7o='ty|}'c/_|?~0/o?zɗw7>ѵ[KwoW|^o]yћ? /o|ቛ|ǥ9zK7o>|_sdddUhFƚ,Pُ̱@9<lXg|~.|o/wo'xsW=ۿ{볇8z?˷8,s'O΃}1/ O<{•8;/*מx~k7e|s`J+.>[O_9:<;;w~[z8w @"S$FGc$m;vUͧo?uԘgc3v1e{_~Wn_|46~w~w=#In?o}Gy'n>?g'><?_?2w}3˟ݸt“}|瞺ɽ{Ͼ}}*،t5MźI/z֭K=yoޞs\y7>{;˕S//=qђgow?: ~?~wp_r<{|}獏Nמw}#?ͮg^|څ Ow}߾do兯]~t·?z|/?qOog쳕q+_zg?q`3w|}|f%̹f$:$sK:՜dcnfMѥ-2F2H2#YYu)c!m;1X$cLN5fZ}ϒ$I93Ӯ2dLtjfe.UCbXHVg[s$mh`52Lڠmu4YhGd;mN5PsNdwv1d&IGư:5+id5n5S`tdk2dM##sj4Ȳɜ.6ƒ&նnviԐΙ6hifs*MfMb͘ܧ.I3בbMEd:s0kd Z+ccmVI:K$ff'됬c.]euӜlFM۵]2D:RsVqЊV4C1njf[Sf$akZ-{F6sZJ̤ɘsl&cRd4:9RΎtb4l4Gvm3e ss%ҚL0*$k6sf}Rhu,LF úLHts]2;"IȬZ5Mڮ6QQb~cdSXd\$2Ʋs\ǜkX@3".&H s?9ҘEt:RQ `Y70F,˜i>s&#-5Yuj$Hu!R&ݧkHttfMW;Ҏ!ɰt]2Y`VܸvhgG;xŃ87q{|}K7lgrrҹGˡ퍫Gn\zi{އs2% sf#t~n#MR]׹IF$ɲdgu~{KG1͗}?zrp^z }⥃Qy6Kn<\ᕫ߾{œgF*fFW؝<7vf-vu7;ܝulYZq _O̅X}/}f>핯o}r;z;wgĨlG79)@fsq~<~ n_GO\9zs_{O_޿^zի?뷾nnq>\x~ŧ}]ϛg>yӳG;rpul.x ɗ_ ǏkO=wʕg{'t7 7~̓GsG=~+=~=8xGG{撹tnƾ#:Ha\ƈ9C7#nQIYslҹ.u̱c ]au,9 f-s=smfcejXfѱt$:h,${FfIcXh)S͊h)te\F:ܨt]H9Zw] K᠃5c.Km¢:edfMC#t{s&ڬ;:23flȰfsάS$.tY`v1ѹhX}lֹA٬s$v뚙M4*Ʋf]cd7F͙em;%aNk钴g͒Mde׌414cĺgo͈dN:vKe}$lifHN3h13ac&Ȳgu,2tL"*iF:\ǜIlb")3'52m6&BR$hf:26;(4Ŝct4ZK2HDN_ǒ aa?!:t&:m:X*sٍ͚~΍u 2֩Fa&#sIi9:id!JOCm6:*$$i:t"!:阑cl:˒9g:QԹ$u~YmӘV#F1:սi9G33vK1t,jit0G &M<;o.YL " 2IfvaK@vHӒIAHh f0k/[UeY[f>sj=KI3ed; 2m#cy&;7stCMd:ґ$iΚY4Y9'H:#)ë9Gd,TLmlNgו)L۽Yv6m$횜>|6r}qy`wXp6Ͼxr;淾k?~q4o|O;{xlK_>zvٝsy_\^pxɓ~{L-m:go{o]>wz=}{߻;_sGzxWo䫻wGv.]xcs^pmo{z՚:If&9df.SI0Ʋd{eOW;cwc;/Ϳ㿺ɓqo?ŷotOo_;p;7 7Οs'^CgF?>|7~/;v܏8)ko/?o?~y:.|Ww߼q={O_'ϯ>Տg'^ۻ7xO~ϡ;_|=p _=1PJ$:wp.xo???/Z+o{N?şd};oΕ䫳?}t/~'}ڻo|9-c\~~ɽ'xp=ֻ߻}no9Ϻw7o]ޯ>~+ovl\W߼pk̒n1FSMF۶bֶm @V 9wxͫ~oUvm9g[0gvNvP\O?G߻y|>?W_\zO>{cmfUZm[ɘ#1 d/ .x{O7s.ׯ_rӇ¸֍r;_{uvfٜqս|`som^}wO҃o\쏳۹?&ڹ7~7g~Ǐ^/>?}o'7wϞs5'}?~Ӈ녯wµ+<~|xk{{?߾텎M@fi!ɨhLXl&M3dLGh]gudj՘tTt3֡1[͌Y\2Tj3YԘhd-3Yk T31SifJ+)054mmJK2d̤ɔ1t4sR]ih Sf:٨LPHZ4h2dSj)3`v&ktvtXIh$ 6֥kڵ)c:4d 1,mImhf-5Ji%a]#ivd f KSkH3d4LMY%svi$f׌423W]204ڦ먈,i0fu0QH:fY:f5FM3f(lqL КE"Kc6:F1H)2uL,f14)B4j, Ҍ6fLkgԘYf:ژ Rc":da-sΚ͘iZ̬J$:Yڊ,!31Ά0h;!v&:U1*CX$H3fڮgklEє#dmfciQ959%]Ø:3d4MgFm4fS!1֤hmۦ6H3usu4mG cm+R9 aNYlHlX`f>ys޽]s+=ݣWx<'/^d'>qyp7GmѓG%뺙}{߽~[75_tO~tgn$2+_nyg9w~a_ѣ+W.<>:9[囗W.z'm3FƬ:Fd@ c͜Z +OW_~ܾ'?[_^x{1+W.?x~圣ʕ˻<;zq>-8ٝG'gѣ_; ;\Z\.߸~;O>{OWsK/_u_<{󏞝>}|x={/オ~qn_yy9;Y+p;WGˋ~=tϳgP2jfFь-`Y.ۿV16f;^=||ŋ{)*-Z[o_?7|i>]woε=.vpg=/=b>=^}/zpp{w_~GOf<ݓ߿rY[j<˿NuuXcޅ/޻ɫ_eoBsfūW9^''o~߽|уZ_<;I^Wx;{kv6^px}_l7{ H-c#3sbY)&MbhhPHhDieH[3Ck0fhҘCt\RX+tPm+$JP!)*Y4&$sFM;iҁ0d$9N&H REt RI#M,J #3 ÜT[d fLdJZbF 0*9i*Hev@g& #jR4)16JZD cX44I@IPkGMhTj5d͈t:̚3iU̅`dQ: ҂L$K 4TKM̘PY 93@5#c"D%%i:DU - A`Vf1ij6jTC"Ufm$R$HL-1Цک*34JI۩X6̈́Ivi ICf::ӁV؈V4!h@DffRm:u$ $3Xb5'1"*jMKZmI`ΘM"H<}zvgggx l7{/OG?n_>[]ɓy\tf3ηw͹d5rqَegoٌjzO>~8mv6''|~߾|[o]W]{w<&ڎуw _i5Kq`wO?yٹƕo]=w0yq~r|GnoJf$2H6@;e'ݿ|qo9Zw.^l_WO>;W^;}v{N~qڍK_ =~}zG/;2;;_]6c˲M @sŃe=zŋkWt㯎O϶v/,}=Nvp;/v,{n`oggܻxͬZг~vw~w7<郇M@Z3 h̘KRHl.~ݿ|zt__|[7o^;?8z{΍{.wl9w/^;\X-#6gG/?mv&MϿ/?{8to9wʅe}uΥW;s]דl0_>YOsxƅׯ9ctQ ʔb&Z&2hIڶ ^~|z=v꭯[߸Ǯ>+W^|>wӳ'''m_}ֵ߽o}gWOZ@iZŌ4]i*d,䫣ǟ>ƥo\;xޅg_{W/^8d̃_￱e;/vd'O?/>9Y|~ztn_~'>yAj=8{oׯ\;>{_fH?<]ړ/?uO_}y|vx`\67ny?~wᵃ/~|ܽ9F:6}'z_=O<;9{ٳw]uqgg|7/]va3o]_|t6iBF1"Tڤ7΍>];ہW޹f/cdn86;]9z;wNgw{Wo_v7g?|lscnle۝%:'Tefg;yzVٜ;mjYY,嫷~flܼ4Xi,e,N纮{vvw[;s,I@y?k'k?xO}՗=YO VFHHhH$c;,/_7?8{o^Ό.ۃ d>˯tZ/^yv__vT}wO۝vol6sjgE@ιimHh;kQ.L1^|z~: 7[;ǯ~/ڱYe 'tt_/?~gO~t''gBEF$BJ/Wo{x7o^}yуϟ-ف\8N+Ξ}}a1gO\?Wwݷx[o?{W_|闿h%cY6WMDzc]N_;ˎuQ`,݃Ýݽvmf:/~_"!JG;):TiT;k6f׶hmHBS٤iF!1Z0ѡc,ff42$LdEIPB@"mX)fJGF6F)&.t2$324262H2e6L*3tFHN:HǤi23)5ӒU4#д:30ے*Ma)6sdRbi1!֌h"mCQDF!M k5D2sRM0#aTtVa&1h'3Mۚ5FCꚮ]GG1kf65 35"- 5R4ڦ2ut*H۩1:$F#42bLV$ɒLji3 3FusʌvfGGi9)0VI4!iDȘs%ITZ41kmh41ZFC " $iXkHLD[thCG&fQmkMF3e:ui"ӠE3fhKHa3E+͘C3Q i~q:vpիn}UOPc{W>_>y=f/ݿɍo]xwv޼xgޣӳ}_9VuYb mƢٜ>7]q7.G_}qvhu_קw?9ꜫqv:/_ܿ__뷿{˧onv^{գݜihF0C2;߹?xxXv٘IeI9}v񳇹y7/q=zK7߾xJ%d$I$@ @;קslw7fww3X׳l]~o;WU9uYcnd$ #˘>>;7n߼zo^xky/LJCHM-SG(nwsx~~N:;=99{ݞ|r/O3h۹ftk7?xov69]BHl6Ŧg9O*nֱ$c͜Q##UF41+6$csp҅y"s_;wsspR09I2ƲDJH@$8[;||r~v|,cdLjKuF6f9~Nڿ7ϟ;qE^:]?o>{v'x5d߮;'_~|z7߻ֵ+o\76닇O~ٺNv1ƒeYhNNu΂$#I$&.?xrַn_kΕ ^ןИ3cfh#3*5IjTs1GfX$3謪u6hF2DulLeTfuҎ̨jgY,iG7<~cサw# N@Ѳ,*Ťd *9A>ت]:")R0!IyCyX\WI(3@H#c $B @-'@*+IFTQQq"t" `@ !6[%#rDrEeK#4"(AX8,#cS-Q#e '8]T3D",XdbIX%0E*81aL(Dl,S$$-V1"DRjF2ReD,"[6e)(2Jb!:H,NCJp)D$D d#!2aaY%m(#I`V@16A !76`D![8l L0 YA(! @QD"X$i,$¤Hl*H)J@ґI- "peP,B"Bv#l%$[IΜPՈAR -mlJ1""3@6N$H! BH'6E`m#TF%c҉%r-iL'u l$ҲmIBBr" AXk8a$DK^xd?|dj(4 \z5ϺӧLD[u<<ٿ*z7.-΋}E9:>gz =O&' Ih( !d(Mq7~|s/Ν]ͭfakUs1 _(+jiNv^zwk+㽓x8o۷߸3<ߏ|{:>^#I(\R((RUp RH?r{^\m6^XowL?,/ᤫmwzrr go9i7vF:zɒ㗯Y?z48=PV.6ma_䓍<{y8r:`p4eR Imwh˟Ņ;?q=C3!$H(e pZ%sV/ ]k=<''G'ñ.EԟO0RD5iךzO=ޟ˻s POGqx™3PA@T%-NP$@iV ,o1Ƴ:QVP6ԣ9~i.NUMi) ?l/Owҕs I`"!3:INP’ "yrxviʭs^ll`kgVI)?J$!IQƧG|O2\/w-.̯Ͼh<>8>36wGt5ssgdo`Z= EĠ7wrsg+WεS;Iq*R'IXP0qmH(D58p$ 8GeX"5IS`JJ.FgI;K bG%zQ W;q'B8JT5*KP:ɪ,RReՍF2 ddPeF *Δ-PEC)[:2M-)3],la)K'' "#z5DKpB*R e֨ʐ \pU)jQ6,I(8J3{fn-]b5"ENMHlE HVF]Dq5QdA # c:=z&$LX`H(R$g-Kvز-2Q"3]RBA;٪"DuTJE4 FV[#QΪPWcgedHQ2ȦdI`8BXE:Ȕp%23d!8J%Yv:H\TNi$HM15MBىT . v%P٠p(k45EJ!\" ҍLRj^ &R2)QV&*(.IQFMd!8l3Y(ՙt"RFb't$@Qg"әi#\bTC.Pj3U؊TZ FpDA%%JWA5Ι*FHR#7-E`c'N x_?^\x?[Yth[Xx +5O>;&ittѫ񝛫o^ |gx;t/յ77mί\%o>|loTmcN^~_X}4ͯvow6q7|tOjܺyl2=}}0;dyWzs͊V,QFR*Tdac6ضmc0 ҴMV.]r˷Vѣ^3n\{F'G[[vsUgi&G϶߿z[Ag/Ƭ˗i6 `+ڹ>N?l'o|J<}b/,Ӈ?A,ZZ 89883K+Wn_~dޅ;.GhW\~sOoҥ;wkmcѣg[㛗.k8zv5roy҅3'`01#PUJn6ՑJp!@m4m nGşܞӯ?l7Wx5,޷_XGpIWi.xw\> 4G/_<~:9z:M3i:T:.Eؒ @m{zb{%IG_o>|MO]zw9OO/~?<~>0`l 4s׶ϽNӽѬ.iY@?8{cx}pԱvϫv~|<"oje~sexaΙt3J #kC'r"r"VB8),[YEkvnZ2Lehfہ2B!T ԬmY*uY0iJqgLd 29$8,JT0`b6Q;;WPtʎHaSPXdp: )T1!bK)F S6"$2XF ajAY,(4",h2B'dAդL'E8Tvrc!)lngpŕ,iJATJخ Ց$YYuKEP&piTfJP*BH6# \.dwZ%MըQDȖj"K)3VJlVjj!"#d .ԈhҶ h@YvgrJm+CiBTƤ٪v$H-[B)cE42J@ig$i@ װ,$u*Y(.RM)vPTQ*mEز"23Tl FVh&H\m0@I6ժ4%"Mf! TC2+ E8θF$5\m&SI!vDJfnb@D! ,Q*]dhSHّrSllbnnn崛On.]~Jim=9xӟ<.]hVR&קޟ1x`JSiחGW|хۣ.ŅG  B0W?\]?;p,@P'?͕h{,`ni׫O&IF㧯k5|7ہ&Abso|<*j Ԕ dG2AJlP_'|I E)m{_ƙ;Lџ_Zir>k2wNZ\h?:eAA\:۴m쎦RjJ_}ziyp+o}rt\|<<^^<8ګG+ʹ`"_>|yw׮~0KgzsM ^,^ifGdnmĉtɬ(%Hlvf֚3G̝k,A}u_?_GdMo0D=}ٸ{t0 >??{g/Ϟu ` "tz蛿yVo|ۣZ`p0*&-"0`6_?KQzťE͎7_l >|ƝvmkW]X1ֽ_?p?tY;wvemu!;(.WזG[[z2Β%H! J͍oIp(ŵ>_^OWg[_@R/6sWF'S,--Lf^`箽=o[Zl彭HHlHJa(A V&B"$S¤C!`I K(`q Bvi# % !3 l6SDXi 0Ƥ ,؀ $BI$V&0KәH v8%&,%%'A␃aGb CB&-90J]"Rv%;aSXTɄ&USU)cP$&(6@m "HRP#@p%Rr,ʴ)D vb [%!A 6,D. I( 6$ d ;ӑaX&mHՖA,4 pADBB!Q@`+FBDZ)EN@ K"t: F`clD YؒFD`aPMԐD  @lɈ",V&J!pʖdaPbDZR13Ui: :e)%%($,*- $N!q 5 p#v;S`P &!qƶP`PbXPm`\JcƒD@`m0 m@ T%Na$J0 - f[\yεkVWxxճ>zur[!Pn'SL6^l*FR[s7.^]]]ttzwlo{T`R- n{c{g 0Vy}+ kf|vރדi:lƫqGoxv`޼Tקi!N9@a,*cTKk}n6n={OXKB7<'?{;iMN^|?|as6ڶH1\vON=-ptx裳 dcl}BHTML$aX6p|n޽yҙ`WOy9}_Օp~Ç_8{S߽~ջg֏ލImFow}™w7?wg'[1` M*aBB *1vtɧ>?Kw޿^\g?y2Le2Xc Vր}l4մhA?o]Z;o}m,7W 9?v]Fh?Ƿ?zƵՕh|ŗYR4`wG-t^___x>/>o?ۉ\(–-LI:8DnlHHEN1B@0 e;m+@Y)02%RP!3$YX","tL E),%+$ +!lH9HBH%)')H@ ²*p"- 0gJb%lY ’H nD F2"aeq4"d l0N#ljJT*.i Bvƶmd$d#$l[Bu e [id9 #ɒAJ,`aY3M@ "l!R&%Kd@H CMD6"Iac A8eXXd0 Bƀedl9 r`! t(Jc @ȶp@N `I]$ccp###dv([e8*  q8HR8%d!;ddخ: Ψ1Di)$##I-lIF``qJITS B NM@9`p2HB2HVIgb P&Pʀ@vDZY9l ־qF )l+-'o=j׍'l:'fyqL4خ]7崷$< Ef&㽣ܹr?ڀ:MG,`ianp=-'2 .ͭ6.G0V^fݤY08ƯGiKskV=t2>?Ng?q˼'gBVҍlnq,D/ }yΕkgW~A .'.꦳Ϟ=zu6v氖~u` fɤ=32t2=:˽3ҋtNFLJG{ua~L9&؜-w|q 3p.edF0Xxc0O7N{C L(ћ9?jW--.z0vΓQfiqpn/ udxr0[,m:ˉګ3=5dwr8_r|~49wr1NN7 lvmwfPt:<:>:ܫsKgJJ`mq+]:8 -[Ly)$ٲ-)РE[ @`$ :928IKUsv{9mn޳|~@8~y|t}ݽ??o޾=8O?{0ޭǏϏ>?~_sק?Ӽ>U}_>~z}Y/O?חwWkO>=x:}9vΏ>>/wp͛kMŬ4Myzyӏ'}zݯWw*//O_U7W7oonO?<۟W}|on?/?xw_d><|o秏݇w8_wޝ._osI?oZ';[>~ÿ @ӇS.ׯfÇgW߼|{8ǟ~rܼ~W7w?{ݛ?xsj岽=YקzyOӾ}\]}W/7o^.ǧwoժӧ?~Yn~vt<'OC~7>_>?tuz<yO>_}w}wJ/7mqd̎VNvt;4٫Fv{&2:]Mk1tC&EݝDfchjh9[d Uimit]4X]Mi;[:TFWt7v&\$9_gGf%Kq`$3KY_#$JƘ:.Rtl5LZA%{m'3m}' #:/MHZ%NvgjRMFE6Tt=n'\D]vcIN6i۵t#3]tgZnlaEHfeb${Y{H'=N2vwNk[J7Nw9-UMk&O1kOEZS1dZ$&tj.61KɠE֚9,It5L&S Fk}JjUi2UIrđ}=wz9MvǴӾ\#ib=Mf(咽JL9mdDWz$htje'b(;v&[=^iZk8&etw4t!=%*rqٍL&={v;ZP͖Mҥ+;)TT8y}ӟە$mZ~?>_>z~z:./euuu}sw{w}sN {>p:|{t%~O_|iuu}sa_|x93h>~_|{8~xo//_tN|L߼{y~/z~zyw7wk$|jcf{-3O?_m(ӏˋV$kWW7wwwoh>~_~{u{xup:3\^>7}suuM>N﾿{8Nm}{;O?|7άO?~w|w{wor\.O>>}}3ëjfӧ?87wwnnOv:z|zz7}{jNӇO^^.|uu{tu~~7Nǟ~Woޝ&Wo޾=)ݤGԼ|}y3~E Lf|>_]W>_~rDZNvǶ뛶_^~Yi^^^~y\^t>\.ۻu>/{o5kWWW97d=:o?>/Ok~ë7$^.O?~|~z\^"竫ۻWW77qgֻ﾿{ryz/O>&su}}:^^?}y~{x.//?}c3㇟~|swWm?_o^&3?<~kWo:I߿/ncw iǞe'MN#>Ed᥶H^g۝tQvv Iу$1>9uwNnc#ae[g'-et4v aeݗD5ӝ6fgD/H;YvfBIZ=taV3I0:f٣{`5=JjNcjoٻ;cغ7dTL^b'MhHC}"{Ψ# ChvSQ9h3mb2v4"Ǒd9k&٥MwZYtI&GIP^ah͚=vvD#[w*!D]3=vMmmwΓYckjvZ At6IWr/ҴD;Iػ{LX9I[jd1+emLRZ$mL!DŮ]Yv̞ۮNvӵNM =U&m;rJd'ر.4$^4i}14GSd+MIMwR5M]AM$mi3:Cٍ$}’% uNdum`ݺS#rd c&LcRɞhujH1;)[{I3MZL]BdvF25mfw!333$ 2su}~]uZ3KIfooWWouo̬Y ou $uswZ+3Z޼{xMgNno>B233kft>~iͬ$Y{gif$$F4]i82syw߾k $Y+I@YW7۷ݻDffff$ۻkZ~8H\~/{fZ77g o߼Ntum1f7o^mL&7{g)ɬYw.2[)MLf֬lvvhڲKl-Udֺ~v5kJ$+ۇu}{{E̬ÛwHmbJtu}ZiJ0k]ޝo"Y3p=fZZw^]YkfP8rv8=FOI& IΧS[$ `57v#$*$WWקY$3X6I9F2Ih 3d޶%!3I@ Tâ;Z=d=ۣo=_:5+ Yw5H֬Yd77N$!|{~]dfe&ɬ/j:̫իR25k!q{wu}gt:$7wW׻̬tzx{#If֬HP搣4cM՝+$HWTRDE/ۙeUdLM:#vHҮ:hR;mti)ӌdwdfKv:m(١d˱ۄTVFt;I2cˮ$@,QT;h qldIXFN6]ۮ#2qjzڵ0푶mMU!M4GK;![ŞiٓcZ1VI$Im5kV[[4G鞖rXH'娽s5#mb t޹DF&5z4cO9N;aT+4ڴdwwٙnٳTKl٥t-anvdҦQd V0 leWi۴m051&huahlMW*M'McE$6$$# +@F[mZ$;+ήݽ9v4[ڸ$=vvt.le';=#Jkw,MhTNya4ĚݝĖ=sI-I%Hk84!a8=Ҷڐ)j>j۴Ѥ{Rnu4i.1mzFcN4KNwIZ3:AHBfe$Zkf$$I+ $֬$NHdf$@r س2+v;ivGץcf&9;$I$d3$f&3H N@`53$@Yk,dYI,@ G I.NTJ32 $!2 $JfHId$$IIdB3kc ҕΜ.rfjvfL[2hIM$3$@I Z$6 $aI=ݣIXk;̬039O@H233HDu:I$k$@$@5I3 033W@$YkQ'=ǎ[6]̱!Yk'N0,N/;&ͮF$w1IpDB[3;´l3*viIiY+!xYA+{2md4l&I$ծfLmFc6SРiWfŒrT9NCkwe6{t/*miI :aMI#M/Ne';uj&SS;4:rH>eY3;)I9mO3Q0+N+N*Ihw&34mvtՙ65iҽ]I7mDIL&;ڵS#m4V"v7yw6H4ز3-;Fiޒ%IBiWzj$ffMYM:I-m)]ޭ $]Hv4M.EcҴd&k}؍XSrԶ3ҸmEk-+6Lf.kKt:sIw){Vnwfnwkbvb{K Y/խIٵ;G[ HZ mkn춒6&HDnGsڌ˶w+!BN5J{vۑKnMLdMVvW9di]vbgCZMI[25Mw3K#dGsK$ $ $ زjiڣI;PI$ $I R;ٓN2h(@$@$IHٝD2u$]@HH@@@%H2͞I/cG31@$@$IH$$$Pm6vL#i% #]B';YS ۨ]04M 46eh4&{zIg;UVs'Hqd[N:nu&[:+lvutYʖNgLJG1KLGh Q!Z-twfoBMyiA[m鴢(}m 2h'Ȧ=ifwgݵdC8tJFmw1#Ւ Iͮ閘ά̮jvcCwsYݲVV֎dK2NGdjh4fV֎ڻv]!q.!ThGm쪄KLv3RmZ's$vI[U;ڻ${3)k&G4zXwidce1ǘ*HӴi*F&i&ѶhvL8ؑwd7nt4V [HpD$EeSD4#־衻tDTaIT {&U I` sNЭ.ɒ$m\*}[B/"[4[nID:l9"nmY'7K7߲-1"&.XlMtvNnRpj_'wɵ6YJK>wf`b kqɲ[n6c6غuKVjN.w1HߖmMl/Oْb'$Ɩ,j[bAoU%t{z{Y۶dI:f76]HN5Q)r-ѐe'_$gڻ̲Sxl;L/m;\/AR]Hfшc',w̺ 5-iˎXr.HYnvۙ,_. %GvR}᥺l>iNFгe-%7]\.l/ynmN#w-;br!ɸ6ELMnH6\2nO3ȗv4c8ϸIٱ$2m{9-_rl'N^lwٶf_-Ky[,ߒY>ܘ>} Œrkog6g"G,=qyIӠ"n3<]5D.eX/cTDf2r ӷ6n\[69OcIM KR%"oMV{ﲉusI,ȺpHϚU,[e wݲW_H^:[6$YZd/GЋ{5{IX{ə?%E_4ɖlbYzfrK2K0$.`=._H6? GvqMdXu^+}[#-SE4\,/9V"-.icV$JK{߮n'.gBzM˥Iؒݶ-_np%O~ [:,)/߻L’{ 9|xf4e%*YwO[a2Mt[*"[t5}߮todD_vr!rs.*x;IMX&',y5~mr^ܽ>{I,dBo8&^c ٵ{fxXInexwY~'k%d;Nާm3ܼ_#a+߾Dl5{/!%7;O;Id1KΖuj-|˥Kč]5%ݲU^Fvw]"D>f@I"ٺ7$m٧'bUP;{-H]&b%]]\2m_+. % l߾em!j]"5/g_bӷX <.H2Ib:/;ڲmfh:FE:B^VfqtKn}sd6&fn/*%rgƄDn7'TҦ^ݝۜBlRf]H:撐2WuYDv4R/z.l7iBd/z33^ [rkYgq%shE'^q;ӝZ-oLI-w屴[vc>(eMs*|ۈH&DFnsI4Ev|+̶DJZKf7'w[Ie,3YN{[Y~$%]s%}^6q;t\"&Xf z#w%o$7}_j;fۼ$f9ɽm.F9LeIBl1Dl݊dyno\%E]['ػSua3w'޲^|U"Al2lƾOO$.nۘqo=za̚-eI6}}z]#DM6sdri]&mH$i"K.}qgW'K^3Orz"f_&" t2Kuɒ2%4f}ltن%KTkV ᖭ1GK&JLOqz.$ٜh]/U6w{i,rȣnfٲd ֏4K#d ǔ-YriYcQ2pCd5ζ!ҩ.dR3Y$d*M0m(%"\Ŷė dEez!."`ڞ,m6mdI2lv}!-K<{V~eF7sa1~*$27K'66};S bg%Ј N&#K[~}*؈bn-og$ߥ%0[k0d2$[8w!bɐm''BtBfʶc5]r-bA,>K* wJY=^,X68L F%X2ܚc,$$%{S-&ۅ5͘niwN7;dLis[vRl1`!I-68"I5~H˖`g],tbYdH|O֮I$ev\|/'Dٌ ;絤R ^YYmos]-^wߟ4{l}.-;m߾~sdn濻s#WE-˒}yn9nfTFKK巬ۚߝلdOWkn$Geuu2\y<927K\_܆%=$}"}*Ş&wճoy[t/!:/[7boΗer]R7䛹߲/-*E lk;eW3'vIv}_|CW"%KaIUA~_ۥy_ܒ]Dn_^z;o{tY5'ݾx;H>q?Krηzm~n۾e&ͮÿYN'vғosK_{m,+_\|ڭ͚<ϽdѮ/Y2濽qf"Qo$+E%N2H66ߚ%N|ɲl]o5Kb\_7.7]L4}m_m]׏z[%Ku_.yWt/}T7?QˊgKz`{缸\'uĹd_.\noK `{Mnˤorą\&{KHf=I`K/uN6qޒll>o9:d?1ll.}!gKȒ%.5}ɮɰ9k"e$OK.׭JUzA5unI"nHw}|ss6|=4Ck$ؒ~~ ![]{_&^v˾meR""mwYXvǗ'>96ɷVrve~be75oo|q][7\m۳nel< ]-˭\N.}/=jwmhn&]luuoy~'v IۿGzfK]I߿%Y>JS\$7w_HrxeY"a%\`ٚ}wo 4rKw{zf3fb|/M qmL{l??m_}ROdNtfm[I%w򾻼˓N.&rlV^]/ngl^ϟZ9JUtwȉd"q^&k5[\v4; ޷2?Rb_c~v+m{ww/Vyq82_sLo ]:z>:BvS_O~vm1`0λfU43[[9Gs9@ksDg(tTsrnO>.ncL^dIo2dni'B:IM 41tm&)X@%=igNT0ZiQ"ی4&v3;tvjSFutTh[H+.Pt+3T:rKҤBӣ4&iouIUG-SAj tbRhUM*RUDcmlMr%V0FճF^ $rBn;¬$B_dM\Uۡ ib馫ioii!an*80liOWf664=m$cHKn 6ըjwimg.2 FU5W&M0ݖ稦$h-IKU]fΥIDB &ll0UR] !+մtӼiJlg"ʈtZ.9͘tmMXjznw4!$Ii/Waj63r%IvF`iB[U:hA'&Fl$$۴CnkcL7ۛ^TIFu3iihsTEKccMoxx8'zùOze$8~`҇-3M&&97}|x89ѱ4Ĝs?o} ///߿?ФI; pmNvL~>HWRҌdHͦi$Նf3 k*2jo6LiIdvTVEnJ"RqnjNT6JR;HA[I!itb0ZKTJJ6v`*FjVHd-Vڴiݝ9,r%B-tfVe1Mɑ4ӝj暉366L&.R HTv3z`-έHKҤQQ1A$*"&JCeC7T:LlfLIU"a.dZnGa,Hics5Ѵ$IϦk5hfUQP^)l j;Um:V&brUӤ)Mm[SW.SiQ!Mb6MRm* e*tFir=m΍.dL,WF*4Z"D+-6i'ȆtwۭT4 ĢMʊ&$V'I a"uLqLvF5ّTʑ[&ӌ$0H^iC&-ƆnZ͐˰FTS4"fO}Z3$v۽x6تiΝV7/_fN&ץxm"ѰIlM;3뗧O֜!Ùw/o_$uevډv?׿}-`f?DK`v᫗痧9I}y}pqTU4"!:ZUmФڶ#Qؓ$&ٶi;QTNUTƩEFgjFk\ 0@4[ӆDzTCMfDFݭ9)t&\W*l4V-mȕ{խmَ1&@l"شI n[TfKdd_Fe35Hi:I+0C^%фmi h Tۈ$hA܌vr0mLT+XkvLQe'6rvӶ5MBR %$6ikmМ@FUM a*tVwJGMZseQamI6әmFvZ4#m*'F*I5\sD4jN^ $ٸ6Yv224Tj մ(Vf&4Dm'&d"A*F71Mw"6m64*H(NL:mFǶzMVSs !n"p㻯/K{[B  (@%ThIhכ_K1Y47ϻlQ  P@ (@@Qɷoo^^n3N$D,[i;w_~ZU( B@ @ft~xeN$n5ۤ1Woo?/_jJ( P ՛pLtTg⚙ǧoy_?~޶P@(((J$\|7L([A*ȦPvQѴiWk$k*Hi4MmQl Y,MH;nN9&`+c&E'A-]0qlvCXe[iH&Ӫh֞AMSa4&VimLDc!!UYvvz!B&lI+KHr.i&F,mT)S'%F4RS$$&^\QMQJii5R`2H. ӶݨTTWШjØllfJMFh&K*Saei"IIv͔LBhw;1IJf+:jTJ .!E*M`T40T0H'i*IBRҎ(ˆLl&i);+fduhlve) VZU,Y詉*5\qIhl68ٝHФ'i!MZv:UDKL@i4l4[Z6]@Vw:icliNn1lI%Ԇp*ME^`$hmf:Mդ餽Jn/?O"-hKH)Q"@(iSP$IVJ@%"-LȴdR&TcC i4eP7߾{?~꫷3%Fnݹ>_O jiFASRJ! "ڴAl)(iHC MF"ZRiZ@ "T!DQ QE("E2R9۷{~9r#5v+=>?v_~y DTPA @6*PE$@ @&i)UMJ"V KDC!TTi,%s˛7}g'ѳΗ+tdp~ۗ_ZMDYmiK'Q6D*ҢJJRXBBiAR 5E e[4D_7?|=r\.{sw4ݯ?~}nH[RB#M$eL ;ejJtQG"O91 fۊF8ٶd6bיUFJc3MI4Ke-UrB;۷|ۯ믿~~.::&w]ןg)=,zU Ł=}%Y,(Aܝ t0+@@xZN$K1|˯Ϸo#_rwޟ__?l1.ب3 / g[,*8WYFak0\XU;L&!xPE0ǽNIJR++!j;MЀg>>~~׸gbCz_?g{eq2>kPw>踰 J0FRHc4{PVbg@fW p \&8VK83?o|qڲbWOn!t!8]rA)n2s-hh wPF!rP&an4`+1>^JJq$h4 ! Nj3P!/?~۷qnH칳u9Y>um7ƀq}Ľ3ĝb-thνSɞegϬ..3 Kxbo-qs-K΍O 45 KyΗw |;n w:=MMtqgqډ}.鳳y!Qe}knΙ0/"Yg"8{'p3\6m293< 8. +; Zig5mf;ev9of:#[[ƷܸZ_sPG;K;Wۦua>b7[(CɦD:>cF{w8[47vw^,a)wsv4kC8gi%;h3gkJ?oNL14n-KAu$wJ57yݥa6s&?3{g$,8;0geZ".td—3\e0,;òdzϾo^k{,f6u.ĝW4~-J$Y[=ͽ;#M,nHsY Y]M /E&ڷ~g?~@Gilzފ6S*cۀ& 婡K$Rw g $K.b;‰q" X9@E*%>..3]fXѯ:gd (q}<9 3=;]i`_vZdոNыE3sAa q-|ܪԒ]83_Tle.c#$h1#G*$f/# vZβJMw & b5'3Ie IЙv><*|f[owWm[l,2Ybكw_fU7s*VEATzcNY&:z9@0@9M3<|;dc!|ikxϯ^wZ/ 07HceH#eQ8Ͷ$36dKrbA N^0D e:Щu6}70>K2$g˹key>Xhqw5sI6v+C J3^ͳl3[pd`לy8EZ 8>\.dܯSbL0,n l5x0km\u„Z@'K_=:6U\ iN؝3rmmэ9tɍ$p(Ѧ5p<+}}mevϹ {&f2\7 s Nfxwav8.1eV;:M{qWBi!7tGڹs\2L۠ `9!7mʲمdh{WaHыԀ 6. =!' hia_ݓ&l83:Dl'/- M 쌻<1tmg5b`g_V!$- 9bUguMHZB kn 26 sYBveٸkkٌuq9 Lxhf1Sq"HBEIb&Sp]IJ뤠aG01*$ .v[<.@fk: cL'g[Ϯ, 3TȻlh5gXE!E'vt幮vmqr6XlgAQk OLC.R6uxи^xVcjQg|p +-k &9V\=q0z3B."`)!]ƎjihpiC#AkB,ccepy0FĬ\qDtB;@2 ⁉]G 1ebԠ У9PJ"U445`Ssc\FbцeUg0q4CJ() A"H]@~}Ip (Ҏƃ⼃ڵ4,@KwYV0:@|`0A*XCp}Wi t.&7 L1H'~tI\%C38 rR{K M1!)hTqNy݃'+#C6C.#bcNY!2Ji@ e';ENC ᆧ t8 1 u\?NʎT(J v&H>}q;M/8ޣP9>>σ r̅&JŋI!C25Q!h7ڿDq˜`d9vuM"!ML1B8@X qh9 d!kBԍSѿ@f~ԅj 5wU FFK{~q;t7;O_zRNhot-:l;~}ߋ Wݽ{D\K3`]FqqvIrT^O{Ӆ|U-f}xOp[-~~[2v"_^lPQڟ+S':tC۷q4_DMqގ{$@em=6 ?]q% ͊ʶpߐ=7K>nzz>? 5ɯ>}{{}}/;']{ٙ"cwۊ+/# &GwL]/ALqo꽾DX<;gn?@k~q(mgxh(XW׿_~AB}pjW??:zꓷ820x^!v9hg'; 9 ㋫ߏ8:F+v?&6r߀eJO.<]xg,ob9-O}͓xƸk텐޼}w3wA7UMƽ{_|ofr:oz˪;?7S ~an:Mb/Ӈ}z FeNo|q%9)@˛MйaG_z|؏~NnYl@uytܽ I8 @@mu+s~*p::û˾7vW?3ZMD9+ ~O)Z↯`?Lؓx|:7yF9yzcwsK"t6/d<ËvL'q0SBow &YX1Rƒcqh>|o'hсh޿}fC(~t 䔐\#{>96"=⮎Z߾۱zC8/$sk֌xnGCw~ȯU׋lß3+~'o}q{<{vٽ $ /xm)`G,'_AM[~ aa} E#G>f}jTWmegq%кv*D j31ϩ]pk`.=꺏 9v5|W_˗Br7wklY/%$)=.)#Dv&~ܪV?z@䧊6[WwN~cL^䩓'@ڦo<԰'2£)/BnXn YoCz܇D~\~g=*ϙTA[[G I "I9G=3h|G?^|Wgp>1}ܖһ#>m璈6’{|n8p;?s0G&l[>2Tpkn؝~{p€hӷEsNIOf1Mξ_~'CHV(px!8^~܆ĉbt ^ x)Lx]Qbz*l8.Z*U- P"gTq'Ht]t#>v h-;ѼDX.4 A[!% Ko 1 1_#.uP9mF_a/@&ynX| 8OU.I18v;_$pU)0aO*s'_E=@p—l}un>j~΃*'gzrN`A =n.r7Gr\"ɉ "p,KҹwrGwDI_tzCr%o!5H S!^ė1?;ˎ'FDb"J `K j^!MU>bÔUxpМ/Udɼ :z`e,ͪKIau`=>:O0f>/+'y 7Ӻm!Y$'P3*BJoK?ꎂa 1J D w01Fo4:F :L<R$0Ukqg_]HԝB P]vkJrGW`1}q)\wɫ5\DpaEGֿ 2\sz nD eHa <=qBr+켱 Pqw cFR E@S4}5 o.޺a{vCዩAbt 4|rg>Q~N"}ó a'a|g6 AI-`I) }s'N캃|)Ee'( !iPܱu+PDu}Swe2zbÓH48I+38 $ `X]LO; Z$I}ٽ.Wp-n)58ӊ, HE? /q}'` ܘL4$g A.  R { ''xUgi_?8`LU:FNMĨKj8TxQT-ճ )HNho㎤읿'EX!瑚WƔ;ȋ) 4?p`PT0dɉpB@dsy ۻ$@|Ao|W#Ь4d SL lK YzHpD|a @w_-*ٕuihJ,L%F08=]aDC!j3]fa 8;u03_^4+MAD#MdFg{O]|܎k]JI@_:*ä-`EL&u ֆ]娤#|łt[k]aJ )z #z1,`{Ū]/ycb>1#Ju TF= IO$T rk1"WEgU=*N$b;V!:IOf(\`ri'¨}AFЍ)$C}LJbϔ|įpy/XA-딓X KL^4.i8mu`5hɁTpp j {1 + .xP X ňKW?cqǻ6KTv= 9[=Lj3*;8Nb`O<5 GpT/Gюa)M&$@<1yQ&]#{w/_xEQp+4qx[^JVRNW."H}?M%xsf;d6WHִy|E 蠸ׅ\QkS f0:?9z(K\U uK*ˠ (Ypq^/ﻰ%e 8c '@G fAqEF|#n& O :Zy+P{$7֟0`^ߺ'Wǰm݈o yq:F _^C`e!1/E4g~{eozmAW#Am*ں"؏u@W 4B8@':ZHh:pooIIB(7´ro=v{^aN س{^t֌nrF@ $J"$/*fR 1B/N+txݩT^IIP|p`U9CTA%S".xA|i}L:T% A#M<ϡA.2M;<@);ywX"R$ < [$t('0(Vn P($ |")X*W6Apz^/"AR`upq$@QcP1D-%AP' w 8FbP yTq`-ہQ'` AP8aL}돆zQI%A>CмN@B^ ^ cb&";)C, ȧj PňQ$VAT \QtҙuapʷIIn{z$4<.ZS)FR 0QÒ՚ K:(?=|,޾01y@QX>_ * 4/9Y]aL8{vrEx0 `󐠰aq IElxW@IlP>K#(0:1ďvL> (/]TCwRBёpQ{wŪꦂ_NB\^¬0Óq#@: IqQxCDAWY4:Y"-T#T89^"G@M` :(,9[AvMDЩRt ÇTF"}QQCIAPG0~0bgzb.. ̶>u_YUOC,Bv@ $/|7:<68+äbj%p?kA}Gx<rƋwlbqbT h&oށuȡZ'XZַqZzrf%;;E' ~^ )ES>ƒFxcywpfҁ|ѵ +2;LDexZeH€: N5("; `v,WKO8OoԽd/qyQe~y{YBA%; 8w*ڼӷɎyv}Gӗ;,2}? {nJpS[;F~@ ')imcwww Qܝ{ݢQ}kGt1[BڇM Uݨ;4s"L|pS1Jb'&zPH Ջ2(YNe X$r-JWwlKjwڝ!]`lzF}ͤ#I@\ww>7rW^:lAW{|cynP=yb>/Lٖ^wUC _O,,%}@yX1vxX!'{]Ez'}Fт ԓc?<񋹀l"@}ݠM}}q%zx,|3c)}uҵWtȕrz;-ξZ_hDo|#̻aW_M,.F#f'"=Lf/'Scya`8侢Hѣ<,,GXB^Br(:Jb@8_߄N*ڤpl$`,QX;߷Ó}gWqA[ad ?wGg٠`w}_$c_}`W0>R>],Oh>f3`Kx5Gg!^}&(a0yd$`SDuq'2t-OB] `})RӤ}C!hsS!;SuA)B;( FtPvJ$e_D*8/I8Єa;;<7~u[O ^P'*eɴơjCa9Uvo%ԕG6+Rq$ux0)/\@_d:!)( }z (wT2T>ưtǰ/>у-Έ.0./?:`Yq4Gүa :N++;bGgɣnU;KvkE,N൵8 BvwsDIF<:ߪ_@Sap韀$ O@qg @os v:1:rp*2(7s2tG AT3"\s6}]px|6 n-T9|d|PTħcC`]M8FUe'ݡݿ{2F0D+V^€!|qP~Q0ߥI ϗvj=ƒONU$G;&@42LxBG&5+ChƸ6/ >8$|^S<8}AG'oߝ8哃&}׎Ҥuv%C}Oέ⁧qx<F47d^'60gpqӣ@NyHP<٢E5!1"(8*A[I 2 f#:x@0AYҳ4 ,~B<4 .t$8M/|& ,.L$ K R8_8 O8F0oh(E`񀆐PKPa{]y'j:7 LXђ\W]nuHL ǐ@^b=03 r25?¸Ht,X2<#ul, K#Y@cApH%A僜LwKeA'7l/'ٝ%B;r(Y"i! 6DlD@4@9P&AJky\ڢp84,#R^G"/X] IwbwEupTN:ñ{-$X?9IHZ7 ='{  Q&, y;9ێR/l*An_M\߁5LFkw?H988T-fXw(v_hS8 (hځqTj&Hbn=eUME< ǎ9Zn;7(.l.@pXA9bQqE2‡H(1,cX&B+Q4$5D xnaH0)je,*|ϯ DN aW(*d>/QW,ȠƩ %t//K/ 5'G@{=Z޹X[Cq&Ob:]mq A)GĢqλhtrrGcFFfgƋqW[Nv|$P - wY@]⋍pfii0 X05v&@@{T<Ӡ v λ^nS%#V rzcz4qR $8ɩT#zw8(;7m^]胏CTmw, EG<a"K!:BHE RD 0T_ _6-/l!}0ON9D'6MIEo~X.^.2:ElEXۅÙ @OpRֶ?-/"j3q Rw>'|N}Ɋ&l@ʕ-ZāJj_`IsϪV!=]!~ \p%=h|2{dǙl߉>Y;>_r|ʢQFwa @P3:L0<k@A4gIxOO@j$rGPc*4yň̛R_P($>]u+v 0A,J0 }ى; 8QEsrto|_J9}}^?w_tO"2_v[mx>] Jݟߗ;.# Sl_;.ϯkonvWs>wa]#{^,"zypWcwWL{:D%*:Wg{v=}BDYѺ8wǁc=@wT p cp:>ZG^#0Ddqx\>6ѧo9>;]~[u=>c0dOb3O~Oe5L=t. 2v^ tߚ%|gNF( ^e wV#cW63{쏉(}6o1 ;=V[zQzCG ^}i]>,ț_7=_Ob#wk7\tPNCI^wzz@7$v*ڽ>>!GGl'pc1TNȺLޝHEP8ֈp'bOG֡|uן(EcO]pSjdACَݿ=H/?zǯß@O"Gׁe߰;ί1P2d|׷n[ӻ;>]eC5}n{ x~dopw5;{^Г{]T]%9>@FwE`TbvƂ!@Xr({֋Ћ=I}G׫?:0YwpR0H;7`:/h⽕?x ;3O8Z=(}BA'u/9'z{\WߩOխ:}quy=S<>yr iPo{{C{̈kx0 ;h` aIy.veq ᭨b7덮}_Vrs Ё}}bqx)]~[Oo8ͿBN>vpߣJ~~x2bty{:~(cS_ N~.q}oE8C H¬کtw90|^tqJ\OA[RУB>M&(~ݣ3ޤP!?[dH:Ȟǒ2E`xBv$O Fp&(   y4wd'@8. H S}o)u#yF1v=g}A7*id"Q C C@ śhvٷvq[˻O_ʹ7s%v_7p6򏣌Ā{U4D#JtE}ߜQ)fGPW{Aq_[c;5Nn~U|"c15fYh]4[SA2G>7Y йY7XBj#L۫ZdϰCsG3dQw"." D.n~!ԟ ἐ" y䯮Gʇ)D kw*:'vڴ!1PF$!ȎP8DPǙA`r_əќ[.N ǏGv!P/~ΟQnAzhco!|>CQ̗G5|~c4{{䉣;UPDwM>! c34o\`~l/ $^~j?bwy'Gv 1 ;~?Kz}!) D!f35JDx8@:FDžPcE.=6ue)B~z}e-w->M0 KBN{7o!!x88js{{ ҇'g)=̈́>*Uzv DP0: J; z)U*KP H ]`Ly ~x$w9  xˋ/=4(D@sXD4ܹ|:`aGjK0~ n*,HQᒠZC8r` MEs\$B(Ɵ'Ďp:`Xы hw<9⹟U TGp7)$p/Bl燪px}`ExArZBLu&w@ז?8  nN$}&diFyV:q#'PQ TtI09;<X@+P ONOZ L&/ڮ79ʾ8Q~iwbp'@;{V,* 0 z#pQn@E[8 "ۊё/K|+>o=>.BD L8Đ7.0Ze2l~{d(5\E җT8>ɣ{02qǟ_"(ӣF"wb N߸ ;FC^(:!AO$"tlSwK` D>9VkU*Xx_%x`*.qZ'b 2#DĠꮮ+Gžz$n]Waݟ8\M" {>`e(cw `/,8oa!$_ (<|Aq; ̣># ΂ AFPwPs WaruD="pr!(KvE!@iQXdEo/.T›Q^TmMeuy0M!JF_vx<Ń2m!2]e$Wx3CRu.>LDr["S*sfY⃏ bQ ,fa-c,"B"hHt`1@APyR`jWiI@HP :e%_3FCFXtA qfDrA&TʣRH "Q`]a( H"H*NpǧSb,W"r!. ASsmTT AGddXM?GB0BU4ࠄSG T}if꡺" *@(E ;,Lyd' @`p'=hH˜! %2ԁg".B*^qaE!lWB\&7Zt rE,D,9` IRn+ LAC(s[f qT$zhh ѮGJ.@@cK.hTa2 Y$( ՠ$խkcB@Q^pMo>wjH8*aH,2xq@P 0XGбq-՗J.{A7" D/r kXvU" pADɊ%rEH!Jتb QŸ EJ9 @<,"!pU=sAri 4T`!&!< Kl;fecБFHn ³+0G hpn" RbH uJ.X20hXAalȪ $CoC!i83DKmAwà#| #j0Dв0"`t1~ "pyU쀅p+X#N"L\'-5a,fG(J@RFuK8xZJQR@0 e#yA .A,qqy%=2Dd$,`w"HMp, FĀqA`D i,;A>hY:O(~^䲣TCA- y1|A.kVogJhI,xCLvOP$IՀPXh%< !=`ax+p\N"$=LA,l&U/+=E!a"%- p坚m=0a9 v} ~vy,'c_&ء [7pv}M']"^v0)ŗwh] S@{ p ;0?`02lG-Cf I^GA\ Av9O!bQ!A,bWoiVpyT0#S& L^aPƒ/uo>;BO<+^w 9.;ǎr57a"OF_ {ݹ /c3;/Z]8 52@{E^..?5A'u.?Aut ;w{,_@< qd޵kK`8z!8*Cq avk n;oSv׍CpbYק|L_ ;dCϱgEg5#͇ZP_&??4߭Lڀ7qq8㣀_0`6`˾ec}c?>B.Ѿk=.k߽?_=o})}>Fw%ǝv\ퟹ]ez|T}\湿?ƭnjM}e5z!!o8G¹6UYt~58~9y}_Di\{ۯ3$=o}Ǘ|z?.){Q3b.n›~@ fgg}W-Z"Wfx;#{;}<=;h"-z^umz~1qO_s Oq}w~==;$(~{{Fotsl8>~~ks_x.H\_ggF0m'w Učd;ۆyߏ}o}>qO.YPf{\_[&:Cu5d^\a_w۹|?6kܷ0|kZWitcOv@>&I v/9-?;ߨ*O; V' s{#WSY~w4o=8Yw~L"41Kr0/o+.%ا)^}PGߡ찻?;v7Y7߹C"2Go~! 649Dޏ_*N>Eϐvoy8r/S-$=ub_|T j49Eϯes oCs}ǷSGfA83n~gmwxY;1[ECc8qCB=R nFe=g~z;R!>Qًanwb) hN^l*=U.IN߆QϽ~!oW}k\q{kU}1MqΞyG" 8u0h"gr(9ۺtvտɹm~owha" yWO|Atk5~pIgAق{As?> ؘ}nnv,;؄7ϦIG6WF]_Rs.W\|Ƕ浠G}]>ÇJB ow}_m<_,bm ƾ8yG7cɂuʼ_O{mӿ'`CP5=(~;w ܏N$1xq?o;PddoܤwuOxyg1М3訷ocj{||Y?d]\e2 j\ "m؟}lK1/8vu~?dF ?oIGv_|##GWⓟ Lٳ_?ًnٝ0^J&﻽cn|#ï58yc[;$e7JEg a-8q>'})p(~B%7=qϽJ|!Ћ\}r=oǻg8i|}_]Tؘ9hwUTsH@*E7WK#<'.-Co,%Nh2#!XFQ=Tu"#f(PGO쫀y" r2Zz4'AxqcGuw=>KKBzĔvyFR’q^9]C5d^|.;#NA8 AjlMZ\!0d߼? '!=caHPk@qP*07' F/~"lg:0)P{ /7H2 ,y kJ &qC 2~Otr="i~ Q_g]C>2fuv=ʛ!Yguݝ%!, TY}~7"9{bk|Ӊ'.N RƐ/a@ٟ;^\:$o!CW)0&}ƒh$Vaiߥ@жg8B{L*TsNv.#rf˺;(H<&DzyH;0( G$@ MQ>tkBw]Wt'Ш)IK-AȀ Qd0o||S..b( \'P!;n_['\E_q jD`t!wm(>=$.Fnn{YD(B.: ^1xo6 |$'U/D\ 4^"Uc1hlEhA0/@ 4Ʉq!-DtPtcaGl#@5<SAunfAw*!exa_0N/8Mt?\7INq'3sR)^+08zIs2TP@$OL M"vj"a$*D0 *'PTi8 AZa2 P@0w,pGQHy/m8$4C! "3.;oS( 767TGyo'w kp񹑶A0rDX A:uu*-8F-@K`  P|1$LTّL2a}>nQp;@  :AAvAqEpN3. ;*}T I~Pu=pCv. pRrl`pRۂD?)Oơ42QLx%h8``=Np"q# SEwD P/A9E Lخy(NtC1˺{=ArB>6;eA םAqa/80-4Fx yIAM#er1<,ȔLu:C%tqԡ''H,# @?DGC }{;fEєnE00#'RAqj +D5>JEqPh*$="`6ϙw(%K·m"f @v eeݚR-8d?Wಎ "CA&#.!qbJ`SP^ ;D' 5Ap!мxQDD_;TE6] ZYIćwH ?L9v @TbH0CU4j00<@-Hom"ɅQH<۟O #K(j!2P|8 Dibp5I)NWMa28U! ֎"njdӁ5r18$[~Q UTqTK|owh >2> (3'A©(iXuC Α|XqN R \d &׼O@@$&.!=@B0e܍? K?=h+ #DMkK:k,(xi@|ᮠ1P>88<<8',[Ή!\wbe`rEbpGǁR09k0Xq@;Z?r(jJe6*8Zܔ,鋺:0?̈ydz:5Q"yʪ4;DxKB30 Q2A=Whacڳ Uw͉q;Ъ#ENurAep$A0v3'00 ,fUo<O@T`ybtH@SX:.| @(Dp%QYtE4К7 p8\<>*,;x/ZN5P!7aqVJ,p^8wh?01L@K Qf?`a"P(HSaP r5 3:C8TXQ``uMy@|_Ʌ-@04HfB S:h"+;0Χh39,/Pky.9 :h6`${ rv>p@)wϞ !f%Јqd'mS?^"xP$(><¢5S H`c"7@jLx aW@sd%w|Ҁ$y, KI#~*Up5(HF`Hi ;o`p#iソ*aZϳj qv!z(C42Tc:4ĤF ж11 Ҕ;KA`d`I ۜm`pDPVGqvV%e,)D@y)H9[U0 p!L!NȲKY0e@榛UD$pJ&%F,# 2+(H¶"HM1H3WDaƕ{'ӳPVÁ V1šC-v0+gn 28f3ݷ>)L w.D~?q<ӘωZ%tV̜ 4{&u0"NxSf Z}pQ9ǝ\X@*Bޖ8FvW"Y`7_g]dAa }gJVuú30A`Ɂ *ǙmzU73rl_*d<왚w|vAO[Kll8`sPSU-naȜ3d a;+ړ;>~~˺Ĺ8po!OUhs<\y,~lpqA9apf7 lOTM. }mwa.m{Kd'fξs[Xv##:t֝挝]=@9qe/Lss{7Bi.q-wiF7pGrӼx "{kwC:."jw_څ=SD pv€̮}ׇG|o,=94_Gocu̮5,+|?3Y[kf:5]YgzRpga~ 5; FClw qsV|(N; pTlyYB< .8΀ÛUy6PpoYmma%/N|!yU[\AkzNEDSwDߨ+Ծ NQ t}AB`uq ]|Lm-2a+K!o쇳^mn{rSJsBc߷ Xس=H&G10Ccvu8s1\OÀ3VLWp56â-3_sqL.|SL]1NYa3 Ri_{g9{ `*Aeu.-qf',e؝EU vWeqdزk@rC[a<@֧ i|=P/=Yooٲ\}FiHhEr1;Ue[k{28lEVESzّ`m5xj#Kbp4,X3쓷 (;itpq[<%iEʘs1]utoXD8eY^Avv -K"Uٴ3($6.cbI2LgXحN8Clgsa;l|>FTO(#~۬ 5rȰ[qEm4^I?cx9ͮ Q4SŠ-'7 Ca#)2EJvfcZ n珑M*{v60 ;,߈"1bUT h2 kU AUV3C(&ϞsK…s9̧K5׿XYifU-eAUsbV F]+, 7wfl&khRa1'c]APD,e2Hϻ"OӅ t, 0-l sa7bRQJjgaֆ7-qpf06CA% c$%}gZw XCù3i^0#ABj޲7jBd2^&\ Tmn}b&CPDbك0M;ܙa4  d͘}a'wl";`G0- S#:a!roaA Мs҆'./8R+ -fMH|#5\(Ȳt"%( 2[۫wtQ$Q YULl.P T"vD!#XcPZ%F Sd@EQL!@Kw cI` ("T&b $5[ G`Ii4c "Q Բ EH+HΠI4LRha$" #0 04F!BJd kMa cj0¬;V ). :BT6jSkXp] %2E `9 $ +1iwA fa(3a ` E0 (WRf!!Π(c BdL Vmm-2MBPJft猀$l&pEa4c-MPd68e2Hi*5V-P^8%Ӗ`P@I(XAHʈɱAA,4 giѝaBp10,XQЖY4tWMF) LtRA2 hI|ҎXq2,[ ք Qi%MХO0A,ƔkG"DPBNf@2Sd3ʢVNe ji @[Z &;&MA2DJfAHy((R0OF%CD"6b"f2PFur1 VE1rUF@IW* +69,.0KÈe"QPhk n%4 4JDRh2 b0C(Q159` ;&(m+IA\q}:TMBR,PD @C+;"!:(S`S(8b-S@bc0AY#$:2S`ˁ=0 E4]|*f&"E4q"vR@E0D% nDFpJE(8 %S I! Pd \( $"HDDQA2s$IPs qh L)sl[r1![aM$:³\Q0 "P@@Z'`k$I"9N#&;0hh0Fn& e;+ʀTlAag0ҰA}$j ^IbK6z6@FA eM@v(Vv" !ujF"ԋ C|c6 5Mj@kj!0-DS$ഌd+0Q'%T;HRqe*! ) E I43Q-M P L(F ` $\1f4A8@WVD`bC"BeU ֝$UlWPh` mZ%- SS4DPSQ;Ȑ;8Y.[Ơ a;KA `⪀6 $bLm!5Ș uSD I$ɑa# 2P6 T%S,@١(‚Ƶ*` ((%_: 6=4.. -ƃ^]!j 8Lk TL `ba)d1?3:_&&'y 4$5Wg"L=A޼ )dGȂeQj&r !M 2F1`F' | p(H~T€2H́'aWCė@ t!Y3T0̰;xt6='>HB;ׂJR:R>p&7. kfZ])1BT[zvS& |( \̐j8bcE0 (U;~(@RC$7H~q,Ӽ !׎!xy8V(e$A*"O/Rl`ʌyyC  "1D XbJD18#I@vK nIp87ѓh7 5cz:1 8Yc!Ut`&X˜Jc@ , ' BMC`'jQ臀j`u(HTahrp0,AQ(}$MY`Pd~ժgW`@w' r0p܃45 Gng-t@*,pE#$ "{l~]@[a d_ !P&7; yT01`%`42+yPP@ gW l,$rrX@00oWGc_ 4ND0pH$"YTO(XP #O; M6H!%qeMԑ GN,$#C#F 0h&stT% 8&)N7d0;R@Х`}-n29Gq)@M}vraVЋ%UJ.V*h#p&O B%{ IGi un;dǝL  C:E-,U~djx@8!@2({[u UК0n`_rİq~pqYA$tEP( q wnAi~kHXZpOw'GD’"BTɚ=\?ÂX*l7bC,v8, ,8I&\WIB!-@ԝ|@A.(RBRH$)8"(H ~,ʋ&Vf Ky]{bA&i )) L? ?Wj :9hV\'ŗm"Qdbw|?/)hkP,[,I <8xD fG Q ts'@ap 5qZWeGࠂ@h{UZ@pz"7LbGGGS/j@ ヘ@V$񥠄5?-8C!\4vs8!RD&"DH%H tFyY Xvqo4Hse?{"ktD-o.?/"o%a@R#Q( $A1AGQ&@qy1KWF Ĵ$-/I#-Y, sfpUQ>t8 pһAKa0l$fiQp6/BT o"NSKb İ8i4A@l=rȢ年@ R&J3;bE5NaGqAPdy`2hޏ}agI(rEL<28ZGvĪHQ PQ]2~\qӠpT`h̢ >L@ o$KRcj4_rOOa|p(;(e'#(> OqH(c*wsvƦ@"IE[(k~޽c2pOȳब?s\99G~~_rx?(&* ޷=\LR緎>"߾zmWw=1Jx c\ #E}T^֤?==v.gGwym1h4OSv?׃=!ǐ:x$Sp~]u,aoۄ()`?=e>&ݻ ǧ^ "㈠&K}^څʖ@te>v}Ŭ𻾸T3}gݼs)k_؍uB෿7V%w!!7Ⱦp1B 4AįAiw(ː7?1KwczGE 2UTV!eg#Qn{n?tgxbd-p uߵK@N>>wown~~A)~~Xp_;tY}{nry .zu/HynGtG|wƓ@wyb]Nk^AY`(|yp4>AY9vm)au{f\qG{<>}EziNOiafV6*HV( 0<< C}%O.B @Jfya᱐78#vL{Gۿŝ1;q{}ǍX??0vbԮ4vGZ[*,KO'Dc;ϻ'·Wg/)M)C=y9>cӺoj/!k8:yǂ&;C-VDY%G~~xnzwwy*|\z%L(*}{DKى\\>)#w ]ҙۿ.GCWy?hڍ>nK oo؁O NU ;I$G,;T",8yP\"l&t?2\pj] og I/FB3v> co3@yUGaWӒX;,:$o@v}eř<"P{6NՍc@p; f]´ؗb/wólgIM'о>|g yt̔}-=W=7PeºYq 3dIt :ŮOyhk(/CFRuq$\l;\A幢d!}{[j}nK>Xck!Kk_wd`w~bbț\N~`?LrPxx&mMaDx_!.|;.%<.Ӹ5 Hlrlx0 °Q'` ; i>R~z;v݁>_g _|>*O W@̕!vӇ%+ L~},8욈S4-8%ӆ~:pށ(`$õ4 80$IxQy4{69pm򽋄eL^0w!`~fSPb2jw:8 ̈́L6@i2":Mxe=y$r(LInfɩ[E7D|<N@ L3(Jt5O  ( av-<Ə無;PjYpJ`tqP1-D|ա/=e:>S<8BE:P208Q 3:k}wK AZ#W24ヘi@i5t9ByAud.w :Xg"vQITPAgu]4%t:2'@ўT&7< Ty:R½/Aʃ-KOK[#"sMe7f "]k>_]~+ N2Y(@L*M,)85r݁bYM}` Q@Q](過.L->@w`.(3h'|v>y%?0j"y2 ChW  %zuxz ^R#kC˾$8iv&N<TtL 1Y,rԪNX_$r?+ x^tt TdmgU˝2tyJ+h ?`'8YiwOV+f@2Ԟ U JkOeb-0cIwo*o*Z" ;iWNdDB +j*X} +spa9Q1qWqlR pWALP62d4-d=w__ '"(ra0T=Dj"T|+رS7;[;7@^-wMM0\"s̀+a68 ۗkPR{vz6ؿw瘱Jgw vpE@ x5l4 }?&)ɹ@\fnn"v?hAAEIllUWD'븻xpF!i7˾诓K@uXД }Xxvr\wI6<::^$PH.2b}zDE<6qx~9X۹Ľe?.noB%d-#)9%ܽ3V(H H#wFn$DP2HyE 2ΐ)a _?k0#<A.9 9G~»}u']O). xg}Yǂ8h88 _W؜h)N,k ͞/w]Q}.Ȁ Nq b1lڷ ݦAr 3]r]n:Su4HiF8Zg9S|RuP: ~l^ž؛Sд&";iu:/E QHc9!]wv l9. gc"/=c4P~lop:(=)2|%cL"q* V" 9$c4H>'TL}KDaC1è K6f]x:C8<y<*Nx0mW}юQ)AR>:GQzۍ;'dfǝ< Pk]ӧ>/L:G0-'Y05P"ս'ǂ[M{[@Y 7ˋ'Oa oYYz |+&bpB 3)0qhZ0`J˄leUD1%,>{zj:>` `GsS8\#(}'B1DSH rrW%G]{iO4(nJ@!>#ޗTECFPA:9@k}7'cQyN›)`"I(q1w,.Б8AXK@~Z!8LTԏQh:L3@2:CEY;LO"@8A<|@(0DyF`#ւ(F!A"p U"opL  ]Fglqf{1hrvÚM`j%% Qu<1T0x ("}I+ĢS@0G a<8cDM3B pň<kf$ӅtTД 54!Y_ MaC^X`*P&/( Ҭ T|mVFYamXpA*4TڝȠXLEZ0DD+@wȀ*C]H$!&XP%Ii8> @)$22(L@2MB_8K ݵAQA(EP oEfsH(*u|ml/'] #(dmx{//"Xn6xQ!)>4KwYAE4~*ɗV -mRHw:#Dp!쌝Ɂ!à:*l)RR=ɌHPE_:#y nMp- (@@߭;SYUα@k'PCNqdǐa0Dy<}Ijd(\D)8х̽wuC7POGq9Mx@}dܕdщk:+QӃ =ITB$;1 3@ċ4?!;Ʊ^v+ 0hɇf@6=>,$# " AߓוaG'hиql`::$ۍGP)F$ (ʫ; %AǎV]Qe7N }/1%`yc^o eIl0ftIj@${E(} 9;yPK h'anZKMt9dɰ}6C0X%G%$<(ISv; ¹urHVbX^k cq1JTOT k>" u”uOF]<2,afd'$I$Gl^su F-U Bf(* c+xҢ":hgBb;oQ*'kJar]$2|wXꗂ]\軨K/  si ($[0#!W7=$ ,$kT<iѕ1ˁD}wOT] 0uWwE"T6Θ<9fsa4 0*70ZeEayq`Jp{_q[ZSX6|u!D&O7&Zщe) ?ɑe8yN!Qk_Y9$8{qL?|PpCn߾ϫj*xC 7 Cg( ]({]~nqHxv{0-]WڄO mV G|]i0|pOś{tm3Xzى`I`4u8>UpH؟# 7Gpx!{w}칋6#4'4ڿ=x$!5z/D0|TqĒ;HeQ6OOwWsѣpq :\ZvLw=.]!$k`ŸGQ ޝLHQۇJ&μpɺ+vD'nK\Ap&}@CP8nzA>}kD݇!$7>6՗>l}|}{ AA.qַ#  v3>x\dr_Γ`tRvyx_(U._@ӝ; G ˆNσtsS= =;-`It_LviA 8H/9k$~NX6o{b^<7#o[@eC[}sAڳyC:\4w]}p]>mmL(%!]H8nXa>46s$=ޒV}]vo~O_:'@3#<}w$!}v/On[l\U ~<*u[g^*4x_)IupOlk?:Y~rqCizݿrs] '\?@WXFQp{GL롴5&ìX*)PʇupC y;%̻uW^-qnLs>ªV[oNU芏u}$z!T._nܶ~t5: NEYf- O^wО# #`aѕ>wۄq5#})$a `@r0#R\.`gg"ACQ_}uI @t!_ YZ]'pĬB'p)Y!8 `R\w9zBD2I73H4.h7XaBY A*i ?$)4@A"H?\ M`yMAA\E`"pg9Qm_8 ǟ@Аd0WTCȐ:k PFک @2xy`ZFʼnd1uEDxdž/AY*ГAGx<2eU0A:*8A3C83 !pwݜZ]+ rtDzIhq]W`L F]rvw_@vg=: Ax PNaA8UnAquo|].49 =[ ѷH[zq@#&1ؿXU<*ImaM㏐0aiw$6pGx#.X@(J(Z߯c\=@q:3oc-L8o>&{N:"lBg;̏)w͟|2 }ywG;(:>N(; r̟eha`$-,?*2yĥℯۅ!}rzȍ.jG }=>.@6\T+.hDp[A'1UCq̀v`A#]HԸbInM}6>:3>Z`̓݅MݽÉ ϮJZw^@cq [c퀏#, gRvU^-|;y 5#N ꛊAƑw穴,QTL\?E ԄF=6$ _%zh~a¹ۍX,yGSF_q" Wk`skRAb]&Zx!w9: `qwwJGvGG0$O+Hrw ST&i`aY~H_$ C$8&h4q}= NUۗ{7Ȯ&xEbjy!cyQOG;S3?>$“ T10Wzci3;y&0ܧ<0ãUqy &WfʦߤGpj ?,1]yn! >兰3R 27oO{h8>9MN1{>QA*=ue \G72:8iPDqݓl|cդ$,Wv]މˎӰ?2|%OO?ܐ8 N0QSvn h"]sdŝO4#yWK>peOX'o|C6]&|=Ӌ@NR=^mEyn*BV?`&'<O u624c;߻o2NJ- `AT\}!r7q\~A?[@ߛO/ľǂ?<.J=}||z7 0\-ğDXs{&}7LL ~X n'pv@&^cwk.^⻼qB} ^qOp)6P9:xcsw3$Eضꠁhk/BΣx[I? Z(|= ٛ1w]<;ώ!,?>io_ Kܷgրd}kyC$G.|Swvp{w\#Nحi bwn̋ A}[h{x uvwEnwwߘ(,~1KR h܀)]}ejҮ~MnϮ:[\}@'Nkq-^叽e„9uv.7}n8uh迋JD|#I82AoΨ$ג:}ngw1p7xy|~dc\ ^?^DPPݦAT9r9n߫]yty!'$-G@%l- nOGΒx)_+C+}ܻ_xbA>J ,9dø{X a|6Q?hzGNwN㝂La]3[ pʐB;,E8~*i$au3v0T5 u#>w}v@\Fqr'a[ϋ;'衴dS;:/PDg #*r.зF/>;0 dY7L}Wlpin>GrVt·Ż~-VNBAmFHn^F;@\QB(9I("z$W5nuǡn0$H!^['}}v}*:GTXtZ@Z H`.2KK{eT=G7N|a9E|az5>(e;*p0DQ6scY*5;Ö& .$ZT}1vp$Rjo(I-&HJ$v:y Q@ۛƨ@B$mE@ ⢣LIFJ PX4✢w2$ D(Ft ] SA$T(s\R@렍9|I3%F!LAv)@kQpH!ww[ŁN:֤v;Q AG$!e'߱ ɁDGTiL "^8G& λS="K7׷<-98``@: "xv[2,A)d㥔7 O {wX,KӠ}# ";8KXJ@倄B:nP "7)y7L "A@\DaB|q5@?$s{Ӏ耊ޘ zߤ@D^㊫ly"8 TJl62 (>!!twwE+};q`cg)M)H;:4d奢Lj"RF{@ IСIQt^-@I]N4OKt'l`gpE @F^wthXxr @|w}LeyWjYr` ۇ%_^֦!Br`T'9 @%zp8P #L n`I',& SOSHsठ8n t r8A-鰸1l%Leh[2H}7@/ (p߂FdmGHEh /)ўE(Ґ{GpBymls|! q?0R QwSbApd2/ (-.A:?[FLQP šPuw8D  @M.TWRR_DDҔZG׺KDEGq7r $t>/OYO @c ց'ƍoG$Ly*'.0Hи\tbs]QAR0qC%;9+^#;,Z0e]}$fQ0 J ʰͦDD013Kfl~գӉjL:^\0&荓" S&?L.XNĶ>f'퓺 ~&w+!M($AEE ݤ8@D1!Eǩv"̸ ]h+B}a8"jU|ٍy"3T +QDN^'Wu%".c zP^oꃯ TR'.@1?F:0 hO $eD)j|7#fQp;QqAyxOR%$e̬JNtg5. L\Ϻa*%5O@ UOO1 `6P!(.M"qLu=0IU`":?&F-($YK70woG-tYQQ%SY|dawNm͟PP,P |<(8PB7 vDae'Қ8'zXRt)A飮s4AXt}㗠Ar 1:0䋡0Po'#3o}Z,wvk(QHн%G󸻆FtT\\p8 r7C`uH`$?)*ŇoUSbzXLg\`_x'/D06 K$s%Q! E!BWPU`S|#в0,(EI0w09+bp\x)Knw HCQx!ՎmGE2O:LUYHjo.aC5| HeQj? !O.8,%@grBtV `}L O|"P!S"JCBE $BL!I Oq΢xp,kY !F`f#梃 V!aTI0\= > Be|ȺFMahh(P$I8t Ǐ 2(up]' -)C0赝I@>N,/-B:GKPSIkC`"$à bX`V IaöxrĽ>(vcev h;9AnFt$!;l(;i#+},ef,$ R$} <'$qt0 ThN u@c1$Axx IEt ԃDyE 뒉(V$@Afay@k  ,NcкeAӸ%)y`HsGvtR} "7RgPiMLĐ1 2~ B| :/3}Pu ̃1Htm$@ܦ_4g)$=@ &@K)xHֱEU]'~ԂOǿƩa L[Q!88k܊9˃ec fqΎ8 ,4#L/gAc*#9?ÄL@ EpSؽrQN辘 0HH9"Q C~4#0EC Xh# n> ŭ$8i'/9FpQ[U#.̔/o="B@wICI/>\@J9&w2C\ dX0\7pQvM ]`@_EeC& Gv(~ʚ+Wu] @!abY=6&)X^?uI ^sl~r^RC$-oR]yJ?A8{LWjAIa2" w1I* b npY[HML@Egyd'E-TU Bc0!}1H| 긶v"hċk1Q '2#4& r| O@yQqL.OB 8ECP-%CO.DHYI4r( PLA @H3L*ur%"<ༀ0!9P (w a:Wd(Mn4(Bv]@F#@ n3HӒϖJ0 c N vЎ? ځB uA!T|p]wbxq0 - L@;>6QZȢ&`l5d\%. vgSIRʡW"t =Y͓u.i>T~& R hhU 7_B|N8,!w}ٶ|O ٢,˒} "5jAuV}cǰ0;{O;DŽy\v.+{?=C{hs };8>\5PqzD|λO,s€%}P{gר~_|[KZl*- aswߟ|<}lLЋ;|0|3lasok_Ѹk}޼oWgo{_ϲ{OqOxsyw?3:s%󍉡v {}wǑ\_7yΠx68u;k߾o޶\܍6ˑ{Jz??M>o^I>`vߢر>#ؽֹ`oھس ^Ae+H Ƴʜ|i>b"_J |߿;moO>* ӂC10^>6~>L|kyeN ۻq/ugLՏ-~RcW~:c5;+p'G o |7?]!N:n$>@cx`dbn߇y^orޣɏw`]pm|߀}NOo#yu~dܿG~"^//S"I )t_{'}_.vyy?Nv|x I}ōϝ߻,߿},;IrL\'y˟?Og6/s<>8sww{z?~svW;X;߸w|_;cȉx=оYևٿ^~<`f޶4ZI/nmZG{$|s^N7Zӫ<5'.~XՏO~9;N>}:6/?CXtKklõ-y> ĻuyrK0c~Dz㱸,7 G~rw?/o}db[' u^u"82`Cu8O+ M?~W|:>> qk$y~G2sc?_ Ji&kvS*q!;>>l/ |Wqowc;׏>oWWDػڳ` {ok85cr_k~r8}@{gTN"xwz|oOWH?:HsU>D"}lcwv'EOGn ?;޵_v';}Vtt3wi}6 {Ho*yDw4Ga]9}>_w9!GVy{E!4N}b1}_on'H~}oBo'pU;Fm}wuWyK3Hu||DZ?;#>?{c߯u`LİůϚ<{ :Bug|߆8?uBW>x $LJ@&2vrOsQ޿;e}۽߅L>ow3v <aǧ}g/ҁŗ[- ^9P ɳ뵏Uw}?.Q8.vNL~Ͽޞ`~}q1Žx!o= v<\3v;;!wC_7(L߸7r&;(q?vr}/;ҧ#w1`%ޑO/ ж{u\#{p@9>k:?okȭ>wq9=^_~߃g,wUL!8׾S)Jߟc?}$w U<{;yRTͥ'؞<_qĿ/@ T[ Hssqqq;|ݾ?psy;~>~t M`i/Iy6zؕfcp7߶Pɲ_Ow-ԧx0>w~ỮfH4ùvǹ-n$ߓ]0Ɉ#Bԡ~Ƣ,( =$3C>3^!)N~^Ɣ"2cWqʐxCtu%%HWqA W^;*U%=6tY O*]=a}uL̠&/̜[dvĮ <͠PTn Z/d s'a-ɪG=0!sgw96(o*w(SXPF}̵wdz{-^2UPnb|p#ľEM&A] SD . o{%%Qv)܇Bىގɛ>EpolPϮ:?_aѡVU<8v~A._au7l>?b7UpϺn/#Lߺc G+xp'zY> /|=IoA+TX(8A : " :*'Bb%}kqGq'\f3uH}T?~-Gd1]9Tv + z' NT_ۛ$*xp\~ox㜰H ?yWp1=I!('Q\;}dƁSI~( J@)H2(IZ@fE8^D҉S%AP'y  Qx}\_!7&̄)֎0 8$!Âu)d1kL$z @}xhirw }Qeﷸ8T5#:mW1텋()SA-cgGy ]!Wg8$?E;xEN>3 2"!gD'"LOc%}8^80 *"@0y}5fc H H :X3zIݹ˃L]QPē(LG@A %"L@M+3`GE ҢI ɸ;.vjIw-CdAC]Ё1zriHh'@XL" %YWm dDHrɀR:^H :JI p! nihʅhZ.9>d\ uBDEX T!JN, qGAF hdW1D3XZp]x `aPr KN Az)7 MEGTp) 2*j LFT`2J"6,wwP@ûd`EPIX!#qq$uv`w8?0(0I $ iMn I&CԢ.*t;,;w! @ x`B,eq~pt9"!$N7@#;.iDtzj9$ċ+vq2 =C(</2](G!@"8Q:"0 1D0 B$t0! :"HR `NXX^Yr$;v !'!gpB88 tb!@e@  r&w^(J$ U |8d]IRYqL{I84 M@aAOtl &O rWxgP ')Yp!" | \gB )@=:01NNՎ(;t`A`y,]8M&@X r꽓D(`"JS&$@$T&ex%Gė4UcLÒ;N\ 𚄕"Z(*kf;#VQ@8 Ad@ $]LSRrNDUA] d 5r.;D@ Y@u( Ņ ** #`Se^ERMη$D4ypud@iూj& !@, *E% 1(韊Ao9&6 ɰ*; "@i) *@x]'F(YaJQ/Ax$U`hq@cf JLN^]I Ҩ (%!y02HՋ> ,gW<;RAL" AFh ;A 0 (``#8LQH7$בPm? Dƀ.GPّĒS0JN(r}dQ)i"maq%#NN%0X"M@X3^ww:r5K k>)u @Eï03:`X'$X8% i A@  ,<ػ<11Ѕ NW :7NPX u_ ApjT AlxY^(9 nx*@^|L :-m(QDJ2@4%MEc D ƒ\[lˍ$XԔ~գ% 0;,fr|G$ ( i = AP (0 % wmpy x4 hp]m֭]"C v> hB t@! D}LQ5 n, $!!`(FHeJbH?$,{5!vH> c" AuSX I`t%rrz Nzd6oWr[y40(IU]tA p͋@ ~pn'GygTt˯Fhr,!L?X6 8<_H%4ޒ xJ1G  xqYG/}g'dQ|lސ#"Lćc> ,$(9=yqɦ#.Lz>`yN)bNSP #n㣸9\)V՗yuP!(lA{,"pn>SrIs$R^za@==OB{Ӧ糗 "kE}E?c Rp|y%* ;ǁl׾~ciϞ+}7|dh] 'rAR/ȞL'c]/m1pw9h4m?AWxΌaAYA)E{܁:}>7Нslf䃻4=?4(8,mm4 8 osȳqu:QK> a0] 苫T^v{?T.N7#N>y=c&I )p=wH+僬0 -$Ez*z@ ~aO?$>=z~x?|~; I=;sF?߾?v'ώ O;۹vK&7Xګ?/cWMnw{!bY؇%L,-|Z>πyɴcf]}H*wo>>x}>ț7C8#k^?i=|;L;~HAe=@>.qߧcZYGn}1D7ok M[3?vvE g.=w:v-c`?{Ftu޾{oU񃓃b`C1x˻r[}Apbn29<~{Gi=Qn<_~og>_gD-֢ܨv<;v=}ۃ>%6Wߏ0 m/w,{xqqkHe??~gRJROx?uO=Q7s<~~_ԝ}l'GoR6n|KU|=w.[~ݾz& 7y;s<}vۨC{ ܳ.@E{{v{./}/r7;~n#_ !7qrgg|_ߞ`4;z|Bn}tsοzgcxD3^z >kr<^E6|ۇ{2]Ɋ}k}ߦ}_CE Aοm}W{A 8|Y~Ym_i=>i#UG{i5zBe<Qo\A"oʧk =]8%eYz?>> ϻ=^()1?|&TkJy?e0{80 dE_Fp?Ӌ }#ː`}dL%{}AI;\Ѥ{:CMy;zX8};' {.?Ɵ^ 7w~؝t^XI>n|14h~cÅ_׹7YQ39Ќn9Ĝ|<][ѝx-؇qǻ/fg(>m#/lU=]&;?FȾܝ &zS6:X]g_7THB|>K:&O?≉y>x8Lva8 n?S҃'<8f=> `KKQ929`wܙvN-ndFv}~ݾv[|hE>g7*O/>kfb=?sxys< P"Z m{!qchxǟ#S+ntwj)x#vf<}4@؝e/@/|pOi.vs ><AHAwٸVwpE?sae8u w{.3,ٍKӓ=Cq`a(?Ey xO@}kL|R}${=v01y ǽ֝ׯT|G=Q7)a<6>ؘc$0 &v=GGUtgLx!볾`kЇB۹P"eW,NIx@9OҞZ|yAa }[>->+=nnu2YG%s6~q Ɨx%\wN8a w'')?aq:ħ8Cݟ}0xZj35KT~? #]'9=waھ81'P3ܑ m$O?< 7-D=8U+*Jp`g#`h~gx_pWsa!|tqzrGrb@P(S#5?(@#,p!GEg±AUG*KqtED20$>K ]dz*JTT,As}p KM^F22.VpTJ>XU5S`q A7Ί]$!caz$p)* U~"EpR$`&_\ʁ %f(*IPmw JJD pRg=ad`4 ,Q% bHEz(Qp U XT( 80$IDr4}qB4cQFM#Ԅb"" W:"$<281r# ٕ/AOaA؎T)DБ70oMMXe$0pSCb`"O=J0Ҝ$K8K LfI0A&ESE9N3WRWb;pIT H@PaA@(FRd'W$ Bi@:R8%"mq'Rd!i+tL>!D-HaHվ#_zrKG=à"ju '\EH;qF-d]'Y -*RPHƠv`q" @_#`'@PEU Q#ל$GKB *Bx1" h'_XP"C*oG+Ӹ|2+0$jG UL8j9}`֣e}@.$(0T4Nd`@@:,Y"@q f3uA8C8$dJ "$d5" ok+RB£o $%h @1CP Y("Anq@"dB } h#y !+U.u#4%P8L `E7"LiR) [C-*C(!{&0P,QL8-¸yR}'(z-'P&}$Ζ7Q&0ݥ&q"2* < 1(7[PWA+hIwą@$p@XU,KN>TшSm>¥b)ɳoL<]U}qQT@<:0ET,y}*XR2S=LCeB]*bqЁ`"@ GI\Lڑ҅Qv! &^Xvp'yCtsNBUPmTJ g[ewDGO D!$xX*;&Ё|w8« tiA#DAӴH׵KpReo*gR6C8Qv3lMϔjQp^uDFh! $]X\RQWZ(Т .O'|A$p>&$Au٤u2jͥ>iy Y8H@Q ԺRz &Ɂn@A@\"^ apGU3®RvAe\䅱ǎ~PD#"ҐRE&q(YPx~)gywr 3]8$2D_EZ /2M(p8z"}@#dRP `@cLdpuppl@#:O@R>n:݇^g6D$_Is $HJv鹥C&uπ(BB!ta)@n$k ":ӼyyrBMtz)fc$WA#P!XpΓoX^3 hBE+ ;.«@8<&xP-e!'N3OtQ%EqunTz0dqP?)ElP!5G\lBt.:L- ]ws`<9%N0^|bwxraȐ09&a(#. 2q/n5.Q_(EYqNKc_&pQo$<@N>2)BOb&tuЮ# pE*AҋaзHGy~p]e r \ >P1#M$;`@ X@XcN1B1PGcQ`4L %D8QVD`TGˁށyCA΋` AqD!k Uo2]bPdID{}#ID ~,VA]8g#0Fv^ %`yS5wҩf@؊ToD|@11V g:HdGwcJ (XMl` w E} K{v3OtȒ@L Hxug4?ovy $8x띟ǶR^ː[ ~BF T(F!q'VwOP.Ӣꉯ"T9PG}/B0=MqQ$BXTsLa.|q\@X"cKN1 n> '@`Wewp7 !*B0;/\u<S'Ņ'@T%4iwpDwq̏bIE {U$PSb}pD D11vyݓ3,0 (H{.M+摯|уR9nAꭻG]}j7ːNa;UC2ͼ 9v7jza$о BAA+bE3?#, @|)HU?zz@ę& &PW =, 1;-"T$+ކ{~5WvhgD]H=7ne8n'qp'&"B;0{NpvT1эHn+xB't& /?ץ"_ M< K }ّɢg]?xxU21 o¼WD[|1OA:p >۷ 7D I]=.z{Oֿv>yL\G˯`:oGf >o0*,I H3=srx+?J=pKWUםD f߫0aHm:Ub]J=GQtT7+xrDg\|م鎍͛p'͗șx H%t㓆!r2q_]˿$A94L:}{yk /[)x$ ZGA!ɿS<XB:ş 76Gt;wrݐylj%*Ykn^{~{|O'$IQKޤCf[cowxՉVJZ u;`1)d@Y&ܯdzn{,/}iE[|2 ']@CU킶ǿr{k:p)_KW6:|qwrG^\E|zy0%{3~b_[`z}*X4([!QqL! J8;U~Jߣ~HGxAFoMgQ({S=zkOw>w[=->ro~AA&!3ux}qy}{1P$'؈U7AuU oowwL)po6x0=}p?HG7nw\ޅ4GzgՑjD]y*I[^w*tDN__Yʷ|^>w7rw[IGh hp'l{||v{~mn[/w$IzGEwєZm~|PD^[}բ{{prl<˞_@\?yZF`E=NP_a=ǩ 9ȝ~?z+_?&TG;k8}5u&cxSh <5}#66y{NN{)q$6.? ۇ9ᎧhB6$m Uw!%F.?ɟyU /vN+?,r;GIܨy&zXS9v>ƻr7'n_B$0)_Dxq&Dv\NA:u!^q%C[犸 ,wYTMp>i@0ѣ.>2’Dܢ(Zܒg㣳`Q&r?쵁H0 o2$+.pw [䡌bpy#Qt*D(  u r7Z vY,:>hdqFd?J\ $3,>Ggs݈)! z%}Gfݣ0/^D!T5N$: O-xHnu}`< pGu7)Pϻ go9vn0:DnRUy{hhzGIQ @ m"XE`p\!;bz" @E7 US;# ~W0݁]{_,j_wqr*>F8%u48A~nQNNLYr~Z1@pfAP1c|fOHţUaRR`1.ym`pV<@>Fp]Aac<ӯl6DpE3M>Z$;?C$ ";ʃ*<_GM0> f 󻄀ro 0M`"oC`]y{fi]ITw W5g^# LڳAݞ" ЇD@&tObcӻ`.D( 6%ZʒwK;.NߝC2"&bؕώ<;t90Yj Fӓ-:]O=nQqA$KI]xڴ &&h61 8>e:!FqGq ňekW{"Ez_Ѡq'R8.>N )1H;|gMTCAnᐃ%bP(4jP8wb \B$֤A(a{ H;=( vW,pG(eXZDp 1d.nu7"YVװY;A>* @WQxL'ZxCvx޾o} V~ՆzPՁP0a"GN>(bܡ!\\RT]ΐAr" 48 {Ix3B;T$p ʱzIAïO1<rB*TW|#JXTʦCTF GEdbޯ`᣹#o~P"6OIۘ~* `Bfa NA3XJ' z;L8KGL#ZP4Awj@Iԃ!ADj / dv"i5Q!Y$3TKvAe'"bVHrZͪy)$xs(pE}a l8C A;XAźOL0.ǒK <(CIN"2@Q1΋D1@ ȧg$I> #@Zp,Dx03"V`:Qu2Y`)) lF:%~!8.X&7-<,YDl_>Be 8" Ģ ,V9.:jlǂdxp8\wumG_8%BGn-.DH1P8-mF9Th 88 x *~EKO>_ ՄtT?X:qM G|*(wK()UɍEϫ,Y:țP,kR}':8zcz Bd: Ə @D[QL\_asSH'ʈcB]}I4qG²7BeOR;zM\?_w{}gi5<ݵu'%e|G&^X3> *{—WTkoްW.߯ FC:Fø usAG<ZFwaъ|(t^3`{j`HHx//M+cܛE^"T`!e}旿 (&Ptqr|ޛʅco8k9#$8sq #> F@!\>ۣ~o'q%{ B AeuժTLX~r`4G`ڎy{gccYwkh3?r0K Uޤ8H=:SpS)o$'{IFY)0'|0 7ATV ڷ}q@et5eP D8 u#bp'X MVutEϤ ¡|^Hߡ&]`tm }]7Lq;5/aXEʺD. wtQ~9t:}! AA-Y[N};2a6:fC{W@..vӤX8DQ@G=2.\<HC@W2MH!|WA|qr*r'Lg\SWy"5>$ 6ҼU78:C5zxk_f* 2(ŀc]< 2]1- cL2!jH.!@ad; )VPv^3$na@AAʤ |`awA 6o7;b09!A:UmȱUNo,CqzL0ʢr;`/`yAT/EVk )@v;4퀀OZmo(P]?eui%w6~QtCcwwOxrY<.O=gQ7iMjSX1c;[ͽz~Wey=}jVmf|u7k׃eӚy]Wگv9^I1y]wmve=s)ٶ~\{yiڲ|uf{/ԅ-yayp6 a5~$,;Ui7m+~+۞zdY3~mͽϳ#j.j }mVv)Kݫm?o߻irٶZf,}xYfj߼ښǾFmZ_i֯k+-ۼ{z=܋Mw=)l}mBf^="ZY ^?yV+yy_{?z)ǥn;V#wkiutwګ[Ǟk߮Ŷ r[S.ci6$>4-qڻkzγNhS:)}aة4qnl[ףin ~_wZۯ]^,Z7wZ^ݝֹohd!Ǽ 欛졯nڻ7;}}~˗:ޞ^-86o{ޞ\zm+wTom{6'{gkMi/a6We6wfKW睗M[[ou.7vs,z^R^|q&;vאHOtwY矿oԮϑK}UޗqTumkiX~w67yswZ)wʉwV:|}=N>s)l{vvMmϛs>tpwz7-~RﰽqWv^ R6m&%IikM\sww_f:^i.W]?11  &0B6  F@`10b@ 03 1 1`P 0bF11 #1K#0bd  F`0b `01F1#10b0 f0b fFX1 `iflb1#2  #(0(016@ # 0b i0#@`(0`@`1bb ([m h 0bFb# `1`S 0# F# #1#i0#@`Q F `@`@L kwߕRa]G(om 26J.elUۛe+om8۶h{W2ڞ{$nZ.{loTd5@ls!bjm=OfCSeJ7"x#`SmU`(X]]jrw6͔ea{цǶ)=aеm@r6JU&Wffom)Хِy{<"uQ{KmЊ{w-{̶[I୶wm{{e-y0cۮn]lQk{ƌrڪnz Zۺ{7b#=6Z"eHۖt; fm@b6KE ~ lKV64s4殷m"l\="۰bmginުyKL{Lm΍.ۦKmS)lw!;98EaG6Hsc@*ケ;VYf6ZnFuƱe[檬Y[mx$3bEL:m{k l31*D߷[234S7ztiVMg[[&ÈlQ`͞KUCƮmT`mQߘg/@e=1(1* !$g[6l4uKf6EgM! %6#fm;+ݶbl9[kK-6mWo :ig 3OAľmQ=X{Sж6MK-LRLm,teoi5e^jjaVFdKFPl=`=RA`۔VZefݱٲueGejqFoeF@}!IlR {,_n ܩrz謶m;'63i ojˌ*46ZW ̤(!-P-ՉgnޭڼfͶh뒙W!fUlNۘxZswѕeSV`yJ4 A*k@ho`No/Fؠ[{˺¨^Nk8[,qV=ZMqFtdk% =z*6]l|&2RM4#0 wV6Qئ%Dmͪ@uefd60U,(m\fm9ZJuۆ1m_aS]6Wp o/-43&l"|W [/.ho3MVbļ:khAiirZd6Ķmwg ,m[Z 2lV{Zbu{dyksF um˶m_]۳t:{ohoO`ƺkA%oHX:l3fP0mꓕtP6Ҵl-]ٯgmG)Sl^ڶXwc3ž CݶѥiYٵ}{Fѳ[Q3 ̮2ꋝ)sCv;5,Ӗsy+U%e.-mm 3)R[6o^OٔZzwܴݦ2v7&O6foW͢IaہVy^ixm "_jTjOގշ0VfNլf^UZ-Wh5@Hn^bcTkj{k0Զ٫]3+Zulѳ-6T[ыm6mx~d#潽YO[oc[Us-[nZLG}scվml@[_{=r(7 B%[r{}]V2}f 랙m}izfuQ~{F~[zVdwT^S-)fPT.u--vS/cm o 7K[@.lfzm C6l%ɂU [zmDkVlsؙd2fF;V%oo* f{ۡbp32r-޶Z!#.ϫ͚Vgh~^ԶT*mlԦf)nkLջ$n#Ua#*z`(bJ/[lSٶ7ܒb̪m߲Ӄ&Pt۫vs&҆2_hm61m*Ac |~H4Fz)sߜ׷ 0@s8d`ݶVTōI%f[Ymruql2rAjcoZ!t[ֶûetc[ԶT0SRfsqVT `fzIel+ol0{=v{aSvh~+k`xl+48ڭomo6^":u7#޵o!]{6& F[ELcv,[xѶ;}h[!l>a[{n[s,gFbUv{lʳq)oUf6T䷋J0LoiCVbUvތ9zlFT;ۤW$^ibm3PllFf *S!Y5ߺޞS#ڦưs^a˛Q ɎQ=^o@m^a4 -6FXNC#CBQM(qȶ 661v-@7`!ahk cX6o~3 [3 l6٬jf]V6mMg{+Ʀ6`rd13-@Զ&63vÐb3Ku֠Jk6fIXD[ UilyziƤݶ 0"f{kiqyBzՆ`ԙgײ$a@iUFh&Um0V(0k,X,jef*mQ0ev:5j` BM6mvg[pdۛfն †62dokFq6l ېX22b1( `SZlfV mkic Fmw l-D0ҐQڣê0Fk]62ʪSa %HhMX]ha1Ffm,ڶg# j'lR6:C)z*\Ɔ\tPmLE"0mlUlm0j+Ǚ$l$lofﺋlͦ&lJٴZ JRcU[yKx12,aDۄAb!Yؘz۷ H&JdԈf:CdEilvHVZ%-3-eB%lt$,mKfc,gRf$H"37-6M2یф*}C=3D``*d38meX[FAWWmFu,QbH6Eyaahvy b mPg8Hۛl1a;k*-HU6P={Sej6G[ěv5,mk%BA59xdɶ=Kllm -`l*ƦRfB6C -6fw0؀dcTf4;,^8#0z+X8{)Bf^n*aÞaV}1MU턑--lcUe1t.0dcI6vsl5xoBgsxπզڦjà-W<%<ŶXBYFl۰)̤6M{)J{uZU6)ƳgҬ5{Oe{=;f-00I˶P=3!ekonc%loHIޖ-'chUlͨYJf3 ZRlB.Xn0!;ܶGT]WNy[w>1.Ub]ަcDS̸6V4E6/\m[0I:6,kNG6X]L6lӝ)G%@bY3JƺmHpe{tFY13aScN0۳[ܗm͹yk~ҩfJTa&hzmn~UiżݠڶuahdJm{0WK *p}۬"ﭗ[=3{/˹6(c`ǣvxyQh[֢;0Fjʚ!ۻغ\i3\Yڶhm^}o ªMgt~veTOJ̮VCے c(*!hU!lsz\c33ڶע;*Ms< e5eCx6Ղ͡ikκS`ayinjo1^͵*߽k˘z5Mfm-U9nY"_=C"moW{-[g_mk*4=oPbv-[):1ƛKfy7m6{,g.QmmqצfŞoy+b-1%]o;6Q7rҫkڛMm66Ȼuhi3چ'un)]gӖo,S➺M~;ؔ m1eq)ཅ2/ nRض2׶F+t7{6V֖Nf0Fj|*ztmٕc9f*m,`fWN=+GlܮzKo7g5gl.MvJ6(fnjO!1P[kfea:=eGv]m3XA&pTF;*A͌X1:V4:6lj${fuilؽ6#0ZZwި&+|c&%yk@nކCmDlMmo-ս6`2XYە2/Wf͈ƻaYbfF⎒WKlV[ۛ,6CY3Ǖ`S km.ӦXKy lf[GcvZfign`pp5UD{[VP`qno323]&@lxJVT2i4oLʶ.۳.TӬm0Ff-&ٶ6gZUƈ WNԪ¥¶mKֹжlom67 Qz2K1dANR]6YbBuSڮ0YΚi`wm6 ͟l&;lB3] ,ڰ%m3u2C\6([Hd!.@aq`p-U7MǼI#-eokX7FWzv5d6e4k۪f"^Ief](eޞJ'7,x"m#mra~S{̀oμFE#Llj`AwdW޳iX8z ?I1m# jf$mlt==\mN5k J۶d{aWokV` \1EQ ڛ)[)q]VJ&=KlFec i X۶4nugL\]ۼBR絪ۼqR۠{~^`{o:~{:3qmYʦ1bdl)4,)Yl A<.F,8 U``c,FmDcԣ`lK̮`5l*T 42l3N1 !FVPK$h5C,Ā 8fL!+S BL5]0`656c3 !1f5`cc]a tm[=ȈJزf4 f eQ0lb˶D,i4`R̐Y` bl#'æ%\l[26<ӢFFDPJ `k4 du2ƼٌS66 #1c 0Bd3*60BA lQ`l@6@&F JY3*6O 2#͑%5D11b!-mo;-3S26FG["Dk1`FYN26-!H֪oۖ4ٳ(0 ,`:e`lJfa5"ٰmj53FXPVlfm,0"1`h06k^0Hce1eJ66Uy[HRu5V%,lӫ۶ 6jV^qArmڊDyM <]\`Fe4lY6}m*]ä1j֐Q!ae~ᄡHN{2~۩`3KՎ!6ؼo֦(.fմm5ͬfr޺ΦjHV{mb{ٙ:vWz;v3=vU'J:V~$3o˺͛+ s/l;aƪdV=fU Ҟk;3_~o0;{5Զp}{ujTm.@{6|m ˪7 l"]o2V7{vx 1]7ql* t`v]ya aJ66كM6mv^?w[ۚOoIڳ{]([*g[W4Q4\M۱x;aBfoU.oOW m-Pd#٬NY9xUm]XuT4%b{k]}War˟{~.LiGNÖǕvG\0F϶]ͬ~ϝ`;뽱ZZ 빵 c}ͦx{Ht{Qe$Ym6׻y?RXmJa1cߛnoOɼ5f5Om{5)oɔ`۹Yo+)ڛll@U,ۚ=ozLme&5g8lTVz؃n湎ff\[6e4ۮҶŪ-HmorVֽmmJ[ow֥devMm*kj8Q{9ն͉MKV{O=nvxͳbi F%3 tEL*魸HڅmoR-lS[*Zmic=QvBMۮ+`XW޶m)fڡYM (ۜ읖h 6vS 6t6[]*e:-l0ml1-@oZ\خư17[dc] *f/T$Nhm Sm@QcxU]=;X~[UTVT4 xK]KLL]LY،2{i8'uIl*<ĀI(Af*){[L$>y{ mU`F;ކ`? 1m:[`ɰK^2bۯ;Ehwðah$#DĘQ~2 lU[ؤ9t5զ0^TjA[5mV(fXj2={W߁ WJ6"ky;ۚ2PfdfdClFV8bsx/Z ۈ 80 ,1lfldHeU]ڨ[6Zc{:c9YZ0lĦ*)̒fT>6 ^/~0^بάoOʾՀ==v ۮ)kYmI`۾ݯ6ޞug66W[6UG*{ fL[h^wLZ^ʲͷ~6Iu+Exo[?͢mTfis\ݶ=lmm5lXy.Vdߔmd{)f' `Yj2&[Vv/尽y&a`V^ۭo[ж]mUͮ-c$f+alvw+ZҞΚifu6]l[hܷg T,7d6=Tha|vBٶQ@XAMx`^sf["5]f!Ae+#{N5Z m$m̪yl٭FuO0qo`vkm2%3U B  {tM^[wx( ٬}c2 m26ֱ m-eBf6涗UF6k%<46 Dބ(1+(mfLҘK  8a5ՙSu`Úr c@ǖ&\t8)Km۝YEXO5uͬ*uswda r6,nMHMw uKc yՑl{ymX(,Sm&ђ^*@{K%LmtjWۛ% U7etmPA )אK̦`4c\-]Fc,NA*6G"b=LpL&Wƽ̀uLUyo,WیM*o3BZz 93%|nlc40i6pl$6er]lźLX6-$NPqFx]zmhZI ֗dFvJm] SmQMif1JC*P JlR=!Y1i&-u1"i`XuRoj omcu sl)7"~M!W[sQ޶g'Xwf((`t6i0ѱmD3W[fiԤ,WB6e0sdDm/SIl(c̚lylm۽*dӗFb;J m*f޵ƲMITaEfdo똴`yܲ[վ9  Ln6R fɪlUMXQ}jMk(Xۖ۫ 3;bٳ6aYM^ V[lfktMY([doPfM%Mn3V=a'e6Өl6mkPiFUHme-غ`zIacvm˶*:6[,mb %ilf۪tbNh^m>↚ULXqmZP ۪imu5e˶o@k-D/brYCf㢱,Q2^R {ٺl6̰lm7 1u n4Rj %cPM%^5#l{9c9hA*&63L6 LlPF1Lٲ*l eڶIi-,Wqضz< VV5Lfw,Fm\G Pu[,Φ&h0rA(lZdi"\CltJf,6&lOQmfPv[ 41)6mEI3 yh R]䲗8Hޜ1q0 TI1yam)j^рlʦ@[j-3y ^t(oM$mU bblyWj+´y9"A ̀f# [ͲI[IMHFleʧ1cvqf{ 43L̰s[𴶫lH6 tLQjoȎq{*[embcŽmZFz4WzBYw{1[ն6ܞwvE74z(f6/nlofzf،]aeTlº16Alz;yf47{N<Ӷra޲oS͉ػ^Y%z>fsnGil#Ɩ:`A{P-6N-)iSᙹҳ)^7k=Q`ksʖ׮ܶJm'4~[٘MRfܮ}W`VY&c[[:1k_=ԛeP~ijVϝNܔ )ll}{ C&xLkȨ_MO>yxMg˷nYݭ\d[v4𬇶3w6[#G uUmk^U{oF6nHc@:+ zv6sڨ#ܶc^mk;dDwݾʖ [ݺZqv71rm׻-Ay]Yw7h,޳s[BKhiJpYVmo+W;sSFy| cc{mȶzk{n6OkMasTt^c0W8OU%3YzE#-67˘[6]b ^piK2۴1LVa,̶r n6̀omL@ņ®1ⶪm)%*m&r%{T8Vz5 [5ٞÝ,p49dZf+bv72l#&Mk2:637K&[)5ek\YΪ`}S3 kױ2ll^e3/iClYs7Wwgd6aۤ.[QзX6_.+a[ ^!mI]^3U`[/BUlv3ʹγab*7/q,M6P@E,.ZU6cLH)&zgv݂ 80 DԞwXd. X)`J 2An1FHf Y)[PlY폩[bm{VZko]Vnonm]mvrdAYnUcMTؼ_]a o5-C]mӅY6s'59bty;F%X 1,o'l*paں=)MfM=˶@b$MGd30 Arٳ%6 ]1021Rgl.} lRfۛmۄP;W"dm6SBfvZlkmO7c+UifH_}Bj.ڌmo*LhwRuj!Qe{*0Ț؉3fiTsު moۺPMbU[XSkM%i3^FTHC &qƌ$XBALf{"`-`V#6TfìYbfS[26PɣZֱ]f F6VYo/QҰv,`~+{ڌMֺؚk3г-Ζưmn6$XFrAfSukn#>15( :66 F8-6kVc`myۻ#G\in"ccٖCRmy{mL[Ė&IQ=lSV#ՂaUyquIq,fVF=-gmۄ23=%&ChQWFʔEkkS[3"XvL܆m {]3,m֬涧EͶmMOcRf6adVYosTjx)ͳ gr*6ϻw6~Эx ޥmM]>#`qݛv۫b{zupc-]]Lm+ٶimvj7Ql񶲙QۚEU+:lk_q{ݕ lnV77ihZ|܂MDg$-h[ ׊ Tow7{5ٶmf9էWx1z顷.lcUfV`oUjkuTmE1mҰVm366Y ![_ن+lsܲuƘJݬn) e'fݽ~~fS@:{;kfö<՞ɍwFdݻll*vVMt+1!@oK3TZX~ߕ`ZnZOVgmZ!#thlkmwm=#ѻj,Tw5v~uWO-KtmE{-Gc}^)&Y£IU6cK4f-U}ۿslm݅ׯԭYV[ĶwoꣻL6Slo]bm\rnZfm-$6Q\[νana! Sƭۋ y{WMYoVf};A'ۃ9aqMq[۫qljѫ7fԞ;q-Qaձѿ4j[q꺽mMºۋ)oXYwu 12jbHoO ww"yWea<}yin^Omzl5ŌQ=N^Ve/_arFZ~cn[S 6adV-x_bԶ,%Ql׫{:6-VT37/}F=@ēf`vӵ0mOmF9b k+"l}1omwjd{놨Hc(aǬz]Ig`tRu-3fY%h&ͤ0 wm,3Qd}M-.Iv f6U3C`[CFc X3F&UBo:aU&U؞60#l=vNZ1l3 жmj0mcUBTcmԈ gJB؆M5njn͖n!6ڌ&ƦPĶ%UfcflؔfĶމmh*&^`4 jjYM F5ւbBX P3hӚf!mlZն1=vWd5KTB̶mMr*3Հ`l#f楻6m"6o0P 1c SdD SYFQ=`fTki0i jGnF6a mS7Llcmc #aQba30 X6XJ& ˔ajE!FMcHjmUmJx@2yv>*K`6`!3cM@o/R!mB%eLdmdfUSƀmU\5 x?Ap`IrA Q`v A$0@`Iұ iJf6olB#*1e;{S%Pmi43 a*3! Pm2l3r5Blfӳ05摠T@3ZʹjPR1 ;3#o)mD FVmcdcx{ۮm*/B5U48AkzU0M5ȴ1N5cd9@Zuh'i bۘ8 1cLalFJ E0lAVa!X@Q#ðH@dذmc060316cB e3[c@)hf3 )C$l1 3H¶Utm,f32&0 ) fB4!4@0 1X  !Kf0FԱM@0I6ƌ0DƘae2I$D*J0Xxض&c`2lcEIXmJ@40l" ƄZ 0@9 Ɛ6 @`I16H6,d0f`Rj XBj@J@DMcafb`6 1!DRhh0lca-"21,y06@ضilaȰjEJ( $ @! @'3V00lmaTl*6 Qj,D)c f(#D QX@0"CV Ɗ c*04BD`d`f6le"$ 1 $" 3 c06a f#%`l۶H@R \dAGl2 m0k 06L0-Ccm D1ACEal`!o,l.;J5bc{`m͈ eY)jiAt #C`^ΨT6bMIŶ&aZS6p/@df3nʂm޶1Y%fg#fɶWFeòfA۴V $0{-K %6@l@D]fS`g6.ZFmX1S،ɒdcA)3&ۼRXƜ `NkZ)TFZeE Ȑxޭ0ٰIbHhd %0nˆaL^RFm3j7k3kRc0`Vb=={܀F65 6 InjĪ%4.TDlFi@m6,3DPa0I sE 3.5aXT6Xlh@y4۠\^C`o@mFmKR,fȁ :$mdm[͒M`Ј56$llIX!#KhBۼ1aְ( Yj꫍g Vw02HQæR66Òs˂`a Sdf- PU ֶ>$O 66" Ƭ*4j1,*8͛l`:cl(i6*,TPBMY,m%muGblȶ,TMu EVF&AȒ͌q#m{UN 3kZ`H<ڛZ|luX AeIC73X]^!113HSmِb@5ۦ M%*1q1鳅 ThU (Ax3BY0]  ٶZMeDvVJm DjAf*aX"3 =e 5 J JuM31 {)f#-msU9 )b lcM*1.A& J&;3fڂ`I%3۱ "E3 ȥSW0M&6XUkeLXXlR { m3ƈVf3-V6oآ-0@Dc)T602+f#cH@QL@) Pb Ę4ܞM(Ii6WUM"klflPmE)ԮB5f& mp{,,ǔm"c@6ڬ)$5Y6"V7m̕z1m ,c(cҶmJ6f6Cx{kVZZ{mks x[K *F`R00"Rl֖1b(`5aHgFmIJɦ9Hi(i=Q4@4jIWƢ`ma3R1hP01dm`6e{[e60&XUm-#lTFLaQmUj1&dLVf6mZ5m,ZM-hd۶!e`Hb[#&bbEml!Du#J0PHmQ ffJ6MXd J{+*!c]El,SR 6mJJ5 m63,Ky/6.lPb6YlBY(5ii6 +TQɖfM6U!mUĶX5flmcF˶Rٺ{?KjKlgTfF&b6KN6fֶR5mqCFjM]mY{Lȶys7l6{ѓHmj쥾:ɼqu_fL [E^tۚ v.:Jt'봷z2yoy-jm{ebqsc:Ӷf3aX]i,oa.kfs<yJ bMN6[w{,\L_ͳYzwfƼ^C!0ٿҭyϴkvJkz[~P@ͦNݣxofVmV]m۶:fslj-ϫ¦NbܶfͰUu_ݛ0S(K[bNNg'J6ZZ=׶=z-g77bfz+GSjmZM÷k~Vom0Ym׮lմ&f/߿mI&*2m{Rg$ܲ:yηwz;U^;my}zVVnfo\tm֪Me{T7{omImv,2Gm{8L㰍D6=( lS63-m ݱ^K+i,sTͶEۖA=[wlCf p42vukk?)mv6_wݚ˥mlmz[93ZJ)65έf6F{PVmi)AywamKFSWۂ]#yy[_nhYzN6]¶3 箧 6fO]ޛut͢32Gɽ*6q)d4m*ʶw;M@#Qs;Nu6-5x*f˭`Vޖ*f{6[Vmm 5/ݛm[^l;;gٚ]kh%(W3 ,%ۀ,i]d{ۛ[wSO'Xm1۾жYmSWVmo٤<=Ҵ}Ɩ,jjm%6k+^KcVRm}ݶ T.6YBlܛ {55ۦΦa2uZ-u6ﭖvN 0 1uc{ixv3jf6nFf{fօ6:)~Wmŵm5{ >Ni۵6̒ gd=/IeƺSsmw1<چmc*b{sUƨ&eMiy;]{c;ghV¬f&dx-tlWm*ۯf[zo[6p^k˙QW_K+ l0M7hme6$e[3czEҌYi߻[-I53{=L64(jU־lT\wM޿ܦZ.쁫w ۦRlVg n~ SM~]Ұ%zx`,bSTmLZ@ywd4NfV6eDnI6u=096Y5HM֦a9A˱AVw;Uf۶JNifMζ6klvN5vJv̽4m;T1cL6ZvAsXԵ80!jɲQl=OgeNt79Il'sw#38.3;캩ئΝmeZ-YyH6M-e h,G#vUƹwQf hM[x v 9fR1ApVKA#k͵ڔCb 1*$6HM k1U\ʕNޙf εs̶Z ͍IQSײ10[t&6ζmErΙnc%&s3K0{:HbLn:0ѽ V -ڭCf9 U+P4iM=dX}:F:v7y{wZg=:Ud23-uZ.( Uݫt4mذʰf8u؊ژΩVfflFYvܑlmc:3[q0TGUC3Z[3ܶY6Kji93lv T;k ineD:vg.̜35C \ fVfT@h'{F (۱ĺsN-2ۚcy \s8\bةU`3[b6LHٝXm;ڶsXi6RƵB;3aHEfjS{g[l\v9J v9s4St9nf֩(NF眻ttv8e?߿}տwz1 w7#n9A9V/}{=t6C6ۖTϜ$94f1m㤆&5)jccUh FvKNvo3(圓\VjN;gsmMڦ3urrw_U'agkNpbcma5,Mmݜllڑ]Zlέ S8͘D]RԌuIbv{%\۶685V{nU1yr2rJݛ[{q^W\˪ʺj[s[<=k'a' -s:b[{*m3GʬS6 DܱSe0F{*1`m 㚱toj͌9Y;uNC&nlgF{Nmgiϻ*@t`Y5۽fÚ`H[UsBnsF*2 잧I2<2YfvLuZ]Zlݪ,#^iknʕvڝmun+mxQe֥lVJw+vP9jikm2Jmr&ۨQhyĬ6,ڽ hVdmNm63c=tfX;׎v98Kǣݑ"ӹε ek&;k휚٢c1}ޝVl cs[iݵsZN]cT&@\3mPZrN5mN;=9ݺm6] d]X%9=Aָ>9 v 9ۭf!; ݫV%NոNJlm/C_w?{gO}={Okլ-kƷ_kS٣q<'>g>|c;^Ͳ=mzȯ/׾=/wo}$>_y[_o𽯾JYw9ئf|'>/?gw}yDkq*zo|~G_WOџK?o}ϷCqvAQq://͟{㜻S3?W =?{o48 qM=4lծvٍt'DشmmRm]$r76.8kE͎M$xZ;S 0l`.صV):YU-[ĵ]i 3unV^$쮶ӈ8ͰAR\ZkK-ٲvG*znXb]Tgld$VR&k v:f`s'pNS۝!eؘ9mBPPv-儌!)EвYT[k&zV[,b eڰ):gn;f$25nM͎1pS y8]8{ɮZm p mۘN#ixјaknَԪMS#6ccjۂ(bl9 I-fkUz@ke"E:k N:mz .ˡc[Ee69Ҷ%b`WUMh&mR K)MGR5Ӻܘ#{k):3v6ݎ:gfNmmk݆Nnf\SJЌv/1D :GXvQ(98cFwlc4aˎqjdW9g&d@mgCUqwT[n{tm]m: aJ.#@;:*BYۜ2O}_~g~/kshz_/'}ygp_gWW~ÿ/||ǻ_ސRm o~>O~ _׾ƛo+O'??{w,d7~S?׿{3'?W]m92fm6?W_??۟[oy_ƛ+mqK-ͯ}3?oQݦrfj6'y,a˪Ml0C6M۲G6+ݥΑ qgv(2fZR͢A-53'S܍5sJ!3<0(8P!F+6+@PGfN\kՂ$)%l+6Ln=j]Ĵ(iS6hMVvm`d:ɘV6$\6h(֮)hʖ)6amKblݶdS3xĚthf663̅=2Stfae[tRcvT.H 1lZFpJ׬Zy HmVaP"PmզbfITja3BH@U:w+p 7D 9[!lGf[a|uBF1`.cc$9&m?tvgegi'l6vmll@B$tKҕiV(UV"(I5!&Ai FhT%m(,BZJMک!D4Zv%" #Iv(If %hCTh$ڨhDRQIuFJ:dsVG%QI2&0&!QQ@H (ZMeՑR4(RItT#IBJ$DL I **RhfSD@$b(H(& ĈPՌ***IBu9Ij hUKh$Jh+ E"&QiCjDI`Іm!@E)jA(- 0ή$DHժȨR#KJiUMA-7t F\rǎ<8x\ٛ6[TPE%D h[u'uCyúU@BD9hѠUH rGGzuܶ]镏pҪk_~㚒4sҙ=觗kmTB ʩ7_x?yX޺iU/'^?cU6lu|nå%@ `©ӟWNFH$IТ^tOݷp͚%Z޼{k$J PPI)h( Iې V"ViHZJIP$DZDP DBRhm($fP(IR3ZTIUh%h$"j+Q! JE(%A$Z%FRI DJ$R%V@T*I#Me& єَ4%&ѢFВJ(-I$j2+@#Z&DD"4JFTADT$EM 1ڊYZDKIFmRQQEUPJ4E("I+" $H4(P4" "RZ6 BkDAR3E4m0)( t!("JBՈ6m!RE:M#Ҵ1FJBh4SgL#B*E))@@hi Ti(-ihj T'C*T!h$P2@%hU%hIF( E2 @ MNE**I[hd !T)iC[!%@6Ո$QTꠔtXYM*1HтTB"*(LmURI Xh #PDдDVQDFHٌt DQMmPP$Z$ѶRFR"Sێ%)A)A E @'aD""f (E"HZD3$3 ~rz9ן[ص=; 2F (!E) Ɛ-7-u->rC B$M (iB0#*B|r۷lݹk]#N~xss۶ؾJ u^?^=|C@J^O?ؾ{{;oaYɉ}xoњnwlܼv)Xi|[LJZ"RCoO_~>wCB gş}{O{+], TZt+/;7ټEӱz[n޳ڑmF$mD"Q-BDĜi*D$% $) "fHU B[-!IAAPMQJD6@(H0$PC$5AThHhJR-ePBiB$Ң*$JF` ID5Uh3YF"(! љ1̶T5!QIP$MST @(MT II%H B4H[AP DMFRH"C$t @:)h!TMI@4%"M#J[H1H@Ҩ-aT*0J մEDE5 D H f Cj@ B$T%6DEBSU%PiIB*$*H hm!CUBT$iH$* P1TGH% *$ L3)Aګ>xѓg>=wiyXksӍm\X "Dj6ҤV!VI+ C9Ȉҫ|\r=޶yDBQH U ʨ"B@ZD-"r5=v,3o^^sޛv޸ '>xvܴv"-H(ȬГ^CRU0/=}p?}J֬ݴ}oؽ5hBųç/\i֮ްuoؽ}E5ճs;+sfÖ7z,DZ%PT # B6B(0' I ()YfP(IRBtjdiÎ=7yݛa ]>3ˇ^z7_;wD R3$(kv{޻R35(HhS&4$ڶ6QJWX iTJ6rcg/o߲e0{G/^m[7ڈJ0+W.WNm7ܴiS'̂ƈ9hre~~ۇO_ݰ/7w|>ߝ7\o=_o? k4V|g3AMFGFg?y_tay{wk ._GsݸK'/TuBJm?y^qǃ_g?v,Zswߵg+'sf7nYWďO7zÿvuԨ>ݷ_{{wOM_yҪ!BAK\p__8u^HڂpONum_UhTJT()"J+RMcDQDTDt(-$ZEDh# %Cj S4T(DE TiNB"J5%) jۄFH2hK$J%DYLB$1%I@5- цH6h%F@% " %DTlGJBBvڤV"J[:"U(DhCC3(1@hPF֜ЌhA"TIPMig BTVi5Ih EH)F"U&US %423UJ&E'Д4H;ۄFFHM:AUI%4*""4m,RA$Dm5"*mHZ %"ZtD+ARMHC+CAiK@[JI MBj6$ij(m %`:sez텗O^b,fӎw=ȗܭ;^:iK͑TF֬$4I Z&QdIP//?ۼm*ZQUJ$!B?=᩹~;6.Zi$%Vjr??1EW\ɿ΍_Wܻm]*#դy>w_>_i" Z4a640/}}ԝ|O~## msc{_+WXZZm#_MUb@ϟ/?Żoؼ:-WOxo߯^x|t² ;~ˏ>t-;\Hj#o^x3,Vmڴk}/ܺmR襳'z͏?9waK#ܱm݂i."m\r}?ɾK=FV_Ҿ?~ʬ ;W{woڰ*t6T{#sϿWt,^/͛q̑_ \̪ oymߺv)4Zj&)ФY2[P@(JfP$RB6IR&YRm1$Ct`Z[ 90?ѳgnk ڦ J1ZiZU"]$jF$AMQRJL) e; 6ݴyf++un$ieʧ~GΏbڟw~_:sT dʥoG$m#mY?|ԑw G^|o޾s˺!!$m2b02WO7~ ^ۣ}cіʕs|~K?zdя_"|Wn8-kt_~|{~kk̕Kg=_8jݏ~+󕽛hUp?<􇗏s̓]w;{NOQibܥ?=uvo[zbH"6QTJ#U&j4+m")MbH`$+$M5IUS#mc4-BA[id QFRLFKY ʠ(%4 mKYa -DY*a6eTIَMK%F"ՄdL(HJ!!Q-SZtdTZ&5c$ hRa&iFR#:2hD4h+2m3̔2FQEZ%ZLImh@[MRU@ H$URMmQF6SK"Z3DQD:R6"Њ6QFu6*PTbنThHF笂@c0*"Img: -JBfT#hg2D jA#M(JmBRU-DVv6eAFȠڦ5Ci2f"ɘ4.$El4$JS!Vm0B6mEH%V$fHm,EGTUJFڙ$P$چ& TgH$A^<{͗?gN\{w ;q̡}'cn[ZE626Si3FD5ٌ-"%j:C$L"J&0O_앻o{OsT5DZ5Ơ5"P6)&*HrЫ={o]U<;>eD*i$ )Zh`ԔRUIJZT$m3k@d^};ov}_Y1P(-AD/~W,ǿ~΍+> /⻇[~9og\^sC}|O>G]do}g/m#>~.W~{7m^cm׾qӦKO3+?khK$'yso*%4M~_;}=wZsٿ.Z5/x'~w|k_k҅|_ȡ1Փ/MxeGNys co] ƨ0jԬ1-b&mDhhTh4f%$#] IQnpM7sΟz2m4!iUDH+AHhFjhH(mXa1ƠSQ! $HRm2TR(b`: /-m۶e歋\>W._tۮ-n]GItEڌ??×n}޳ɧAdԬh:!W+WǮ[n[T;;+&;o=7|N\-kdNР4 ˯{;wݾzdjʧo|ؕU~ҡ'1mN{3+{n{v]ɀZ۾>|O<]Ջ?̛g/|Fڶ+h"oG|a\+}m7VVWg/[yx: JBFRs.RVấ5QRf22MmjP*T32DELe (li02f1s$$m4DҴ&h-J*YIIjhE Z4VX1hTT @Y0u?98 X Q$EFYrNRiMCi:Cg1? ̴3Mt2$M"GeY6J$HpA>w5F3Q#0*1UڈIGFa& M#al C;FTSYIR-0&F:E11s9KPZ%2ff$#44 235H*sΎT 霕Y$P!R$BFaS*lFhuJhYm#VvdBkJ%ea6-h:EkDFf l,E;SӠC mvPvVFbQiZH SJTGF:Pڦ)QAiPV;1$і@:j"5j &YIBLi;CgvhjHFj!HȈhDMSRYT &SMdB3h̦J$ 4+Q#IҦD%EELFsV#J,H"V&1I XjFiJ5MQI朢*f aJVh33ٌњ"3#e9hTˌ#e@uG{#ؾ27?rV12Ѣm0C :EJ5 M*M%$L@4Dsǧ.Es$$E h5%I9[DH RBZT,ξ‘|]_ڃ7׍H$$Yh$BRT6U@2@I^yݷ^zž?s7m۾W/=ˏt&e&7n_~ލɪο7Ͽ֡KOz/e_?穟s綞ɑ^ynuxIVkDh[K fiLӜTmIH$ Lev&(i$ZXIBE& Tggi(ame1M.|ةu]lEj?y_|?;o|w?R+?ɟw;qm_|chZ$$`f:W3yN\Yyo~i JV#j |}?u\;wɟ+gNy;?bu᛾ӷFv8t;\vG_:}mdKQM }˫ ٴJT*Aʬ(EuR-`Ƞ%ՙw'οsϼpܿ-LRԱ_|cvG_]hT;"5&:;udaB "h6IE)IbΥ$jYdΝqLtmv]ѹStX$hiaC߸{4=߳k~չoz靟ڝwܳ#Iz_o䝷^W7o~swm] 2z-7/yG4z/WXBmڽw6ue,4 պk?gOg=tHW_6;wu$X;^{oᛟܳg6xO;64bCwkoy_˷ܷԉO.vuݮ--o޽⅋/_YTF$HJc4Zi16"ѶiIEhP$E+ZA2 i+& @IBNW.\۲}@W~'uW;Qig|^x²5(?=g7_qY @"iaŏ^'xO>=e>>~읷_n!4$h+ '}K/u,/zܧsg.;3֮[=;BFFƮϭ<|;7(PVDEse9gD%Adڜ^[r9RBM$iD]}'~gַ~c"M&Yy_݁O[]ie AM !b弶^H#i+9]vs^x߻bY޳kƠtdΝnx坹ѻGϜy}jM0ԅ7~Ǟys[onYR@mVh?wˋՕ9w3/^]Zlܺmν$5 Y-$MRfT$ih+jLB2(%I"I McFLHPj@@"$iDBd-*mI$FB2gI̒ hҦ3  9Qˊ٨hJdΌQɈ0QI)V)T2I" ʜ1"!iuل4TJMA0K%i4mfA*B(*FftـhimDQ#4NhZAD"&ґ6!dVHEikB&`$UA L2$R fCi2HڪdD6m!hNE#J6I1@:(5$jhL4iP"Ԕ m@gh Iۥ -MHAheSh) Ң#m+DNVT*PLDh-4̹LFe4!ZlJ"A%#$զmHD44eFTUcvbW7~v^:!Eʆ+IOϽgXz[}mߖEj~~fʹ_~鵏/~3_ߺGGzW=s-mo~wn0D/:/}ձs-7o9wRD#Z!oz+z޻~ i]=k/7}K}߼PH\;ɱ_|WG|˱e{Cwy`3oz_yo >}'x|zy}e V _{~o}xv~|o9kS*dZݰa'^`+^۰i1%0K0v˝ʶk#s\.חK kk9}S7zh߾ݛM;zO>~oͿwu ܸ`VӥKk7,F떇}FdKW׭n޺m$lJ@&bh ͿKtqjЈΏ|KڳE@zly=w~Gn'έl-WG-Mƶ}~r׎糇mݲqe^zZAYprlܲOW>9o>_xӗyǡxVXF4{O_/?G?vݭ>?'c;{mFc4&d$XQDhV h# K?Sӆ{nٲe6#bmݼcލ޽g.]"B SϾ?z׏^y}]^%$H?g:ח~ sy唕M[۵[ysw޹iiLFZh%"AHBfjBTm Ѵ#DiQ m[IF$h&Z @I@ #$ Jm+hDB)A$Bi - jBCeH-ňΚT%4њT (* D H#BEb6IT۠m;!j$"0BFED-$ f4iTt6J)Bg ICCd@U ( !9;# je0H(J "h%Ҵ@)a F:B"0ꈤ$5F$ALD 4"$@BJ)Ihf*PBIH@Jj%V"mUBTZюhIJ AiIATIDMIiT;eDUBfEP$I O:W֮[۴2 Hcz#O<_deǁp_>+_ݸ̅xOm7~Փ'mN_޸v'?||tr};ڴzG_~l=mض{?˿GoZ-BҘO{\ ׏~/>ſqO^3ۺqW;z玞Xn޹r[_gn_=/>?~s;޷wY9 Pjr_SG>w]vq} 3O=]|ogG//m\z\^\ҕN{ltu̻/<Н7ܾ)X?/\nض{ُxOY_[ھ}os[O>۟;sϿ˭?ّ1(m"IUm۾w*$ νwN^޾tħW۷mt]7mxӧ/u$tV{;>shƵtm~6$Q͕ ' |0r=7oTD D8pֽ˱iT!eĕvؼ}KR={wlZv'/mԫw}۟=p̙7onמ5$bn;V׏9~bq?_}Cvs`s'~guw>k!h$p7{׎qذy#'~߳A4 Zrŏ_Փ~^=;޼8=}޺.|;>~⥹/$V4c1DdЂ˗/aJͮ/\o~둻6s#Cwg1JZQVZ "HUdaÁ;3ϟ}x{vߴsJWNGy k\ݸFEJ畏7Ov=wq]{6G[!ZJXYl%hQiEEp׭:w_yv~BBb^9}د럮eڵ}5O\C6m۾emmE Pjذq-\2ϝ9w a^t͟Ϟzr=̇p7ecUls۽}wݥ^y~[otm֚T(J7_~ȫ;wK.]-7lذDl\[lX^Y|J*>G˛n۸DS5]҇zOƶ;z,(HII"2Fg+./2mܸa&ؼuJ_|ڶ7?ok+]m42ƵkY^pJi>מ^ٷՅOw-wmDB}+zV>y 8m,|>OonukC  f5xǕ̗T5_$3T*3qc[ @HvwkEsu}?yymT[ݙ;N_ƻ?/O??}?x;n׾۷>?7߸5P%Kmj ,MҩWz!.FV+[μs{S}oև?|CRDK @)-D (Uzw3wScnnwϟӧJ騙JSS4R,:'^gkgk#޻Ü'Nڙ#ǏR ]~s=Z]7mՏ=WNwϾtn綃zg֧.|'/=0e^xŗ~s~Kһ%b}\p⹧ɇnݳs}}9Cuwo;Nun>ozՓ:g׫/ًk~v/<}gzЦE$@;gc3]{{tlwW>{=6q8pW~~xw9 $tnϱ{:+/=k}j᭭^ҡcC Td.~Oqw^dggΩIh MTgTZt̛G~k?/~kM4։_>N}nc{vojBu1U[(@NEkGonb߁7j$QQc ]͹Srel}{~]m*ŚՕ9{Μ흝+[W؅;>x٭K.KWs籽֣i ƞ{Rڼrykk+(i5Cy3{?cz>?{wؾ  Iٝ?{>|kדfX_jCFٹjb\an]<C_߶m5FX=[NS{}o>Wܱ7%AH1t&j"{nomlٵHSuy{rkYDwZ,[s{kk=.j{5ȅoz{թgceQ,7?gNvaA$HŮ%Ʈڞ#ˍ]+/HT3%T2U4*I(X,-hs<3~6mmKaF5K Dau̩_x߶Q3ddDl$dvga;^yjرvKmmoyĻs:r޽CsV%BFҴ$PڑLښi#{ηg~>$c,#ӂ~΃LJ*f $ vٷo?wqc7~.v,vjv9Q:46 /~?k~鉯!Y,`%w~⣷?DdS%PO?W7vw|髿{]7]ibLжmcjk9{$4u\synZvfJ@FQ$ mg2FD%iI# e5;j'( "YR%)V4BJc DVSDAG 4,Dh&6iWaJ $i*:K%)dDF@hHu U4Rf RI̊ ]Հ$IT32kh!$=hHj$hK*QVm hL23*UE2"Qa2TLF%JD1T5@Wi1S$cc*蔱HMuhsQi P*Uh0;ITۙ$mjFPYHg'(! $LѴ%4dvZ iF*JRjd6@2;GFl mh$$ m$I[CK! EFҠYPfa V ARHZZ"mT IF: fdb*IΕvH!s$ @%H[$E4U*dʈ@jΉEU[ 3A6Y  s2 is#Gw^xۏgRIF";s˗,mFl2k[m'˄H$C6P%P@[DBH]֖Ε+ WW? X9@5v.mmO$߻rryM2Y)0Ui+* в~ͭ>[|=wm@Iuݗ?~^zݳ.\sn7Fyܕ,5-R :,֒@2Bq'ӟ*wCk~͡=y'O]\iD#U&iTԠϿ|࢐l:ws : as[7677G"1Օ7ǿᄡy~ؙ1v57k{wܿ m YÌ-U*Y;pr\:w~1ҶSFz•+css榦d}{vo:us3 r…3.#.z%!PE5M4DSmBiWmdP*MFuG~'ys[ٳ}׍=sO>ʥ?}9ҖQ(ڴl:OлٞonȜS F$ZlWLQ@ chWG?;z~ē/smc~/<;k}w?ruG#j1k53D$ Cnow6X߳X[.rͧ{yWЦ  !t6Iژ̎IvG {?7NzW/wn 13Rдh4M*Ɉ$ iX E̝0IDR4YDhCVhN- iRU;X͙$H[l#aF$4fM&2U 2Ɯm0m m4ZmJB Tf2tfJZmV@"C(T;tEF" IVId "@Ej1$C)FC-"ѤUVe$R+e,D):'$VMgDZFڨN2 jg2AY5$B,B4ڴH$FESmJjfhg 4$2fIv# 0ZjLmA2gc*tƔжm0U3B%1 CHTW Efvt.9+IIi,@[ꀒ$-muN4ADk6#\$ chۊ"hD*iI"m c!Qԙ0P)!1F;AjhI bDLHHm[%5K$s5' 6T^}m}|{m{ ̈F^[_wi5qeʕ+=,tZ-I$ RiĈE\xyU7nyq&I*X۷o.uEb,bX/W.]^uM3H@iFN[QlWol;Kwr硹ҝv"K/?_o_~c/y4.c?{n{{] U;ڀ謘U m1ہ:wsִ"r\5~u+hPNI~o_>wO{?]7~GSgN>y\{!\_W/?fN 2ӯ=-7o=W/\b.&A0 cayco8}*H^~n^!a$:cywx/,6;ougN}ngyݵW?M#o=}ҕ uSL$.&S_rڮ}{+D"$f Q6FtT+I m 霣P") U0voCm_un[c[}Ji*M"IHZ(IҐ@*\})˱С݋2!h6#BJC2s==]l]xd/v^a=l]:{޳}w+#P$d$IuM4Rs6cuw}u.]YbؕS惿>]G?w!jT)Rȍ܅.zo}pd=-0UdA(B4# $6BfM]]}N]3?G^kMú T,cs/A .{}Se5?GڳHՈc ڶ(J4$cs_qgΜ~odO[*"mRҌ11+ hg$VHCQ@!TZePBV MH:S$$cHh6!iђhH 3$6 ZLhڒ1RT*A1 b!5[TJ%@d$$ZN(6V5E B A;Cڄ ! m&iKTD-%RE(MS#"ФH'Ql҈4Jt4َ$j1!mahXT#HR@#IZhV5*UHF(B$ $ɨPh1PZEc-ڶ$HK"mJ BKd%Hb6A% (P!0lG6MvPՌD%AiJLʂ9ZH&iO6}a6`%V @$H;HJ"Kc7UNLN^ti6~~vIԞ&NmYV$QDJ)HqA8^W%jꔌ4 M'(RhTSH fD mh!HIB!UI15ڤ(40jtHKeT:ن}~7y|'}+fP֮_|?wܵc?=yt>;Ս{Nܾo AAm0b@kmk ǟݘmumq˾}֞?>}ff}}mZ_ҬB3-ϽcM[oG>z][!VoXvHA)Isw?{ԑ]s@q7zOn7nXXm^'HdG_{^PY0Ujd6ƗӶYҊK~ŕoٶgAM22Qhz//Go,>'ڻ}i4vf=w^~ͷO}|d?{_,>{۶^y?|}[o]iiŏ|Gj|/?/.޶k~.,./V:ƘHSv6[<]osǶΰk♷?][՛m8Kf=|o}?:4 s7ouߙS+׷,qʕe0o/l޲qr$mNWDGw||aluߡIjի[\I@ij)&@Ѷׯ\Œ@;E2gj* " Mդm͋O/_ܼ o#oI6ͺDm[(#Pimu\FB҆IJDiR E4֮~kz__˟ W6n:y3R*DEic&c~y[T5_um'8Q*@F+In|{W]=q[FѢSTHPBF2hP6߲g-YҺۧ߿wh[b/l>磵^oܸ.$04bz_\њmپysOZ/,!zxïO/|p!dd:!!6-;Wq7ThfҒD'DI$$JNEjvh$AIЪ!HV) D$VAB%:$ZI*Ғm M۠RMAVb6iETRMBZB iM"BUC2dLhB˄$(DV@)v"HKV%*$B"m FDv2$T%A۩ҴIڶm!i 4,P Ҫ4#$Aی2bR$C[JCiRF(PFۉ5FөD$U 26 HP$$ڂ$Ĥ*͈HUj$II!ԤJJ F$mTBMԤC#h4Цm @ Um1$:*ZS$c5"!@0 )%*H#*IKUKTm:$ mBTBBt"d*d$@;U!B2:1"ThѢ(MM"1[z_˟/^9v+ ]g^twܶѓ'˿b{{|iH( (m|ן?t^Տ_zwgߜHI269|]wg3w_?w/,90K+Ks7>{7l]>w}'G?֧Oe{>z}l=OyW?ر⯞{ZITToظr[>r` 1KdZ[yڪmغ8_=w?=e~ye=?rxn۰߭:(D"o?|{m=ct/={}_/6>}σfn~/]ZKO]־~ʙlu穻3/cw߶u}Ͼȱw{~s?]gN߾.~?|n߻Զ:J#!_| =}s~g8w[6_'/]ZW&[fТ I$ m HUڦT32fz[HmI[ =ğl:/:x}{9up"ӥ?o~E[N>[7g,l;xȦȤ) (IFH))i?}/nn=qǙGs--"O}duqoܻui>TFE@Ec& RI/>_nn?Ƿ-̆VT{w ]ɧ__2eq&嫟}rgߖ即՛7oܸ?V?{}Җ78hy CNSl̔Ȕ6P*A4ITPIhH!0EPID*F[P"PHPZI MBhEJ(U"m  %QP"%"PT@ A(dжIB@eHJ$3IT$TH1BhЖN@ M(J!!(Z"RHJvVj%D+Edj%JJD*4RI%RA+BTPH$ I4IhL50jmABKR-J(EIh6I(@V DAV# P- `PD+@[ !Z%ZZ&m hAQRTh !L5C%w=-7>xν01Mkt郻,oYc׮|ƅ/o9轏{r̴.(PH(D+ʰvʧ/?צk˫[9C'owzoZ}7?KK37o\9۶r{wn~dӝ<}o ܏|祍k/_-;277'מ[./̍-v8OE ! A;lGklmuZ޲uma@"!Qs[7^w7oZ7;->>cDPjZAXq?΋?^0uKײأ<m/sϼh®w1'mW_|_m[ο/ 0cyrc~멛oOnyqnův?p8gۂ~zٿ3oޭ>gο4ȼ{ѿx}[xg77g}W=w{{nza鎣w4(Q:_~K?K_x?|~Ǯ÷?',vl9}}/xy'}wߺd _߽}ݙ,o^ۯ<n\<#Z<=ٗ>8ϿiBo^Wn}CjQBHyGv_\:>=ufZ.}orcNtEBPZ@$DҮ_Mw?5QH0D""2:J D5#s7VWoN:+F@[Ӥ 5j6" UI! UJ*!18ach^xcZҡ"DT {>ԝ"C6x_~=m>jL(v_h {o?iw_J 0+7ݻwMw2fB%b0IQO/ޔ˨jBBC)AD$LSiP!h6 PUjT@6-d$I Eu*miA-D1SI(FV VRR BQ !!EHKD :TDю$֤ebDD%I)Ii h CMFҶ * m#DIvQI4Bж@Ij E!$ U$D@55 TDTvDDT4Q*h۶ J(mGB$&%*MmAZ#5B"mFUEiPm[I$M-$0i$%Dj5EHmTZtH#(1dHIA0D D52F$5j%1DI(AD #R%jTB&)*A5TT@-P6e@VѢj79?W_}ݏ>p7l޺kg9ceanf~w]ϿG_\6iS'O>qЎMaˑ{z"t6O|s]Ckny-Gz[օYV [w?qב 9s{Μ<B:[;V6W7:~+wꅷ_tmmlܼc'8yl ]:xף.+[v۵#+6o͵iG8u]n]uˁ#-l}y-붻ѹW-QF2C, lqi& EfKo=7޽iq~S]/,ls䮳Zqm ca{ƥnXHD[{>[~l^}eBg9mSh:w‹oWnmo;};N?k#[n=yx/FÀ&i6:|HFZtae1"oKW,l=9L沸ԣO*AA 3cqѓoڸu {˯o/m'Ԟw-n+Y}onro~{xuSwy=[ۊn9'nkonY\};?s| Hm ,lټ3wn.u=8rlϢ4c~GqyVf]ؼyGm羥Y{~;w힓(w:z͹qx߆Y斶xիO۵2JbRT4QTHT- TA_啩Ņ!Vהf$:ŴijMS3ˈ XڸyEޘn|7Wr HTԩj05hX[_'ņvmܸyn~ֵ7_ˌo6 ID,i E!HmudPTv />dӑxNnU I_{}g<{xbT@-@j JB$JP4` jo}/V롧k$Ps+/_|poۺm>`O>|nd 8kҎ[m=tZ5Dۀ}ȃ?:{|i vB &QIRϮ޸iy;v$iUІv$P QTJD օ ZVF( HUIY2A ED(@F:SBD BARA׈T)Ah!ZL@@PT!@I5$d!U 1MI $j%QDNJ4Z4"(DD4*hZ*AD B#mKV@$ %QU@#CKEf$Tt4 (IDvjGj+2bRІv$*Hh)(tB+RjD*J*A'%$A*i$iHjtm2FQB-mA()ЦDEe QhJRIYR"$QQIQ҂mDE$iI !Rж $! m(@)I%DV*$P)"$EC %I@(Z4 Q B#Ci I7g%E–mxOD&IPxO4% KnOFVo闉daem dX\?_Bے-;$c#Oj"#2A+ v织@Y) jڱ?_H%bu}_+tc<ڌL~E~h !' %#;EBbVDCTV};Hh$~w|{jD0eީt*,iIv{,4 mw6I M4ҶҖ[>ȃ͌)I6cn?ggi${O'Ѩ I ]qcO>UI*-6ټoܘk_|ٰٕ4?"`"S'HL4m D29P! ?^tz,lDv 4L~˯~´o|pB!!dhۦt6#IFӡEZEJ`0VW8ե7w[Z$6,;Gݿ<ːV2imۿ}g2q͒fjC#|a6M[ݻw:MVWo-, izG~y whBhz$"hIfPi}Rev#SURҐu?Ax=^y1ymֆ0,!ّ4][YO.ޤh'IlFFFZ 0 3ϻ DvpNIׂm8w:٘e,KULpν7h69ݻ4d갴 ũf0kli3н9̮qZr*3c3#7d3{a3fh9l;:ܹ0`YːgΟy_۲u9GN]:q7+ZlÖrN+jQюaA1 z8*qi؜@mlEl<[L6My##fTm7.a.xXe\bbvm4Vqݶipksp4hSN;D%h^>կ}G<3o{S=?Sx kQU-f+lwG=W>~K/sluN~ӿW rzhL.wrm۵K//~[~;O{?yx;\\b:;nu&fܱ67ԯwo|w_sنǘx?xs/=_x9 i1.P6˭=/~Ow<>yG>K|Kgi;w'ϾG=§~7?g_`][G_3_Mo=c6}\'??7Gzmwlz/?m???7lS'0M0{6m-@#v:vQS,ގ'ytNqc;VlltBgDN5.rzm6fݽ9z̍{rU:wYg٣Mж%N:v:ikrN=<,kܮM{,b\]ͦ#cfml6w6fqaM4ptIw:nݳuuqE 6[GUeQv6&˦%ոq{蔱:QM#0evwglv--5ucGp:ffe!!*]tB3m&Jq\*m۔lٱs )lƽSv!;O>^7x zR`Ͼ_޷?k7ڇ>οt^WO~^y=2maM1toWƒwlw9I UW^+!/+cw_z/3O? O'o|Nj_g׾~C99O{ӏ /kٟO;}O~SSoy?ŗ@Wy76/sohjOί7=o{ Lsٷ^>?|CiI fz?׿ {/g_zWWog=w~?oSQNJ_믾}s?6.Y_w;?䳿ξ}?c?SM²)Qe` u?} /~k_g=<}vK~gW~{>Blc1a.M5! [m0 6l[И٠j-f"wFCk[M%NkB!mmJ0&o}ٓ']=ya0&&/}x/??񺞰浯?͏_{-#xO$ɝ6V@{}}}W>eJr‚K/~%}c[kdz?'~~_ygC}F8Dl7Gڿ}>O_׿+<>Uk3)a5'lw?w~3?o~/xo|['?Moxr*#3iRMmn]]fՁ͚C32  h6MfS4I3Da䔅B:&ֺRn ٦%SP c g &3 3bV(3`1,L&㲛tYQ[Hc-( F6 [kfv2lcQ c m2a[0 ZTk1qǜ4V5Αڈjj:4jQP`)` :;ǎ0 +Jjj9 ftB䰑03h`$r/ά*a&Sۊ3)Űd4mA̒"%Lf5lTږA)Y4!6#;0, Vi4PYsڴ \l3M8wpSXBظ5C3en"LJA%``NmfV1dY;2C[5sm8(͵dTs-p6-ZhҶml 9Ȱ)S6 M#a4 s") 0k1PX&ckIMmۊ d ehðFfE`[RF0?A{wϗh;ұmD4,l$hdq2H9*K&Ucqf2$\ƴ-B4[{S-VшhA0g>bY ս80{oƒ$}C 7)0L~?W?/}W~g;?ѯ|67/o_w|zoo_o} >~_o?7_?o{V`Ծ_?{"}?K`Y6>_32K0?70*i_7O~?ѷu)4ȶ͏??1 dli1|?ӿ_?h c`}+??'_7g?}c ,dMowO~_տo>~?om`6m5h?qoџ?տϷ~^ 0iY@gցaB6 433 "H)6E e328l4 # `F3Շm7-a0F$CBlF`h2ddfYEC@`07e' s `Bam06Rl[mX<.bj4&lV SÖVن f&-F: &۬BA*lm)0 xژʦ4!`lbjoeR&6d&]6Vy PZ6mH3Ĩq hh1@F뵞1،V4i_6imi%)Xm e+Ē65k0lKiMC1*f0B 6,q DѮ0 (ؔ!`0a m6`D4Yb#qԌ`(vwۆ5 ` Ym٤XfBT]e6TmSz#6Ako} y{Uh 0(13[{b4uv .c!k7ow=HmPZ"q>&Sl'{ƨje6Hb۠Wm[-[Mݖ*Q-7lӊTafTjlzQւgh`07XöJG{jcfeMefE*3T'5R3!# f!TqUj6 k6fyz[Q@m6,6Ft666/窚]y_ޖE)2!CYҐfVU@XZR{[nq@)X2FveXV*l Ѷ%V[=y6ܨJ%z¦*lv65=lԳ55ck>,#TNHRyOSm[3a[DDCژb`78[ [Us2fӑ2*#Wadz۸IkoE&Gf5lzsQ)1ٳ޳mkf&uy{ݧ K޴ ޗXJڨk1S^fvLfo֝{7jc) cޫj{30z᮶VOk>r{z3er3־vgj0 nzn}Vm6{h=N}0=V5[{oҌ=wjd}M-'#ۗ]b>۰v y۽GFH۬j(u6[[x[p@oT.#k{oa+dlF=Wyo 3d[nc(4ų©[żGWmⅳB}xݰ3^_c1ig{}Ce#ޛ4ýxw }x[ۊjXY+_؅k;9bfϓ}^KuݞסOދ{{֩OnͶW]777y 7Բ44@;vRٶ^+Jkg9m[kOmCM6ڽ=[Eio)3+.k0l]$ct/6a۶smDZ;5vNꄹ{TZWMR޶iLv#Vml4z%t}]Pp=JkҊ7Umkmag}(umvkl̼vnmn3>y/ڶm{pfu=m>)O5v̓Y6tfQe^2<.Tt6mk푌vfux&AkM"l+XfuEg2wfvbֳmյKQu%Slֺ쬋pz{<YL}Uu6nebkf*|& m:yiņ5צ1pYj{utm> a$1. -mkڬ'ٻϧf6>K{51epY̆ kʋȐZ7<5v{_/:H{ڠjV^XYD6g9졫5fV%3k%6O{[*6y.Mc+Y>ͅ6޹z7jc)cysoĠi>wF]5ɦ4mm4i}n7-oÕU[ͶyN[5$m21g>m69 t=V,Sh=6Tm7]#wwo!k'X֬}5 j fmt׶lj̞kk\!MgϵjϛumxphVƛʒsMڰ.oV 9m4E%ϖq1oUֶ2,3+ bjjۨ;q6mboG{_mX6vس-W_3>YFu4UնXYfdo/I6$MzV54.mREޛjVmnz$]۲]cSӨLgOq|}Zkwh 35ז]i3ږ9Rؕl`>Ym^YefރO^hlS&IbXlK{#7dv4d{ 4{_̕ͼ,k8ɌK'o0v5Fmz6m}mzבjo/ %aA!jNG6Hʞh6a+cSxbt1t$ )ߛ``i4XXvmUX_ԫqtnSx:EݝC֩buߣ[t68*n؆%}}I̶=6uj80l[p%6ٶ4{vkmFaKG{k͔o3mdm}ٶ6lfRPx\+_]%ۛJycmFO #Fq1mt7%Y4˼nv2P"Ŵ,̲ ][w nynm Yݼm@ȨiSٌ6 l븍tXV1Jlyumj[{6Ġif:4=0WMXEӖQ׶m& Jmۈt3K+_ML5 Ja'WVlc46U4BQh4&+@!>⭷Jӊb+1T@XF1Z16;m#,m[Wтjcغc"o Yc*(k0LS6`6&gmv0i`2t4f4I&*XS [Gփ5-b&€mŭYZ{2*Tftڠ1j{$4p- PPX^o6:&T{Am = q@¶(iy֭j,,-tkQ + S#oڐK,2{Ƃ`SFۺ!{M~HNoLIjؖ0$'8ͪ 3#L6fsMR85 h{k93mcosm D)Rz6[$8//ޠaQnT[Eبd6Yl$F:SZ[G5'f^C 6O[52v*Mmki^mUe"kofM%,ϯ`k0+&]+eS4^} Jf3Vc#E&ڒGJy E06WҲ({[1v_h̶F6۔3(f mq*ڶ{RZ-SSlA1,oZߒ%q6elRY2FftlemRlf Q`F#b bjx\l  UV~ aV@b(6hۆ 6eթ!Mʹ&fDXmضM,h 3 eclN d1aȔDHCEb$#̦ӌ+6d`"6!.md!em@)6 (6* lC3FD @Ŭ؆ (]W64EX &mm6UD00 ƠdHd\~&KZX• l/ &x% C8hSSYNfaa[&dKE0 6(b`!tgSPvh4k5H J@%mS46dkj",XA?/}s׾ۿ>'?Kc{FI_k_g69ǽflw voo~Ͼ?Cz5kcf֩OY6maqDCl:f}??OoWvԳmOor)7۟/__{o~s?kV_G?o6=씵Yjm[kV6@5rݖH0!9Uԝ-4ƒrSOm \lg]̶N8<\YXrtLXRƁD&-[Lͨ%&Mk`Fk.5vZ!i44a['JY9.m9&\&)r6lAfiMsNeu&ٖcf4ʢd&֊!iwXUaDF) X1Öip]G3mcsdʔ4TLq˞e6,;h15'֤Yu22t6B40km$#VP4`s6 QT,1ٔ G!VβAkY+Ŵ%Mm{)lWvl!i5vJ5BU6M-P* hBg &:f, *`4)6R)לa7l6.MhF3p789l2Rm.'6V,0Kd+Q$cV35:[5VumɠXYmdBMǔccVǦnmBU4MKlhfm71c;Kح]{+lr٦Ɇ2[T;lJ@ڪ tIk(w90c!-c64d U3i˦vӴqwYZl i*)P.Le-3GklJ8DQרرv ݌֥kAkFb]C{?m~|w}{޽t Ƭӹ>'?]_ QPq g,2iv_7.ClȽi.N05Ķ\'U~?y;?޶V-ڏڳ}*;'7?o㹽~g}#^~/gʇOǧ_;vm̪@wf ;lSCs6:ӽvw4 e[@%ifPAiۦOFd\< Je䌅eMd Gg LR ضmT0l93T16ŊcfDY%PGfHkUV0Z :ڱ1Kʶl23iN M:YajqF6i 2Rm,)#{23MV).٪V9i*mʀZ ʘ;GXe@Z[kbX 2 1ln Ĭ$'l YB4$]®B w0bŲl3"͑P(i&ͪLdUY1 ( :!lX$06mV&Ć{Ni lm&Y殹3b*AUkhTǽ -ZLZ]QmLr6DZ@E#felvPe1(fdXTȦa8:X!aj-Bؕ{$v:LFeV2AܕuU)(2b͕0-3K6;m&w;r -ulLrl[j4: mmild昸Vvu;TTlê ̌ mhwʰ A @a8"HwsH l*ڣHZHe m-GŶX#V5f9ZAFTÆm#i6C*Xf b,/W^yy> _WneJ-)w~ ϟm`cI(ʼ7߸+cHryGXN{$ODQ4&ؘ8Ix'izv:uݼv:im1(QA尜Yv]vez<0k 65/}_{?&)+{O>_}ߏg>oήmB+/~W㛟{W+O}۷ySg~#?k~cox?>?ҏ<;,Y `kٮ9T0f[˶]b &IZe h1LT2q C ]G;@62m0Zۥd6kd#P(a3KFq ԂbmÄr0C`0ԠtPЌ03eKSf2@LMpjX6LFL3365 2̂ Hki\[-WWfU %lkץ# @Ќ43&11f SV22#t6V Xˑ]K.Hk Ŗq\4ҶA Z6QhA 0ԶQ`F[!6ڀ%Bm`e 0l&&i@2*,、 l&`X `@a`tl بVa6iU36UHX P!JClcU Ա˶bBQʹlmjlj IkSRaf!kr*sD h 5A&%j6)cuf-Wi08S0V m0ڔZ&r.5lYP,lfLjdc׊#f h MTİZTStq6e!ӌ@]fPPi@3f6;25XGvlaerlh6Kh'a3*#0֦@kض;7=w{<xOޓg?O>}}~w?qO /|?S_֋W?Zx1l& g>g~Ϳ_5}OSO}s3xۛ^{W?prs_?/uσ͏m8 su?׽zNfm}랏ιI2t7oz칯~Il:yGPs9wOw{pz}c_g?OaK=|/<܋ww{o==@'?~/ם{zǿﯼ{_sߩ>|3_|[yi7?؛x#}᫟_>g{usCoxo{o{>X?ėyGW>|sCozoyl>|g_׼[/<~~:7c{ޤw@`ٴ ()j0hP%ʨ:%LfmV0dkW,n2m:v)P1MeP0+ HFe`fkv!58,VAcfI f1MIM(2f èQSu.`İ3dMԉX'alP0 Fa!6#b*l"kb2PL&2 66%1ta4@Kf&mض 4],6l1 tfKtpUb֬!V*T3 j[$mfdLc[&TڜN,. 0Tl"CC132ŶbVt.3E0 YNخ]'aFZ,PQ ambҴa4rlC.N*2mdvFLM4Lņ)K#Ym (5,fFKL&阎a 5&$v֬fUAɆUltږ,0$daɚEbl "@,\!صZ!Z;3f3`l3V*ldPF0%Y1l.B۶)h6E6 Q6U2؆6WRh`fbiicCFamm6Yed. Fʵ-p͠b2p  fuvl6s]+/~O}x5OyO/O<_wozkO_?O=7xԟz; W?{+o~^׽ Ͽpw{=o{V][G{/ļo}Oছs׾ko>ȟ>^|eڝ䧿_wM/| 瞿{nswo׾Wܟ~˯澇g~'?=Wʯ?xs_?'y軾|͇?/{{|i }џ|{-fӶۻ# xի c fVUҳ_WO|c_?}|<v^~/=k/}_{޿ßyǛyWoKW~WȏϾ]o|7??Gx/?;W/'/;t; o?Տ/;|{{O/}zwWW>o<}CozϾ_ʟ_K;|#'p̾='+O?t+/ Sm(HJc %̆Upv.fg\;X66!"eTsYMI ѹYsYʆ6uu h۩kv*d4EۄU &t̮90ך:;1Mj vkBv{)tYζ\grs -l:lucEPlnwt`ҙ0;ݹ( 㲃ck6DZp!aDL uU"ML 6 f+'FLJQXf4a3rږ*ClBv R퀁u%egCۤ&g.`&sb֩]s6mGkV9mӤAlj*Z'kD.E:&K:1K9k:e:u1.! *2ץaN:s68\m]J#T.$8tg@L\ΎQK16ۄlakU1TH9ۥֆ q#`7q"d[LR0dH+tIl#3 u]4IFĩY9Aܒrm.eg[Ѯ:#:7KlSmXGm9))lCTWZ'tkζ`mɹ\GfkvSѶ1Zٮ+9g -L9٪ĵsõ圄kFXqf M34cj lH\*L(̢Eh1̹IsRlɁnd65&ԵPX\-l[BfZԬ0p{I6L9\Ui5gf`tvn|'=\׽o? T`\Oo}7?8ߡnڰlҽΝ{^<w_ԯ?{{}W<#G?}oއ^ٿ?CO#=po8??sŏU\w_~ O?7=`sTVklڥmV(˿O˯y]oAݽ{]9Dmvst}/_M zkxSy!WM#}>Oz?=?;\=?ᕯ>~/n'?s|>v/̫?cw'|G}k_7_g}~w}_jg̳߹w[XC w,õ*kqkd,\kMdeX [+#SRL%qIu0T1u캴˝Sc *D%#:fl:&1\wu3HVZہK\D6vuTҍf5jUmNg[:\v{LelWRdNgiVsvAU-Q]׭Dl]iKlgɮi'4:c۪fЎf׸mfS L0îuZ,4c\UН9jƮUuEcufۚ9MNmX Ÿ4dtVGeldt9q]6!ۥn;T؆T[ӵnնMcIhn/2f;p\p{ p,Z;ڵktXQ9YYi1u\Ngvͩ0JmfBε]\NmCt͡ڶL92Rkp pj٪ qf[us6KgUnfCb:mUXæ1Όcuj3vGuⲙHHklW3UNRn'~510?;ۜpQM-(m$&AoE^(пMPM :Hm˻DPwl}yR` v$DQ12Z$ZSFi1B;V+t.e9G2RI ihe$2YB'11D[t9Hhv,aL5"9fShgieUfHŐafGSJuhN #42L2h1-gg@%F;#FUdTҖ1, mu4 Y@@fLEhUHuiDIFvF0!2&$M:Ѷ3ER4BFJQ,DE4i#svMK3iCDBLڦICmfL0$C6I$&CS̥(I:2#2FR202$Г?xt.Ϳٟ~?;yQ Yhg; :uµ>}ޝ;V.V`hǔC.oί8{~k3w:7V;y}/|y _ruoƈG'Ԁx;z|sfňtJU%9j[S2&4A"av G~g_/_wGw=++cd{.3i#3&˩cmhU$teGŸ3y+p|?_Xsk˭ kN>|摻cHyҥ/4+|GWwGy_{?7fBdkѣwSܻ҈EDF#0o^sg^ݺvvZ׿Ξ]w4Iyof+"2,Vv>|~{ޱXL92D -0[HDDX-%Thi! j"LGRՀ$;'/}s}GnHR3SKf2P4NZ_ٗ6#coX?q/緶>|l.\t˛ ([_uҕ6mbcx}|_}Z?oXu{8r];BCXr޹B[dX]sy* 9bme% #s֤*XYLKJ#Diڮ<]Gz䋯샳j~CkmD,V(jD RW?{m k;v;?># -RZZ@5:\+[Y@}G񏞾AbPR iJg%J$iES@b Ѐ&ي@hTAοx+'w NYM<3ʗ.{_{ٗ>B% {|o?pr:n}s×ݖP]?y`cu[/s}~XߵȩC;VD jzW>~psDDDh[( :o^蝿[O߻o]޸~+sص9ƎS=pկw޻J7gϾ=t݃΃;Wxc/{rpT@X!DB `DZ{O;wc1ܸzO}r? @Ri$i4% {O=t=ks7>}nlnJdڊ&#PPjmCUJJ#J"TDH2A@ fz77߬VjUTtd$owx?>̣7?kgv:r7|΋O ].\xZ}}Ɓc!ܵX7Vf͛>>Ƌ/ʯ?rJdlǾ=]{VjO^f=kz]w0c-4QHjPI$j2-$$ТF$IڨIT-,J$ DU(:hB"@S:g2d@ RA R$-hD""ɬ6( T2kHUZ#mmKATDS("Q( DD)5QA(ƠL$@HJVZ̒$$ B(-Jے@A@6i+E"bҒ*)R$FPJ5 HR-QSiiH4U"$E@jvЄ)3AIQRBJZBD4BHfQЦ ٶЉBI U&!J%ihB&AQ)dhf#4f &EU6H &IPIHDB J"Җ%I)i @iJ"D AU)m#DUM 0MJV&DT[I)*H`Zhh* @QQ4h5me$  @ HUT(J$IFT#B!15n\|/vrً>pkV8 [;lW߿AdR)euߞCw?x>򻇝>{ܾ~w~mT$*4mke}c5.~'|rkw<[OylJ\/t򮻾ĩս=ș?~_=+ߵw}ܹ޻F4,_g~qѽ;ʎ];6v{??xv-Ǚmݸ|q?'Nw߼~Ϳ':}_czcg{e㉻OY͗o?u>yl}}(HbJA7w=Oy7^/?z `09wҽ.hC!$@nl4v0L4]I:Ne_LL2틎Nq'qcBY@ >|tn oMF21$H D)9Sa  (1$B(H!DTU*2B DH(Q@$P@"ȈIhU*&d !5p"RHED@9  !TPk"P1P P(Z( $lLPB& @@ )d $"@ !.$Dƀ #D,I" (*(!%L)TZ(PJbHF()""PM  3  J1 %4 D`0$@ RP#0b*H!I@J8 D2ɤT0@` B( 2"h 4#"@T%"$ @Y2@H) !@EH ' Hed"$LC&B @V3%P T B %" @" @! TAP H(` J%, Bf T  C1@ O~S/=ȣ/>xmpåݵ wp_{yK:ܓO<3I`loDB$̭#\xK/=:o~r}c׮=_q.G^{ˏ?̮xÑ}<뮾 H "l"똌 %l=x ]|@K~|ŗ?_< 9uę5ck9{6YzOq޷ks` `(EB ˯yg+?O?9vfSgn쬛貱kkc:sӧnLrܽ9ٳgrck߁=ó'vz{Ʈ]Zb Ovjlnڳgφ,SgNٙ=V,0O<^͍֮ewN|ԏ:sWMs`9MvWn?|]mџs ?g%l=|[?r-̆$Ήg~t~K;zph9}gOp]دC{Ac~|Ἣ>wv0j7_߹rI/coa( X8UZFĜ!P@\@HI*lvsZ@&$Ţ YNPLT G1HQBtP3t&B")A#*PA(YtLek#B+L ,(Y 1 Q M H"H, ad(3SfPʰP$2g@tP@iFD*! ajLd(ᰄQ6j Q*5PtU:tԺR.50!EI$ z{@SYN0SK#*T+JAH0c9 3 ef):H%E*R$ӈ!4 C lQ*Pi uR(Q8Q"j2GN@G:" MD#(YDW"ˆ#BRs8B@(J$AaYT%u@Y(#sRL)BB13!TkY&(, l +a (A PH` Q%uJf@b:X*4*T+!H&0a&f9s8IB*@9&TH!"&8F!I@Z" p.N( "##=zCDbFSwK//] _cMծq k#7||_{߾nn>{Kn}LJ<}suc;n#[\_s}WCGxmŪٻg34rgCٽ![Gnpfg{_n{ƛoy'nv+x˿}_85v[?;oPeEl2\I: 沱:xM}|+Won;j/螻_68/XM `0 PHD. 5˓rٳg+ٳ잿Wϟyι7=໯9<& ξ|Kyn =OW?+?#?޾տ۷^؜/_g ֧~*ӹWOnozۻP:<9_q-w[h|<˗Oݙgط-gE c Ͼ+G/~ٽ;oMt Ι!Գ/?>xrC{3/ΉQRE`:g U 0e0ɩ@ FbT(i32ate`J0IЂPh"FQEXۘ )*DP( aٚb"1m "pVk'. C+ )!TpjH FX! R*9U)"2qٱQD Q\`Pg t *BSc*$ղlĔL$*fLfi&h"Q@s B5CD"1i3E CCP e #([2 hDMs 0X:Q!&ЬP!!Q6ƠrXՂhEic4TuMԪ9aF"J8R(ps cd$hRCÀu,ZZLU!`VCJ  4i32qM`F0r99*P``Ф֎PQpU@,"EƘ:P\ѤATJM @10@)fCfs8U HUc,a*%fZsɂ@@^,h lT4+2f"dT`HP9qAb.Ec:ź#P2@@acsCuN"jN SAhCY"4i(0$ cY*5`@@$b$E'NƜMY!b 6Vb! @a 7w@2*7A{{]SKk~gg2~k9;xw2$\+퇛q,cawm8v]CPeh1"c?jckΏ5WYV^|m֏:c"DP0V S&!1IF  1'cgkH&ٳgΜ ^oo݆0 (D&cQD1@3OVG{~w^y{?7?{Փ;n:|_>?_M۽rg_<}vǾ?s#뵓gnoi^loYOۯ=GЍxk?ϟO>:C|3~gZmKO=/>k'_{g?֧_}v/ opN+<$=c~gu9czjkscsHD`9}:gڞ={,W lW_=z׮C瞿s]AR83 BtB`aBh X@+`XE4pN  A A%G*IJR4(`P C&c2@İ$ 4*e!`ќV0RgbKJB*T@5̉(%h@(4;d041(8si*.AFAeF@ 1QrXQt$I5FfMuaHH0%Ʉ9Æ0XDKZ8 "0QBF3P B T@ AP '8THS@TpB(@1ŁA4ɀIC&(A*Hf&9 4r5l0tc6E!RP`Q DDȁ$(̔ A1-h3P00 L% (D ) P`0dڀ h8(C j0(%`1B`0GƜa bhENҡ3U *ʂ*AEQ(M"1$Hj BP8  d¢(XJ"a t 0j Ib6A"c$ J "&b d5KX!@ %U*0j@1g0( Hrc1,e@lMЅXX!r Ig"C`p%&1 9RATA hL\ s2e(dP81Id@D`XFX4p, 9 -jB 8WAVƲE$Xm `,'9Btl".bLR # R#  # $>83ٷ}6y@A jb Eמ{>uvgW^~/8}GϽڻ~=o??|/إv]t˯|շ.=Oyί^ϟ>zt=g_?p.w@rc7|=:vu_ן=s쭷_y+/  P g~x{ڏsݽ08}/Ɛ ]}[o cߞ]{v_gOqP^y])0w<ӯ~_>񅹆ky]~-o+"%*dDHHDd a0B"*,aC,BELTT*0@ E*!T(j$Z", MP` @8 1RɘBQ @G RfҐH(&("P  HDR#@@@ hHF$B %JX! "BV @(aNE" J``  M8 G ZP(Bt @A4t0HCL0YBBgB 0RDdFh$D8 RSD* hH”P0 E$5@@!HB#aH!T"pU!!F$MBj %6 "R@!B! kFEB!K D3I! @N 'kUDJ! @F!ABE @&"BT! `ҌEL@h(D@$2E ¾;hH `cvblN_3Lo3L4i;4tIx FGHBB <q3mD U{} 4VlBۊfVArm&`X,-kmVF\aXAbS!k;UYKm6m@5 (h-L#(BcLvXQq䌢%kFL%dU6@L$TP fj_};/^Ow>~w?o~_ć}7ߺ̓oG__>|cx?3/|^6D1lK?~ȯ/گ|'g~_ҿ}Ok7~uÏ}Sp?^}7=p-Oqߝy|I~~W>ʗ}>yG>K|o|Z̏ͯݽ|ߜe.=?w~w_7_;i0 mhr0J1ι3TC JKV`3jVv) C\l) iĶu k!HJ1[ KCPV&``KaD˸!6&cYD-Vs!5M5wi ȡlkChl) )C),*\\ XIBklU6 JmǮFPmW&%N`6kV"T€ؘ̬.ΨhfB5[T.weU`Bk@ ˬ _Q C; a8Ff\$fS)!ZfŖ3΂fT0lB$1 fΈf-4Vi#,deIb`֐ 6sM ŰEb3Lm Ke%;QfmABi0\֦* *&EPZcdQ$j-P \[ tL*0m$5 JBl9Z$c0llˬi6ݝr -\((f 1PbRA2E j[h"2n6HBF6,4[±͆i%`4 Pd3ʬC i*6&hY**l*fV emT"q-VڶuFfi3RiV.N !X$.(J FR͐$e[ rÂm+F$sw#lQfȮRܲ6ll7{_y/<7ſyG}x%C0g 0h03s_ps~SkyC/Wn_ߺK~w+O}W^{۳_;}{쉏ȝ͝G>7^l 민{w_|/γ_Ͽtmw_x_xwn{{w_y?߾y{[601s|/^zz[яʏ^w?7姾/}//'q͕}=ګ?| _7ϾΏԧ/͟|㗿WOW=}u}~3o˷xﻵW_?w싿;x{Gn, ,-0ؠ3J` e4lK4RۀM0 m26m’l)fv[f)lbmQ`JvAA r]`$] (3Ɇ%l;#66) r03RaaYF2UH] lAˌf@0v@`a [JmM%`f#kj5 fԈ)9Ul:tkj6`d:av$AC̶1[IeEjfUfb 21#TA1` bLYB UMU3Qlf#ŘbDe68`Âځd l6 ((-IJe66#m0P\0Ј0SF@laSfCb 0b l3rY6``\)f5:f `(m5e+,m3b lk[ [m`bf6q12`ca(V6l` dCmb 04$0f2F vRؖ@ YbcS@hDbi90洍hcƶ8ؠ BͲҵ( W_;wmy=ŏ-ltlݜ z?7=p>ϾrCO7s_z۟:Coy{y襗^x;~b@^3ϼ|~癯90Л|S|C7[5Djvhk ڳcVo{.ծ|C??O>4fvMG \nV۲0vd7(PC6'(HVXi@ F3LlKԒmj GcL'aYvήьZe-aے`+CAۈ "fA$S[5"¨!]) 6+@ ea6iXS&6لh0&+f+;'a0Mҵ QiZbdu$]Mw66Hնh0A;2:fCnXRM T7k(+(f&h-0llLbbС5Vd[UE1E I1#-ff lZ1 кp*(,lZ;53kԨ`@h1,2,,1alCsQ1h,mP*mkh-Kȶ:]fsNCٶtC`6J:'TV@ts4ۖTV\)--606`Mm6 MaEetڮ$[2b8( -C*MLe#٨`0 01)0b-YaFFJ[Čb"S` f Ӏ:9ˊj#aF6Jmh®*KlMk656$fl#%0(A ,lj]E86pK n?7ޟo[mXBK/<~“|/ͯ_?:z;>wG|{F,׏޳F.b7'>_{yû??Ͽzs߼r??PV*O7~o}s]]9][JT6C"Ͷt,V6 f6- ƙ,,Hs4[j;%3m¥lʴF(ILbVb3%u:vD%jk-AJLl&YX͖kKs뚔PaFm4fg` ;pA D3L:'ں\D4b`]$̶ivp1N9VȦZ6U#D6j+j#v5D\P@5[FGvVgUm#5lSU%gbRʶƀVY9fep@fF̰!VKX96V뚛*\f٬tm`%h׶vD4\6XZװa*bv @(`AFsm.dXNۊaWڹ*bglNuU;6蔃3 `@uFڮ.nZbc;M$ٴjB SFeL%IVd5[fېSf3Xl4bU\Dc[-A+5pQ+arʖ1 ۦV9]WL,+`źFT4`)esI6܂lא9ө U- \[5mkar.mSY]#F\9,` auLöɶ:NQ4M (hlJլʆv*ƈPT6.®-`P #h3BmKl,lV `[HJsYم90[m6["3 fڦehg-gg;_W?mͶ 4l1fV0m4u)ۈp˭7wzG? />???Ƈ;{?=0E6[7u/~Nv:DmưٕuVPn?{5n\w:{>k??|ٯwͽo|Ga9صE\s_6O?~?_~c۪]w/ Vw&.[k̴{~g_֏~g~gEv[wn{믿~وjkn4.sLd8klvvmVMm[NYv:ȴNfjsvsݽ֔Mp`Us9bZ:uu]jŢ]ն1K眵4nNQTk׵9:--Ķ:gkGglrզum)0KrLյmHYsթ]v8#Em6r-ѵVNs%;4il:smcI7E42.ٵNiqYTm.I.Wι{:q![:f&1ږɴe-]uj6J];8m.kΦgmǥ.09guY&ԙO|. }>yeY .+\f7$/33X$'q2Ld۲QEP K/KKݽ\Wa.nZVmnUv]Nc-M\NEkіfTۦRdSsNٵp9׵qt\NفZJiɉm#\9l\#U];fffXcX;,5'Z:ckֹٴm١-յKGS ^Օ)pfEZVp͵tU۵+:u9QJFdۀb\HeWlصiimAQBmsmV@kk;U\l5Nܜ]Z"-:eunvY؜s`h8mC]],&Ŗh%ц6"u٥m윒q]X͹!t\JMQk\VvraQv m[7m"k fUslלlA]fu9kmI.׹uJIX5גX.kݞumVŹqF.Wn."[Ђڍ]J]SC9s\fF n^mi**]׭iS]Sv1%UjښK'n`u8ٰn*ehg-g1YeWvf0۶-][=w7^O_YH)rik4ӯ|/>͇^؃%<7~ß7=vm;'{o/~_~n /u=w;Ϯgpuiv|;ħo}k6εk=<{kǟ|G~[.[ւУO?K>_z;ן?wo>K_=zrqͿ?oy{_w9dlt-z_ޝ^s;}'ٯ>mf*몮|w?'8Vɶk)6"Ma5m7'7M9kilc7\ Ue2lY6u:s)׵Ӷ82Jj4ٶ1.Fk,ucdrmSl^ub$'vn\*Lm[!kp]ӪM]UvpXsj2ggZfJ\6sk[4D uBNlõKRu]IEtqp{kʶYf;θs$fkNk:7j'jZ0d3,L״sn9n rN\CVqtkLFsv)]Ȭ+mism5a,uLamu)Iv9vR׆AkQBE]l .kWǶlTGոY;nm0rm50u;74v ͈ةtUdfMiUv]+SKkk6rtZu](kES13ca]ڪ^jd6n#S[36ێ9jssx) snvF1kvmCGfurCk9gv:صtMum۹hvQ+ -Nuu6ע.fEXC딹ؖ1[Nsv3b,lJmnת\Όl2qUg9'hv;k׸Rܴ ۘʵbk;'sjۆvt̶\ם&197:9n[̀ݜӵ%T2L-`ssk粦kV3:⺈ 洫1v9m"'r7k)vsƆò4kk:eH隵nr)K.jk]Ut"V 4\;J::T @ꔬe̽O</}ۯ}?__.خm8ʚVl&jݧy|??yӫ|_7MSw^ܣɿ_^'|__G>>'_6sySO>x߽;WW^x+_~<}g|/Gv__sG~_g?귽獏=t/ع{p;n׿O~~ ͽ o; o}{O=_{{5Y|?'򾷼O1 xX'O3=_y߫غ~~ٿO_~;_= ܷONZ}_y_>%fWQl\j !ж]bh*iZ;iallKcZ6ad)( ˵lfmr fMNY8٘al ZkLF0֪ӶUgUZ d\Egmט;.]ۜc t5dVEǤ8l]:ژSg&LnTa۬K֖hۦ-TIYۥa Զ2R9ˮ-k(c@e1vCUեibĩh0mP'*jbc,m[IC22\Ŕb01#R,jdrEڦ-6fQXJZەɡ3hfhgWkۮMٶ[;4mv9aM \X1cs9QJf]9fb`ץ؆vTKhU#VhZ, .qb7v9A0W*UeDc!mt*MfK$,f` jٌΩ˰ ]-;a6;fͲIQ6RZ r͹n;TTcPf3m:1mpԁhZqݚkuٶ[ 6J ضULbWQeXCiuc̬*`]MζҮ)6ېr]j#Xv 1+Ѧ ]qlJرiVZfQ˭lh%EsLdm`bh*dS%vm$AhZLfBW,fc$`[reց;'@8*43 a3 8=3xKC'=G~﹯|_ m[}Ww??ه^K~oyoGw[{zWG{?o?}O<|?׾o?qzM~|_}c7}I?g>o>3!u{7??>}^۟~_{[^yW ?{O>?w{W ouW?=߳o?_<|;s:o}W쩗?_w^=#w3Ắgo}ɻ.]s󳗾o|S?p~|峟?ƒo>v6?y3OcZՏ}|5z˿ݧ7ß?C>sog]{Ͻ_~ӟ'/y7~}C>|L<;~齿+Ϯι|/|/ܧoUo{[)6?Coyٓ>y[񝧞ys j0/?< 7ݙULٺ~O7>׋?ucof{_}}o=pS0lyOćM/W»?+>zWmXb õ L l$`F`61X'ddb2FC#ǒe#&6 AHvM%Y0pfva iH7 `Ɛ&klR@dئb1jaAF J]Fm`%KfPvbb f5@ @6 d [̊!T\6qB؀@* )Ơv]-@&A(fM(f$3(L؄*۬E)PBml.J@PF!\$% AŠQl9IӱM4$S]XFԸ&NbRY@-pU01$X5`M& lr@Fal1l !.L&Z p`lsHmM6RɆC5в5[A`(45a1rN8f,2P.p1(V`6˨c bfClba"#@ B5S͐\"Ŏ, QlP,@aŠf `SH ց1ck4@fNl- 6H3X'ch16cV\e,0Flf[mܽ}z_w6ժM ܉}u|׏o~7-mV|ׯ{_c߇?z}wşWm͝G^|?~{?}yeϼݿ>w|{x~[z?xc[z-}%~?x 'VTR6IIX5HԔ ZBRJ܂k7.tnE.QhҖ?߷@X_?k7gZ7_w&d//_o/_Og_7oƾ/_˿;?/%#1??ko_۪3yfa9rA͠j;43z4 0S%4`ft{[Fj,6i 33<6۪0֦I3$1Xc6JP,,oBZa l 4 F!ufT`yc6X`#A*lk:<$lLAt @-ci ԛ;Qf3Vhಙ Y(A6kb!S6 Z۫zf"0R5$&4Il+O =3c[1F-E̘F"(X`fԠqE CD250 ERؖ iAC lǓYIК=&h [Nʼ7b "04cQȪ %fUr؆'[Ef pT60mf)%V ł6Pb3s #ޖb4XPب2ĶS {m[Px[l`;=یbI0̚l-! "Uİ``hU[ ksBl`kS#63)5Af2lD mA92j#IYi͊6"(@6H&mXVV@ T V(F6Q5g A~6lTZKdQuYRz[qضU]=If{gP^6],{:[o7^&`VFaےڌmLR 3lŖ&Ě^UOOɾ6{lٔ6RgLel bLzdȶo_>=4'=UbaXm_Tبhyt c[vkYOݚJfǩ6z̶m0|ɬU[Q6#Sa{71 P#fT5߽1pm1Uʈ.gXel{{I [o.Tm궵W{m 3l}1]kÄhȰbFhS*yvݙaU]{ۖMYlm+fSos5<ݶLr_2-=vaekf`cU mcbIlx? m,6?J#]`V`{ucjYK6;wYM=Z nFdfV׼M0p6P7؋7\;76qlx1볭Mj6$2ڛԼ}9֝ySg4ؠ6BIK6?dcUm⶷ՍR=kζB7mÁ+ J^PTkP*l.j[T6v ;7lcuʠm {+7 khMs7l) lۺkfP7Ies)lb9lۖ\FqmmmwM=IFe 5fק: mLE 6d ,Y!f޾aU]lSg{-'R^}:Jͦ&ǫMi L ۅf2\-mjl==tmT34.X]6<3ͱ,r4]Th01PF*@K{s<3XevYma릚ZlkY7,%zza=2dتt6ibXb׎*ܡaNՏCYf,ϒ7=Œ똲퍣l ??կ~o?g?U9xVA׵kh³aP뛻k5fi-hպO3*i\muonhy.Ӫn=0[[Xe3KUOoD(Q#϶=Jm`Srl2f۞E 6ʮէ*`2 sosU2ˈ{>e6SK`lMq72Y> c6Rj^gcQAƶ˛mlo>[MvkCuM{|dږ|j1^hڨٛGK֙!g[Mh/ugBڒ`Շ\ڒڛΏVl9o]2}߷OKSRl~1Llok(&%-okjM[rgiAmˤIRkpwe' 2ҺUŒ^`^]0fK6pg{# mr}3:ź V-PT5sm+{[&v5ڛLPy:%j6f㳷arm`l -wxLuH3{Z={lveB>B6nyuUfV mhz۸:s%8oЛ=w iҖkڦ5bjә6֧=?*V؇[+,07g¶Mg/:1w(1jRE oZ/nz^f[e;2gHfy3:{Y޳k7]6k3A&&f{4X>mH9 {us5m^ɜ*/Veh4cּȶE%Y 0Ҫٶue۳dπ)0VO#ث\y?>)dysmcXP7v y[lIӋ>m6޶OeR V% ,Oysvf^Nڛ,&j8ٶdls{>gemwy˴wdw6q۸5 4SGm6ƨ~lvwFyqDQaʘ*UM6qhWAkRխF6*iR,2ҕ1ma [wyc 򖦱>ٖS۳&KC-dO=;_z9fsi-x[25f1RNQm7RgS[JN7#5=ovUmm.oQ՘>7!*ۧN}mzYnUq&6սqb۫zM3{ޭMyZ [佧dx@v ]y7Z^5J ҕW}>@uh{p/n}}̤M.LӚ7ff6{smc~rn{Lz^{٧mkΦL {{As5cl&ؘhVn55LVטʹܵ ]?6X%ٍw>6}]d;M`sUxu簷ښ6G˰gٴ$Vozњ򪞲28{[E5ֶPqҎ6}{{b'dcBiҕ.{ﭪh{u}vۼ'6l&Iٶll}}.m}nDFns\omYnS}dkֆej˧[gl{kc.O?6{3vyu0eYשݙyU¶ٵsشvYT0f6tM%{c>6j'QּjFMo3We$YXS3ʶfcD7nzVue$ 3Ƕd[ 66ۻ>8 LrLÛ(1{^ՈV= 9'yЦfʘ14fh6k!{I(O藿צd`ኜm+{k*aKjļ*5b *o.w-nٶ@,Jﭏ33u[(SmkVf+ ӄ[v>bvNz{J1EhƒmͫZlk-Utfix93:޻>%y5oٽ2k]c4T[fYhub ˌQ$d^.X77PczEflo\%Wf[7[U!hl՛;)f33ֲBT`ے5dcK؜GPm ZóҶQ׎ unWi[y[{uV)&bmn ۻ٣}3f]{d[1VKڞ;thQ3ujy/65MY7ZfURi鲙lƵ^ol{̚0ʼ%ylyD,m222c8լ8ỗR6f R~hl!flct1obyK}J{W`^Ǵe2C+v]lDVbld!.3PmFyy[+ŶkG"!U'kKfཱOLSU{iWy.'z_nk{[7? #c3QH #< yDb1D!nuݟ R1Ҳ9l۪h#ж:w٢2@kYlO=eT.uwٴf:mfueC aQݔSZ#=;m3mӌN;bQ'ӺiVk6[ {_wůw?JsO~37٧Wg>Jꜙيa6F*OgqS 4NlKٶs_W_o/0 $DfYkjͨ;9)c& 9f&inicY-AYkS65bMrwAQlfh#ԣ]ش >?'Z WGv+itFl]cFcfceR& 667^7X#Nƶt3lĸ,iS3*؆m6BmmcSضkS*c KmqC8[ PmCK2E؊mdt蔶LTۙM47lC ,&HQY83welkҒ%vjf̂A0usc055`m4-ɦ%a :[[ZmbuS&*{dc3vLp`6fS6B:6ʘ d4bk4UvH2CRl K cb/~镏?cXePClK 5 aP]J{?7>a<]& TmfQwnb0۪g"-XlM#[fT1Kw*)4a@fhk`ٌ-ƌB-`hOg>g рfLJ6(42#K@0(6bhئd jf#b-lDl@X1 ؊Yh̤!04- rG 1[Ce3 aqbl:0{Q'VcM" mhjۚ Q0m*.T--0 %̄uflF8fMA l+.e2m 0!+aLm$ Bl`1YLF&U`#%IBM Y5, )fʦYZIY, S1'f* ˖ᚤ[dhEvfb pZ `zNn&$`چm7bP`єYSƶM(  نlӦmlB.%,0а`ҶaS@`ma``ebB9b$6P4C 6 ctJ6BFR ն;0&2)Ҳ)ۣ0m]F(h"Bvڜ1wc"Lucx=RI]Zl,3caA#2 l4cPaЌAAQ6dl4Pa0\!0EN K@˙Ei.'fLJ[V)n#b  ^/_{'>I,~Ooѷ~~}Wo䋿鏿R6Ll4&1A!m"$`ua$f5bl%Nl³]V`# i @C@lc30XH75 1F1PAnme  ai fvh0";`:D``ؔQ65aw Rl$ h ^&b  Fb\4S!$01ٶ-]efp4hI6: ,[8@Z2۪Ť"kl9 7lSn1l"hjV ،c3 w afFaZZ Ӑ(4:#18, ifʲ-* "pUF(+Rۂafܺc!*v% f )f0.FVu iI@4lpUJk*Cba$6r1 Hae`bTl,mf 3Sh6K6b05Gk[g[ld@0bd0:F4bH!Œl [$Z ݊m[2ZN1*vm̀ `f$Ĩ1b40 ʘ٪vڌ3X!7)[G2lW,Kk˶ )ĬX;uw/"T$Y̊&fv4da`X 0, !8ALʆ\ecSbsX<@rƶª)ecf@0l$0If $`H1clwbzG{ Ͽ?ϟ5}+?}[/~oyFĵ:&A`&D0N֌+j@p't,Z%وݻC N†=l!gd#; 3&iInwAFKgc"[s!SA̚EH&ɬP6ݖ J-wb9L)4LΣmX͎c ڦ0aZX]NCDBff-;֮5ijv4F$"TQe +0l͐pTlâ`6l4598hR=u; 6Clmvi`vgl!6;=MU `*k#VZĚ\Cq,,f6+6 ڨ6mASJm63K2Ӏd#Fkvj6RMdfL [5ff[RnNm[42Zi:ƶ1Yfh6vZ( FqRml3`4lvBalR=gtgӑfɱYbL$6k'%nwce M6IsvpjhE*kf9m v;l6b@a[:1M3F`mg-`2ѲI(ejmY!ͦ&VfX['Fd4[adlP`m;紡auue`"mFlĶݖh(U6jUkVKwF,mv*l#fR6lL rn49qF(3#$T`f$l@͝01[b6,+bdB-=lܭ;+`H6CeӎF4v Qw2M*T,k`X$̆͸/g?ǿgտ~ػ_—77?3o웏}w_o}^~}ﯿ;OyC7^{~7շz>Lկs>w>^~?;o鏽x17o}OO^xwXw||0^z__yvnv?yw‡^}y^w{?˯篿#I<{<)wҪmcsma!* Yl3wSXXX2n-)F!jsd{ [Rf݊SFqW@ضXц1X;yVuv8YT=f:fĉQ)cdh<+(5,ۦZͪ4Zckq(7٪jZNYFb`IeS(:i,Z%37]8p-L`[`Z,-d&{ 2 tC(c*marhsz9k8ζq20(˜vw2k u4ҌC6iȬ%R9p.ʶ؆l SG2QLb3ݶNK9ömĩ6)ȶMnYCX;5SI]1;5H榕j u݌S͈3lٚ4cv #vԱDwjkgfv b6̱YMvڔM윳ن3wCkLv8ZfmZݎ Ill2:*۶mUFV5"j::ùlԱm29H[`a^ș6T ȶmUf:ۆM3as֠6w-l-:5j#dkC٤t:Kl]06v@Sl{qĺmةfl:MEp灙he8ȭYem3rlZcb f:Uex?W?۟z?׼#}~_^g_}͓o~O?/}'^oOOx__oԏU>^ݯ1^'o_Nj ??~Kxxūw?ć{?̗|Oo?c?oxw~>G/}G~_?׽>__>ķ-??zخM[as1.Ĵw:mӥks@yk ñ[%mG .9!];vΨlPґUilZNY-9Nmu 뜭ٸΎm9j̣UGpe"vm nlt` ֩aSdL6`V<>^P$N)*+<>ARe3p.F{ÎS`vՎ-kS=`lkp[KlaS.kMئvqlŬJytzJr̘α[,WE9NhQ5[vrN&{傲a#%scul-5;+s0Fvp٢*q5){t()}jYhgn[=Nc];hZTg,ݻ;N{LqXUXua,:m16$mٜǙNeG QKE4mڣRflµV=]t]Mȭîl !q vfY,S+P6tz~G7~>[|Wz_gWbg??Os~g>W_⣟ܻW~˿z;^ڧG>7_O~ o|73xӏw7?'7}:7ÿsxmwe+?; 7?3oܯ~5oz?W?ݷ~+?g~O/x۫Oo<ܗ o|K\:ቭf\ mʂ_z[_'o~}ozz?_?}7wlo[~?=/%>s w-K~e/z ǧ^m/;+|;~O+mO>}቗L5ϼMo|Ͽ^x3~g>OaxsW_}ow٧~]/O͟}~oyKҧ㫾?a7~WL/y_O%_~/y{_ԋɏoG?Gwկx^o}N۞oy>k};W,؄62$ b j#6` 21ل Z+j(lBT، jf6VaZZ9F̔-;89 Tg)vG7f6Sa ltmsbHֵ͆YeM&R9vfۨ0Uy*mjF,[n gֶ&%tA! m̒h *31lKز` lt0Д cil& bIlHdݖ6X܊ Ͳ1TT,+`Tw;bcA96HKna6$ 1[f 3!;1# j`V [Ar)l&]jF1b``c0!fBA(u `$!f5- F-*1 c4(0f6 Zc1ȶ# #6۴6  [L Rl CQ٬$8:ǀVU3̊0($l dK3#mLQ s;u#j ɰmP1CfHA]2XLXkdK0lD5Ffֶjr ;Ndb bKP Ml4̆ ɤ1kaPl Ek( HÔl3 1$6LfnlX;*`lVn"P `$ق&6Ym!]ABU)~擿^}෾Իo No|__={[43Wo~'_|3s=3O?x뽯y}3OgK?>Wx}=KW%/:S9Ðo|k_WxK[m/;?g O5x8Hmq^}ߏ7>}wu','-ZF(KX; cl#gd0mN0mF[ hȝ3%P-GMHY٘உf MHB`CB%ijqԓ WO7;Տ'? o?}?_//>7?tM 7_?oջ߹zwx^}k/r>7|w}{ş~/\N#_[[}kş_G>_[/F`&`-8[T ѢjX 6W*;l3W 7iۥ`fsu`,YQluj7a :FnQQĶ# 2Ҭn2llZT9@Vpنͽ 3RJsHkq6kPla4봹sHc$fn'lö{R5f =6ۡvm;nι[ lޝȰDf[gUv`ͳl  -mGN)Ҷ7a2W۶#s*MmRڞF8]NS7Aaٵ{k{U[1Cٶۜblv+Sfti6Lよm)]nlLD l7EYzmwSadw8ZfeRe8 "][]NSamSa6=f3p-ۙH1lQlTl:1ˆ- AunMFrR D1ض:qQ'Xdϖ̶Q .؜2[w^Jn=oGi'Hf6V,v`:vΰL{Xer ֆ9ca*¶{YE9N0l.TfSkѰ29֎U؆Sf wE۳pL#f:Rl6gT]3"t6@&u63wVgڠڥ ۈb%)J9 ,QVrPvR M)䚲; -UaFsfX֦l3C*3db8qlb:rY; sʽPqY`m=,\4%vɔH:0lpFVΣW{E/^xyNj_xb?~g ^:S~_g[c>o//~S?__~;=y=W>>_xG|_e}?W~ WOf۶xo~Ͽ_~k7g7ܽ~~=_˿~僟'w7+x}o_|W|}Wk|WwmK[F:GٲU>ʢnc3; ۭc}Qۚvw*l aO{̲Nw1p=SN:؜ÚYjjwj:Ylɵ;: f0[%0}-+l\Nc EABQXL,KtOGQ]ϕUi8]&Pd9މR}0uyᴻ =~{^y/^;y/‹~g3]f茹GYC~__O~o`#`oO_?o|c}y3|o|Ko}Gް^};}O>.f{/GWw?/_;Rq~ӟ|K{~~wq>Ss?_G}?o_cO}޿os_wuET;,qwjjiwv fY#̐䄙aLwCr/}t΃޻U4m;-F!us3fwu]>d[iAd݁Xe;fdr:53vbluo<{n.~w?G'> O-غ]g>՟_>> |c?'~3?z ?~~W~yc?WG>#mmݕPz_}_/??>ÿO@S}Ͼޗÿ_wy>ȳO~گ=_~/|ï}߿Ow_ȧ_S􏼸PKл-mm;g0{l s7;haYG&6K+ͦh;tiY95#;YRdrftaf8Y ,`XLǠb&M*lQ8 D8[@3$6jلflZPm<{blT6r{2XEc#qhj#L,)k@2v9 0D]نkbmvՊDwL-fwᴰΣ'`lۜVcW@۶2 *vsYk0il) a0fP!5˔X22ÐݭB"8[L61h6(l&mc9l>쎩Aefd3X<صbYX{$jAPm9;kWYRlWU1R.34bm4؎sckjplf[H-H۽s*Fۨ6n<mNK *vw9 j ] mXZ6v0k̊`CZ3([LYb"g[et2,;mA3615Rg,v"JlR bۦ*`kv0-Z5j!ed6MY ضKc࠻5Q܍ & s(Pm9f-mkV0W@ B#IY60Щձ{$w0(iYTK #m: ĂSc9*&p Ǚi ?o?\lk[a$t߷׾:d>S|q[o|7BhTmwW/@3ÿG`SÆ,&aې d ئ2 T؆% Ķ$`bmP~']guS`F 1#L`Y6Ihb6dm1mJ600@Րl3`Fbmˑ0ll tbw4AFFĴ܂U*)BK(5H(d\Z'[` 9lС ŶB(VM l+$vmjcR6 XbBf-mcBd 01 2LF@m.Ljc [$یhX0aĜNP c#!C,wJ`4Kfl a@EU*#` AXXa fl3(2,ADH 9И2l4)`536 P$,¦NmF DbeW`fmfl` A# [%#SAKF@10 ؝T5cf@hD Em*,H Ua32@ 6 `@¤i6ba)ll Q&X``9&, &F ,dlIMH ]cbF4C1i1#(00Kl4lBLeѮb(0لa 26  45ܿ7G-`ܾ0 HO?^۪6oqtm{L[8234w[ƭL4fh2IEHuj>=[NmSWYyLV}uЀvڒnl3i--luz[V&]mZ!lmz ֶrLR6{̐R16rCmv m֕6%ՙgRMmbZ6U)mݶ`7MQmv2j[-( ߷Wƴg&VوVf}~*ضFl`4=8}vj=eLf{6eն,rJmoߠb޻S6b6 f괍 M]ޒFwo֢L}ɅifXo+հQe{dm6ol ֻP66mRޔDն]m zՆ پje"؛T#gm^Sm@Qm{%i[`fjy+~bmS]m۶q x崚(1(P(P*L 4IAA H(Z)j,$(H5Jۈ) -*(*iiJJbA$ I@A5T*TUB RBE[EMh i EiiHZ-"H$!m5hј JIh%JH(%ZRj$US-MJV[h H(D[(HPJ@ HۨhS9U$-@+!ZPUH 4іHD$$-(I*Z@hJ$-E%@ i$(@T"mBBI R )dh[$H"@RȠZm B4P%-T}4H~_oh$謙VJhRMGd`*ZIIUEHA!B%"B Pj6(!TF;PhjD"@II@UI%P$ 5Woxun_z[ƶ8o A L A& ^Y=qswGU̍P(I"Vv;xhTji1[.;s-РĶC׽t҂Hzeċ'Wޫؿw{iNZ͋ϞyچJXسU=)Dk<,%I+Ν;v-ciyʾv>PhJ5$~깵Kf{ܷk@i %%DiD:\>zlb۳kj/:zjuî޿A& S6O͕k>{% "-^9{vZI Ғ$*D3M٤MP"h*BVK Q!"C2@&휌`@gZ$f+AQ"JXPTF:AII$T5bdDZ"EE+mh$-SGi R4&TA(ڤ5 3*!4-hh@IR( HH$--R!Й I" L"%#Q!i6QITDJMRUdth@h$A[!B$*Rh$V#Ih$ -#Qi'IM'DiDtjvDQm Uɐ16#6 %J2bVK"mHMbAf2F[hZ5m5 BATRCfZ*C0ڊDCKh%H( F$JGO_󛿑5I"J44L#UU͈BT*I2fIPI&L$%%BDPETA6CeB6MF@ZDi&40)Hj"Zs|ѿ99^{xʎAed9C싟_s׬,ճ~gK{[~~$U$ȅso@2g7Ο?ؗ?py,׽_<!M>=_Ǟ;qFV?o{ovV H4>Owj۝?Kɼv{_y}ݷjeH&f禱VO~w?~w{bvnmnKn{|.g?;_u:3G FT6$4 8o<_֓ϟXН{{Oo+_~xR&mn}_Ïů}By'G{K>oz?$Tu&2ʱ'ן}H_O۟9vʹr#O=rmciߑW7ѨN*"$&THsn}?=_w}WsF K{?EtOz13DL&mg/Nsέn޿osH/{k觿'߻RF>?gK|{/ķ;zh67%5ǰyO=u?s?׭ݳ}$F DX?'/#?z>cկwͷ⚽!EѠ s_ȏ׾mo^'>u ֕6i3O~g=w:}/Ow GheΙy'?.z}ۗvթ??_x7ߵ{ O}SgO~/X?}{͇G^s߼3'{橇GnW7ޞP~Sn䖥cO<>'/샯1CRέ_ݻ$;WB wiO=_}ɓ?z4gΝx޷]ĨB[WΜ:9ܿcyj!sN1tСC GO,FmdNӌ$1h(CSbfvbjBdѡmfLRQ4ieQDRؒU$VKd((: hDF #v4m 1ITh00!s0D RH5cK)T 1t$S#D*Y5Pmt6Mc$i&iI*h5IE) !EPtd&,ڭ bhg5h5BHZ9!D22Sܪc$$1mɜM1BD[9 **VO|av~~^ Q A(U)E:>+Ed]7v^]fB_|3LrN2EtHJ(D}Eޞ/P JJT `*BDjP $C$TSjR(@Hi%F4 $RJjJ h 6ATmT bHH!`RS"$ аRQAR!&)$$I("bbQZ["DADHV 4!RK)F)L XkTRDbJRSŐRI%RR* DJP)$HbUE5E$HH $߾@]N ~@PY"!$JTP%Pi$!C UI(D$Em$(6Z $J $PS(ĐjJ@ZS@$1I!UM 5iR% &T,JDS "BZ$:ٻYG֍ wX$HJuaa9چ֤THnnzOu[ ͎RA!"VY3ͻo/?678{OL~xб/~fSGg#+_x~?o;}ž6Zf#zݓ7޾?8  ~qyv7?{ԹOz#pa~j{?s;^~qߦm,:w䝟Ӈ?Ʒ,]p[^9U?՛oQ0h陥2/Z/oW^{5aCW;~ëm @@$jXWVQY:z>Ǻw,7vu4I)`@ %h Ԅıw=y~gCW_r} jjfnԵovqf??oP~f߅߿};l\'}+:܋{6 tԀ}=Orɉ;.MP+Sg{عC_< 55ԅG\^+=זSSSWkg_m =z|Hg;IlݲeKks˵F+_-?kvmn/mז3ל?9Қ@1$`Jgwwwό/|{vnY#= ?xD@5}] $TWo#;>'wj(($@ 5" "2w̅iǷ6 ؽ}陹ŀZk:u׿͉Kv_̓! ߽~ }2Q]8kWFW^lKя7N/}K/ܹH@DH$ bm9=yoHogg'P{g~w?b ~ tB 䝇޷F);♽kwvaD`ӏrvr`/|m(5|i7._E $PIPP::ỗʾݻznudzl:1!U H".$ c>qBW6rػ'3oj_ךήD\{wip[׮m$IĢ5Ij4:z; J++sW~ᕩ/<;'Ong}ͽۺ^9晣_b۞> h!uGn< /&!۹&gfa$A}x''N ?ooPG83.]صO/iӆ --_ۋ$5 -:;: %B@ ṽ2lM! D RQTBBPXALBAE*(DQJ"@ ` ABB B0@H $Q"@@ jP! *& A1 * I%&D$@ $HCHF I@T@ U"`$hVF$"$BI $@($(DP DUC0V@$B $U"I5&)@A T "A$`*  D ! H@@ZE$A L $$ @ b`@ $b&D!hԠ*H*B@A HP%&! ڪ6 AIC5J h%QTK 1 A "MHj * E V @ HA36`d:LH5~'@@*@JT%J(($@LQ *"5 DPA"1!`!I0H0($")"V@@@@ ֠XILM( BB%-1bB@DI,/ܿ}ulGuc473~OoN..5 n1~gju VV^3>=׵婝Y^Y."@LTjQĭׯ_7;j{=zrs3ϼ?6 5oɎOܶA vt5C[·=s05ե޹n` ҳe3_cnmQh7;AZ /ޛi5mbtk:pa5=}ԥg͞5m !iV\;w{s ΁[w}|ϖ `W}2^7'1،ULNlo-]һi/YfM!@&.>w |tu f~vqXdٕ4;"d~s|3{4lA;~rǞ Ֆ-.j}ί~;7^{̓] :?qKݘ]jYxKfwWW|K~?$3=u,W>yzǿNL>XfmF@ F}pʝ۷kYbbz lN:;ynM<[-m#6~gxöGmZD'G9v䃕s}t|~uoKMRgnOx߆g>w{h;7K3n^}tv߽~0ie;v|kMԶM[mټnsc{tm#@_\^XJG{SR[4?:?S{"+sSώ=\^zvoR:zz  }{͉=現uͮ wݿv][5Q"@ *Y]s_\7[?:{$%h޽|ȑ~tir 5v\mcCmǞ}q0[V7  4z;sÕK2G/l\3zlϾ;wohbDZ0p4,/.V<'.\9hN|x5`fEmVg:=׾ t Y~/'&ڶoS{w \3 da~qfjj[=ѾG|q߮Srjܿy{b֡%HH)JVHSNmL:z'׍7IjɫGΉ;;84q܃hoT`Xz𹹾]zzWE*cݼt'mh@JYr{yb=*u~џWg/ow͎]:GWz6;;}ޛ_ߺw♣[+OmߞZz8y/nwzۺ9;yKґ$A(3xk}lod5wVۙ>s<}~ډtxŃ;wk t?ɻG.߹ o:onotvm&ݸ8vod,Lܙ\FV\9?g8 u֥ߟ{>}s珼se~}s_Gg'Jkiaooo>viG?\HchW{>1ܻ_+3ʞ߾chHw[mM...}ڑH]Y_o~՗=Yymbfipᑡ!Y+Im\y{;t-O^Zz۲4=v#ίkw͝_!bWl͔;_x-}@{Go_m}=#.>oGF6 iwaMGhͲk,`1$ ɴffx3ndf^ W:m2IBXll6mybڗ#}}~߹$5?/~|厏޺cDSAPU4  ]Gw:DHFɅTfÇuifjq~m(Hj_/][rՊ_^lݼq`C !IKݥnf/S37_}uf.^fT  Э c=ޕ;oY;$..M|o纫w9kfc|p|wSY~ FA-Ebq%Ο<򛙥Ə,^zYipkaqn>_rGvZ'=NO)inD@HH`ZJ  ` BP RA4JJ@!$j"A bL0*  1!PS"Bjb(bb"bCM `!j$ABRSB4 j@Ő`RP0`R 5!! @"$DkT  @D$`*)H!cPH!AQT00j@HXB@!B0B@EE1I (B*(I@1BAM J"&AERIbh1P($Q5D$1(*PS" @0U$AC$BM D$D$$bBbSDB !EMEQ Z`bj @H@C!1$BDEHH@(C* ׇ7׾~'"AWV׿7T@D $XTA b$I$!mƍ;}͖M$L=zKYqv7qlgdJBԢI @ C A $A& ED $D 1Q"($66-!$TJKtLΕ+ZM#t֋~}W1L,'^r@Hwp}U}ej ŋ<}{{W޶g/Mow;ciwz'f{z߻eۊ=G{gcOg6Ʈε .nQB]nK)Űm oY}k oxgwoٺe]Ը49cco6ؽpjjlbёjIL,It.}핧vo=P PS= w|޴u@ALBAv/_yDM}]SSKw?s{s}zojew`QB w=ơ>$l]_w?#G{p KKNoj"dnܱˎy#H c˱}[;7_>̼ ?uiҵט]`յ;zC^n]q@fϼo?u7#.'^{'=ol_3zk6>?~gks5_}C/?]da^xŗ/ls;^;~O>:Wk݋?qlaǿ;zpS_7==7+ݩSzZ\g =kHW{W/>-C{&кm{_=),n}˷ذj7g^=^OOM/2<8l `([kȪU<ӯ?{p6tfN?ݟ8s=pmoԕmC033ԕbw_;zu͍n95E!@nxR3ĘP w}d]\5j,_34sqq۽7Iwj'xnݽ.OӬݓ*4Ny{;t@ !/?/tksUFH bi>w?O][ u1Ջi7޹{H fjgx?HMwa}ō7ОWK?Ѷk޿cH'~ˬ ~`S45ViIDM ԱϽ>o] P,.9je=xpZ׮mwyc]ڭbq/k.p'}_ڲfdpe/OuW}{v鐰g˾=gsnZqŪU{v޶ W?ƣ, ew|pD_>l?5&{wmIϏ{uzйĭ^5` Ku|rrx2wg]]# uUu<,4}+؈ΪڰX$7_[Gz{-529S[NW 1V33=_0pry~sz$Ʀ@wů/铛z,^@߲oߛ 7Xc|Sne;֥Ν?c̍_9ʳ?'N7+oݾrxYGs/7?8jBAk,@L c':@"P݁Mݽ/9;'-H@fO|Ǟhn^rIuzbƌ 5ijL-;8wW׵;>oݶբ@դ '_5JXtªk6V5Hjf:v@/E)Tj_\_ln_gnYKYqe_ogfa~qq~q~izݖ'߸iW_0wc'O^^={6ZW ,bRbc ZS !D $j!jPIj UMVTH AB)4(DĦ) h.,igNL$D4M2T`2515߳rچ3 [oؿC Eni:˗hv5Z !ݺP)=}ifggDI D$QK1bR!ԐZXLcm,DhRH H K5ҔZR    ER DSQ@,$RBE,b4@P)&AR(ԤF 5 (!ZB c"@ JPIPH 04JR R@@R+JBRI @JB `$E1IJ(  $!H*h B$j!H($$H D4APA! j*jԈX"$ PR&HRB+4ԔT1ڂ`-$H!UL(V4DPkJZDRP) !A#"hX@RHTR( h D 1R!B4P P *)D(B @$7y- _ZWٹH*HԯB7bRKӐ$ "R0 PH"HB_{O{]{%2yNiV+)}oZrg] Y>?\ot7;~?]+0e?ؽ˟k }Bw~?{֭P,Ig6{;s^joZSs/|绽vx7RڧLJ57]ӇI{WO{jƤO_*oO_W0RMe (B5@jT[힢ccW.l;3gΞKgȰ@`!4)Y\x%lߴ|pyS3G_{oaV'݋ӳnXצ8?5=~qlջzմRRZ\:}WoܼZ! gpheыǏp臯O>w036~C?i+g' ~vݟƃ;V Ǟ}g~㞏u;WDA0nvݷvݪu+;عKIP/^x㱿u]}debTrĩUVQb]GM65L}oƫ7]M\5tg榶ڝ|iټqC_Z FRL|/?8 ťKcWxV,(KgN_kV ԋ'&&&ַ) ,{[ܷ}UÝB_cHysW&k陞9|zyH`#b[P@M/_9qyҦoc7mkQϟ]V,:l\ch_#@c)mS^v/GiA  MMݲXնu5K!\ 󝕫֬_s?ΜnECX_<{n2kV;vPǐD0 "P IB$JR!I)HH5 B5h!0RDHUI@ PDH$THBSPQ@b 5]TB@HM  &(E%(@PX@ 0 T b( !JB`B" * XE,E @ X `Q@ H%(1"&IQ `!* +)* T D&*D(( Q$I"DPҐDQPE"$IQ$`*1@1S("!I4%VLhH*Aj*!$hHR )RH ( "C-$DJME @T@ES&˷c79yk_k& %%ƈ`T(u?$$!)E(!$X$HLƨI4- =?SǏ{ʮ=#L~ȹsͦ[wX/;3 $ǎOyr70s7~3?|dhg;Åc/:əGݰc}:7=/{Dz! B];u/̀scu`dtٲV=?A^w v"%EY3x]؎f!׵Sl_M643)lH48}r52;گ_٫꛳TLݿ=b[;TFo=[ב+%mX qz%U BnjK@I1hvlrq|{CTPyeq&RM:Գ;oR5gյՕu-m]-l{}|jjtx9_صgORuLjfuuui%@SN.wwfQt!x}zI{{жBZ=/c #0ח7Fɶ=ͥ\F9n-N\^Ro˧۔F@|F+e5|d! Tǟ귟}7}ȶֺtmWVr\6!XL\pzu9[k7צY$QbAUTbHe3 {7 ,;5dAB+չu--LXTg_m/5Y D*AP0nm<ٌM-u`FUG1n"O Hqskyj|IL&`_$_jZ aƲXXK7vחju}BqNQ+kj3][k~}npˇu6͙;d6 !a$`v|rqR}C $ȝ<[/=ڑh@dm6jiv~}c.͍ɫמNljVe+D200 Ɗ#w/|W} )'\fR:!@& Y(2dYT?_L4{oW/,,nͥbƊ9zdc[[k!nN-,C5I'$l-O?=C;3IF7, 0aC`I2H BB#`, "Xv`m"@؀ @`Y G#`@F` 28H`-;` Hp $@a%`e X` dI0 , X`! c@ pt!D` 0(a,XFH lXm c `!E D-hI2 `1"!@ ˊ`YFBAA 01`$d B0B l, d0` l001d l0BlFKD ,`l#e# $a@H&Xh2d@l%c0 2 ,  o&72==OlV8` eL@2X !l6ؒ  !$,c  ``m0ȸƽ:=~`ɒm=\N߾>P` [3O/0pߞ9&xc~o/뛯{}ގڱRϋ3O&f7ڻ+Kw-w^)$ Ȁ `s͇BБJWf[ۚKy6;oiliz025<VROf_jjRS3۷7e "XdBd DުTYjՕ4v݇LM.4_N3\6eW+O־mYf*f}fb˖KdVntP"P]y|gpls; ͝mԳֆ^qdsCj!c6f+暎|{g^h)ed[بbfkzwyb>0P͌wrP;:s Fn~PT݁o4'A൩##[ROw:!0(>(:P `aRժ%ڗ ͬMB.G˄DXY.omٙ>7_ݚ\?jfk}~jƭm/okcW~o/Oyԑ jr鸶0;GcW>}Rlڷso{jYf(<|ձ*B:5J,d՚W_h%W7ɖj )ـAQU"[K @xuӉmw 枮R~dzrD9;p#w$5j$ؕrCoKa;*W# )Ku!Hf25Tn]_KtU V".9Yӻ-pld\;d{/Q1 +3d]&Ib, K d>e[Gn?ۺf6'n]vJ}+'N7e>~d`s&XYsra56հ֤+蒀X^]ٌږX>}R[lǮƯݾ92 ioL ?,L)W$6PI8-4oښ;R]vjml&Z['> "/ݸ7t}p>%4lɖ3ؗ+ O~#Z]M-y01ɕf2?91WJBeccya9d3لYV]mKGG}M6T+|rjCz{BƆ@enCKtq P:_h?{]ۙ%o_oqbgs1--m;C.z:;VtTSg.O\䗟\~ܹ5{<jm߻n5hInOrK'QZY sGGjya_~4p66JG.9uiyu~fzvER ے0`,$02 0!`[Ž!d02XHd  I2d$Q ` ؀  E@BDH@`@`ؖ 02$m#(B X6$a$    6(b`˶AXJG H6ABL0FH` `A  10H 0@l,0FAHR@lI0BH`, 2 dFl6HB$ɶd `HF#m Rm$K2HB`c@AA`$0H 2F2 @12H`cl !dd,# V2-²  MM,wۿ?W_e$XBG#a @H`X6F2` A @F"1XA{oݟ;|~3kzKA`lWVfMm*2>pYBPضμvp{˥ɉǏx9{Dgc[![@l[ YJgyUW\1_G~esSWbyr1_Zvo1YYKt~W1\;KݯfYL9|d*f1QwAv~gϾvQزh=v/]nN=^ m'ϞyՃ-iYHZ{~Ͼ:k6; cGDŽ lWmknH$+Ϗј|/<іDiG?<ݛ:p7~{A+=c3-mZ"J;Oҵ?j`WcGϖT uwzd_>_^^?m_)W{..?u#M`;$AsP/'j65<22?d,Z{Е?emff*BM^ӋlӾΖO{ۊA(Wl=vsC/g]*ό=}>to.pycKO]gɽ= D:__~wgSL >~QĩW_?]Nm;x—#|r2liVǹd:e[斞_}s{wY|txl6^ޏޝO-.mV2|M} ĭFkǶG|o҉c#_xүO vu7gݻ7j;[ߙNy{vÞzf&&z~eݮ]]YX)'|MX6bf]SgWosMbcqrϼ{WI6J${Nw3]= C5 O?{C恮|J XA ./N<z|}cmq+5Kn\mѻkWwF^=ŅKjioHן %:_}S/nc#@1qˏǼN%[`ae)]h8[F>vc[-mn hٱȞkcOgOS'-1}gOX̷l\- ;czT(6>گ~ؘ\l{eGqӡ&x|E7̫jˀfV{W+ә]汽ݥghW)[Y{42:«?kmd2-=ic,`!$c1 Feɶ` cl$m0!8"ƀ` 6Q 6!! b  06H ؀Q4!8F %,2%-$X`ˀIF % `c۶e[ ¶!" )&@BB`GG`Mh2 "@2F @l0`@Hv Cm+(HE [l0`$Rc0 1H2 @H$"e 0XFBX6Xl !p% !0FB0ؒ %X6%a$djt"6B6H0`@  d0X2 ! "ـ c402#Y A-F c,1ƒ!ea$ pt,$d @@m Xƈ1Xl$YoG=R}{@ 2A! `Xc@ AXDKQ dc$A,cc, 6LI I"(`Yѻ7w\5½ f(w[WG¶B5ZM?D ,١owo'GZ8w۞KOC#s}- ,LH`HH`W+3nܹl9bGhjloz#ۃwχ ZX9X_JV)@( p2 nVجTV>p?is?×W}49/7$HYsݿ@9+ޏR.0j ??/ze_{;/ ҍuT .Wu} _=]HdZ}S/.2B5}g~o>Mwx;{dOG1"„ :u_uZko[ݩ '`FA 6*յJڐK1ۗ. _< @"_jk/ uۏں/?֥si?믽|twOP˷/]Yf Oi\LHޏ]LʍKyf\g,)X_~swt2бc[Ω鄌mOgY~/ob#? фn<]j};$"$@f_Ŷ/^pqq3׺;w?%*>y2O~ѷ7+ɚ֮W_mʪZ!_]weeꚻ{_?Zkkmj{OS}ʍ*]G>ɀן Z))Ba bDaR/u嚺7Ϟ8pϞwB{[k5 d~s7w=_K5d*φ޺tu5u`cp0Cy|ӟ ҝyk ;_[vԀ>\@9J{{o\ qp\3W_9~tosPD60l?__|umP9Q޳?>[RFH68F^ '^o},juq#H9wLCǎS~́B<ܝC`R7}u'wCeǾwbչDR}Ln{S2z;N{w_zt-v8o}co F߳-&FUE–B!8:F-``# X ll$LQe,I2#`I Ql#! !ȲQVTA@!#2 " !e]%26F" D$!F) "F% "#$l $A ڊ&ʎ Be[0 qT#K(ldDeEGH{wA]ڞ^i{zA0(̨S4,KaGb%eɖP5l3LNG':Q "B:.s=i&(D t #m(D$^GhD 14S@L̶9BEQCl&Ѩ$h)MRж JTU2 A:uȈDRR朑.% IHtLGQ-tT3hdHUFH#{i#F*IhLS%9-@ 9IaM$ *U::aV&! #M[:HFJ4Tb#IR*g@G3VȜ&jD ڦMPVvtM"ii%-"=) ĬUlT#:eLцP!kh$Kxчś}k^=hf9[R7޲9<#W_?_oy5htoy޻_{?Կ au;'>Gt};ի?~O~y{w>t iMBMY$j>-o1vGǻiowgg&jlHbٍ7f:V`3yzmVKwG.sc'cn͒J"?==9>>>=jZozɀ$?~Ysm@2FI/z~e OO^=:S!Ҷ#-I:swGGN#czYo6%Qy|'5I(s1ѳG'fY6+G_m776%ٹ;۝;X֫vsYVӳggf{xݮGswa9;=洬ۃvÍtDvnIg''''l-c6綇HRؗ~Z7`YRzs& ĔA펏wg9%rl7%#I~zn6KN8tpY㣓tcvA/|]}w=Yaγdwve^6zZ1,ANNNw)v`y|swrMjۃvY`45Hv7^;/v4T~&Yj>8<\/hS1tw6g2Vf"tNoY\pZ5i;0VÃs֝`v'IJZ-jlƲJgk7v3vJ@<;q|r˲ެ7J FܟwGǻ9+˲l6՘gGvsRjs`^'G7nی,zwtXכv^ݫد6Ki1֛v#э܅d;2Vv{YvOzǽOmO\&oFD s?dDT)Ct߈( DBFӶIGFO J#0G[iRI Zs$mie$2@JLD$#" DLɴhɘ-SE2SIAhK*RDJ"A;:0kV1- BX (J "d:;mb1d9k)mu1FlEA4H; lGi;SRH4f#sLm%IDN%H cG b:j$E2B"sɐTSFHHLBgM @+E#* 2 DG$MMh: 1$@C1aiIڶ2ʈ1-&bYF%HD4ScYImhgdHhK mE2-"TH"%ATvψ$Ra$P cNmI3?3}=?gB$i Fw_ć~~|??cۄ~џ'?{w}Ï|c? ??}إ+l<۟ΡƼr[ߺ<}W6g~?ڮF[+o~#og?'9_w?g/}~헞oß~{ _}?_x7[~_2ieJƪ6MPFJqWŗKO 7nB@@K* E% @ @ Z ZAT[ EPh%ED @A D!PZ (ZP*EJ m#PT BQDъ ʥnO_w]6|K__#P(""@K (J-DQ (PE ($(@h%h!AgIЂAB(D E(PD* V Vh @ {_=9ǣ=xM"(ZmIZIm%E ) le HMf$4@E)*%m$R-IJkѶ ALMech"D0F $Ȑ Dۖ$P%Lm*cJ-I"iڊ hc 6JUFg#MhʜS"I4S$tj*%ɠ1uLID$vHh# !T$CdJhE:gV"V(dbH'MR4sdJdZ(@m2DH) *YH$C FgD Y 2FT$U`VF[ J6H!$"j;сM[$$T2j*"ABg%I$DhێDҶDi;j(: Mm*fICM7_p|3' !bNV" ֒A j! RL@FL9,hL"$"YmS @ 2DŞի~Sēom1 Zx[g??s7?xW_?~Ͼk?4ڕ_?|mc=d]~O?vw<7]_J)Hg)slʗv|vl9x?[:w;/?p_[[.ݗ^w>G|*H+ӈbUM"EvhX#-_~O˗\ww^{bv>o|ﺸ Z>@RRf P)!$ LI2,Zj4RʠSڴ6fjIUG$H i%TmPm %@@Z""JDs6j( L$m@SE:e"Uf$*clX2,"-ACJӌYS1V; m1+цI:$H)dQA4Ј%SF 0IJ%A}$ &QEIBd0JC3BZ$0"S)D4RL#CI)H3Th-!mж$ъжZT;i:!l -͐BD)HF I%%Љ4(D6R%tJ$җ<4B4Rղ|s_nxs+O?&upõ=˗;y䁃D^?\)2$V! mȲ+nwvnC[-Bjm" " vHE$JI"QA[HDP TUhD E 6 ZJ($"T Eh-hEDTM"Iц EDJ-DP@5E$HJ& (4RM"TR @[ DD5J &B$ZsjeuNƲzh@[BF$I*@+*@+E$ RUI6QFA+DTQTUhD E@im#JmBHh@ -Tm"M@DUEP@@5E$B@*GrwqaTiEBF*PHhI-H@,D s BT $TF%JB,  hEU(@@C%4TK$R(T툶E-!-4(PDBUigQT4hRI T#PB)B@C E(D%@ Mvh* DPAu@b2@%D[I( AD%44#"A FLRM A"m!QR4նHU&* DT#tB[$D Th(-!!Th[(9 Q%%( $(P$"m#ӟγ[}V 3=?^(UֈREHFHJdIh H$J QBv&Aln7O{Ow %ȸԻ w=x%rxCO+7Ϭ]|uWGfG3.?۾ܭ_tʬO>7lxn]-8|Gn+h1Ƹx7wn>ճ/\=^\KluͦY|Fm0۪ǶcI`"YEۢ:lX-nʄl 6@M ^m+Po @ua6iT& 4{0z]X-w&Gf6&9f *6C6ۊ՛٬ѶgG5i6 ʪ{s$UfY<KlF,n3$`cڔ F<6 iOJu)D3f M^ԳzocBc2Kn9X"wdl03YШl 6bJ^m\5jiۤlcRofaBLVm* Q ͼdYƠfaS@C/SͲ)m7YImF5 ͆9z۶ȞB4{Z` lljSƶA^؇m)S[HkH؈6^BM30&X]EF*aB8E^MĶM=l{5h7ym3TlK`[`lnK0KlU6`]G@dUC3Ih3e66 3 FCTw7/8@36Ofek#Af$k1^lJ6%-áe*'šUhjb*D ɐchV@ ٶ.m) fbAddZdZ'nPhEi lLKU 8Ѐ13zVU6y %m6Q/6ƀjS6>E PuF`m̤mx5 FY깏RٰU5TW![BM&UhbhZ6mc*D#bG- ,inEba,YnLڄRkaLlFК1fAb6-ư1llǺM Ŗml}̀e0 DYL `mXm[l8IzۏaM6(yY$lF`sc=;I,0Sc 2lT1m6Vlښ _o~O6 bک HQMaK#[0و@`% n ŒeC{vvo~o?#,1W}6~m̶+ڝ=/6=<[eX5im" B(5,UlmFV_m9-mҳsV` Ԕ 햶Y9m.'&cu Kdfl7el۬U͢*m)ʼyjۢm|nUvz{n;m_]*{N6S3EqU*vߟ?ۦ]n2ze3}+rS׹ ^;m2^ SyϺ:vKwM¼j#m|vÜc[k7&bQ5ܧgJnG dJWmi l4*el5!cB%6VmLdǕzYT $3j*{ӕf]m#=WV3,v{٦VrFatm'/i6zڳAv6xU}>}mi6em}Yx6kgykՀUw|[m^5k{a {6ě Zviy[ZmTM{l1lQ^6*Fƫt;|X<ь5k8BSƄVf6잭xrOmayf佶n箧ebo6Xz^+tۖW-M6-]3ƫnOk7xN6da mL7۬^ݝ}&ma{z>7jmvehm[ky>UsM07>,iFzK}.>nh޲Bi{ZmdPl5j0핲mۈ 'l̼umY1_~ O?ydJXڬva o[mMKfX͞j2YbKNԓFT[-{wyҥC6-h^6Wm>nwb,nery?mfj`s}l^,k06OlϹ j5s{( EEfE&ͦ6K+1I2gWan_S\goI·m}ۛ9ᱶ6"cԌ6Ϙa865#ӾZ/kD <մ[ufŦ: 6݋^9e/n{`٣MZnzkjhf*z_ (pMGNzw~A|lsƊy㖗a@Ϯκݪޛl2"}XgJar03)m7@%[6"3i{c_ݦWX㵛ty3XOg-zO-[ciմ9E[alh>2e|®,y|pՊݪ J {jow|ŒmDf9 "i4^A'x{hmRLlJ k>>nFVfdzM{m7doV.(cGi`z:'`7ۍVܓ9Uf3K=m>Sm%]̴ik*gLvW^ G2){[labcm˵ӰcUethSu4PRȤ؍SM%n+WmҞ V0yvYmk߷b6y5 $;FlG 1VmôakL&,6ʢljusOJe??wk}nXcU y3+^lVXSJǀYXaجŶ8m!N̏Cd˵Ft,m*UJ.m{ѻ:Y\9쫻;F4XƶS|^|gvrniی#)e{ =mڏjAfna?mzs5 L=f'4uc|`Ronf jdn̓ٶSQkQdA3$چ [#Ɛ=m hB;Km&޳uK.ǍWo>5nxf޻}*Yy5ngzlyUL%n[-Hbn져^mT[u=; CM&f<`>{Y)XlHv}fejM5EHlg뛅%ݪs1eTء4yil+sa-m) Fnlҳ1@2f6Zj*`[DFlS.%چ51F-{lQmǃ&^5c};6l mX{fäճm+[C}8̋Lx) ԰続ۖ7lfmHٽm3ܲy*[5E6ۙRMz] ǔdڍӺ6콯#fޔymkodͦD0@Bs6V7`3*&(6aWi%nk^(YMf*iJ읱myٙxUnF1m^l<b6۪W?vo uSn԰teFܭ޳[Иm #6&5Rݮr6i}>jfjm k۴ UuF|l"mj0llSڮPmcXi{mlbzZݪZ@uۨ-[Ho~=m?ymtöu#YkhMm bsێ;"nKbD@dk[aZ{c vl'ʬѮ2 ֻNaQ 2 ` yA8' Q%fͦ 6SyZe2(dOfTBl {1{%ya/鲷*ϋ\aۼv/mbi۪2LMѠO-0 6]1Tm$E.koUJ eyUlHq^ZK kmZ҈U&V4ceTFLG[ǀ*,MV6^Z -i[f5LAж#idيm33$6R̈́Pd-FHiTmb{ &ĂJ<{035})ږζ=aY<5@62͖Mc3hJlPX̳5XQjo(6[fKUIEULkrV6jfʹ-0B^4cT`X%[ǖjcI k`5nSFmSli 2 $eK[q(fVQ]\m5 CUʆ%zυaFTԼ^;zdY&kixA v bbTPk[59PpCU,`j`#i^1mceS1p*6@v6R6R5fO6UdLJF)yoP@ǰfkJTqƦi8r~6HU&ސ,mhnm h6`fQꦅبc16ZBmK=mlT&I5G4fQdOyw~o?Ft,C-6g3R̬{ބ!QIBh(g{*:feZ*IQ-6fo%SیN]TYP˦ D}ژxz636mbi۪2FYCHbۣ ݌, 20 5lRa! U,%d hU<>6țd01d0l͌$1FellR3fl+i0Yck`-I4-$Ѱ-9R4f4B,a{ĐfX0 4 fmm%BH Ͷ!@Z3D6$67]76ql6b^oOUHBŶi 3tl6"M$Y"6'C#DŦXH$b Fa6eҴmE#b̌% [ejcRl4UdlJjTF*T$lPPXN4Xj T؆P@bc{sS `аjޫuZ/ (m`yf&e֛1L$1LƐ@el(f[me]#0ʌ5P70MU0֜ٔXxb6 FTf UjT6PB@`-A``$)d҉ &5cƪ)l (&t=r !fbVlJ 6fm,i RfF1XL߯_Je Km=wYEdTbc&0)f0,6< ބVPY, fi*Ɍ-h4BԒ4T2" lϲ%ͪԬ!06 `!6RtURlO]mY3a# !1LxSm3m"A6eÆiXy FDLm"4cYj`) d[SZ;mFSlb$P̬mJۦlav7C0 4 80@vA% 6% -KB4lFl:޶D`#16$6LFPl-Ć1m+ 6XeFfH<1 YІi[1ʸ62ikĀ6Tۮ bfFق 6mc) ! f͐lkIKfzD12J1FeoPI{[29 ȪU#fl%VbUXb ` K &Qk H#hRmۆfRb2&i`l X@KdҪ`S,Ml[{$ۣh6h fv- 0Ŭ 0 13Qd@ 8M =f*F[Cf Ȭ!6=$6(6@XrZ3 &`ҬĞљQ`1R!6LGMU%mAʈ)c2ؠmm "0Lie 0K `AB[U hM_ݟ m٤f0m(4Om2F0KٰN`6 1P@fmO ڨB6600 dilJd0FDĻg#@#$Y KTjb ,(aL{kJnL5ւfQFd`Lӱh0.qbSbA¶vD,qmT\ [Rrr-e,-AŲl,|J3`n#^ͻU1m:6lXZ2vlö,6xki1blTTmK5\afUh,m&@ ~qQg)Ų1m%R.'9A5&6l\d+S{Q#F:SP<\ ,Ѭ65#$4# ̐ lUXb+m-1%Ζ6-SDѶe^Z`HAJ g33쩚mbMnޖF`zf 3S0B0ZT FӭRUX0Lƛ&}z j 9hC=Ӕtb ͆6Ö q5bc[ 1=Ŵ4my2e36l"Y@.Mᡊ!Jk͕UL Uoذ(cFm̶hI`Kh d 7V93TI0ZJ0l{ x[fSTS[̶0%Bhh2 3%a\fa˛O4C:{ A V&73C__̤f-3JCr-E Ρad*Čj[X:唍$AÖXk*pl]7Xk*dیgLۘٛVMKf0kɠf@3bfLlf:o+0&B5VӲ g653=h$ld{@Ş*aV76F-Ml6S{L%IH{O)1M=T1msm5b2U0l2yu-8{Te+5zۧ,IEۖ4cF]6}~{ysjض}%l6z+1jh X:0RltֵּU{ZjXm2Ml&>x{d95"m*{s]o‹+2,=ېa/ڌy{6|i,׼aRQÀY{ۛꩴJ4̈j۶5[ض1[$Rm6*Q(A6m2fVנaYFBb Hm4cfrmeT?Wh RkBP#`vX6&hP5<5mmTmsU3fXsʹYWdl+Ry6yg0ɶUAeDƶV{ݕKZVekVMƮh(;MYZcUj3&Gl{9nmOg6`( s ԼYنT&ki6ҶQU "m%mJM6f?or FRͺU1cI}lhӶKuc{c`.=zjbf{>6 3`4j@2s'UYس^uU۶op fMFww@mikٖ՚^'[3,65m+o zjZޖ}f秉kV|$V0Tcnjlkm[-fP޻͚mPO7F 4%36*ͱ7h5h{}UṠ5dbvwmczfVx3#̵4fnySlz)rr=#62cV>ޢe3'<"oo)Tz{2Ŷ^6ېyV J P6.lYmkY CY ϼWZAV6fZMKۥ63P]{Vu-h;-bflѨ[;MG^>S4Ahc`-kS&־v .۵Tq{YS1ע;mm%U;kYڶ_w&v"l6*唢>۶PE.&&7ٶ& X]e6y+{Kńmk+=;ͺRU(y*֚סfP۶H{/Zj{fuirw{lb^7Q޺#uVY2Qfo*ژשR!T&c0Zkc[um푮w,ۺ,O=3~mm(Z6T;fVܙ F c}O7`lTd۠Q-#񩳭jKu{bT33]hY4+6󮓔ʎ lm[ M5^uك>{Gז7Syժ6]fHT{k%0Dnxdޛܼ+ßou!<.ڪaq]o[,5bj:l[m{PgV-wgX mJp#&7rխ ^SGu\ڌmFϬYkx3ݹ7Mnڶț6b)L=sޮ2{ `&g<$UF@fmc:{;%f*wX]km鱱f a13#iuU@ Ow fcV%-~KzUٶvQֈ暢(UƾC&ˬmn=" [0mw ݧ2-IodՄs-u}74Զ{ ܍ _̚Uǁ 2ټ9kN^!e݊2p鼵\{vTi,S2f63-Qmyfl{'`{sO6K̫k:im޺@6Ɣt 4T+6Qմmm=ҵ֫Rwo+5h3SW{3,v Z#ya0C{R6}롻:c]!lys_qع e6^F6{EamvwΚ_ϟ_~et }dw/N[XymOmp6"]>=OZϩmlL ׼* Z[c`JYbŶ5fIljlHp{n>/ЮkyMX#{S+ަ2mP[f-}8xqCR=TضT4llKI3${V lCL`mK i[J1I![1k*fmfM's0 )l$Z-vgd0K[J>B<[]c|-m쫏˳޲ 6q1#>l ڪޖQmޡͦO͘`򩂱-165XfYMdx#mU,3ԶM*B)mL -7aLm6ylD mW-#mKyot7llgRmWmoKQW2ՆT4053xR]cm UGJ< 3`l6g/ 1$THg Zm[wǶ=cce*0eV,MYFֶy+uT-MaҚaKmlJˊmHJ6S/GXbkHiY O 3Od1eQcmm}ff=U{so&lUm*6ZY l55 2L_(f*GIRM_4X2gJ MdaI7)Xg5m ֝6VM=KxcH)-qWyVm'#63{%3KeUl5%5c5mR"lCZ%3IZ b. b0(F<__﷿{w}ڞ7MmM%)Tyv 6 mJ5C2l%?mc2Z DslCs6G`$D5yl$ Ve/06ۻk^cj:fK{SyYF7"mlkS,U278[f¶1ec[R?ڔ6̄; mT6 ,!iY%1)KY`26ڠ1aO*u=k<m%+lݤ:[} ^ F]==K[^τje%d{vۛuѫf[`<-jw+]Se]{4ټx9ȅfn=޲9+mqqUۛ}:xZZ`kTpXP<6kmdY7+Ͷ]<Զ,S2=qW-3-c궷Ac[Wqc6œvO=KiQۤKc8Dܝ혛3o1=mi^*#s-yoSm0Kfjcřhʷgt~{?ǾZ5g;﷙/k>blkB+s*-[w\mJ $-O 1{mZVGom}w_so`gWpg6J;QCec*}"w3{goS!v;=²fu۲7= #lTk[R#n<㝔&۲8RForLtHygZm睮Zj~y6XoB8@Z_E`l^f=# FȞ0 eֺgתIfھr{߷oEܶ{x-lgwg{d cS}[^uU-iٞdlZN\3؅Ky<,m*Lckdܷ= b[, b3]m>-S-QdSf0W%v%BxD16&u?.2fY=a[BTVf{,do5aܙcԙ% STaڲ&ֺ3e[}% 6 d"vUu6~sD&o}lgwg6F02Nwx1 ]kG)lZ ȶ.FIa6KMٺycdq˝=V{jfn[5ɀZ%#̣zw[,Kݼm x،l"U _)[ ڞ0q:X ;x-a%BՄc{|g:fVfeo.كlx4KmUwnbyە2VL&f"Kme+P%-Ȥ!ݱG17^w7BعNmZ ۞]Ͳi ølc|ͳ/k3cjiћ^:oMl{j)mq`oC٪QY- 6eGSN軭ܥ`m6!-)4#%a{xe}-m+bxNۣ$Dڮv5+6Tjf\msj]gM?ٿ;?i PǗ46mdEĊ(3cDn[*Qm^U1lUl{v5b@N?Apl:at]UpT*PvؓP63 Ȅ2g-v<>m0t@_RK|[kzS<҆jێmVo[5ejg5)s@U6vC l># [)Fc,Z *{{,6bWDl6Pao6 $ iaLقdV vXLztGl²Pk%b%Ŷ-6$7_ֶyA Ij*$vzԶ Ui 3gbTۨKLm̒`S![&3UmT/ Of}%5Xm`zF jdYe\=Dy/Uâm4m6qZ6fiIOrz=!4Sc2UMMvsј"4l{m3)lqylX JmEf5EPs)M^Fd;uۣ`ږfc;zLia-oSl $YKalچLbL̀l68X ec#SF(f"İBdbEi%>؀%2Bf 6421,b@F eh[= l€ m})`ʀ@ @/&;\}%4* XfLMb*0bZE2Yb!4l+6y6flSJݗYP錶ͤJ@l3g 4Jjf chv[f`6I챇MKc37@ 3kf 29/P6,nKala۲̪b5V0,# bUoP d(06+[2$ h0KcTplkTaF666Y+6j3rF͒m@S!H1`{ʄm`Q^ iQaZZsd&00 kl0"fAl%! (*lH1Ci l[Xc01#B6`pӫ7ض #&0$ 5톰??fz8PMmH!0 Y^6X0u$ lzjƭ ,mkƶdlsI1J-٤ )1,4- 3I2P,&`,a ]i`D`1mBK P&+4لJFV M6(b&:> [(^f% 1Ҍ1JsHԈAdD#$34[4-M ؙ 6[Ca۲(v3j-ks R6@$&c%,b4l0 2C1Gf& 36mL fLH6#l3ҖL6F")m (9F[fQSLCa=6) 6faLEnJ @)Bc1"[`@6ۆzje,l6j n4f 6Q,Mƨd-h f#˶ m3ʚfc 6Bl!ll MR&4UlQUd@4!)#$+*icz PfC) "n FPh, llhXM+`̶ &a6kK`LIf 6ldZ"")BIТmI0`ò-@d& !Dڰ1 J&^fcU!Rsۈ F^S و ARl߽?`_6ԫ6c ASm-Zx%0HfC 6(6mk " &E M 4 (mJ-6)HVIl5a2 hJa3Q`0, *amAlA ʼn%mo̖{o6\ ,f6sPMe<[c}jjkͦG,MQ$fvj-5Rl%c++ذa3KϴڤvŲY%Dy$ٶjy3bb`i^/펧⳪9(iwFQyo W̐5*yFClŒ-#6 &Ǡ 6=IƬlUZJ݅ڦco[/l6)L%kFC9k[ӧMaY=b 4ǖs,؝6. ެW j-Myl&669I0cՆpѶ^[V3E2av#qF*& ۶^/ّ[՜Yy.jdZm[^m(h>eIz; h @P`3Up'[z]P(0 7ЦfmBFd:a[=զ.Ub&QJNmZkL%3Za! -TJ̼O (@Se[+X%Û\Mf̧[&%S0kc%3[ڪ Z͔  3X5۠2 wX]ۼiP6<qe.CKƶ ]G{4B5նe0NEPM7Q,Yך`fO?f~w?~m?-e tm[mZ7ak)l=]kU̴F&:i*3< ۛ(ʌTm6ho+BT2̚;(,MbSVlbR6 LO34*fq!6 Vma12LۈǦ帵+%c{ofEl*!3ޫI͆øcf$mV!ն)c6[+7HR9\76ͣAZ]ymlT-4UYyvzkeܗ9liSޛ*޺@1ֶQe<9ގ͎s]KVD쭶 nˍb k}^6Įum¸;mH9y{gV&պlQYJۖڬ ű^6٪ɞ-y޺*[mD6εJ^cx&ys6pKtm4k\m{9ɂ9\g3a`R̋ںlrۢmlU-`ϯO^jެW UpS὿~LZޜfnDgx= ݶmC\ɒ&ڲI*޹ŖkS{Kw^d/o[>b9`zPamEj\ҚmIly+ #l{݇X~f=wlVZmV3d۲W`PTm]{ڮm\2oay]%5RӱfylUbKؼ޽3mwl"^fOQ{?Lݲm>#\s'^ݵm,[geٖoZ}4{{??_/o4^ݻ\^Ϯd&&&5 6PԻg9gj;ޞ7v|=YH/G έ%Zmo3sVLIq7:VӱU6ޙ|祾vw=9csڮlζK{J۶T8nf͵-۶5w=1ՖwëV?ow]m^kࢌ7Y=QzdyfaۺC\ {{seu-f[l)UdiBV)x3nw FlW,K3vY~]{سjTa5sז$[4+{7m&{ Jʞ8S۶nō#^M\Jkv7mӡcQ6HwZ߭UUݘ+\5 fVDއOXko1f6ZaH|ק1-;bR7Q۾[|f]]Tݮ 3}b֦_1 <;R3$j+PʚΠ'n͌gKaSU%[4ͬ0Ժn%nUƷQ԰O{gs{A{9ۖ)uU{|mhTmyh5pcEmE߸_~OnXT s+vw7עm89+Tج,Z޶cc/nB!ު"@#j͏m]L}6O]ޞ7K>.G-=}>)ʚ煗'VMԴS۪Xd;LiMlfefE~wEt[ x2in,>\{(C m9.L_og''~&``bf#@b0` ` MlB0F`ϯ_~+VaVLPm1mgͰ٫t5l6U Mz 0l;V{}ٶ뎶eb{OqhL6 &mхm}[4!ﻮf}. Հ,[uޞ m3ۘԵD6zN6XEϐn-m`&#$ϬkI}Wxܴf l`C1ʾF4pن٠,72%7([צfY0{v3=f Q6n{2m6cE!éU 1öcea>Yj>{{ \z]l{F4kbٜ6\ɶYqs,ﻪfgA CٶafLl}wxiCy5(x۰nvڶ:6˽¬q%!S> 32zOt @bMmQLiv)K:sPۋ)n-Fg fofMg23r 禮64V Ն!I-MoD?d{*L۴7Uf͚g ՝64ި*z?o&w?~ޯ6-&QmyDfެa F !{z[n[V sylQKRj % ֜)*{YHڦWL^(4KFj6E-ET`2`CXe\8m3̔Tj6ֶa,lրF昢^Zk9XL3TmO6Yp(bU~wO ~Oֿ˴ cd:fc.VukdYGP6g&6TN] yLڠdE3+8 l 1 %UExqZ5lR&$Q@QK)FlÐ -1ƮphJL *35 3k?P1J3F|'G_?Ap {et-TnBgfȺ!BGLZ0|$&}ӵ~?~w7~??GȈ-HfbF9MI(bSPhm @@c"Hʶ)#Pږ6\mehMkfP ;Al Ha)ͬd$h=҆btmLh F" ƶ,M0 6 F6@@0 dlH00+m X12MUaF&@F2¨b`Sy* bMhTm m["6Zc& M Ė 7 P`8vzMP[  6㌍ fadf@mT@1`[QZlC0bHAMaQZ1m%Yّe Ú!H%6ARLhb`H @АB#$deFHc4$%f$Q-a6JaO{_ބdDV !6Ly)&Sb-*362b*dq #h PMY 3j[d#l3+i`)LFfBaVŶAcca 4P4B`H3]tq" c?W??׿?lt%X2Y^.dX(&KK`f@L2q",0ml*l$ab&6L"AP*m^u3066@۬`hӵ APaa֫E ۛ A 0KHI72` Ķʼب-‚@6M̖qlmU&펙ƪ*[TKB})dPmF(03lLff'@LEd @43XS$Rfc 1Lb [ MbTFFS(Ck&mlo7؆aY@ Tm#0t m A@]cVl ؘE LJ1 E&Ma1L%6@(XgHf!SA%0aiEk$a6D`11 .A4kMYFkG6-M3fM0"_/} 6"hֆFIfְ Ѕdor"FFm)yoFm)l V56`aiƈ& b`036y9eɶ4`SfT*2"6a@H c"mFPq" cd6c~}~?f{X50I@kk V 0`۶l{P{$0YMcXfB͆:Vj J#cD)C Xc-6UcL@l{c6ńضvАlM 0!m#Ƥ`f*[D m( ضmLRfM`lY5 $A<@Ujؚ5Zfm۞&h@fSFAZ mod$̐Am@cM،7؆Ұ1Ԇa"!ư f c&mkm̶Y`mЂ Nc&0f6N`)B3-@ $26&t 26fm@ɶm,Ke`f`H6Ne&[+"HE&t1m{,adl0 bhl6g=W h14 cכf-H`j3`- !c۶u\ɌkHm۶̀l jZfa{341̘*Lh+"6Ƅ"Y6m16oۦ1&3@Xml4lfLV e%y춚R&Je ci31f Ղ 6mjުlMc3+϶g3KZo~}_ş?G@3EImkѰ l#˰]fs,Q߷s׹on *-NmdsWm>Y"-Re0x-VCzU}m+Vm'5mΙm3ura'& ɼ`G}fڱY]dfj٦Ni&e۶GliYk16Ͳ[9m RwTʠ'zxoΙۻ]٬[Uֳshs tw7o{wL0 Njxv˰]f*ΕIƪwywͰw1ln3M6+)Hك`f(*ۚfJlՆOYԳҲޫ:eZU a3mGL쵃 wm!6Owi&e6dcm-G]Tٔٶ|]{]ro@eJY{c[Zckl2shٶۣM J1, {h[곱Fd<2eBd&*}k4cvڒ^N]M}X]}>m{6f-bmYOS 6ڶzkJ׶!wez٭[7;6\obmSͤLz*Njun;+\mEVMoV6.f7Wwݙ1N!v+sL߷oו5fFr{]w}ޭ= }پlQY6rߵgϦ&-Rֶ}7RGXkfVO¶wU'hlc[Q}fvw' o7oQE{k3a@%|{ ڶO3Mꬸ{;k>f7a¢߷CmD|g/)RW$LMmڵot]\oŭ1[eL) s}޾_AɽNWsw`Eή5c5iު@Ƕ;dۢ2fsﱷmj`޾úmn ^S8l8u1fY)dwa}nJ %{ֺ>=t:U&nm\"l2$> ]޶qGٶbl7e!Yf{4뺫^uMΡݝ<)"yw?c>}T*QwHxҫ^Mmfq76tlښ⪶m2^f3}~.*^keksέa- YfyoRloMلkkS[jVU3=F]ƅٺ*ykڬ dk^%ڼC-Ӷ&Z2 ]gT02yO*^fk$u֦7+{my{L.3ӴȘަBT!eҲUo+ 8 >?OțjE IJ2mlLol%{rQ>"ńٹm srCyyfNM5ˎt*2[ڲRa{e&ьͰh*3oE ѽW2՛;)[1==k&5j64f:@fS)kuUG6ޛZhv{L,*fOF̻/ d+f[]ͪiݚ![A{sftRޞSڶ: jzr.Gֶsn]x0fx3s]{lZ`[U85}4c>i+dٞӮ<4VVױ9 {QJyn\ml*@mVmUGksmѓUˠmmQk1m\ oalTRҺ6ڕf۠}~jK{ؚmۦLޫAb."l>o~ LKc6RM$][b,MfΥm`&dHiP&knfm1KV&4+g"&d,b 1mfLZCLŰ,DG{& - Hl:kl.vƜ=I˶1%TC`@&πhFEm32³W7۰ ⰈI-MX ia[ {o`a0aG{Yt%B$l,Vfff6VɪHe  U]d3r7Bz> yjkSͶf f@11d( ڔlyض &R02* 6lC H#VICΈP- S624$-1F%`3Vc$1 A4K1` 4l`KGؠ/-~_}~ mwo|[Ǐ?~G~_/}O[ _?¯/}w_W}'܏//o|Ͽ|46Y\d<#l6 K[0/dn!{"VYlpg&D6hl+uamr2&ÂljSNFU3, 6h4PL@ـejc)aۦ1{Eky@^KȺ j{2d;1afl",fkecV È)%`FlL\f6*'z._k)P2R [\4cLLT<0&1;[q1 X4l2AJp *fq0 2jg4m RjK)9XE3#c&Vݔ0mFFT1 6D5mZqVưhC@ecfthɲͳ VRۺ>naBbձ)lC5+S63seH6Kʶ 5$a2X-6"^WZ`tm`.X, mipy 3 tkLg,dX敁ʆBj6dm)#Mݔ\mb>ji+t<1 d L&bB1`B mM(R/YQmWdZDۃbL Y$ATl1*i1MlT*a0%0tIl^%f$5%aVMb6LnÌ2Ц`{_{q_Wͯ_ۿ?/?O/7կ{?'})j[ I mj#v^J61ͺZzDT+lͶژCl Lhm$W^3.'Qۭ"6@m q5VlCΖ-˶A(RƄ,4L¶dI)o֦l" ml2fm`#M$6S%lLeM2Tۚl,Е=(|T` ټejl[|morzƶvyd+ikP7<4VcQaتL7ljٴ e$:6KF%31W DVĦim®HIfیkeƒR6WƼ2fA]6۶vx^46Ձin~/oKɿw |w~O|׾ͯ{e7C ۶Umlj{]ٻO[mÛ7K=V1KP\g-fޓc3VU&6|nxv]&n^7h>kYл˶PM>=uym8Ɛڑt;GV)CW63>%3j6T[،.m5f>OeۻŭlmUٺ;ьN#Z@6Kljczm[+{Vb{ϳCm۪M!Wi ]icDgٳwwgXۊќyV̽:mk[{dUՠ6>kږ:f6bo Ͼj ClS2Sgtъ8m-m&kh=/m 5]YW]yTI%lln=lwm쭻=su'KKm#W1mW6>45% Ga5yl6Z6[d{ˬgޫFKؐ*m{Ǎ\{g ;.}sbIXڶ+Y֥&ٞʴ!pWcJMGufbOҭ51oNKgw_a"c*ڛ>aW<:;e ҶGl+fļ]꬙f'm&e1"EqEm3k5d۪Ulvמ͆xU0T{O>И|z{òEK+=6ՁͺUnUٚUOw ?S7Kwo|} 7~싾`-|ij!J<${2{sMb6lVwMNޘkB{Uٚ,TRQ6W.M̨Z cmIn[w8.˰J9ƦfӊҳD,E0pz۝Ml.l6>5 -XZc|K\ƫ Ċ͖޶G iw!2{`bw>HcTާ'gmkX6YXzxM1[k=۳ Q@+owkٶMGSbgoos 0wM6ZmZko$3mUM>1Ǚ)Ijcxz>{AMV0zsJf^x1V]Y&E(7Jsl D54z[7Y.[M)àloei zY#dczmH m6ʭٛ֍lFSV [kk{,UËUk^#b=j孖lFMMӶ+djl6nӦq{&b/tXa6ʇm=ZLY53fRg[SIw7{M%g "êl몚 _lP6{}~~?;v?7O/׾կ~/ȔIJatNMʔh&HkPW0fniM5ofdcl6؞->y}y$ (S`46mƉf2@TZ@ 6:fKxSb&^F"lNd۶†lsre*l-ZXs>0a:PԒ;|F@=&ŭ܌(֣ h3IGmq2 y[j،׸>l{f QRAAj1ܶF&& JʶǦGz*Cš׶ܽv$>>xEl&QU %FֆѱmK{sr4QloNmG+4Lz٦L-lZfSW*pli 6O @HV۲FPE`XQJ,l dG6Ċ6B,k,m*0kw!5oCQlLfl iJa*b}G{oo?ꋿ+?׿o%K/OWo_>o_ş׿}{/O|>s_7|'r@~oo_oW/~/{}G;o/{_??ʷ_Z??O''ޟG?:??䜜! PA@nulu;;3LlY+]W]2E!@`BNN8ϻo}3x'ōw׿÷^}qO?./y~o]Nw]?_|聋Ǿgn~7/[ŕ##羻/{[ݼ \\~z׺~qE0if6 6Pvhf0e,,UaDl38g;C6`@'̑4-46% 1 kHdf#'J,m@Jkj['Gъ' J#i m 8l S1ld ШTXF6ƱhH*ʠM6Z2`h6T+9@#X6R%Զ!ĂpZfl6T#bM3'hl9%# f9HpLL3 l+` i4: 0Eƶ62JȔP6'R`ÂT԰ 戜[²9la6K;U `F*Bm3lreE cJ!FȹEE 1)(pm4"H(- L,'Ff2lЙ1hsʖ*0̙ -@繨24[ES!۲q*lFjV45Tl*3ٔKj9:(KLKu- L32Gb0Vm`F64;($ذq@͢Q,8V9[Ei$m*%` am 1(BF( % `JbNa23DvV0FsYQd[pRئ**#Հ$cS1};q7_W>\}7?.x~/<{_};p˗{ǛO>z[]޺}gw{Wq=>^8/o^+_7xq塏<į?z9Oz[?ڷ^Kw^̧}=ܺyz}]c;]z֍?ȏo_nÿ~{zwn^xׯ\n//~'=g?g?Gr;O<7^~Wo>G?r#^v5c_gv޺uWp~|_{'}<_޼wOzS[]z{^;@!-sB15SU2"d:Ii`SGNQ3& m(fĆ5 Kjb-a4Z 01$ j rȁЦ@X@t+G";WFG-;OcT&46@ْ̙PZv$%Tmc0vZ:@HUfFl", C(#֐*M8"'4er .`0LVl mN@䘤-Sd5 [PsBbDʰ@٬2N U`csё!#%50J,i9)lG0G98w E`@H-ɜ5vnFK$؎Ҁ26ʖl E f LXB&v*00+fD1e`T \ e!jFl8L*(b5P )@lQ, qB MTb*p &̤Xlbs6![ԤLfqE4UT֦6b1J0kbsI8hiIQ5166*44MMm  0 P̠#HXBƥ.`(Xk3sTMIa,l0$06FՀm8L9d"\QbVfth:O|ɏ}~{G?~:>x]^y᧿xyǞ~O^ût}~>swO}𞻺|@W?~{o[ݸtO?O>W^{kw>Go|[?n˛/ۻxG~ͷ~{}>~㙇:~?.ij=۟o~o[+=|ϓ0V@c0t *22`jsԶ 1Ќ8`p@)lˎ9m:4m ah0QUbԘhmmlb̚3Q 퀆bjFk kaFT"aSې)L)[4cF0 BBm@QsbLSmv.ic;7KD휋i$jb 1l Ye0S Z95Ũaaa6rlVق`:m;+D6+ɦQv8f fNXR38f#PfJf1k2@&qqnb&$E33$cTV6 2 b(bTcSZ3YP:[`:dmV G1i܂SmH,YA3f6+6aĘQ8 ؆ #fFQ6 :l[r'f-(̦ fC]!VbM(6 m8*6%#ellè`f2,L;wkF` K[yFR3ZѶ0kh+2h`FM!SƈMF؂l4(a؎)[bm۶ ʶqȴ5p@at!1i-)a$f MZCG4Dubc4'4 JaӲaXl LTZl۪M;,FM̐v5v=O>ˋ+W~닏<ꏿw/6\_{_၇z?ѭ'ws_uo?{t|z㝗}ןKs_?_K<鸸7ï?{+o|޸9WWwourg?q/^%.'??= kwo//q ܿ'}_[􍷿 O}OՓO??}]?oO}7/?uԇ?˿{@g_ܸ8|ן/~{<=獟ÿko]|Wه>|A9ڶ8aU#(m9c9m@֚hᜀ(lm :̐fmvPbۙ`D2RmۨmlsH 3$3K٥f` [lp088 HYƶM6ٶ%ՑKLMXaXhb0e-lf!Yg87 K-*lqT43iنY2[-ۀm(lؠPe 2h[eN%saV4S8ҴmVh#6 ;Tְ 6:a9eb33U6ۤvBlCes?ʋW._GﳿWz_/;/~}噇?;n;{ou }?cWnKg>̽wǮz&; 埾.}쩻}=s^ڕ;҇>+/o?<ޙ^oSw?o+O?}[W_~B~o{/<=G>7??^qw>=`md bDJŤc9Qt:Chyc9mh:-!ԡK*uvvXiDsG+֖Yc]asIT`vF΍P8X1 9J,3 ufee ƉjPauN lF*.yj۲8UtHN):"igŹʰ2mX2#u l\9 t*ZΎq9G9.YT[*M6;Ֆ&[Xmf8XG8Zml1jbؔcr8J9lf:mmƑ2١j]8qmYfЉi:aK r(1Zpf.G] R̚b6T;7jq;6-3Bǀѡ,l4[UcyYCNsVMal%QaXmK dǒN:h;@8ʴA2$c<,vTi9ӱCs訐\&̎Rmijg2<9:p2:4;ud:Jِm1Li8::.aQjeڔ#s%:vnjsFF8F5ۜ:898\V.g:8 =̼z~;' /`KnFb[Em $vI41 ~8!0al(6G-1j;SbZ5, ƴ-aձ&s!X{vζE5FusnZmjw4 Wc,6U#W4(;35ih;׎E"U[2;ض쪭%VsUݻ %i˽p7w2M#e4[lNqDǤ[`&fSu֝m'v;,0SHmdTyF /߽}8O??yzO?~/~G/=o__7_}?{_؇woao~x{<裧??~{շ?嫟#yy>coǫpzw>'?ŗp~|'?ٷ|o~׋WЇ߿ǟGǿg?G|֬`a[۪ Gv# lf}Nw:0>?[:>u )dv,ZhA@l]ۙ Xz8u[Dv:1Vɝ,!4ʥ1 n Yfⴲvi;mI7)X@mfʽΑKCsҩUZf DыXX除h;lwLeצcl{<6RHղ;nUubLuNk,ZFmmmkg$ e3&vv[;PWuTGM%t2cjڽN5&X;6yvmg zZŤSVmжKT[wKl l۬쐙ieF$ܻ䮌U4%֦:1N+wpqvgS]QVbyӒ]9GN*mMT6f]=h)^8%hlS3.Ybʴm v{u\SA ]|vvllg%Q9ew3TcWg5IF`mk:sc1mZfdmS#(զrҙͶ)SM!r䔚نmNF5iU㈵줌\Z޳taSSKiۺ ejF p<#!<sԉ-U94h.Y0ʴcb%mv.LzkmZv|=xY2P~o~?/ꗿo__ٿ|G~λͷ9}Ϟۯ=Nj_#_| |gO\`mv7 `wxq^ ˟|~՛>|շO/O~G|zWهӏw^⣏?˗_} 3pL6`Ċݻ)v1fh %@j`sfǦjC-FRsݛ{Jg9u[uekɊ܌#mƚPmvN938d*d`-lab٦b \,cP{'jV6@{=.ku(l-K3Jvm {jbulC4r){9$ZlDUژUt=+1@g۹lFc*9VlSf6){c|4 4ImNro.V̎v/N16[evoTkVu)d vg͑^S3Ԧm]kQ{!9Š`ֱ02gwlv;ds[5%mPƵ,Ѩ9mewTh2 l Avl`E75c[s[[Ɂ :Bgt0"fl\+lچT0XF)aS۪ >'6X*fm[tZ9fu`N%1\aVeԣkMɽ]m28fʬmkQuWitswɣؖr:(3( dc6jmdwG&:'U6';mdl5&vѴfbr5NRQ#lTmT]n4p:&VY* 8ݙ Mg93:f6bVX]m98 r4ԙ dy^H,mqqɬ3۳T64 ޽~~s'y t0<~Op`vyvNNOz?~_p߼wW_>9wf3 BZKi׉E,wm4Y@#YKSJ6H# ɘT[c[ ́j v+FZ% cQ6a3-̄ӽcR64Svftl56F-vق!aԌ \r`v-`Al6Al&1etb`X1Aw;Q# ՝b6V$ UfZE1m#fs! -vf46Y@jۨbf6Ҧ Tu!Ț$mV30MEs;ǪY"\+)ltQLfC3$T&]԰e$hemHAbk,%&"pҌShI2mmԵ0. 6lFJb5Ml `2c2 Sl[UAkΪEa21V(vUC*EY5Ϫj bl2L iw5p#Hf&1H.ř4r َ"ؔXm7@0%5"r"6hH"mمIZJ!nLJY1LkM%!] F$cՉ5F X`$b^'X600⬅)blH#` S`_ǟ?߼pqNo>^|E >7~:O:G >:p߿{Nj'w`>{z{?7 _y<wy`ͽ?|x 1hQ36&d6fw,tcb FFb[]*+ `ۑ K̆Pcdccf=e1M *2U`aEv/2ۊÈ"[ 0h  F)2HLpGL j@`cb@rbfB33XXr wh ] V f+@`(![ `&j ) Q6t g2ӭ06!ȘUiH!)FRl(cM\րM iFf*ض-ͱ9%dceƴ93af#CHL@pGL`ڨؚe"fk`6fVaPh4X` [hJf42Ä0lEa`E-M9ظikZ-gfZUQ#hfARVek3Z'eŒ' Lf@1[ &6ֱY1 KEHlZlF$]lQYf132YGApgS `l`hHYNcҢjwf&(c6hF&d`dL`"c;`c:YbF08U><vy >_۫>ɋ?9yS/ݗ??|=~ˏ?~O/ᛯy??z;ػ߼ǟ>߿oo/=}o/>髇w(=6@mvVhDlh,@mY# 0,ba+L U3SH9(wmŢX Mh&6 :h٦H`[1%iVU``afP&0U6$l`RsIڄ Laf#1sÅ `R ӡ qĮutt={O&L $!!,,RjY^*O1=Y`ybyP&$2 Yf=MoYUpW-LMACNf.mL) ʆ bF#bČh` -c%qTYʙibf`(4m¦f:Զ{hm0,bln eC`idfAllMd k&avc,Rb3lZǻ'Ѵ !e :!!FHe4@6YʒlH0 @#f` fa1lP͐,8 d7&ML \$BL1vr Y Au7fi M23QMZKqȮ(Ym)f͚AeȶT X ",Qm㐶91-&$ 9u"͔149YUfU٘%0+f֊a p,k6+(6?3_ȧ^~{}}|^xp>KCw^?~'{/Ǟz=w/~?}*|虗_~c}iƏ߼ǧO<SO?_OWx;p{8=y?O}?#wy?/}ԋO3O}c<sNSi?揾z~_‡}BszGxy ?OWĿKyc{o~g>?_y_y^/?o|WO|{䩏~3'_zxwS}k|wW='_~?}Ͼ蟾6ŏo/?˿~W?S~ƫ~g_‹{񩇷k_m>S?K?_Wƛ[΋O}g_|w7G?ӟ[G~/{^x^[?/'o<3/^z;o}o/~{~O_OG+3 v񝯾Ͼڬ\}D8l u)is,ƺv`= R9^mLpf9]*mLfS]s5a%fʴxoXӖ9]WtsLq'#4ebIc;pTx\=Chvxx.=ڃNfhԘq7u::=Εv6m=L5v[Q{&ٚV3mÉ^#gH{X,g S^=nG%Jܫ#hSe{:l=rx8Hem9gװҴ=2ܩ3!llVǁ-k:O fжn灻:aTls.vt%lꦭ1YV;Si{m7u9]`X9n*q6ږ6P#-f8R]:L;g1  sOgjns_ޏ_|?^x~7W_'vy{?O'?xw;kg˿~k~o|O/>wo|_ky__ܯO?ɟ~}WG|?/3=׿ow_̟ŗ~S/'?7㷾_}|̻}'>7_/g_Oo?|aW~_Ÿg~__Wo?'/qa6aQ25fL'fj6]-%`VLcBv6S„s s6۶0HvdNƶG7M0:3fSǮ lNmvV, ̶sl4ǑatͽᚁH]wNP V5i-֑6lC6TڨRNHta7)؆Ľ ضvll p&YɶeؤژT:YE@ݭNcВa9@1֔i`P@ؒ7U)jt#]Cd$1qn2ntSU6`3kNǶȶTmؐ;.*ڲ2%m6,dw]Yj0v3v4v\ڌ*͠xٴ6]Um$I)C b7`swM۪ڶRfU`¶dΖ #۴m #lb6L:GCakSdwfb6讬6PlwVl!0b-1quFVmL9ζh[v-]qhٔvlK.p r[tr\ԶæMێF toU+eMe;@;K{Wa$hݩaw~?x/8_~/>}{o}?'>~zͯA>=}w^?G_;?|?~o_~?O7W~<}g>wƫ '~v0139&H!wuT*lXz.*B؞xf9@|g'|dϷ_޽y7^~??}8vo?x/x|O^_o{?7~z5>gW[|gWo>'?o_߾z_~˟|7}o_釟}O>zx}o?|o@O>O7_} /_|GŇ/o~|?~믾y/'g/>xqo/~>~/~^꧿ (-P9vIm- waellMl"&ijyiܫ4kr6MjSldq2l(ylK&{& :I l0Lgv- fM娫CsYCJw5: EB!bv1yuOU=+ QѰ;Km:㤘ʽUILuXsZbCm갹t+fD(ò9lI'6ܝuOl֩lh0mJaRޝ3at!T&FHc!#%ŕd`1ùd6֥0贻A:0ݨtawC4Q`]KvaژY]:6&ȴqLQScwNa9: rN=GLYmieb$]lX  l< cnm`bwJbhFRg6{oE`͐nvR64A(؆ ;ݘ6v'Ac6+`!Akf5&+1e$dF{' l9u03-!aA 4(342fҤl{VdwжؠatW[4LmIte&ڦk@lu6XCC`cwqbИeTl[B%w#enC0n0P5 ИEUA 0lmT bd7h Ьfw{&Ƕ*,h--&-V0([lw!C` B6b,%4 U&[ĶF2l6MFG\R H4l єUB]l1M\i4f J6T6vblf!b(!mA(rd6l dpڄ ض*4baq` 31(&DH#auﴄ̆L)bAlWBeіqЌ 5ec1"(ʵ[qL&]2l6% Kal:f`LVQmHP[Z\kb L$B@$jmYT" !3 SE-+2kfls@1Tʬ@XX1b`ky 2HHmI9l2#3lsfي@iRd+ F"ݢ tmcDh16DAb$c4wnк0Q3:fkieyh3jmFQbJژ5tB)4)i5Nfl(LlHH4CXنActs`d$1c.8& 2X ͭЀ%6@لfKm Fڵt8)P#YPCBTšmیAj(4fC ʹReVp Ftea`FS-61l6 "@l "@JfS[J0 U4I*NKvl lj lDۆ]4A``BBcP` ¶`AMI`6 *lh[$e&RYĢ6;3Um7f f(̡V0 h0 5 H2c5*K@6 ̀lpҀ͆l@lM$FdUb6lh Ʀ:gV Rc+"FmS11 5 U %PSj` cc¶2 ul6Pm0HTjk6͌al0j -LRb@0`HlF.0̴ ۊ"&[B ` A 06Y-ԬV&mlm3 $`PlEaӀ1ı(4f "f@ Ueنl6`8(Nb C3f\UƬ&8M(hޛF`fl {{0c-F*˶6(]f'͘5T]f,5!l;mkhmc]@N]=fuw12.%j=7@Qub!K$*.#k2׉ ۱YUmD*2lдv%۶vDiQmsS2a8ZIg TA032ْ9A #}Zv͘%c\k'LձZ\H0CtɌ:شi[Sk jM!ۭcfhC w+6VMissZ̆Qf:UZmka زꞸS8'Af@IԱ;U6ʚmfXښ TmÄشvbGUnͤPIK.9Ţ3m7Qa:5ڶŪ* a8l۽u4;`6k\KaR"w2K+5$l0Í-QYF-]٢t fZe \ 8Fm˪+cG8U lJoYkcxz{edm~W7I-y{re4m@om F[=:4G4 mw06tT޹d!wٶ>+cCr65[(AsX2/[[kfg\t6ޯfȺM7VvMyPRT7S1x )av6uwlZj)[-{~wձ*v=$ޣ:7C=]E"}le mj0j36TfK]&PmoZ|5RL`N6`h[T0F!6Z5QmYňl`CaӨ1TIi[[/rAmw+03S6`EX6Mm gIc ޺`SDX,Z`Eڛl]SP7˞*˖b"ɬKS yre-#eͶj hۦo[&WˮQ'ak66u/L- 3f,] V1{+"Z뮁B1L 64 0naR0`4P؞T1fEԘUٙ 6D g16ފܤQ2adR*62el]ٲQdAoҔ7mӻx̖ĚQh6`oN5o'UBmk[Z2ڢe ]=fն*O͚afl^6, S1X*EoO5**)Ûrlb1fmV[LVۻf׾Yl5)YIG'\EZe6uy9g0q̬ %l Hc^@*Va.2̦Ҩ&[l5$j koM1fRjs3lA@ͼ@V@%@aPUh@fc-HȶuQKc#f` 6]CA5[鄚قy4# (b3SaL Xef 3[cׇXJkrm&u%P23zm#!ېdk{mbQ̖e 6R l[ضQŶ-T lm(-`!6,1Ll %kAfPN0yBc¹6 [1D1hD HlcFL$le+$FcM4f fR * a!NjҲ[%g ##6XMXO&@6&y0&; E3[D`&d.uc&*\,LĴ+EMɀ4l<[m 4xWd)ٌT r`PiyS1gec-%3D2jXكJh ImUA16c%el 3\)W XlhJCJ{͖*34l6rsۼS@F!ٔyN406AD,,_? 3Tf@ "lL2hdpJ, B6[l, ƀ Q-ґ& &H@T"6ƀmYD30mCdZ"K0 TL Բ zY3[6‚<:N1=hEaH{7`ama$m`" c@4azD ¼m;Bȳ̠ $04fhl,kSRŢ4 (03 06@J6H$D@lAj T 6Fd3$@`MeRLӘ1dA+ `X "0 lc2 -cPl36px,2QƘ0Ja 0Fm `@@"fCC HČL&  `lL`i0@[0 0RJl @["8HJ lllLVlMmlYd(d746*  70ֆa606lFU2 X̰1fTl &- 4Ͱ056Ha6[a`Զ)'(ڲmFH[F~?77@?V-&ai&Y+[ 6PϊX&YaV!lsX-fʨʢ3H*f.`Ʀ&ðX0A-L#=ݓJd޺`93 B-fSQ%ӈe"GX}2m%-Gi6Z4`,@ l,P-- Xy{h6ʘ J@mlI)`e>foj@HRJg!q3%+Fx"H6k(2omeP c;FllQb 6ƹ@ 0/STb2Rfs IR6eZfIm.@$Jۊeb] `K ulm^kULm% V0oE>mfL,ֈRjm\m6A6`[MI/3T `1%sojkm0B6 ٚٲAʠ 0\x++ F^ 2^ڠ E& @&fHU5e6MlVֶ]dmbd0R,no`H4@f"hR4A(lk 1/K.[ ۊ eq6ےbج&i˨boCSJO~G]|//~}l8E@AUU66JA봮ei!JU%cpSJ)^FEQ-c"J Xci(ZmZQ"6M)ujmm{PY֞lˠ6mM4V Y ٌVƂ!k 2f۳mtRmyooclEbc24Vo (eW'&]Ͷ;UU,m^,hIƛM2Bcs0dTJf3ִD<VU )C+4Hl3Ʋ(6ٲ$d@AU%cSE)QϖijLHJMkoReR "0f(=ѤV"l-P+r1Ybc{۶h[J`i f6qi56-Zfq:A)lUwUƼz y6i[걍ِAFJAlmkQ)XX@4X@X&`u566kk1ۀ`+"mI[P [m*Hz6MiRUec M*YRՀ??ͦ ~Ͽo?tu:*2dvrlCUfta[ٺX +عW :0P,-O$ 9VMvڶ,SۆFa:LUWu(RݱJ{vgT۞U ≍.̕"hdr]7O͔*|<U5MhUFcS*AUuL2JS2o;j]V!MWB`Rt*.My?5??_=|?g?_>u:miի, $ųBT4oDU\ր];ɺ۔mwƪvyoU1TJmLO").Iak˓ "(bZjQN7AG 34YMqtc U*]iݧ6U'-VksHFxHնGL[*Hm$a_~[ZEkYVu=BRǵ^w-Yi*iZ!޻j&<+inVЦJljF,[(Np.m8^\wW HjTܣIWVUirbZlm=JŝHUʹd$zWcstm꒼jɊl=']R1%Re}y])U nsY[ VRnDU L,yx2{DUk,l(zu}ۖ$l[I$4 1mK7Ie>#LMZ' Lv;׬ "UVw2o/qmnwWNj tB*^zRUYu굸ckkۋN0RR*ƴQGƛksUuG`0kHNbµP[Tټݧ :kYjf*I&r]}6۳@Ky Ve&W`6l5ڮmDF[mfOÃ,7<>lL[,W a}>fLgn[`(;{RݝFyQƫK4_kd[!6/qc*7c4жU[b{Fc&^+6 &f20omo53*g{\wYq{jc$d6VM6$ 1`03TlGLf[W06ֵw=% F䄍O f]+qc^]نul4Ub`jVa[)cSwޛ0kݪ٫Kk|թm ڈ786`raCWmek{\4O,iImL*lbiDQlX̨6{D۸޶4fg|\ 0 FcHÔjffͦQf[Ua]Qʣ-dьUǗ֐þ몠>2 oXWm̓f,)[F+Ly{u¶\Uܽ7 k693Wea3JyF=!VmWmo67O,mSe3ecL<UuM1c4L>?/?|5{xZe\h7 U I{{X T6X`-I2l-5ӨR<6 ( SmҊ٫,Y3YFm$M( Faà5) ۫lmiU6&#,ϦkHL?_omZa 6L[ ZU:ֶF)жVbofZL1 &yv ʶJUkVM-@NN`۳$TtThf(HfM6MĘ(&թتFjF ` 2lc*m16 ȆAL B2. m3Wd[Y--bo>*-h6 Li&վC:f*BLLYL hƨxr$m[S& Քg:,mv˼|6 Òeh&m3ͬ#(fl{m!31+a3Uef[I' `}L,;)m@e۶M-U53fYY[fizs hĚgTl `Im{uC͒1<PmmB,֝@N3CK4]mX3۪ tlHC`%ݠh6˶˛(]k`2c)ΞkZp K6dVŶMXQ1V%V(%3b,Lp6 tyڦٹDޘ0ۺ6T JY2rl mdB[ 3x3ζeR6V fݖfY٤#TCMC &n6L=20.Y0f!!{$AYJ*nPaVlcw cR̒Bcֺ5&ZILa[2+6۰r&mD%e)ْjQMFpY"[kf Lc5IS0l!h-Q-eQlkFbHȨX(V#6X&*X[kclfYl$(fPaZeX2Ӯ6-[y­ vfM0d]Y͛eiV-6Q"#B]38:0orXXcoݵΰYrmc&'ĖbTm6!cl{1֬*.[ [xe+m̼y:m>a-ml2H01zƣ*Mx<%Q.63XZed6qI7xk)޴NBUoܚ0< fWm?ΰ_Mrk=z ASXͶjبQDTFSs`AuW6md&V5Dٺzd l\ifjjvuqڴj+"mEmՐ6{ǍQ*`S64{CW3ۖr4 0llyhlXwyƶunrZ@f O<#Wl[:ܴZc1M6`dˮv{/P83V6qݶˆ>-FkXZ"ž<CQLiBۢ7oyct҂ԽʘnsFJ6G-PZwٛvumá]em-J ̶(czI7m1{wI2b}ڻmBۣ\R{6Ux[z~wf,۲ K{F I񀷹W۱6tڛzՒxUX<ۛw݀fZ0hULs;gq|޼UlOg!#s)y߈Ze4-}V6Imֻ޶:x!mOw4uIU mҶ.ccmWdh86`ŘeO_fv}LV񗖙aKټ b䲝C\Mmsj1Z B2<=66aYm-amŖma-8klQo5FT$: dvfFf!x#HmM# iyʶmU! l=N̆lV]4֨lj*٦L(|9g ajE)d{LýZ?;;L;y;llRQ۶I4Vڶuނ4"m˶ȹ66)vR{S.:bf{z+Lƚ6`inƲ-&0`m؜ qd1f7ϼ>$̕6zʆCڪm W Cn{E1Pzw©Ty;sz{ a-Ԁļ͹4kW{ -H;(M6lڷzQRoLڤ=4aaJa!ԤlGϓ-K)x޶N݆EM[5ˑMdg]B̮ØmXkˆhVlnlwoeD 3Xqg=Tc .U5 Ҷ5kI1Ҋ^fs ɦ&j۶YW̒-PM%ulzvZpnh6Voffi2mdj I-&&wxI\$3)m6T"=k3jgl*6++uA6#$5anjC+{Fil{%$&47Y#ZPmZSm5u*P7 m\lfSiyonVM(60'lkI&Uf6a & &iU256vӲvֱ44;mlN EY 22ޛb[Bm\,3Ö>ͫ6[WͲaj1-ڼv1Sl,`eǸ)% 6PM̃$-Hlն֒6ٌKRd3͞:jC c6dòaJB`0~ܖLmX;X `"R6 PdlKk6+*lۆDmńiے2F֢ x#slZ&-Yw>ZƁD#& image/svg+xml y x z y x z Advanced indexing Advanced interpolation python-xarray-2026.01.0/doc/_static/logos/0000775000175000017500000000000015136607163020361 5ustar alastairalastairpython-xarray-2026.01.0/doc/_static/logos/Xarray_Logo_FullColor_InverseRGB_Final.png0000664000175000017500000012407315136607163030444 0ustar alastairalastairPNG  IHDR r~n pHYs.#.#x?vPLTE!lIkEeYw!l5Ik!l"_|IkEeYw!l+v,l5;?IJRZ[cgkx\tRNS@@@@@@@@(IDATxmTAEQ/-^NH?3?<"@mwBI n0[pnN:C:%;pLpBǘ'ppBǠ'pLpBǨ'ppBǰ'[p t8u8!@c^t8v8!XOa` ;1 qb:3;1 qR:S;1 qB:s;1 q2:;1 q":;1 q:;1 q::t8t8!ppB81t8t8!ppB8á q@&Р q@NC'=Nz::t8t8!ppB8á q@OC'=Nz::t8t8!ppB8á q@OC1Cv8á q@OC :t8t8!ppB8á q@OC'=Nz::t8t8!ppB8á q@OC'=Nz::t8t8!p8`'^á q@OC'@'9Nz::t8t8!ppB8á q@OC'=Nz::t8t8!ppB8á q@OC'=Nz:C\B!IC'=Ng8wh8 á q@OC'=Nz::t8t8!ppB8á q@OC'=Nz::t8t8!ppB81t8fx]B'=Nz::t8!t8!ppB8á q@OC'=Nz::t8t8!ppB8á q@OC'=Nz::t8t8!ppB!;ppB8á q : :t8t8!ppB8á qw8!t88@3::pBNt8!':ppF@Nt88 qÁ':8K{]BÁ':ppT@NMAv8!t88@3::pBNt8!':ppF@Nt88 qÁ':8@pBp qgt8!t8"BÁ':p0 qÁ'qh8@Nt8!':ppBÁ':p0 qÁ'::p qÁ':pBp q':pBp q8@t8!t88@N/&Р q::pB':ppBÁ':p0 qÁ':p qÁ':pBp qÁ':pBp qp{q::pBÁ'Ϡ q@.Р q::pB'::pBÁ':ppBÁ':p qÁ':p qÁ'::pP{^B:^::pBp q:޻@'@':pBp q::pBp q::pBá q::pBo~t7!8݄8@@!N88@!!N88@!8!88@!8!88@CC!8!88@!N!8!8@!N!8!@@!N!8!@@!N888@@!N88@!!N88@!8!88@!8!88@CC!8!88@!N!8!88@!N!8!@@!N!8!@@!N888@@!N88@@!N88@!8!88@~woKVbF[1 !,7-ޱ|Lb>D ā'Bqqq ā'Bq q ā'Bq q ā'BqBq q ā'BqBq q'@!N!8!@@!N!8!@@!N88M!8!88@!N!8!8@!N!8!@@!N!8!@@!N888@@!N88@!!N88@!8!88@!8!88@CC!8!88@!N!8!8@!N!8!@@!N!8!@@!N888@@!N88@@!N88@!8!88@!8!88@CC!8!88@!N!8!88@!N!8!@@!N!8!@@!N888@@!N88@@!N88@!8!88@!8!88@C ā'Bq q ā'Bq q ā'BB&BBqB ā'BqqB ā'Bq q ā'Bq q ā'BBBq q ā'BqBq q'BqBq qBBqBq qBBqB !!BBqB ā'BqqB ā'Bq q ā'Bq q ā'BBBq q ā'BqBq q ā'BqBq qBBqBq qBBqB !!BBqB ā'BBqB ā'Bq q ā'Bq q ā'BBBq q ā'BqBq q ā'BqBq qBBqBq qBBqB ! q?@@!N88@@!N88@!!Nw@!N!8!@@!N88!@@!N88@@!N88@!!!88@!8!88@C!8!88@!N!8!88@!!BBq: āB\BBq: āBBq: āBq p āBq p āBBq p āBq:q p āBq:q pCm8m8t8@!88t8@C!88t8@!N!88t8@!N!8]@!N!8@@!N88@@!.aK!hG!88t8@!N!8I@!N88@@!N8t8@!!N8t8@!88t8@C!88t8@.!N!~m!8t8@!N!8I@!N88@@!N8t8@!!N8t8@!88t8@ !N!8B@!N8h!8@@!N8t8@!*88t8@ !N!8/wK_Y@!N+5xk !8u8!g!@jpB ā'=8@!NN{ qB;X!~ qu8!@;Cᄸ:q p7ߎ'=8@!F,mpBcN8jt8!AQ{ qBN: qu8!@sNᄸ:q pBܞ'=8@!.{g!n_pB ā}Yᄸ:q pBܞ'=8@!Ft8!'Bq5: qu8! K،!Nᄸ:xzb^eL,[7e38.t8!nOpB4޾n9*bڌ!XNᄸ:fޗ=u3+Kbڌ!PNᄸ:Ucb.+!bڌ!Nᄸ:Lu˅`1oFr !pB܁'=8(?5p],fH88Nᄸ:[_XBރ. Aw q{:X@jー{/@ᄸ=N{ q`}df\n|I軘q7#W% !pBܞ'=ᄸGCFQ޿ k\*\￲ 7atpݯB\'pBcN{f~aBܘ/{1ngKȐTf|i64_yw}B!.^t8!'=o.#!.bڌR=qai8v8!nOpB\۽`Ly*}4V!t^` q ĽpBܞ'=ᄸ|_s05۫ͼ#b]LQoīdBp q{:XN{N紃_ʼnh+mFQ_;:BqU;wN{ q>v@EPXvxULo3*[N ĕpq;X=}}S}ՋMh|kThMbq570yT!Nᄸ:|G݄6:"%jG[.i1nu7=MM8L@ ᄸ=N{ q鞿V~~ OeS\v?ZLQljW!.ht8!'弡$o ېŴl3.es2U!.jt8!'%|"*-L%S\v4?Z0!fT1JK״ q{:XU}{bOB!o5-ff(9rY q EpBܞ'=ᄸw^l4Rhii39jv2U!.lt8!'m5ĬQo[M ~L%#n9g!GQx׵ q{:XFjw}9Z=WkZBzhmh׍LECkᄸ=N{ q{{ p5fŴ݌DVT!Nᄸ:}`׵/]7MY Q_Φ_X8u8!nOpB\}$e^+2ĽfgŴ݌MMϭB\'pBcNK +qMΗju;V,IlF=z݇9׷ q{:X2:O n{RΠSLYLQdwN q pBܞ'}ѯ,y[ wɷ+1ʴ{Qȓ\ >W!.pt8!'ĥ2Ҏ@)s<5wݟ36e3 yBq}'pBcNۘZ3} BLlFa6(z:*ā q{:XreLJq&м,fxja=4w8!nOpB\Tzu[!RKhkZ(flj[0'S8Bw8!nOpBN^CUu$@fm1mF^v65j:*ā q{:XҎenLu,(s;:~yMw'mFn,;\!Nᄸ:WcI1Lu%=yMvیMMsdBB\'=ᄸN[lqJ\0s3k{vljt2U!\ qu8!.}fJ^1Vq\gMi1s~͘Oo-dYJBq; qu8!7l5|3{3fe3#YCBq; qu8!ֽ}K6Q֗.i13n6IVT!N(N+3\VSԚ%.ȰwLpnF>Z. q pBI ᄸ )m&0eVVfoI2nFΦ(N q EpBܞ'=ᄸ:#O§ܝŽWu 11++bڌzoF'w^N"!5pBܞ'=ᄸƁ S'ehTYLQ]ܝC\'pBcN+4dK}Έ͓b&۰lFSI{Btq : qu8!FpJv<|T%quZŴuߌz-dC\'pBcNar]Sѷ[ގbmF7#$/N q=ĥpBܞ'=ᄸ*7 F7F%v;0̴e{7#gS_8*A q{:XX;s0ufKO92-f>%xkdC\'pBcNR9j->&mG/i1l6QɼX{Z!:,Nڇ:W,HѢѷ W(6#%-T!:t8!nOpB\z^gގ\^bfٿmFI6#gS8'S8ht8!nOkpB\tB}뗸(3ˀdٌ|[N"!pBܞ;=ᄸ"7t9jv3ɶe3J^.[/'8NZg;W.dCWZWm1mFY7R!!pBܞ9=ᄸJiNMtɑy+ĸy(ů+LECᄸMq{ q{ fk`m1jGA.i13l\6T%7[N q5%pBܞ7=ᄸZS`w9j1\jGQ.i138IWB4 q:õ qw8!CbJT"ĭi3[_j  q:\ qL~1LFϝvj !~buϦ]<'S8uw!nGj% 7M>]F+(NY3|یʽWfLeKᄸ=gᄸ6Sm^ڸю]^b߹m$~EY.'St8!n[kt8!S跊Rlю i3m&.JN"!pBܶ1mpB\; Go29t?!Gg\cd8*Auq:\ q:T#}'7oPwZC(flj̄.V~!.iku~!n[M> 6$oax;UVŌ}ٌrnFΦ\8'S8v!n_ku8!j=?_\9k.cpW⎷`1%b‹z3sp'Zz.T!څuẅN;ԟ z9%q3XE{hW.o"!pC,pB\==v'!5?Q9qfz3r659*A5q[;\ qu3M#} S6>_!h}6QǛLYUᄸ&S?U{땸8(ܿ7-fKfx3ZݷրX!zm:N+qTW;[y]ѧ/UYv_j'S8hRw!nwkvw8!#ۨ7ۧc|mmGQW}1mF8^:*A}4mp}B'ջUzdX5F/D._]%Cܬ6a3Auq;\ qccwu'}Mv,eW1|5K'7?jVN"!pC܁%pB\ŐM^9N;ӯwqsٌoFΦƺ$LSK:I;ᄸ;z|Y|5Fεg)i3J;k B4 q;\wqG:wI7fy{i>EC\ŴߌM u;*AW qg:\w qUTMRh](YǷi3zSX9*āWõ q:\w q:`wP.~|wN\ mkuԊKN"!p]Cܩ ĝpB\Z| ypu_;bÕ4_i3U7Q'QB q5:\wq:WcDMfQt q㾸un-w`1mFGk^N"!pMCܹW?Y1;wqޖ5G19xSMG9C\Ŵ9//ͦw}YKʇ q { ]Ǹo5-f%?az'S8htN!dv8!#3~qǠ#_ި(eScREiC!Nkv!d nj2LR}+ck=*9eܲ q"p}BW;pB\n2F̖7xb 2i3*Gߍ5ֽBq:\wÕqg;WQEr0ShmÕ05X̦fljr2U!TNw!p9gP1ʁS}G?Վ⦣!bQߩkܯ q pMBW8ĝpB\'~ep q fljwV8B\ q:\w q;wW0~K?ɠ.~ۆb>6R꺱F;LAZ l;ᄸ˪hl3 Σ喝1}D`1ghe8kMu2!!Nnt!Bz[o9{;|rxv2b>#fR]>OLEC(+hᄸҭh3 g מf1mF6ϳU"!}:\w qW:W1[>ou}M1_ ƖLftU:ꙦB4q;\Nk4LB~ƬRovtbfb3r6ӣ 'S8(Jv!V.u8!ͷL=3 j7u9-fŴ94:Ru q:\w qMƍ{g4~:&N7Nw9-fŴڌέnS8(v!^u8!paz=>JWńw3bѡ=ȿX/'StF!b+u8!IT3e=3.\WyWg1_-$oB&iںDN qP9ĕpCW-]pB\^;z7`~6?!9tfu3r6u&U!n+vb!fB(?,Wu,L-Lp+ꆸVᄸOsOZM|ir^y[U͋9,d3kr2U!r+.wR!n 'U>H:\-۪YLQ_BΦv%U!tnwJ!rwr)q^|iL*fZX[v2U!v+wB!vg,:LQ DŌhE=me1},Y:Lj;FꄸNk33FaЄ|5e1}lFե2qq:\Õ q;m]>K"ӀS?e;9lF'U鋑lh+!q;\U q:g:w=[ s6|޶6`:5q!:\ qq{ ~^Q g1mF6W{B q :\q!:GװyY~lu}A#clFD6N qP2upB\W"pB\ ^C٧OP$pf,M|T8xck᪅(B ᄸ3J) ptv`\]2ŴIwp%~ku@B\W,ąpB\'ER5fÂwPN_Qibڌ,ɝGN"!pB\?ąpB\gbV 2Z]F3lFW7#gSo,B qpB\>pB\AĽ'ܻ(Yt &ᮁ)3]<ϓ;YϦ:o q]:\eq:jjw`&݋2{}_Cõd1ͨfluqW*Aצ q:\ q=Е,UgMߎbjIgShX8xCꄸX.w ᄸN ON^Xnp3/Uq6OT!FL RXN&k>oVe1mFMZ_?`'S8(:u*!.Zu8!Y;VvӴO8opYZ!ft3~65T8x}kኄp.qᄸfsPHڎrΐQ;d3-g:*A׫q:\ qm''~S7O{bڌo劄?e=!Y+v!.^ goD"M d;ZndjԳ?^'S8(u !.bv8!}h8!LI: 4VGd*B2ĵpB\4EpB\@I^CaLt#&NWח,f\ͨRt6芸P򇸘.g ᄸ~æq>05}qX̐+7lF!6#gSO^N qP(5pC\2pB\a̎S7Wpqbz*`eJ|?4N qP1upC\1pB\Ì>suH֎rw8i3]Eo~ CܡB\0EpB\L^3O8b qB?Uq:ܷ._ ᄸIw[̾Ѵ5^ ( ?d*B"5pC\.pB\kEovtff3r6U~F.sᲅNg]7MBtd1mF!Jkl$'S8(v!.tK"w8!t v,웥U8Je18ks2dLR.W ᄸz>2X.-(EU_(q2UJ!qKwT!.vO8L:,oő,$!UI'i;~hᲆ.S ᄸՓݫDl 2fhGERYx.oFR8*A׺% q;\? q \MY{_f)VG֎ʾb1u+umڌ/8xWr.O ᄸNГs7+IGfd3wcd5B\2%piB\'E5=j´2bڌlF#EdB\1epYB\'J?v`B\vyӂ-UloMgq;\% q:7f<3h}!OGzf:ꔳ.z8 q9:\ q{ {Sa!'>jGeOQYLM"U5}IBdq:\åq9:[lSLIQbڌϿҤW/"WC/epB\'54>Û} QCTS]~@_N qeov4lolF>YoȲ9~tx!H~S1^oG3XLlFnZ(pPE qU:\W q { ֓h.(ӏ~:\Wqe:НWↁvT،|[> q:\W qW`*fWbb3zK"t8!m u8!ΰc$AWU،}c$G*o :׷]q:ҡé;Bw6,&6X-ޯg'S@Ju!Tr:bf_!N،ޡTatt!Vju8!.ӯym|W,&6_.0:\W q:gxp`ގ:peq;W q%{ n|;BfO*x.ȿϠ qo? q:FW⼐b̊Qo(|??4pBܛG!N bҎ\6QYyANⅸN3:՘;-!KB6C+XK/y0'pB\'ĕaL#iӖ+^[LlF:uDCth!EJH5x|x-1 qBfy1t!I;zt8!ƈcl[qsZL(m@ :: qM:Wdq0sҁ'YѲTlppB+upB %Ik▝bڌlF7͚W8.Vkᄸ*ZLfߪbڌlFl'S@ u!Oq(O]^!N!7e3y㻋'pB\'ĕL̾EQKbڌlFTl,:\תmq:W44A{8!bٌͨ㸋;:\׫ q:WGԏY"\c1=T qppB'pB\J`jݓ!ӛfd3Á6up;C\' ,U{8!b، I=dpN„nN3:j5 a،boF{\d*:pAC\/ĵpB\d`jWᄸVӢŴٌz"A._kᶅ~N+eW 'YL(gVPá q:\װ q%S fjwbڌlFQLN,pB\'8 BI-f2Z5d*:p!C\'ĵpB1TW;،_Q?ǃpE qM;ܖ׳ qK4jGfD+jE_GCt!i⪉y8Փ ,|\6#QzZt8!Nv8!aTYL(7hV@&Р q:ܥ׶ q|TqڑŴٌr\]d*:pB\x8?1jGf8KWlpÅ q;!qQp?|,f+tn"= DCt!s 6J9!N; 6#%Ulc@ zwgC\'U|T qBͨ_{P檴á q:׻ q-c q#Qp*t"!{{25pBaT7eٌ0g NN3U|Mf_!bڌ(*M}~lpeq:s!}jZW;8Qq9:\=t8!(TYLͨt|8fᄸ穳W31ZLllFU~%á q: qYSOH*ybڌlF)E%ZyF'pC'+SWS-fTGCt{!NJ 0dy1}#!QkjU`;á q:ܽ q_wBW;6nTۮR 8ZᄸꮿG3`Bvd1vmWlts:\=t8!H`jٵ,H>5uÁ#pυ8N3m9zr.'h q6#QQ)'7B/V::oY~oqHfd3*S?]@N_oB\7LW; 6^!nTȫՏ t!N{.pB\f.S"iGQ_7 u88J8Sס}#[g3>?t8!Nt8!{/cy !h1mF6rߤ3y6qt8!Nt8!GX{oqHcQ=O u88B8@O'iGCQze :pC\8ӗé+W;,ͨ#Tt8n8 q\O'8bp6rV#Bt8!N;t8!NJ\1ۦuBfWM:Ýq:g;ʲ#9GjڌlFžK?p q:s!Nzw(iYt8cżp=/gSw;Á'pC'u5pf_HlF]C\^GjTᄸg5jG6i3Ec{:p7CXᄸƆ΋qBlFh%ػ~:Á'pGC'ĵ5Xmv$YL?At8!N;t8!7A3jGB͈N=R;Á'p'C'uwoJiw+-fTLE^ q:ylYj8bڌlFm}>n=tB'u7)F_v$ٌhuQ}tuBX&5wwN1"ę-ftLLE!N8N5 qڑŴٌIiq;(ā' qBG[qf_bڌlFOL@8!NDf⌎iv_>N q qB'g"!Ηb2~mV_88!Nh4 YLͨjG!N8R q^k7ΟH^'YL=D!N8dnk+iGfd3s5dn88!N(0ֺ_ߤBoe3|rL@8!NH6Nn(XLSUw/#ā' qB:Txz8bV.6U q qB'-U{ #[ qB̰،J2L@8!N߻uG7:\Bg1mF6#dB' qB ߍZͽBvd1[6#!L%@8!NLVpje qڑŴٌ^+OT!8!N8RN[5'Y̟6#!BqB'ϤÎ}\'Yc_.?dB' qBYSΕ`#i3V$ 88!N(rD^3jG3koa3{m$ qN q qB'QNMNT8i3\9WL@8!N&;sYC!7b~gE[_J @8!N#Lȝ+ qڑ,%zqyΦR@8!/G8+w'8,OueHs6uӅdB' qB5_wGl#i3Z_ ~L 88!NrD[3˄Bvd1SWG䅸g>UBqB'a0]nqڑkٌ6qqB' qTOv^(!N{cwH6u/ 'S8!N(0Wӎ,f/fT T88!NHY||#Ya36U쳩w!8!N0hxf.I!Nv}QjFLE!N8aQ JMBvd1mF6{C88!N86Z C\WY6ѴTe'TW/BqB'đMr$+f_byb3*tQE>d*BqB'a|u8^,2nBbڌB{򞟐{C88!N8ra;UqڑmFBeX!N"! qB'đcNP0g7ij-Q$neB' q2N^Eճ`Bb1mF6PžMu2!8!N0l4^ҤW;M6*vQ=l@8!TKfF=aՎ,fhZrߦq qB#IuiZvd1mF+/u?YLE!N8c^< S,HK-pWqqB' qd\4>k]!,i1mF6.M+'S8!NH^Ƌq2!!!NHJ0`f_bڌWŊBqB8;Y{ ۗ@)8"UEuj{A_#"!! qBMk#Y>/HgSP@8!ŗaϼu B bڌlF;g#qqqBGi3ͦK^ppXB(b1~lq qB9I+qq{`j1mF֫El3A!N8Cl qq4G6#U sx!8!N0V|+qU@Xu^brQ5Mb BqB' qugH% qfd3[sqqqBG!ƥz[(gBͨN"!! qByiWFU2 qB lqQ]q qB!_(z1 qڑŴY"Uq2!8!N?數W;ByQ8#9' qy~WBf7BKbz*PUދjF$#ā' qBg_)ziGSQ "!8!N>rkro8Ŵٌ.̺{-;' q)n1rN8!"ٌ^Hwh'"!! qBסW6g_'YLO +3n8' q(^u5qK q6#Kd*BqB'}xy3!.W;BsQ]?:]q qBGtAk qK/ٌbqqqBGkJgՎ,rU.MTT888!NQQ${ f@' q6 ќLE!N8_o&μvOӎ,g3*QݽBqB'\L Ur.!NℸύR_T2u%;' q<)_j8A[jYL!jEq2!8!N=NCS8oYq6:)E}yM? 88!N#/{ ('iGSպūT88!Nc7sl^(qB'ٌ"Z;iqqqBǃV+qa^8!N.+~w;B' q5{ B\kC.lFWE'S8!猀יe(` q )ٌ:]_u7qqqBGa]DéB\F+ qBg3Dud*BBB'ZW`VB'ٌ|k0˅ 8!NH6+5^N"!! qBT?xz蕸LqBW$> ]TWT88!N#?t9j u9qBg3n'N"!! qB&o}#] BMOkSV'a'S8!Uko;!I,d3ʱXmF888!N#G9E0!N⬒(Sud*BBB'đp=u8\wP8!*ExBTdM]@8!N+c4ѓ/=cq|?1ٌݽy5B' q~E7#{5}#" q]ڌlFkLEC݄8!N#+X^ìҽ" qBfZcN"!nB'nji.;(BXkTzJ/q"ā' qq LJm#qA|BBuw3@!No{\5Ÿ-5+ qf7Ro<8N"ā' qq!lág%N' qNڌB* =tב qqB'?fIrJ(9o kcEžc]Y 眵BRH$X_/\B`"B\Ar:B' qdXFY忯* qBlT=vΑ!8!!NH|k%V f)dռCwىB'!ulw_\+ qsLFY. q qBGU=V=WJgo0LF/Q;LE!N8O8|7!NM5%WF88!N#эcӂz" qssjJ'S@8!mE@Gq+ qSe2܁_LE!Nzoعڵ^ʷ8!db2>LE!N⚭#Z?#5?W'5)3ՠyM3BqBWOWm=8!dɶ:B'!E3Mtb}#YlR e2>kֳ#BqB'zI; q&C!dÕAj'S@8V j]' qs*q1:=1K88!N;*.jn۸Z_Bg0܆'dmro{xD!N8"ѿJTO,_!:'ĵL( y/T88!N#CoBk,~C,}8!`LFin9LE!Nz-%Y}^shz qB 6]&F!nr\!8!NCk^ww\8!`Fai29zu2!8!N∼8hA%.J8y.q&Lwᱼ9B' q;Wmz&Xx}#-Nw\&L\B' q~{m(3\8N h<&\;W?8B' q^M_ 32L; fЄj6^'S@8!*Vg'Գ=,ô#]ɨ-勓q qBG'Xk˰Pk_|YqNFdT@8!ko{N0`6~c2y:B'UtX lOU~K瓄8!`>Kɨ-lH88!N#lsnޥYkw 8i22{dVD!N8rdqz5<$J8KN.lMq&(N"ā' qB62\A~z^/k_`%ϸ4U~d*BqB'QhgݻW}3x8`2ڱ+d԰@8!;\U}_óWqB4|p;B' q秭} OPۖ~7,|k_|Yd"LE!N8ҕHKm( ',_T#YujMF ̉q qBG0%-q{6g0q皌ʆgSLE!N8.v?օdE2_ШՎ LF&‰T88!N#R3_)w85^볽(ǜ7!N;2&#]B'!ĕ^6ݴqU 9+Q4&#g'S@8ʫxM[v,>朻~e2* Tq qBGf'գS koM5L!E|Zq#ā' qB51M׋/28Ni2z~*\XN"ā' qB!.\!j釕VBvd0\pQgF!8!N9mW_e qBvd0;'^xLE!N8B>k/2!κ`blrYed*BqB'c߂sBW;2^ݣ+q qBGG~Kg8|oR~J!d*BqB'qOvz5| YxL( BqB')DE~N} 7 qdd2xLE!Nj#P](_IBE,4\VN"ā' qBržyJN7^ ~e3T88!N#+kʫw+YLQ 껞#@8!x//=0;8`BLFJwq qB']$?QnKé[#,= f='Y}2}U9B' q+PNȔ{w<دV׾ڑld'A ā' qB1*Fy_m\#i22%ݿq qBB\gO77 vd4JܛB\O8'S@8!p)W~q% qڑ48(e$@8!pOY7,vz% qڑTLFI/BqBWA2Gv~Z8O9LFt]od!8!Nx|BYݟYկw`*q9&{IyD!N8=fZ < ˜Z.}˯}{QT888!(z[]BէTBet]v2!8!Nz2 y\B5dx2*ɳ$BqB'lUtWH?B5TBet]~uf&D!N8< ? BTLFiEts2!8!N$F2ӏ[7v0ŜIJnK'S@8!Nk {藥oϵvd08ɽB' qB\׌g6*H,}8O?%sd*BqB'1^|_Bvd0FT88!N#kW!_ cAӎ g2r5u@8MO?T:[6Ў8Ľ9'2N"ā' qBiebaoL%.d4\XoL!8!NHCanL%dZr"ā' qB-9.kK_! `'%"ā' qqPk>+q/Z3kL%ݹޅT88!N#(5[zp8K|Ex)pήJr"ā' qBק2y jY|k j0M&#O46#ā' qBWzAYt㣽 8K|՝^nM/@8!NRСjR(Վ fddtj^GN"ā' qB_gWe4Yv$8FI&dT2r2!8!NW=xXvY e2'͜LE!N8%G׳eb`Fz3d2XE!N8!P7GBETBlxve%v2!8!N蹌U+Y^jGSEI3]7q qBG%aX{UW;2fڲ⮷ffT88!N#{2Okk_4՚b!8!NℸKS# qBLRLFN:q qBB\NSVD،+, fȷDa)>n@CCℸ.?fljm(bi0MF&boLE!N8.tJ=+m^#HKT88!N#\T*?Wهr}d qnuŇ{F,BqBW)Y{/kGB4IMPy~ ! qB'5`jcc| v_D #\Z9!8!NxxGr[]W׾ڑ̗rJ55s"ā' qBQ^<1{7|[C7fN"ā' qB!M??Wovd08sB' qDx4ܸ%.X }"Ĺ f 'C5 7]@8!oLFڦwF!8!N-ElLF|1:B' qBXZp2!8!N8(ϴ0d*BqB' q`{Oq8B' qB^,\[&#>d*BqB' q9MFɨd*BqB' qle҈ LE!N8!.`2~N"ā' qBd*BqB' qp2!8!N86p2!8!N86N"ā' qB'S@8!N`'S@8!N`d*BqB' qqN"ā' qBlqN"ā' qBq qB'pT88!Nq qB'q qB'p?'S@8!N`'S@8!N`'S@8!N~N"ā'!n|DN"ā'pBN"ā'pBLE!N8 LE!N8 LE!N89B q:\N"āߞ5$UNq xt8>5LE!pu8!NS qq pBܲ'p|w]8pBܲ'p|T8Jw8!nYt8>d*Bq; q:r2!N[88B\'-pBg^N"āW q:T8jw8!nYt8>r"ā|u8!N#N"āW q:W5 āW q:T8w8!nYt8>d*Bq; q:N"āW q:ˆ88w8!nYt8ίj /Bq!: q:d*Bq; q:@ᄸeNd*Bq; q:vW5!c-J!N'S@k'pΆ88u q:?s>lC!.W:q|SBqi:8@K@K>pL@J!.j:\88u5q:Coj@!.Q:\9B\7t!N uC!.S:\:9B\7t!N us0!\np}C@gCBq:چ8!n!ā kLE!#5 q:Cq epCt88S@K2p$q"ā cLE!._:\=9B\7t~!N {s0!npB@`*Bq;ẅ8!T8Rv5 q:Cq pCt8808rvõ q:C?B\7tN!N @(pq@Bqi;8!o !npmB@!ā K(88w5 q:B?B\7t!N@@K"pTq@Bq;:8 !NC!.w:\P!pq )笡q:B/LE!O q;\P!8wUq:BB\7t!NB@+W;pTq:Bq:J"B_Bq:*XB;@W8pq:Bq5:8!NC!H:\P"pq UpCt8J8!2npEC@!āW f.8 uÕ q:BB\7t!NB@W0pTq:Bq:8 !4 āW \t88u(!/BT!znpB@!āW V(_ BqkYC+t8 8R@75tJ!N s,!npB@g;Bqe;8!v88 wÕ q:Cpq UpCt88@W$p$qpq pCt8׀B\7t!N ws*!npB@T*Bq-: 8!NC!I:\8pq ĵpCKt88!FnpC@R@knShEuA[ZQht9u8V8[teN!NnShEmI[ZQht9u8V8teN!N>nShEe>-sp(4t q:'teN!N̩/p9u8Vڅ8C:2ЊBC}J[ZQhhtp˜:@+ B9nShEU9u8V:8nnShEQ9u8V?Kp˜:@+ mB̩%pp˜:@+ MB̩#pp˜:@+{}n a mD6ZIENDB`python-xarray-2026.01.0/doc/_static/logos/Xarray_Logo_RGB_Final.png0000664000175000017500000012416615136607163025132 0ustar alastairalastairPNG  IHDR r~n pHYs.#.#x?vPLTE!lIIIIkEeYw!l5IIIIk!l"_|IIIIkEeYw!l+v,l5;?IIIIJRZ[cgkxNr';tRNS@@@@@@@@cIDATxmTAEQo-^NH?3?;"@mwBI n0[pnN:C:%;pLpBǘ'ppBǠ'pLpBǨ'ppBǰ'[p t8u8!@c^t8v8!XOa` ;1 qb:3;1 qR:S;1 qB:s;1 q2:;1 q":;1 q:;1 q::t8t8!ppB81t8t8!ppB8á q@&Р q@NC'=Nz::t8t8!ppB8á q@OC'=Nz::t8t8!ppB8á q@OC1Cv8á q@OC :t8t8!ppB8á q@OC'=Nz::t8t8!ppB8á q@OC'=Nz::t8t8!p8`'^á q@OC'@'9Nz::t8t8!ppB8á q@OC'=Nz::t8t8!ppB8á q@OC'=Nz:C\B!IC'=Ng8wh8 á q@OC'=Nz::t8t8!ppB8á q@OC'=Nz::t8t8!ppB81t8fx]B'=Nz::t8!t8!ppB8á q@OC'=Nz::t8t8!ppB8á q@OC'=Nz::t8t8!ppB!;ppB8á q : :t8t8!ppB8á qw8!t88@3::pBNt8!':ppF@Nt88 qÁ':8K{]BÁ':ppT@NMAv8!t88@3::pBNt8!':ppF@Nt88 qÁ':8@pBp qgt8!t8"BÁ':p0 qÁ'qh8@Nt8!':ppBÁ':p0 qÁ'::p qÁ':pBp q':pBp q8@t8!t88@N/&Р q::pB':ppBÁ':p0 qÁ':p qÁ':pBp qÁ':pBp qp{q::pBÁ'Ϡ q@.Р q::pB'::pBÁ':ppBÁ':p qÁ':p qÁ'::pP{^B:^::pBp q:޻@'@':pBp q::pBp q::pBá q::pBo~t7!8݄8@@!N88@!!N88@!8!88@!8!88@CC!8!88@!N!8!8@!N!8!@@!N!8!@@!N888@@!N88@!!N88@!8!88@!8!88@CC!8!88@!N!8!88@!N!8!@@!N!8!@@!N888@@!N88@@!N88@!8!88@~woKVbF[Q(6!,7-|Lb>?*  ā'Bqqq ā'Bq q ā'Bq q ā'BqBq q ā'BqBq q'@!N!8!@@!N!8!@@!N88M!8!88@!N!8!8@!N!8!@@!N!8!@@!N888@@!N88@!!N88@!8!88@!8!88@CC!8!88@!N!8!8@!N!8!@@!N!8!@@!N888@@!N88@@!N88@!8!88@!8!88@CC!8!88@!N!8!88@!N!8!@@!N!8!@@!N888@@!N88@@!N88@!8!88@!8!88@C ā'Bq q ā'Bq q ā'BB&BBqB ā'BqqB ā'Bq q ā'Bq q ā'BBBq q ā'BqBq q'BqBq qBBqBq qBBqB !!BBqB ā'BqqB ā'Bq q ā'Bq q ā'BBBq q ā'BqBq q ā'BqBq qBBqBq qBBqB !!BBqB ā'BBqB ā'Bq q ā'Bq q ā'BBBq q ā'BqBq q ā'BqBq qBBqBq qBBqB ! q?@@!N88@@!N88@!!Nw@!N!8!@@!N88!@@!N88@@!N88@!!!88@!8!88@C!8!88@!N!8!88@!!BBq: āB\BBq: āBBq: āBq p āBq p āBBq p āBq:q p āBq:q pCm8m8t8@!88t8@C!88t8@!N!88t8@!N!8]@!N!8@@!N88@@!.aK!hG!88t8@!N!8I@!N88@@!N8t8@!!N8t8@!88t8@C!88t8@.!N!~m!8t8@!N!8I@!N88@@!N8t8@!!N8t8@!88t8@ !N!8B@!N8h!8@@!N8t8@!*88t8@ !N!8/wK_Y@!N+5xk !8u8!g!@jpB ā'=8@!NN{ qB;X!~ qu8!@;Cᄸ:q p7ߎ'=8@!F,mpBcN8jt8!AQ{ qBN: qu8!@sNᄸ:q pBܞ'=8@!.{g!n_pB ā}Yᄸ:q pBܞ'=8@!Ft8!'Bq5: qu8! K،!Nᄸ:xzb^eL,[7e38.t8!nOpB4޾n9*bڌ!XNᄸ:fޗ=u3+Kbڌ!PNᄸ:Ucb.+!bڌ!Nᄸ:Lu b^ߌ8@CᄸN{ qP~E+jr Xqq: qu8!ʏ{_G189.E]@!Zt8!'ā8{a^L ā q{:X͸Vw1nFJ@Cᄸ=N{ qN%bYν?2THeMt1oF&潙_!Nᄸ:ޱ1_2hbΖ!$n1mFi6#RB\'pBcN{ұ7jRy]B3jGGB\Ŵ9z֡k[ q 5pBܞ'=ᄸL{=T8-N;iXCJDo3K=@{ᄸ=N{ qn`jWyGb]LQoīdBp q{:XN{N紃_ʼnh q;X2O}ًȡv2-ffTW_WLEA+w8!'e{Zīћ(7Nqц,jnFa *]BqA; qu8!wz ~\XmŬ݌t65it2U!.ht8!'ĥ{ZvU+W!.pt8!'ĥ2Ҏ@)s<57LMٌ?>UkFVa1mF7W`4S pᄸ=N{ q2LQs2v+qtT-UZLQC;ɋBtq): qu8!ʍuѷtݎb!b6 ]z"!upBܞ'=ᄸ*7LSvtLp }kdC\'pBcNR5FO q/i1l6QɼX{Z!:,Nڇ:W,HѢѷ W(6#%-T!:t8!nOpB\z^gގ\^bfٿmFI6?)}BB\'pCܓNq_.`tYow; bf՘,сT~T88NzG;W0ك6Sظ؎"\^b&ٶlFiˁۅQ~k׾ q{:\l% [~J\^-(f4r_jY;88N:;W:ͩ.9r`1oWWq1Yxqq:5qOw8!c -Ɲ[(5-ff9fT!dN;W!w#j9k\^bfqT|Ur2U!.[t!'zȝ_5p4BiJUr1mFyˁX[at8!nS6t8!b/F{ӎV@a1S?X OԽd=Cܡ7B\4pB\F&&#ϕvL>ٌmF+vN q2%pBܞ3mpBQ^CF6/mhG.i16|V,f/Y!:NZ=N;(6.n qʴ7/Qh\c.Bt q);\u q:W1ý?ЎVٟ𣉳[l1o2Laẇ]aᄸj*}_7Q;-f!fs3r65ft+A5q:\ qE M[&_X<ގbi1co_6!T!wڅ}NhfO`We "(XLɽbތƜ3 V%ľp2Uv!.mk6vn!nc;52D>q0gC{>@'ENGC\ mmF|[qq:\5 q;;Wkl~ q͏.FxvFGμB\ŴތM lN q-%pC+mpB\LHԿͽOGW;Z;G(f&!_'S8h2w!nokv8!ScU?z%.N; 3n3J[kK B\6mpB']cXWM +-B\żv,U/B4 q;\5 q;WmԛS1[\6p(s6fMV/LW>ukw>!n{*Fil2S#koq!nV_{Ѱ K DC:Mᄸұft1F;:&Jq`;w2qw>ᚥړс`{+'St!@t8!b&Sv['W;Z[lF7#gSc]N q)ĥpC܉$ĝpB^[=>,>ޚ~#@QyC\Ŵߌf5ލ[W!a;z#NۛD^kkr0t0=Ɲ~%i1#.fo(M s;*AWu q';\w q WQM+0w :博Ry05v܌^j.^v:8tF!h+Nv8!ܰ(`$57Qw2ٳQ--B\'ĝpC'u.cl9~΋'(c62{Xck8*AWõ q;\w qU;.S?% !.;\ C\lJnFΦX*'S8Ju.!tw8!sVX<5<~S(n:J/:nV B\$pC'~G9_Q_I W`1mFΦ6|k#Wq;\w qwzsM.$phm|!bڌJmFip2U!Zk.t!|.~,:-B\Ŝ7q6T88[᪆ NQ0n9`ޣwpfM bLjKu<5r2!!#\kt!FzKڣ,8!.;\{›Ŵٌ>*V?pCܝW3]pB\Ǹo!}=4|%p6[3bёWlgN qàә7qqi13/ftnMw*AWq:\w qMf wP2p-/&Oõ氘s،EωEr2!!Nk.vr!^ Ou=_F3bu{7pubjHV-a򗦭KdC\W9pB'Epꕸw 9ny/$紘ڌnFΦ޽ݤ qP8pCW,pB\^1hey ZF`鷷,fhٌMudC\W7pB'5x{~rjK3۪m^`1'хhuX,B q;\wÕ qw;ef8>ѬABdԑBn$~Vbڌr6/awB q;\wU q;%`cHƌKN3dR64ג◰B q;\w q;&`=c`j =/f G+i+f1GdUCܡ7B\W']pB\5Z &-g3J.ACZLᄸn*YbZ~)ѴV]a3:7J_gSG_9qD+*J ᄸ>#ԹjŴ幚*=|T8OA ኄNku0St:֎ g8i3ʴMuݻ7dkꅸF ᄸ8NgMqQ,fճ]\!8*AסÕ qA:\ q{ gf?5C'YLQB7L'S!Et !.HΠ+6/فquvpf$My•կ!q=:\qQ:ɩé[,Wa;(/c4i1mFW#b'vzwZ!.Nt8!σ31[t q.#v]67T! {tR!.PKt8! ur],:Xp@.IɝۃgSLE7.Rᲇ@Nk55;`&݋2{}_Cõd1ͨfluqW*Aצ q:\ q=Еi3oGf1]I뤳_4, B!puB\;ąpB\{T'S/Ju88z'dB\W&pC\'E5vc)/fE\fv2U!Su!.X5cU)ao7M4^ŬlF7gS3LEׇVH hNk6Mm >$( )N62{៯B q:\ q:v}~w0uG4!h-ͨ&\绬B25p%B\6pB\alP lG˕m1L9zdB\W!EpYC\'u{»o 'i>IGSԪWLEWv@ ᒆN +qswD7btx}beތ*JgSQ! ~.rN78l\#Q7_; rfb3r6edB\>p)C\'uFx0u|Z')-VcMdC\=EpC\'5[3WqxTh(w6#ѵ_ B?p,p C\'Ŕ5l>1q4^,W,cN\qw}Yk򅸰N=ިM'웩M;_ӨՠSN"+B\:pB\'uVyX1iGGGqlFa6#gSg8x[2.[ ᄸN}f/=@/&qQW8,i8z>T!.q ᒅNk9;<=m6i2fiGRYL!L?Y'S8v!.vBw8!އ="Ŵ庨 %N qP)5piC\*pB\g׃[M8ґŔ$ ?d?m'SB;\e q;tz{uM]f A*Eͨ֟vY 'S8Zw!.|K~g!)k q 3Q7B,nZѴ% q;\ q;84uξ^!N:6#Q?W'S8w!.AKw8!.Tpfd3.z'S8w!.Ct8!P[ξS=⢴e뾘ob{k>8xGk.Iᄸ1Ak qm:Bsh6\WWMu#/8._rN-k؛ cm?W;*{bjYK2TL {$.Eᄸơ3DHlSLIQbڌϿҤW/"WC/epB\'54>Û} QCTS]~@_N qdYC/epAC\'%58w-&6mn@8H҅.fKᄸ$_㐘Wkިft3Qft|!.g rv8!.Pa⮶mXLlF*6XA:\E qI;g@}{fbh,&7gS;t8ᲇ.`8ě S.Ӣf■ӷt!.mv8!.0Tn،B|Wf]Tt8b.\ᄸ_d:q:ונÙ}#!gu qϦzQt!OS!NJ٬͈FU=p q:c!Nf:/" qSc-d: q:ךc;ڑg1=nnu88dᄸp'iGfd3*m)Np q: qݛR1 qGi3վ~n'S@tB'uwq[!N;6#i[߅Oylhpυ8 q c q6#Z]Tivo=~+ qݝSq&Gi36~p'S@8!.FSBvd1mF6 FnZ q qB'QVChW;6#QSBqB' q,4Bb3mB a'T!8!N8LdB#XS=¯WJB' qB&a#i3BmG/88!N#%|d5qBŴٌn~A88!NHvBvd1mF6>WChP!N8!c;pMJ!&Y̯Y6#!7*T!8!N8d:_zˈBoŴٌ>U^ub>BqB'đY3JWӎ,fb3JxQ;_P!N8!nkM@,ffT+GdB' qBލ{O gՎ,fΝfHL@8!N^2ܝ" qf$8rSL@8!N&;sYC!7b~gE[_J @8!N#Lȝ+ qڑ,%zqyΦR@8!/G8+w'8,OueHs6uӅdB' qB5_wGl#i3Z_ ~L 88!NrD[3˄Bvd1SWG䅸%cAӎ,fh^TϦ:B' qxUHU0jGo3}QŻh\N"! qB'đרּy͋<YLJ}Q;s88!N#Sױ/D:8!b:&/ܚGqqB' qd{;q]ppBb& q.a|fT88!N8 `#i3%f%B' qiWVw2Uӎ,fs5;' qBv數'Ifℸ֋d[6tLEC8!N#Cz%쫨qBŴٌ^?-\q qBwH}a0!N;,(c(?'S8!tDP)8 fՎ,z_H1L ā' qqI8V/xf qU:= qqqBpf)iGf@*T?88!N`{3Ľt]򂃃°6#QOq2!!!N6~x數t KE,f/aMu2!8!N0}5 y%.nrLPQ-zUM]~&q qBG{?+GB\UZL*qQz`B' q+~ 8W@Xu^jpQE9:"@88!NE6Δ8!)Ĺ2,fԭ?'S8!dCK0=ٷQ!Li18QzN"!! qByiWFUe8$⢚1~@8!C/+iGfd\T!T88!N#qs8J\YL!zռBMLECC8<+q!AܛL!Υa1=*E5#W@8!3/g^CIziGSQ "!8!N>rGB\%fYBKbڌlF fݽLECC8R? +q[yS8!NH6;qqqBdzu蕸pׅ* qS!ʌ'S8!Tū!.r q8!fd3u{ILE!N8O<z&E;b5jGS\u.gS!8!Nny_i8!b.(\;' qdfFW8vd1mFEulꎧzN"!! qBr +qZKw8fޝ"BBB'đhz4q數XO{8b q6݋hi ā' qB qQ." qBQjWd'S8!'m>`yE6 qڑVj\'S@8!>;t8UgB\'X/W@!N8bۜ8TOYL!fT뢚"'S@8!֏c@@8!fbOWDCC8^^ X' qBv^T88!N#xkb]Bv1g3BW/:' q|E q6 }C׹!8!Nw7ˮVFasf0Hnq{t;թ{}tg˶ ">GB\ Ye qB4EF*t}d*BqBґmis}G/%vn8MF!B5k88!N8-#](_]Bυ4E0(d*BqB'đag/-U8!ԧz# BqBBϑ=־!J'[S<ɪy!8!NC4XeU!V 4|6\N"ā' qB VYܶnZ\UߕgS/!2`^&~W59B' q$xvqMo=(B'9j2Jp?@8!gG7M 'ͩ)H]B\/Cq qBG5֕N fm2w9j&BqBpu]뮝e׾Yς q&@!N^zll^;"7q qB'Ur{̺d禅3S랺MS`8!`LFwq6T88!!.ݹ5X qB 1&vWa|.T88!!{ >U׾9t/!g`LF? !8!NC뷾a|o).!2`nuLFi/8B' q_^}ӺkikG'L([Cq qBGu޵z/=t`X8!`LFGpp߈B' q|ÏGcpG'G.;Q+F!8!NC8"{>U3 L/&ߝ\'S@8!wܳβiUe+ qsυwvWu-׻qD!N8/#,^Cխ~D W^F(yT88!N#ʪ־;m%iGn3BoX~wd*BqB'A3녲[⮢+W4 }[wq qB'GosîPѵo](e/bIOּ9B' qoo,:ZVK_!N>*S ud*BqBkծ+W8QɨZp2!8!NCk8jGvk\E_W'5)3ՠyM3BqBWwWm=8!dɶ:B'!E3Mt&ZjG@d}<=Yq qBG%S^J\'ę 8QW.ͻLE!NZ-$.Fw1:'u̩p/hv\"ā' qBQq!UsKƵ+ q3ey2LF-׶G88!N#zbi 8!`LFi8,'S@8!P7 GUeb+ qdd2Jsd*BqBk)D;wGϫ'`e2j-˸#ā' qq5T\Q?ԫW!N3fب%n|ϭ:B' qD^Ku_uЫw8!`^K(Ux,{N"ā' qBU_]oބ+vd0<ʼn[d5;="ā' qB?x[9 zYjGca25s͎q qBGēv{}#9ɨ֬FT88!!ώ6ޓodigN4LFɨGlQG!Nڬf|:/zYjGߓQK4X4/"ā' qBuNUjaڑg0[NF&T:B' qTYE=}:ݖ?e̞rLF=/x_LE!N8> ^CtE\ZZ̊p2&״B' qmt;_{KsMuZiGQKT888!a~[Xz=$ q\2MF/gS@"ā' qBasN׹ySg5).RL dTY!8!NȽ!zܒ* q3].ٚLF%oQLE!N8mdz^/k_`%ϸ4U~d*BqB'Qh]꫾< qB 0g2jxMq qBN~UͫO8!`LFeLE!N8bjmv8uU~ko0kbQ^,'S@8!te*Ғs[: Zo~rEE;2UGdPO9!8!NHb=%.w޴-?rMFNFeC\ԳN"ā' qBANGWu}Y5/xk_`>dd2*xLE!N8,5c՚rS>ۋrys#i22]q qBB\eSMW(X9w|8Ni22Wq2!8!NCj޴%nk+cι qڑ7]&bMuB' q[hS=ݥ_X3 qڑ4=?B\Ļ1BqB'QuoM^|,; MzCr2!8!Nw ʫݧV_[ qڑls)b9OFIm88!N#O_8`v &!O4ڣ:B' q\/EQ-{ ֶ_eBu4Hcqq qBǻ} /~؅ivd0;<1F}F"ā' qB/_u q֥*q qBG=aW_aywA4&#QF3 BqB')DEO{ 7 qdd2xLE!Nj#P](_IBE,4|LE!N8!Σ,ϋM/ȟHŠI4 D'%s2!8!Nd9ڿ*.}8|i2q>R}}$BqB'rzfLQXt2!8!Nℸ6ms8OKOaOhVjLE!N8 'd=;wV׾ڑld'AJ@8!O <؟m\#i22$]q qBB\{w77 vd4JܛB\o8'S@8!p)Oڝ'Qӎ (iF!.su'B' qܖaOL%dt"ā' qqnd,qowEX4dL͠, ā' qBoRs:ݸ3q*³];2J\h\HD!N8fZ < ˜Z.}˯}]sQϕq qqBQ3q qVS q*7q qBG\דY΃(Y}LF'K"ā' qBVE1yxd ,YL%.QF؏̄q qBG[à<=2~AX4J(nN"ā' qBDӨM.]88!!~OwU|QBTbLFG(q qBG,ch}X[jGSK2 ԉB' q$[}R\&mՎ g2Yr"ā' qB-9.kK_! `'%"ā' qqnPk+q-ę5 \T88!N#(`_:3K8!r`|(pήE!N8!OerkF `dMFhlG!N8![{7 qS;\8"ā' qBesCfQ|f29B' q|͞^l=d{<8ڑg0|%ړQɏq qBhjҷEn0%,Ѩ=xld*BqB'/?O}#}ɐm!8!NℸC{/~X qS '+q qBGEeX?W;2*J yq:B' qdX+]W5k}#a-)zkf>Y,9B' qyn\t 6uLS#F@8!NܱαZx<:" '!dtB'!ĕXzJ1U=1jNogo0C>% {F/888!N(cdž֦kGOi22{d*BqB'tS~S_iSZLFB\fN"ā' qBRU>%Ss~>LF3f@8*M0m}}(GW qd"&59@yyAC8!Nk .캈rhG3[O1hh.V88!Ne1n)ov]_jG3_}*dK%B' qDy\\qg7]}k4 Qi3 ̜LE!N8"2(yr[{j7R\ *E!N8$ 3)_g 2zȸb@8!N+0xG0Yov$N)nd4LE!N8X>z1V׾ڑLF7U88!Nޠz7ZC'fN"ā' qB!Mߘ?Wovd08{B' qD5ܸ%.X }"Ĺ f 'C5 7\@8!oܚna CR95-ogjq qBlܟhl>LFMό 4BqB'[ߋr(bu2!8!N8LFld*BqB' qPr{i`2q qB'ā{[O=LF|d*BqB' qPzѳpl q qB'As~ךt X6]&'S@8!Nf/-F 0d*BqB' q`5d*Bq; q:y9B\'-pBG'S@ᄸeNȝ'SËBN[88B\'-pB'?ՀB\'-pB'.'S@+ᄸeNq UpBܲ'p|`:B\'-pB.@ᄸeN;2q ąpBܲ'p|q pBܲ'p|j@!~u8!Nۦq pBܲ'p|۝?p^8YC[6B7N"āנ Nxj@!C:𴗓q 5pC`O5 āע Nxd*Bq:8g8zt q:ϺLE!E:,?ՀB\7t8!NQT8zt q:z8zt q:O!!.npB^~!&npB?ՀB\7t8!NAq upCt8s8t!B\7t*!N {!npEB@wB\7t!N y8wÕq:CSq 5pCt8R8R@kW pdq2BqM:8!NC!M:\7pq 5pCt8҆8B\7t!N id88uq:9Ct&!vnpC@g3Bq-;28|!NC!k:\-ĩpq 5pCt8r8!npiC@7mC!}:\$]v!āE q:\1u>8@ q;p9@B\7t!NNǜ"wM!8.@+v8mη2Rq pqB\1pB q:8@b!E:\qP,pCt8!&npB Aץ .W8(tå q:B\7tL!NXk(pB q:8@b!U:\qP,pCt8!fnpIB A׭ .I7B\7t!NV"pB q ;28@Z!GYCKt8!jnpC A׳ .|8vEq:B\7t!NVk;pB q};B8@Z!q:\qP+upCq pB?q pB\-sp(4 q:܇teNn!N̩,p9u8Vz8s:2ЊBC--sp(4t q: :2ЊBC--sp(4 q?\Kp˜:@+ mB̩%pp˜:@+ MB̩#pp˜:@+k}n O{p z*(6OIENDB`python-xarray-2026.01.0/doc/_static/logos/Xarray_Logo_FullColor_InverseRGB_Final.svg0000664000175000017500000000652215136607163030455 0ustar alastairalastair python-xarray-2026.01.0/doc/_static/logos/Xarray_Icon_Final.svg0000664000175000017500000000234015136607163024430 0ustar alastairalastair python-xarray-2026.01.0/doc/_static/logos/Xarray_Icon_Final.png0000664000175000017500000004060615136607163024424 0ustar alastairalastairPNG  IHDR  pHYs.#.#x?vPLTE!lIk!lIk!lIk!lIIk!lIkI!lIkI!lIk!lIEe!l?IJk!lIk!lk!lIkEe!l$n&q)s+v.x0{3}58:=?BDGIJJKMOTVZ^cegikntx{~{WtRNS 000000@@@@@PPPPP``````ppppz`>n?1IDATxMo\yam!PEy\%4dbatzÕ^yuPċlEr h>9M}A !纾$s [&hљ gmh [t6njqh8@" h8@ġ 8 h8@ġ 84 8z 8|h(b@PÝ4 p@^ya*844 pD4 pDppDpc5Tpk6": PFk#"Nqh8@" h8@i8@" h8@ġ 8 h8@ġ 84 ∳w᠊D\; 7<11Yqk  844 44 pD4 pD[u@{PB";Ti  844 44 pD4 pD[  |`(B";qkqg@i8@" h8@ġ 8 h8@ġ 84  46pD\S@D"4hm@i8@" h8@ġ 8 h8@ġ 84  44 j{ nxb(bြ[qpCqh8@"N@i8@" h8@i8@8 h8@ıU4s D\C wf(b1 4pS"Nqh8@" h8@i8@8 h8@ġ 8 h8 8Xj8@ĵ#@+ PƝ  44 4D4 pD4pD  c(pkF" 7pp@^í8 h8 84  44 4D4 ئ4kF"q 5܉pqpCqp"Nqh8@"N@i8@" h8@@A3 vXm8 5pqpCqp"Nqh8@"N@i8@@8Xi8@5p6* 7 4p  44 4D4 pD4pq۴PZ"j4 4p  44 4D4pD4pq @;pPD"4܅ း[qp"Nqh8@"N@i8@8 h8@i8@8 h8UG4kmE,4 j3@qp"N@i8@"N@i8@8 h8@i8@8 h8Uoi8p".6".4 js@ းwFD4pqppqp"Nqp"N@m] @?M4 iHÍFD4pqppqp"Nqp"N@i8@"pk'F"&qk   4D4 4D4pD4pq[u@8G:?1pk\Xl8 5pqp"Nqp"N@i8@"N@i8@*q8ɡD\C wPƝ  4D4 4D4pq4pqWU @ǝN"3iDnm@i8@8 h8 8 h8  4*E  4D{@PT";q[(b 4D4ph74h8  4D  4DNh8p".ϑ*fq 5ܙB းqp"N@i8@4q4pqp"NÁpD\4s+B sG* 7Ji8p"N@i8@8 "NÁqp"N@i8pq4˳須h'N5tpPFk#q4pqp"NÁpȋ8 pȋ8 p*&8Ti q 5D @^i8pE @^i8pE @^i8pE @^i8pFܑ*;zߍDљApȋ8 pȋ8 pȋ8 pȋ8 {KÁ .>p(BqE&6h&4iqgV"NÁ /4h8"NÁ /4h8"NÁ /4h8"NÁ /4h8"n_Á .Et L4hmV"NÁ /4h8"NÁ /4h8"NÁ /4h8"NÁ /4h8"NÁ /k8b @ Z8 niV"NÁ /4h8"NÁ /4h8"NÁ /4h8"NÁ /4h8#TÁ .5;1pDљաD: 7@3@q4y@q4y@q4y@q4s>ߍ E ދ&q8D@ D" q8D@ D" q8@ D" q8@8D" q8@8D"@ q8@8D"@ q8@8D"@ q"@8D"@ q"q8D"@ q"q8D@ q"q8D@ q"q8D@ D"q8D@ D" q8D@ D" q8@ D" q8@ D" q8@8D" q8@8D"@ q8@8D"@ q"@8D"@ q"@8D"@ q"q8D"@ q"q8D@ q"q8D@ D"q8D@ D" L q8@8D"@ q8@8D"@ q"@8D"@ q"q8D"@ q"q8D@ q"q8D@ q"q8D@ D"q8D@ D" q8D@ D" q8@ D" q8@ D" q8@8D"L"8?84 p@g_Dq @Dqqh8@Dl" p@" 8 84 pp@"KD@>pkO6 4 p@"@" 84 4 p2~?qq p@" 8 84 pp@"xpqqh8@Dqqh8@ppg6D@\}ra@h8@DDqh8@h8%|k 8tb@5FDDqh8@h8@Dq}c 8{n@h8@Dqqh8@h8eJ" γ/m8p@" 8  4 pp"@"w6D@\}ra@h8Dqqh8@h8%^" Wi8@5ܧ_q@h8@DDq@h8@ppl8{n@h8Dqqh8@h8e|74 '6o;g8 C]zN}xPph8אh8ph84͡wmi84ksrp#p"N qph8 pph8 {qp8@i8@i8 D4CQ;4h84N; ?p84ito|h w4 wn pPVFph8 CÉ8 h84Ci8@i84pCáD4N!44J@áp"N qW閭 CÉj8b4N5p EL6ph8 CÉ8 h84Ci8@i84F CÉ8 h84Ni8@á4"NÁCáD4ͼD4#6q[@AS iýlM]Xmp"Nq 7CÉ8 h8k8 psh8D G\ q4 '4ph8 p8 h8 pph8M  qq^gg"VNáh'^JÍlp4q 5\g pph8 CÉ8 h84Ci8ph84-D CÉ8 h84Ni8@á4"NÁCáh!n?r;(b4v"A Xi84D: 7ZAñ3/h8 kqpc pԎ8  #/4h84pc q4q 7<1Ci8ph84W4 G\pPp@mNEL4CLi8(p h84D CÑq4!~4"NÁ#@!4h8@ápG CÑq4 G^i8ph84O4 G\=|P\i84DcC 4f"NA@áh&4h84pph8"NÁCáȋ8  #/4h84pph8"NÁCáȋ8  #/~᠊ph8ޭal( ~d2^4e D"@ q"q8D"@ q"q8D@ q"q8D@ D"q8D@ D" q8D@ D" q8D@ D" q8@ D" q4rQnr7@E*fX8؂Y)]%Y) La&3~}/ D" q8D@ D" q8@ D" q8@ D" q8@8D" q8@8D"@ q8@8D"@ q"@8D"@ q"@8D"@ q"q8D"@ q"q8D@ q"q8D@ D"q8D@ D" L q8@8D"@ q8@8D"@ q"@8D"@ q"q8D"@ q"q8D@ q"q8D@ q"q8D@ D"q8D@ D" q8D@ D" q8@ D" q8@ D" q8@8D" q8@8D"@ q8@8D"@ q"@8D"@ q"@8D"&@@^6qPfFn[%Ii8qN@É8kkh86 G`É8p6 G`É8Nxll(DCp" ]4%6]@QbÉ8[@QdÉ8 #DCp"@ál8p6ph8Nh84 '4q G`É8Hpp" *h8F֍p" Fpp؟@h84y '4q G`É8 pNh8~q;q G '4CÉ88Fi84kzk q GnÉ8 #DCp"@ál8p6ph8Nh84 '4qp ',lp" 66ph8p̢DCp"@ál8p6ph8ODC\Up"@áxDNáDuNáD@^ímfp"@Nh8z4q q GxÉ8 #DCp"@ál8p6 p" z  ' '46ph8Nh84 '4q G`É8 #DCp"@ál80}4N5\` qq 5cWp"@áJ '4WxECi8ph8ᘸ5q8Bi8k7q qӶp ',76ph8pp"@á4@i84ph84ph8p"@á4@i84ph84p 'tW6ph8pkh84ph8m8pN4Nh84Nh84qh8 GNh84\ 8 +ngD5CÉ8lp" Fph88p"@Nh84Nh84E6ph8 'p '4 'j4N7NáD@\[#h84ph84ph8 'p '4 '4NÉ84CÉ8 CÉ8 qk4N䝏;h84;h84ph8^䢜q 7/}Ypsio+4NÉ8 CÉ8 CÉ8 ~pNh8^@É8·AÝ@É8 '4 '4NÉ84CÉ8 CÉ8 p" p"@áp"@á(D`7NáD@ph8p '4 '4NÉ84CÉ8 CÉ8 p" p"@áp"@á/m ^lh8 ȿH=A#h84k4Nh84Nh8 D#Yc '4q W=AÉ8 CÉ8 CÉ8 p"@i8 qqVNáD@ '4 '4NÉ84CÉ8 CÉ8 p8 p"@áp"@áp"`Vi84]@áD@\ímp"@áp"@á4@i8eQÉ8 k"@i84ph84ph84p|j8 'k5q5\e덠D@q w0CÉ8 CÉ8 p8 p"@áp"@á4"@i84ph84ph8w? p" ~lp"@áp"@á4"@i84ph84ph8 p '4 '4p"`p꣆q,l n0~ll @h8 q q4p9pNh8Nh84Nh84Nh84CtNáD@^ímp"@áp"@á4"@i84ph84ph8 p '4 '4 'p '6ph8p '4 '4N!4CÉ8 i5p8]g4q @h8 C qSo4CÉ8Ti8DP`mp筆C q q qNáDCáDCi8Dph8ph80ph8gCÉ8 CÉ8 p8 p"@áp"@á4"@i84"ph84ph8 Dph8w@lW '5g6qNpՓDph8Dph8ph8h8z4C q q~@4CÉ8b qy` q q qNáqCáDCi8Dph8Dph80; p"@}4Nękzk q&4 '4N!4C! CÉ8 +NÉ8 {qNÑ 4C!",4C!8;h84"k p8@ph87C q q qNáqCáDCi8D?4C!‼[@áqCáDCi8Dph8Dph8p4h84Nh84p"` p8 Nn Cq Wp8@áVNh8 G Dph8Dph8Dph8[4"p9h8gh8 Cq W?Aáq@\mp8@áp"@á4"@i84"ph84ph8 p 4 4N@ p . 4 '4N!4C! CÉ8 p8 p8@áx]qNÑp Dph8DwAlp8 mp8@q\4C8lq#mqCáqCáDCi8Dph8Dph8p ph8Dp 45 4 '4N!4C! C! p8 p8@áp8`vNáq@}45\5C! C! ^É8 k"@i8"@i84"ph84"pN! G h8DP ogyg Cq C! C! p8 pA_m hl}pɾ}p/R#!IENDB`python-xarray-2026.01.0/doc/_static/logos/Xarray_Logo_RGB_Final.svg0000664000175000017500000000652215136607163025140 0ustar alastairalastair python-xarray-2026.01.0/doc/_static/numfocus_logo.png0000664000175000017500000006064015136607163022631 0ustar alastairalastairPNG  IHDR)fHagIDATxUorQQl |{N-v'  P}9gff|~ܳcfw֬YSS`0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 ^~sMG0 `0Xz5M_s j*WhBj`E k^O\`0 E V^x+zE#BSi4U;^zDjQB.P`0 fZ/ !/BZ"\]#4Rh1r`0 U9u]BhO>oτ (|Z- KZY3w5s4kgy{UkB*noQ%;xB}іi%-P?'^Z;X m7V^_'70SCݫP:ՄW W;mPKߙO{-Q&yTj.Qz'pDBkyi;2<7sml =Ma;>= mP:Im27viUnB &PgVWЁB ]eB w)t&?F u]Bm4 j}q]FESycN| YB =]zPh$@V%[OZr:MЃMWVa'p:\,+nz@jO ˄N_h8Dm}BoܐcDM$ZSa\R*Bc6D>PE'׾ܾ(T^+O/B-#p^`BK<މtrD N!3 4~Qm/ԬBɣKc=e.1,?2t!qNX~2SڶGWuyc_|?3`y<9ڬ3YH` ?<2aȢ{^{AvIϹ0y'!Qus3^$_AQ3Bo1 ̛;D,?;&d]3$ɟĝUQ}NeaøO,*e[ +] ʒ76 f: _ёyJ- g d]vD[UN\/-A@ߗ,5,VhAh,ye%ot>Eh-u} w,{XjsE} !}m:gw /clŰ˯|JKqr0@6r]+Q,@U,@@Xo 7veHA/ⶽo 6 ,mԺ%gC>6ׂliB`b@Gr͵lGdq XڼijZndSYG6!-xDd+dMu4m\{c;i\K~T4; G)`'m̪TcKB[ ,Wh @6f>Y@l2 [舤EvO3Luy.Mɪ]mO.lRMqYdHZ|Iye86,WVdZ~ bwdpr \d ƙ L}L d[50Ⱥ:VJ-xeЁqvEzOK @Vc'mKF:'aڤ٘ASIel@r?l"Yݙl.IKK)MdS,ޑ7 . XZzdcZlAW [ jw~^YJz(öOzP d"<}(Ȇl7úzOW d fYadAV-㕊+ ^SZF7v+ҒI6qkِ [+e~ưq^`I@ ȖB_R;>Vw1 3uV^YlݭY yH ؎^m VHl~YȆY{n{pٴ@VjtlQz^ `^NVJ >u~!ȊI }lMl寶+&Hrl [ε- p&,@65-ye7ejPBO$,) m< PW [] jw}qP \9876 ]5w9 @6-W s3,@6-ybR.!1tbe^ٳN\ڽS[ :,o{P pAV3%uF3,@6v-ͅf[8v4HX d]LJ^W [} 봽RW^ ++yn&xL1`%޶Oz9@6"-۞~r^^t,@6v-&P&S}˩͐5Uv%ȺbW6Ӿ `ӼmO- =f l:Y&2pjSh8);h@Vʮxe^,@6nu& p- ĭ4F PWYiSfYJ؃xc2@6b'1y=z@6BmB#BTA%me1 )N^hGCKJ@']IӥW,@[JurOd;KU{gdhSRFBYȦ .tf }1囖dd- W [ n,u)eVIt=fG[1 ;&gK^=U h7@ 6Ⱥ`&cvԒ> )O+3 u`+}TST^YluWSgMz>JolAygq Ը¼ő}o^g d3߅B$Amȋ~z\`>uBYɃE^٫dd](?,k2q|OVȾ"_gCw&u(@6# [5ܞNMΫJQ Md]0K86l<-ZPhMΪY)VPv+ Ji=o%yc=q= 7+Ȥ, ;]]bmy61# v -J.6iMD4@U6L+h-B\ D7%d]B6,@ ŧ),mA#~kB#ڤdHK.fqh,@6 ;Ԗ-yir_vO t2 I-| YITg>>kBx\C#LdKv1*d>^ڶB+IaO'pZ}{a6KY;F+ xewqxo -A@yք[a&9fd[oUNd^d+d 3,4S?5ߧ&L{be1vx'@ vcd&Q6넮K 5җ@(4{ѡ {dlQߘ09sg,@Av>-BvjwsT}αY 1Hd] #dxef76 ;5 ٕ&]& %dj&"YlB$4^4aY 1Hd]@PX{e76 .=:6$&u)@6s {E։UuKYlNh9 N 2bJ@эwGv@ W%)olA 0ch ĩqyt@63 [ M dЮ'̾v΂W6 uŮ3\ [ D=bs}6`P,< d3ygIQYl5l{@^BY +;<^YC0+k++]w?,@gh` -7"@6㝼/[]1h d[38 v/o:@ [m [Ж(4!$̾ !ȎÕb(|YN/CoqQ{ct܁»kw6` dz[0c^Yl;K7UtO$L鈢p]@RT^Y~',@dW6o,@6ճj7y.d#tcܹ fUxg{ +{7C%u~>@P{d-'Q7 ֟sҚ۳N^YlD :hpӐUU,@A}2dZ&AV>^?6Nssd1xecd =nn8z dE>5zz@ +d),T YP5W#ʾ7 V1w@6%--<Ӱ4CY,@ְLJ1^ٛd%X'4:^7dD#z߸AvK-⿚ll' Edo4P *]aE (y{beو'Q&Y(sl@V+Ua]+@ ~o~UP,$I,ـBKfdsyhmLXِ []9ŏI==EWOY,@VQ6UxRhS>p6|edd!&QoqdS<#4tِ :᠓h6gd]%چ,.qXYW +ycbY$liY32]\dd}38izuʼn[+ ȆDɱdS)&oL6 DJhd%y%C& ,@ m'4˲u6dˀЇAYlAB1dS:`։<~ze Avl!lF%N ⹚g"3s_]dKhX7"r&@ M,g$@6,@ r_h'jIyxzkze AVK1F;oAKM螓 + cQ/_:i Y,@ַ̲ e[ƭNd#DƝ ס7(oBxe A/5$B#pБzX;~&dKursa2@ 5 ^,d>@:i_Ylභr+7@A3K }m:;y.1,ȚՓXI!1m#tE}PA;@6@6,P }as{@6κ|BڽXʎdI*A߁`LD%ofBXm6\ {>@ַ =`w*k% Y,@6cvy,;^!\k  >w(to +Co0bZ {쩖m&1# h =nYN -Im0j֭+@&-q-)Il8d3ilBZN / Dq}(`.A^ƈwag 4gXLe cۦv ;K^'prVh RS?-* -x I\lFAvwZ7v[E܃l; i||2@n/ٝ؛'us[ZI$-I_jpMȺwlv2S̷ry[p%dK_dͽ `CY x0ݽ3(ٌzeiY rnPhSƓxyhOF y8YnŴzm-AgRNh@z+<ԖhCRm%/ +y? M_;r!b mxzBozK3yx,@6^YҾ!-d}+/ĮwZUz,%9+Q\imͥ6V6 5f<Y /:\=nI-3\#(Dg:/ li*>^I9!+Da <<²d\?wYO]T52um\@> *BXJA0^B 縵}"eD.Q@V6]6 =s?J.^Ut?}(rd9)-Cq=UXJOeY!#]O&Z>H>\2smn!3fUGma&!y5?<,rʨV i itҪ1-Ϭ "]OTyYɋ؆-5uy4/68qI7=}:*3*2@ y0+ ؋2/`gOoT7[AYoi  cGaR悏.6t7)[ O\B&QN.+`67+1z7&\O聀eD@u Ғ>=Mr[v`B $v|2,@6Va D~OǺ6)#7 Bc ` nԫ`2Y;Ȗ@eF)_p9 .v&HPYDg*q$(-||q <F x7>XO0IѲ4 k Ao!Yl<@-g2tsU>XA9kv j 6Yw/=Syw眷O()|eXRo1-!h+_(#!oױ4̓l:'ecچ˩kϡ%sBsNe<+{g +=KWC|X}a.zY)uRQ{ټ4(tcr0S `d-e iaƐRxʗ{5o[= yPF&u9ŋ7͹PЃ%$H[%WNsrzh~k=R\ ]S&z͝r p٢ R{8ngy}nr&YVJB >l]̀BZqvih1eXĐ d7юC,ʢM!JKGd9Ry1²~0ۧO-o\! =4㢓هO`7!MUB@&z}υ&17 gSd+fwӮx2i1EKN:"/>C| ۜεٜP\VNꌧsyf{;vo9Q&Ш'VdWc9 C]fs^,acaB库WvT yy%$ plo]˙hg.\؃wȑq7g#brp rRF<[6v `_}Wmj3yY)>+;*qBwVM1,@ `wI6t3W=RXޜ!eN)/DNG+/%p9MRÿ9ζ&csي&e}$ˌ~voCZN-3G]YJa/Mt.mϛXH,ߐ-H8gZyc>&WYs2 :M賐1AE.Z1L/,@ kBFkNg5v&?`}`dc8rěp]抶'?xgz4IQ6hoh6ɛG[i 屌?*qyg @{3e#x=SVY,:q*{v*UI]kGȎ吃9 y&r9KEM xzOs?>MۦNOMLxfϥ>>y%tuzXtɞ'! JD},Oj]4 -`)BW +MNEa ]#t 9l&UzPN$*dYP;=A'OHIyGz{l$-o#tЇYfE;3 Ray=rU9Nt4{_p k*wory\k.@1QBrUW=OR#+ASN_pY+g1^ņ<s;ZW;DPFSr"}m}Nwo>.Rh5NYEKm4"j$#whrsP8{[k5{ϥ7ʍb jBqѧH|Hgs 4KhGB ~RC|>77yt͓M2-26no\w/*.9qx^[x1敌8` c86;w~t/*}mmA?׉} C̀(Uw)b>A~&5蔼uO3X{ @v٠D@V;r_z3~g~7.02N1"-7C y /m5ҏ<1YZ-XZ٥ VlExw0hA ,~'t'@aG>gݹz/0miϥRٟCATB,g+ {6@6Yo,-=Oɫ|d?ػIqF2g9߁M|@v2iR Ӆ]̝T%N$-uhKh<e򎦬O|֎4md:>ui9FES?-u<磥 &b:}Bѿ/] ;MM,셟6U ;۷W\ H )MeK5]t:QsK}]cdAh2Wo s?ǜiyDM߁{yo~:9Ws,q`gE^>1Q E )EշAcpg mV +9\?=bsw^ 2ׁG>V(M 8ЙE//4Z03An :}8.ݫ9xS\}?}ˉעߌ~Ǧd[L2 4ޮʵ(n:EfUBo_8:@>O1:\;xD3<[j' يu {@6U% 7~ $j0Pb_dR'^S\Ҡ4Y߫t&}_UZp'Ⱦ<鳩N|+7<u\ ;Sõ~cv [*4a]5eq)~sO@pId}Y ^ݿ@g7"v'龟,'`Ij;2)rS@3Nz]Wc;PT>[Prv Kx]{w:=(h"TV'y :[SwQ?>Ƚ*ӁQi9ߤi K'K5ESPcSI~EȦ1.0&~Ө.a=@1066JR>h͊_ {# 8K(T /(wJ0K彊߾x@ IXnvF Y}@,*zuA8d;2x`:!AVߖ6m3Ӄ,AVUzYkA6:xl{EWl>O:-4T|/6>O"Xʫv6c{G_&6&҇+~cp d?wB?پj5Ž ej_E?g#E'3ωع)uRh d];N3!Ɲw&v7)mxCZ;Pݿc\?͇glmBBn;E H'K3@2c4;<Rbg* ({)G}e dC' =5K1zu~tt f;0(2AD dK;x;dS:FCl^Aw4q>f[v)gYuK4![K0\eeXf] 4C쾊oVe>@~+@2ӂIcL {&]l ӨY5e շ'<@v3GR S=Cqdڒd[ [&!iFCt$ Rܲj)mY /tEM.Cchw+kN3Y_X2-_=*Ȯ?\שÕp6{Rk-\dO ggdݞBƔo5s\ys¢N_ qӲP&s8e.QNu?+hb*dcN fEbhwmKj]_w[O)gKc` ۍco&JodÂl'MBG:U#XZp"[>+w v=D:H(A {m K˻{sN ]pufq6w9Ⱦ't {'ozq8rX!049c͚ev` kٯT {7:>:\7q);XϜ:mk39.Wnp9)tM@xn3܀lU;Ⱦ`jWsV8@vu&͜hkbɹMN9VU'gR% +vpi:w?̀&>LOFzxf nv2ېȆٿ:ueX$\\04Ȇ(s~ѧk 61/cم8fG;?V R:lTn@IbPDrp|Yܱ~Ke!@p tUrhlł/6wU9N\7B,rGA'6r~}ޓ\ #搑A7XEx"9@`2[syu;^;l[.9|Tk]< ;ͣ&rbN{m)XetÇ)k1'V\y_dyTﶇ6يٓ8/k?M.FZjS]`̵A-lg*~hx Ku4j_% ZUjVxs# )P>wy*Zv"|xsqO{;Я6s;idIg*%+џ#٫AytKRCot5@"Ao]/KfAyd+Rʦv>W=z}[[=cJu+Z/ t?w2+B@v3>D~ +YQ^ǭߜܭ1o*(V\]Ut9aILd菨ul u} Om'{(ܚ8 ulC//.0sȎVioc+ KO:RԦ ߍK1d3 򩊭E4'z+8Y͑͌ @'a7z<DŽu*dW&08g% kbwVȞ dϻJa:3ȪR؀i 3p8@\ߒ" 5зJ9sܠ?:Fu@Ƈj,9_AN xթybNw5R@V29oy~ {BH@]A Yi;7 ZivZg9fYGsA6J~gZ -ԉ{~RGʷR13^R<6 j+uY9d5id 9e@BDr>`/@QMd]@vt* ;\3>orZ]OdMa@ Uv<+2#G-5+BPSzցOtBm.ylkQ-?_ya[DzR5ﶹFiҽ>~goMDu}P8LAvi~]k+S| 8=3Q҃NW>[q<ƐG'j%8Atbɩ;Q83xl=sR>@u"tF>|:xouձ+1h>d"lnA]'}J;ة6 u@ِ WYJJ>o~V壢(n9$ІnqENTq޼ڷ&q)G{mtjܳ|@E@aw YpHgi޽)cҚr:|r_/ tx&~L8&v(w_jqkx^o%Ӧz:4?n\=- 5i2kn_r;MÍd(EշE.(+-|&ζoVyc"S:(lRN) >u97@ k*ExKŀ=ȖkbL4v Sձwrz5nC/:ڧJAVx*dmu*dMEŢY#+=b0 Zz:dJYH3=dMEb"1d]A"¦2eYS]Mr)}4&JRJT^d T/zl#̛WߞGw[Geu=qmڍbOnu f&ĞÃyk}d Q>rEOgȉ)ȶ]Ei zY !KXwTAހB=6ض(6>RgtȔߧٲ_ \c^bu SC>  d]1Fݦ9KK̝5(KS] Mr)?wp)KN4J嘣~ wlO1=d>zwv|1" 0Tݓ|1lWAGWN:p֭~}y<ᙤ\'su]1ÁGöN mb9Cm]>{UOqz. [X y:@|éy+s0pO&HNc%JLP~ᰫ[9,q>p8Գt?/rYwXE8kvw aGI_ ֮64{'oOmҰJ }ǧ ӥrǹ+8&IJ'c; nAA80{,h9N\Y QdS|Fu2LpFع'w&Hu;뼩, ~ }XVq~Q[/`70-.5ӓ1d|kuA{Q!}O;6#tjm۳kFC߂b𽻄]߆_{h\c`CoHQm s>akȦ5p棧U)+c*wk<Hw&yu"kW~+zIN껢nj}1~ O.~NSq}aڃoyL-H`0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 `0 ` ?vMוIENDB`python-xarray-2026.01.0/doc/gallery/0000775000175000017500000000000015136607163017247 5ustar alastairalastairpython-xarray-2026.01.0/doc/gallery/plot_cartopy_facetgrid.py0000664000175000017500000000240615136607163024352 0ustar alastairalastair""" ================================== Multiple plots and map projections ================================== Control the map projection parameters on multiple axes This example illustrates how to plot multiple maps and control their extent and aspect ratio. For more details see `this discussion`_ on github. .. _this discussion: https://github.com/pydata/xarray/issues/1397#issuecomment-299190567 """ import cartopy.crs as ccrs import matplotlib.pyplot as plt import xarray as xr # Load the data ds = xr.tutorial.load_dataset("air_temperature") air = ds.air.isel(time=[0, 724]) - 273.15 # This is the map projection we want to plot *onto* map_proj = ccrs.LambertConformal(central_longitude=-95, central_latitude=45) p = air.plot( transform=ccrs.PlateCarree(), # the data's projection col="time", col_wrap=1, # multiplot settings aspect=ds.sizes["lon"] / ds.sizes["lat"], # for a sensible figsize subplot_kws={"projection": map_proj}, # the plot's projection ) # We have to set the map's options on all four axes for ax in p.axes.flat: ax.coastlines() ax.set_extent([-160, -30, 5, 75]) # Without this aspect attributes the maps will look chaotic and the # "extent" attribute above will be ignored ax.set_aspect("equal") plt.show() python-xarray-2026.01.0/doc/gallery/plot_control_colorbar.py0000664000175000017500000000152115136607163024221 0ustar alastairalastair""" =========================== Control the plot's colorbar =========================== Use ``cbar_kwargs`` keyword to specify the number of ticks. The ``spacing`` kwarg can be used to draw proportional ticks. """ import matplotlib.pyplot as plt import xarray as xr # Load the data air_temp = xr.tutorial.load_dataset("air_temperature") air2d = air_temp.air.isel(time=500) # Prepare the figure f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(14, 4)) # Irregular levels to illustrate the use of a proportional colorbar levels = [245, 250, 255, 260, 265, 270, 275, 280, 285, 290, 310, 340] # Plot data air2d.plot(ax=ax1, levels=levels) air2d.plot(ax=ax2, levels=levels, cbar_kwargs={"ticks": levels}) air2d.plot( ax=ax3, levels=levels, cbar_kwargs={"ticks": levels, "spacing": "proportional"} ) # Show plots plt.tight_layout() plt.show() python-xarray-2026.01.0/doc/gallery/README.txt0000664000175000017500000000003615136607163020744 0ustar alastairalastair.. _recipes: Gallery ======= python-xarray-2026.01.0/doc/gallery/plot_colorbar_center.py0000664000175000017500000000201315136607163024016 0ustar alastairalastair""" ================== Centered colormaps ================== xarray's automatic colormaps choice """ import matplotlib.pyplot as plt import xarray as xr # Load the data ds = xr.tutorial.load_dataset("air_temperature") air = ds.air.isel(time=0) f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(8, 6)) # The first plot (in kelvins) chooses "viridis" and uses the data's min/max air.plot(ax=ax1, cbar_kwargs={"label": "K"}) ax1.set_title("Kelvins: default") ax2.set_xlabel("") # The second plot (in celsius) now chooses "BuRd" and centers min/max around 0 airc = air - 273.15 airc.plot(ax=ax2, cbar_kwargs={"label": "°C"}) ax2.set_title("Celsius: default") ax2.set_xlabel("") ax2.set_ylabel("") # The center doesn't have to be 0 air.plot(ax=ax3, center=273.15, cbar_kwargs={"label": "K"}) ax3.set_title("Kelvins: center=273.15") # Or it can be ignored airc.plot(ax=ax4, center=False, cbar_kwargs={"label": "°C"}) ax4.set_title("Celsius: center=False") ax4.set_ylabel("") # Make it nice plt.tight_layout() plt.show() python-xarray-2026.01.0/doc/gallery/plot_lines_from_2d.py0000664000175000017500000000161415136607163023403 0ustar alastairalastair""" ================================== Multiple lines from a 2d DataArray ================================== Use :py:func:`xarray.plot.line` on a 2d DataArray to plot selections as multiple lines. See :ref:`plotting.multiplelines` for more details. """ import matplotlib.pyplot as plt import xarray as xr # Load the data ds = xr.tutorial.load_dataset("air_temperature") air = ds.air - 273.15 # to celsius # Prepare the figure f, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4), sharey=True) # Selected latitude indices isel_lats = [10, 15, 20] # Temperature vs longitude plot - illustrates the "hue" kwarg air.isel(time=0, lat=isel_lats).plot.line(ax=ax1, hue="lat") ax1.set_ylabel("°C") # Temperature vs time plot - illustrates the "x" and "add_legend" kwargs air.isel(lon=30, lat=isel_lats).plot.line(ax=ax2, x="time", add_legend=False) ax2.set_ylabel("") # Show plt.tight_layout() plt.show() python-xarray-2026.01.0/doc/get-help/0000775000175000017500000000000015136607163017315 5ustar alastairalastairpython-xarray-2026.01.0/doc/get-help/socials.rst0000664000175000017500000000051115136607163021501 0ustar alastairalastair.. _socials: Social Media ============ Xarray is active on several social media platforms. We use these platforms to share updates and connect with the user community. - `Discord `__ - `Bluesky `__ - `Twitter(X) `__ python-xarray-2026.01.0/doc/get-help/faq.rst0000664000175000017500000005177615136607163020636 0ustar alastairalastair.. _faq: Frequently Asked Questions ========================== .. jupyter-execute:: :hide-code: import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) Your documentation keeps mentioning pandas. What is pandas? ----------------------------------------------------------- pandas_ is a very popular data analysis package in Python with wide usage in many fields. Our API is heavily inspired by pandas — this is why there are so many references to pandas. .. _pandas: https://pandas.pydata.org Do I need to know pandas to use xarray? --------------------------------------- No! Our API is heavily inspired by pandas so while knowing pandas will let you become productive more quickly, knowledge of pandas is not necessary to use xarray. Should I use xarray instead of pandas? -------------------------------------- It's not an either/or choice! xarray provides robust support for converting back and forth between the tabular data-structures of pandas and its own multi-dimensional data-structures. That said, you should only bother with xarray if some aspect of data is fundamentally multi-dimensional. If your data is unstructured or one-dimensional, pandas is usually the right choice: it has better performance for common operations such as ``groupby`` and you'll find far more usage examples online. Why is pandas not enough? ------------------------- pandas is a fantastic library for analysis of low-dimensional labelled data - if it can be sensibly described as "rows and columns", pandas is probably the right choice. However, sometimes we want to use higher dimensional arrays (`ndim > 2`), or arrays for which the order of dimensions (e.g., columns vs rows) shouldn't really matter. For example, the images of a movie can be natively represented as an array with four dimensions: time, row, column and color. pandas has historically supported N-dimensional panels, but deprecated them in version 0.20 in favor of xarray data structures. There are now built-in methods on both sides to convert between pandas and xarray, allowing for more focused development effort. Xarray objects have a much richer model of dimensionality - if you were using Panels: - You need to create a new factory type for each dimensionality. - You can't do math between NDPanels with different dimensionality. - Each dimension in an NDPanel has a name (e.g., 'labels', 'items', 'major_axis', etc.) but the dimension names refer to order, not their meaning. You can't specify an operation as to be applied along the "time" axis. - You often have to manually convert collections of pandas arrays (Series, DataFrames, etc) to have the same number of dimensions. In contrast, this sort of data structure fits very naturally in an xarray ``Dataset``. You can :ref:`read about switching from Panels to xarray here `. pandas gets a lot of things right, but many science, engineering and complex analytics use cases need fully multi-dimensional data structures. How do xarray data structures differ from those found in pandas? ---------------------------------------------------------------- The main distinguishing feature of xarray's ``DataArray`` over labeled arrays in pandas is that dimensions can have names (e.g., "time", "latitude", "longitude"). Names are much easier to keep track of than axis numbers, and xarray uses dimension names for indexing, aggregation and broadcasting. Not only can you write ``x.sel(time='2000-01-01')`` and ``x.mean(dim='time')``, but operations like ``x - x.mean(dim='time')`` always work, no matter the order of the "time" dimension. You never need to reshape arrays (e.g., with ``np.newaxis``) to align them for arithmetic operations in xarray. Why don't aggregations return Python scalars? --------------------------------------------- Xarray tries hard to be self-consistent: operations on a ``DataArray`` (resp. ``Dataset``) return another ``DataArray`` (resp. ``Dataset``) object. In particular, operations returning scalar values (e.g. indexing or aggregations like ``mean`` or ``sum`` applied to all axes) will also return xarray objects. Unfortunately, this means we sometimes have to explicitly cast our results from xarray when using them in other libraries. As an illustration, the following code fragment .. jupyter-execute:: arr = xr.DataArray([1, 2, 3]) pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}) does not yield the pandas DataFrame we expected. We need to specify the type conversion ourselves: .. jupyter-execute:: pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}, dtype=float) Alternatively, we could use the ``item`` method or the ``float`` constructor to convert values one at a time .. jupyter-execute:: pd.Series({"x": arr[0].item(), "mean": float(arr.mean())}) .. _approach to metadata: What is your approach to metadata? ---------------------------------- We are firm believers in the power of labeled data! In addition to dimensions and coordinates, xarray supports arbitrary metadata in the form of global (Dataset) and variable specific (DataArray) attributes (``attrs``). Automatic interpretation of labels is powerful but also reduces flexibility. With xarray, we draw a firm line between labels that the library understands (``dims`` and ``coords``) and labels for users and user code (``attrs``). For example, we do not automatically interpret and enforce units or `CF conventions`_. (An exception is serialization to and from netCDF files.) .. _CF conventions: https://cfconventions.org/latest.html An implication of this choice is that we do not propagate ``attrs`` through most operations unless explicitly flagged (some methods have a ``keep_attrs`` option, and there is a global flag, accessible with :py:func:`xarray.set_options`, for setting this to be always True or False). Similarly, xarray does not check for conflicts between ``attrs`` when combining arrays and datasets, unless explicitly requested with the option ``compat='identical'``. The guiding principle is that metadata should not be allowed to get in the way. In general xarray uses the capabilities of the backends for reading and writing attributes. That has some implications on roundtripping. One example for such inconsistency is that size-1 lists will roundtrip as single element (for netcdf4 backends). What other netCDF related Python libraries should I know about? --------------------------------------------------------------- `netCDF4-python`__ provides a lower level interface for working with netCDF and OpenDAP datasets in Python. We use netCDF4-python internally in xarray, and have contributed a number of improvements and fixes upstream. Xarray does not yet support all of netCDF4-python's features, such as modifying files on-disk. __ https://unidata.github.io/netcdf4-python/ Iris_ (supported by the UK Met office) provides similar tools for in- memory manipulation of labeled arrays, aimed specifically at weather and climate data needs. Indeed, the Iris :py:class:`~iris.cube.Cube` was direct inspiration for xarray's :py:class:`~xarray.DataArray`. Xarray and Iris take very different approaches to handling metadata: Iris strictly interprets `CF conventions`_. Iris particularly shines at mapping, thanks to its integration with Cartopy_. .. _Iris: https://scitools-iris.readthedocs.io/en/stable/ .. _Cartopy: https://cartopy.readthedocs.io/stable/ We think the design decisions we have made for xarray (namely, basing it on pandas) make it a faster and more flexible data analysis tool. That said, Iris has some great domain specific functionality, and there are dedicated methods for converting back and forth between xarray and Iris. See :ref:`Reading and Writing Iris data ` for more details. What other projects leverage xarray? ------------------------------------ See section :ref:`ecosystem`. How do I open format X file as an xarray dataset? ------------------------------------------------- To open format X file in xarray, you need to know the `format of the data `_ you want to read. If the format is supported, you can use the appropriate function provided by xarray. The following table provides functions used for different file formats in xarray, as well as links to other packages that can be used: .. csv-table:: :header: "File Format", "Open via", " Related Packages" :widths: 15, 45, 15 "NetCDF (.nc, .nc4, .cdf)","``open_dataset()`` OR ``open_mfdataset()``", "`netCDF4 `_, `cdms2 `_" "HDF5 (.h5, .hdf5)","``open_dataset()`` OR ``open_mfdataset()``", "`h5py `_, `pytables `_ " "GRIB (.grb, .grib)", "``open_dataset()``", "`cfgrib `_, `pygrib `_" "CSV (.csv)","``open_dataset()``", "`pandas`_ , `dask `_" "Zarr (.zarr)","``open_dataset()`` OR ``open_mfdataset()``", "`zarr `_ , `dask `_ " .. _pandas: https://pandas.pydata.org If you are unable to open a file in xarray: - You should check that you are having all necessary dependencies installed, including any optional dependencies (like scipy, h5netcdf, cfgrib etc as mentioned below) that may be required for the specific use case. - If all necessary dependencies are installed but the file still cannot be opened, you must check if there are any specialized backends available for the specific file format you are working with. You can consult the xarray documentation or the documentation for the file format to determine if a specialized backend is required, and if so, how to install and use it with xarray. - If the file format is not supported by xarray or any of its available backends, the user may need to use a different library or tool to work with the file. You can consult the documentation for the file format to determine which tools are recommended for working with it. Xarray provides a default engine to read files, which is usually determined by the file extension or type. If you don't specify the engine, xarray will try to guess it based on the file extension or type, and may fall back to a different engine if it cannot determine the correct one. Therefore, it's good practice to always specify the engine explicitly, to ensure that the correct backend is used and especially when working with complex data formats or non-standard file extensions. :py:func:`xarray.backends.list_engines` is a function in xarray that returns a dictionary of available engines and their BackendEntrypoint objects. You can use the ``engine`` argument to specify the backend when calling ``open_dataset()`` or other reading functions in xarray, as shown below: NetCDF ~~~~~~ If you are reading a netCDF file with a ".nc" extension, the default engine is ``netcdf4``. However if you have files with non-standard extensions or if the file format is ambiguous. Specify the engine explicitly, to ensure that the correct backend is used. Use :py:func:`~xarray.open_dataset` to open a NetCDF file and return an xarray Dataset object. .. code:: python import xarray as xr # use xarray to open the file and return an xarray.Dataset object using netcdf4 engine ds = xr.open_dataset("/path/to/my/file.nc", engine="netcdf4") # Print Dataset object print(ds) # use xarray to open the file and return an xarray.Dataset object using scipy engine ds = xr.open_dataset("/path/to/my/file.nc", engine="scipy") We recommend installing ``scipy`` via conda using the below given code: :: conda install scipy HDF5 ~~~~ Use :py:func:`~xarray.open_dataset` to open an HDF5 file and return an xarray Dataset object. You should specify the ``engine`` keyword argument when reading HDF5 files with xarray, as there are multiple backends that can be used to read HDF5 files, and xarray may not always be able to automatically detect the correct one based on the file extension or file format. To read HDF5 files with xarray, you can use the :py:func:`~xarray.open_dataset` function from the ``h5netcdf`` backend, as follows: .. code:: python import xarray as xr # Open HDF5 file as an xarray Dataset ds = xr.open_dataset("path/to/hdf5/file.hdf5", engine="h5netcdf") # Print Dataset object print(ds) We recommend you to install ``h5netcdf`` library using the below given code: :: conda install -c conda-forge h5netcdf If you want to use the ``netCDF4`` backend to read a file with a ".h5" extension (which is typically associated with HDF5 file format), you can specify the engine argument as follows: .. code:: python ds = xr.open_dataset("path/to/file.h5", engine="netcdf4") GRIB ~~~~ You should specify the ``engine`` keyword argument when reading GRIB files with xarray, as there are multiple backends that can be used to read GRIB files, and xarray may not always be able to automatically detect the correct one based on the file extension or file format. Use the :py:func:`~xarray.open_dataset` function from the ``cfgrib`` package to open a GRIB file as an xarray Dataset. .. code:: python import xarray as xr # define the path to your GRIB file and the engine you want to use to open the file # use ``open_dataset()`` to open the file with the specified engine and return an xarray.Dataset object ds = xr.open_dataset("path/to/your/file.grib", engine="cfgrib") # Print Dataset object print(ds) We recommend installing ``cfgrib`` via conda using the below given code: :: conda install -c conda-forge cfgrib CSV ~~~ By default, xarray uses the built-in ``pandas`` library to read CSV files. In general, you don't need to specify the engine keyword argument when reading CSV files with xarray, as the default ``pandas`` engine is usually sufficient for most use cases. If you are working with very large CSV files or if you need to perform certain types of data processing that are not supported by the default ``pandas`` engine, you may want to use a different backend. In such cases, you can specify the engine argument when reading the CSV file with xarray. To read CSV files with xarray, use the :py:func:`~xarray.open_dataset` function and specify the path to the CSV file as follows: .. code:: python import xarray as xr import pandas as pd # Load CSV file into pandas DataFrame using the "c" engine df = pd.read_csv("your_file.csv", engine="c") # Convert `:py:func:pandas` DataFrame to xarray.Dataset ds = xr.Dataset.from_dataframe(df) # Prints the resulting xarray dataset print(ds) Zarr ~~~~ When opening a Zarr dataset with xarray, the ``engine`` is automatically detected based on the file extension or the type of input provided. If the dataset is stored in a directory with a ".zarr" extension, xarray will automatically use the "zarr" engine. To read zarr files with xarray, use the :py:func:`~xarray.open_dataset` function and specify the path to the zarr file as follows: .. code:: python import xarray as xr # use xarray to open the file and return an xarray.Dataset object using zarr engine ds = xr.open_dataset("path/to/your/file.zarr", engine="zarr") # Print Dataset object print(ds) We recommend installing ``zarr`` via conda using the below given code: :: conda install -c conda-forge zarr There may be situations where you need to specify the engine manually using the ``engine`` keyword argument. For example, if you have a Zarr dataset stored in a file with a different extension (e.g., ".npy"), you will need to specify the engine as "zarr" explicitly when opening the dataset. Some packages may have additional functionality beyond what is shown here. You can refer to the documentation for each package for more information. How does xarray handle missing values? -------------------------------------- **xarray can handle missing values using ``np.nan``** - ``np.nan`` is used to represent missing values in labeled arrays and datasets. It is a commonly used standard for representing missing or undefined numerical data in scientific computing. ``np.nan`` is a constant value in NumPy that represents "Not a Number" or missing values. - Most of xarray's computation methods are designed to automatically handle missing values appropriately. For example, when performing operations like addition or multiplication on arrays that contain missing values, xarray will automatically ignore the missing values and only perform the operation on the valid data. This makes it easy to work with data that may contain missing or undefined values without having to worry about handling them explicitly. - Many of xarray's `aggregation methods `_, such as ``sum()``, ``mean()``, ``min()``, ``max()``, and others, have a skipna argument that controls whether missing values (represented by NaN) should be skipped (True) or treated as NaN (False) when performing the calculation. By default, ``skipna`` is set to ``True``, so missing values are ignored when computing the result. However, you can set ``skipna`` to ``False`` if you want missing values to be treated as NaN and included in the calculation. - On `plotting `_ an xarray dataset or array that contains missing values, xarray will simply leave the missing values as blank spaces in the plot. - We have a set of `methods `_ for manipulating missing and filling values. How should I cite xarray? ------------------------- If you are using xarray and would like to cite it in academic publication, we would certainly appreciate it. We recommend two citations. 1. At a minimum, we recommend citing the xarray overview journal article, published in the Journal of Open Research Software. - Hoyer, S. & Hamman, J., (2017). xarray: N-D labeled Arrays and Datasets in Python. Journal of Open Research Software. 5(1), p.10. DOI: https://doi.org/10.5334/jors.148 Here’s an example of a BibTeX entry:: @article{hoyer2017xarray, title = {xarray: {N-D} labeled arrays and datasets in {Python}}, author = {Hoyer, S. and J. Hamman}, journal = {Journal of Open Research Software}, volume = {5}, number = {1}, year = {2017}, publisher = {Ubiquity Press}, doi = {10.5334/jors.148}, url = {https://doi.org/10.5334/jors.148} } 2. You may also want to cite a specific version of the xarray package. We provide a `Zenodo citation and DOI `_ for this purpose: .. image:: https://zenodo.org/badge/doi/10.5281/zenodo.598201.svg :target: https://doi.org/10.5281/zenodo.598201 An example BibTeX entry:: @misc{xarray_v0_8_0, author = {Stephan Hoyer and Clark Fitzgerald and Joe Hamman and others}, title = {xarray: v0.8.0}, month = aug, year = 2016, doi = {10.5281/zenodo.59499}, url = {https://doi.org/10.5281/zenodo.59499} } .. _api-stability: How stable is Xarray's API? --------------------------- Xarray tries very hard to maintain backwards compatibility in our :ref:`api` between released versions. Whilst we do occasionally make breaking changes in order to improve the library, we `signpost changes `_ with ``DeprecationWarnings`` for many releases in advance. (An exception is bugs - whose behaviour we try to fix as soon as we notice them.) Our `test-driven development practices `_ helps to ensure any accidental regressions are caught. This philosophy applies to everything in the `public API `_. .. _public-api: What parts of xarray are considered public API? ----------------------------------------------- As a rule, only functions/methods documented in our :ref:`api` are considered part of xarray's public API. Everything else (in particular, everything in ``xarray.core`` that is not also exposed in the top level ``xarray`` namespace) is considered a private implementation detail that may change at any time. Objects that exist to facilitate xarray's fluent interface on ``DataArray`` and ``Dataset`` objects are a special case. For convenience, we document them in the API docs, but only their methods and the ``DataArray``/``Dataset`` methods/properties to construct them (e.g., ``.plot()``, ``.groupby()``, ``.str``) are considered public API. Constructors and other details of the internal classes used to implemented them (i.e., ``xarray.plot.plotting._PlotMethods``, ``xarray.core.groupby.DataArrayGroupBy``, ``xarray.core.accessor_str.StringAccessor``) are not. python-xarray-2026.01.0/doc/get-help/help-diagram.rst0000664000175000017500000001072515136607163022406 0ustar alastairalastairGetting Help ============ Navigating the wealth of resources available for Xarray can be overwhelming. We've created this flow chart to help guide you towards the best way to get help, depending on what you're working towards. Also be sure to check out our :ref:`faq`. and :ref:`howdoi` pages for solutions to common questions. A major strength of Xarray is in the user community. Sometimes you might not yet have a concrete question but would simply like to connect with other Xarray users. We have a few accounts on different social platforms for that! :ref:`socials`. We look forward to hearing from you! Help Flowchart -------------- .. _comment: mermaid Flowcharg "link" text gets secondary color background, SVG icon fill gets primary color .. raw:: html .. mermaid:: :config: {"theme":"base","themeVariables":{"fontSize":"20px","primaryColor":"#fff","primaryTextColor":"#fff","primaryBorderColor":"#59c7d6","lineColor":"#e28126","secondaryColor":"#767985"}} :alt: Flowchart illustrating the different ways to access help using or contributing to Xarray. flowchart TD intro[Welcome to Xarray! How can we help?]:::quesNodefmt usage([fa:fa-chalkboard-user Xarray Tutorial fab:fa-readme Xarray Docs fab:fa-stack-overflow Stack Exchange fab:fa-google Ask Google fa:fa-robot Ask AI ChatBot]):::ansNodefmt extensions([Extension docs: fab:fa-readme Dask fab:fa-readme Rioxarray]):::ansNodefmt help([fab:fa-github Xarray Discussions fab:fa-discord Xarray Discord fa:fa-globe Pangeo Discourse]):::ansNodefmt bug([Let us know: fab:fa-github Xarray Issues]):::ansNodefmt contrib([fa:fa-book-open Xarray Contributor's Guide]):::ansNodefmt pr([fab:fa-github Pull Request]):::ansNodefmt dev([fab:fa-github Add PR Comment fa:fa-users Attend Developer's Meeting ]):::ansNodefmt report[Thanks for letting us know!]:::quesNodefmt merged[fa:fa-hands-clapping Thanks for contributing to Xarray!]:::quesNodefmt intro -->|How do I use Xarray?| usage usage -->|"With extensions (like Dask, Rioxarray, etc.)"| extensions usage -->|I still have questions or could use some guidance | help intro -->|I think I found a bug| bug bug contrib bug -->|I just wanted to tell you| report bug<-->|I'd like to fix the bug!| contrib pr -->|my PR was approved| merged intro -->|I wish Xarray could...| bug pr <-->|my PR is quiet| dev contrib -->pr classDef quesNodefmt font-size:20pt,fill:#0e4666,stroke:#59c7d6,stroke-width:3 classDef ansNodefmt font-size:18pt,fill:#4a4a4a,stroke:#17afb4,stroke-width:3 linkStyle default font-size:16pt,stroke-width:4 Flowchart links --------------- - `Xarray Tutorials `__ - `Xarray Docs `__ - `Stack Exchange `__ - `Xarray Discussions `__ - `Xarray Discord `__ - `Xarray Office Hours `__ - `Pangeo Discourse `__ - `Xarray Issues `__ - :ref:`contributing` - :ref:`developers-meeting` .. toctree:: :maxdepth: 1 :hidden: faq howdoi socials python-xarray-2026.01.0/doc/get-help/howdoi.rst0000664000175000017500000001103415136607163021337 0ustar alastairalastair.. currentmodule:: xarray .. _howdoi: How do I ... ============ .. list-table:: :header-rows: 1 :widths: 40 60 * - How do I... - Solution * - add a DataArray to my dataset as a new variable - ``my_dataset[varname] = my_dataArray`` or :py:meth:`Dataset.assign` (see also :ref:`dictionary_like_methods`) * - add variables from other datasets to my dataset - :py:meth:`Dataset.merge` * - add a new dimension and/or coordinate - :py:meth:`DataArray.expand_dims`, :py:meth:`Dataset.expand_dims` * - add a new coordinate variable - :py:meth:`DataArray.assign_coords` * - change a data variable to a coordinate variable - :py:meth:`Dataset.set_coords` * - change the order of dimensions - :py:meth:`DataArray.transpose`, :py:meth:`Dataset.transpose` * - reshape dimensions - :py:meth:`DataArray.stack`, :py:meth:`Dataset.stack`, :py:meth:`Dataset.coarsen.construct`, :py:meth:`DataArray.coarsen.construct` * - remove a variable from my object - :py:meth:`Dataset.drop_vars`, :py:meth:`DataArray.drop_vars` * - remove dimensions of length 1 or 0 - :py:meth:`DataArray.squeeze`, :py:meth:`Dataset.squeeze` * - remove all variables with a particular dimension - :py:meth:`Dataset.drop_dims` * - convert non-dimension coordinates to data variables or remove them - :py:meth:`DataArray.reset_coords`, :py:meth:`Dataset.reset_coords` * - rename a variable, dimension or coordinate - :py:meth:`Dataset.rename`, :py:meth:`DataArray.rename`, :py:meth:`Dataset.rename_vars`, :py:meth:`Dataset.rename_dims`, * - convert a DataArray to Dataset or vice versa - :py:meth:`DataArray.to_dataset`, :py:meth:`Dataset.to_dataarray`, :py:meth:`Dataset.to_stacked_array`, :py:meth:`DataArray.to_unstacked_dataset` * - extract variables that have certain attributes - :py:meth:`Dataset.filter_by_attrs` * - extract the underlying array (e.g. NumPy or Dask arrays) - :py:attr:`DataArray.data` * - convert to and extract the underlying NumPy array - :py:attr:`DataArray.to_numpy` * - convert to a pandas DataFrame - :py:attr:`Dataset.to_dataframe` * - sort values - :py:attr:`Dataset.sortby` * - find out if my xarray object is wrapping a Dask Array - :py:func:`dask.is_dask_collection` * - know how much memory my object requires - :py:attr:`DataArray.nbytes`, :py:attr:`Dataset.nbytes` * - Get axis number for a dimension - :py:meth:`DataArray.get_axis_num` * - convert a possibly irregularly sampled timeseries to a regularly sampled timeseries - :py:meth:`DataArray.resample`, :py:meth:`Dataset.resample` (see :ref:`resampling` for more) * - apply a function on all data variables in a Dataset - :py:meth:`Dataset.map` * - write xarray objects with complex values to a netCDF file - :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf"`` or :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="netCDF4", auto_complex=True`` * - make xarray objects look like other xarray objects - :py:func:`~xarray.ones_like`, :py:func:`~xarray.zeros_like`, :py:func:`~xarray.full_like`, :py:meth:`Dataset.reindex_like`, :py:meth:`Dataset.interp_like`, :py:meth:`Dataset.broadcast_like`, :py:meth:`DataArray.reindex_like`, :py:meth:`DataArray.interp_like`, :py:meth:`DataArray.broadcast_like` * - Make sure my datasets have values at the same coordinate locations - ``xr.align(dataset_1, dataset_2, join="exact")`` * - replace NaNs with other values - :py:meth:`Dataset.fillna`, :py:meth:`Dataset.ffill`, :py:meth:`Dataset.bfill`, :py:meth:`Dataset.interpolate_na`, :py:meth:`DataArray.fillna`, :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`DataArray.interpolate_na` * - extract the year, month, day or similar from a DataArray of time values - ``obj.dt.month`` for example where ``obj`` is a :py:class:`~xarray.DataArray` containing ``datetime64`` or ``cftime`` values. See :ref:`dt_accessor` for more. * - round off time values to a specified frequency - ``obj.dt.ceil``, ``obj.dt.floor``, ``obj.dt.round``. See :ref:`dt_accessor` for more. * - make a mask that is ``True`` where an object contains any of the values in an array - :py:meth:`Dataset.isin`, :py:meth:`DataArray.isin` * - Index using a boolean mask - :py:meth:`Dataset.query`, :py:meth:`DataArray.query`, :py:meth:`Dataset.where`, :py:meth:`DataArray.where` * - preserve ``attrs`` during (most) xarray operations - ``xr.set_options(keep_attrs=True)`` python-xarray-2026.01.0/doc/combined.json0000664000175000017500000000170415136607163020265 0ustar alastairalastair{ "version": 1, "refs": { ".zgroup": "{\"zarr_format\":2}", "foo/.zarray": "{\"chunks\":[4,5],\"compressor\":null,\"dtype\":\"", "logoWidth": 14, "labelColor": "#4a4a4a", "color": "#0e4666" } python-xarray-2026.01.0/doc/_templates/0000775000175000017500000000000015136607163017745 5ustar alastairalastairpython-xarray-2026.01.0/doc/_templates/autosummary/0000775000175000017500000000000015136607163022333 5ustar alastairalastairpython-xarray-2026.01.0/doc/_templates/autosummary/accessor_attribute.rst0000664000175000017500000000024015136607163026746 0ustar alastairalastair{{ fullname }} {{ underline }} .. currentmodule:: {{ module.split('.')[0] }} .. autoaccessorattribute:: {{ (module.split('.')[1:] + [objname]) | join('.') }} python-xarray-2026.01.0/doc/_templates/autosummary/accessor_callable.rst0000664000175000017500000000025015136607163026503 0ustar alastairalastair{{ fullname }} {{ underline }} .. currentmodule:: {{ module.split('.')[0] }} .. autoaccessorcallable:: {{ (module.split('.')[1:] + [objname]) | join('.') }}.__call__ python-xarray-2026.01.0/doc/_templates/autosummary/accessor.rst0000664000175000017500000000022715136607163024670 0ustar alastairalastair{{ fullname }} {{ underline }} .. currentmodule:: {{ module.split('.')[0] }} .. autoaccessor:: {{ (module.split('.')[1:] + [objname]) | join('.') }} python-xarray-2026.01.0/doc/_templates/autosummary/accessor_method.rst0000664000175000017500000000023515136607163026227 0ustar alastairalastair{{ fullname }} {{ underline }} .. currentmodule:: {{ module.split('.')[0] }} .. autoaccessormethod:: {{ (module.split('.')[1:] + [objname]) | join('.') }} python-xarray-2026.01.0/doc/videos.yml0000664000175000017500000000465715136607163017640 0ustar alastairalastair- title: "Xdev Python Tutorial Seminar Series 2022 Thinking with Xarray : High-level computation patterns" src: '' authors: - Deepak Cherian - title: "Xdev Python Tutorial Seminar Series 2021 seminar introducing xarray (2 of 2)" src: '' authors: - Anderson Banihirwe - title: "Xdev Python Tutorial Seminar Series 2021 seminar introducing xarray (1 of 2)" src: '' authors: - Anderson Banihirwe - title: "Xarray's 2020 virtual tutorial" src: '' authors: - Anderson Banihirwe - Deepak Cherian - Martin Durant - title: "Xarray's Tutorial presented at the 2020 SciPy Conference" src: ' ' authors: - Joe Hamman - Deepak Cherian - Ryan Abernathey - Stephan Hoyer - title: "Scipy 2015 talk introducing xarray to a general audience" src: '' authors: - Stephan Hoyer - title: " 2015 Unidata Users Workshop talk and tutorial with (`with answers`_) introducing xarray to users familiar with netCDF" src: '' authors: - Stephan Hoyer python-xarray-2026.01.0/doc/gallery.yml0000664000175000017500000000346415136607163020001 0ustar alastairalastairnotebooks-examples: - title: Toy weather data path: examples/weather-data.html thumbnail: _static/thumbnails/toy-weather-data.png - title: Calculating Seasonal Averages from Timeseries of Monthly Means path: examples/monthly-means.html thumbnail: _static/thumbnails/monthly-means.png - title: Compare weighted and unweighted mean temperature path: examples/area_weighted_temperature.html thumbnail: _static/thumbnails/area_weighted_temperature.png - title: Working with Multidimensional Coordinates path: examples/multidimensional-coords.html thumbnail: _static/thumbnails/multidimensional-coords.png - title: Visualization Gallery path: examples/visualization_gallery.html thumbnail: _static/thumbnails/visualization_gallery.png - title: GRIB Data Example path: examples/ERA5-GRIB-example.html thumbnail: _static/thumbnails/ERA5-GRIB-example.png - title: Applying unvectorized functions with apply_ufunc path: examples/apply_ufunc_vectorize_1d.html thumbnail: _static/logos/Xarray_Logo_RGB_Final.svg external-examples: - title: Managing raster data with rioxarray path: https://corteva.github.io/rioxarray/stable/examples/examples.html thumbnail: _static/logos/Xarray_Logo_RGB_Final.svg - title: Xarray and dask on the cloud with Pangeo path: https://gallery.pangeo.io/ thumbnail: https://avatars.githubusercontent.com/u/60833341?s=200&v=4 - title: Xarray with Dask Arrays path: https://examples.dask.org/xarray.html_ thumbnail: _static/logos/Xarray_Logo_RGB_Final.svg - title: Project Pythia Foundations Book path: https://foundations.projectpythia.org/core/xarray.html thumbnail: https://raw.githubusercontent.com/ProjectPythia/projectpythia.github.io/main/portal/_static/images/logos/pythia_logo-blue-btext-twocolor.svg python-xarray-2026.01.0/doc/whats-new.rst0000664000175000017500000161444015136607163020271 0ustar alastairalastair.. currentmodule:: xarray .. _whats-new: What's New ========== .. _whats-new.2026.01.0: v2026.01.0 (Jan 28, 2026) ------------------------- This release includes an improved DataTree HTML representation with collapsible groups and automatic truncation, easier selection on coordinates without explicit indexes, pandas 3 compatibility, and various bug fixes and performance improvements. Thanks to the 25 contributors to this release: Barron H. Henderson, Christine P. Chai, DHRUVA KUMAR KAUSHAL, David Bold, Davis Bennett, Deepak Cherian, Dhruva Kumar Kaushal, Florian Knappers, Ian Hunt-Isaak, Jacob Tomlinson, Joshua Gould, Julia Signell, Justus Magin, Lucas Colley, Mark Harfouche, Matthew, Maximilian Roos, Nick Hodgskin, Sakshee_D, Sam Levang, Samay Mehar, Simon Høxbro Hansen, Spencer Clark, Stephan Hoyer and knappersfy New Features ~~~~~~~~~~~~ - Improved :py:class:`DataTree` HTML representation: groups are now collapsible with item counts shown in labels, large trees are automatically truncated using ``display_max_children`` and ``display_max_html_elements`` options, and the Indexes section is now displayed (matching the text repr) (:pull:`10816`). By `Stephan Hoyer `_. - :py:meth:`Dataset.set_xindex` and :py:meth:`DataArray.set_xindex` automatically replace any existing index being set instead of erroring or needing needing to call :py:meth:`drop_indexes` first (:pull:`11008`). By `Ian Hunt-Isaak `_. - Calling :py:meth:`Dataset.sel` or :py:meth:`DataArray.sel` on a 1-dimensional coordinate without an index will now automatically create a temporary :py:class:`~xarray.indexes.PandasIndex` to perform the selection (:issue:`9703`, :pull:`11029`). By `Ian Hunt-Isaak `_. - The minimum supported version of ``h5netcdf`` is now 1.4. Version 1.4.0 brings improved alignment between h5netcdf and libnetcdf4 in the storage of complex numbers (:pull:`11068`). By `Mark Harfouche `_. - :py:func:`set_options` now supports an ``arithmetic_compat`` option which determines how non-index coordinates of the same name are compared for potential conflicts when performing binary operations. The default for it is ``arithmetic_compat='minimal'`` which matches the existing behaviour (:pull:`10943`). By `Matthew Willson `_. - Better ordering of coordinates when displaying xarray objects (:pull:`11091`). By `Ian Hunt-Isaak `_, `Julia Signell `_. - Use ``np.dtypes.StringDType`` when reading Zarr string variables (:pull:`11097`). By `Julia Signell `_. Breaking Changes ~~~~~~~~~~~~~~~~ - Change the default value for ``chunk`` in :py:func:`open_zarr` to ``_default`` and remove special mapping of ``"auto"`` to ``{}`` or ``None`` in :py:func:`open_zarr`. If ``chunks`` is not set, the default behavior is the same as before. Explicitly setting ``chunks="auto"`` will match the behavior of ``chunks="auto"`` in :py:func:`open_dataset` with ``engine="zarr"`` (:issue:`11002`, :pull:`11010`). By `Julia Signell `_. - :py:meth:`Dataset.identical`, :py:meth:`DataArray.identical`, and :py:func:`testing.assert_identical` now compare indexes. Two objects with identical data but different indexes will no longer be considered identical (:issue:`11033`, :pull:`11035`). By `Ian Hunt-Isaak `_. Bug Fixes ~~~~~~~~~ - Ensure that ``keep_attrs='drop'`` and ``keep_attrs=False`` remove attrs from result, even when there is only one xarray object given to :py:func:`apply_ufunc` (:issue:`10982`, :pull:`10997`). By `Julia Signell `_. - :py:meth:`~xarray.indexes.RangeIndex.equals` now uses floating point error tolerant ``np.isclose`` by default to handle accumulated floating point errors from slicing operations. Use ``exact=True`` for exact comparison (:pull:`11035`). By `Ian Hunt-Isaak `_. - Ensure the :py:class:`~xarray.groupers.SeasonResampler` preserves the datetime unit of the underlying time index when resampling (:issue:`11048`, :pull:`11049`). By `Spencer Clark `_. - Partially support pandas 3 default string indexes by coercing ``pd.StringDtype`` to ``np.dtypes.StringDType`` in ``PandasIndexingAdapter`` (:issue:`11098`, :pull:`11102`). By `Julia Signell `_. - :py:meth:`Dataset.eval` now works with more than 2 dimensions (:pull:`11064`). By `Maximilian Roos `_. - Fix :py:func:`where` for ``cupy.array`` inputs (:pull:`11026`). By `Simon Høxbro Hansen `_. - Fix :py:meth:`CombinedLock.locked` to correctly call the underlying lock's ``locked()`` method (:issue:`10843`, :pull:`11022`). By `Samay Mehar `_. - Fix :py:meth:`DatasetGroupBy.map` when grouping by more than one variable (:pull:`11005`). By `Joshua Gould `_. - Fix indexing bugs in :py:class:`~xarray.indexes.CoordinateTransformIndex` (:pull:`10980`). By `Deepak Cherian `_. - Ensure the netCDF4 backend locks files while closing to prevent race conditions (:pull:`10788`). By `David Bold `_. - Improve error message when scipy is missing for :py:class:`~xarray.indexes.NDPointIndex` (:pull:`11085`). By `Sakshee_D `_. Documentation ~~~~~~~~~~~~~ - Better description of ``keep_attrs`` option on :py:func:`xarray.where` docstring (:issue:`10982`, :pull:`10997`). By `Julia Signell `_. - Document how :py:func:`xarray.dot` interacts with coordinates (:pull:`10958`). By `Dhruva Kumar Kaushal `_. - Improve ``rolling`` window documentation (:pull:`11094`). By `Barron H. Henderson `_. - Improve ``combine_nested`` and ``combine_by_coords`` docstrings (:pull:`11080`). By `Julia Signell `_. Performance ~~~~~~~~~~~ - Add a fastpath to the backend plugin system for standard engines (:issue:`10178`, :pull:`10937`). By `Sam Levang `_. - Optimize :py:class:`~xarray.coding.variables.CFMaskCoder` decoder (:pull:`11105`). By `Deepak Cherian `_. Internal Changes ~~~~~~~~~~~~~~~~ - Update contributing instructions with note on pixi version (:pull:`11108`). By `Nick Hodgskin `_. .. _whats-new.2025.12.0: v2025.12.0 (Dec 5, 2025) ------------------------ This release rolls back the default engine for HTTP urls, adds support for :py:class:`DataTree` objects in ``combine_nested`` and contains numerous bug fixes. Thanks to the 16 contributors to this release: Benoit Bovy, Christine P. Chai, Deepak Cherian, Dhruva Kumar Kaushal, Ian Hunt-Isaak, Ilan Gold, Illviljan, Julia Signell, Justus Magin, Lars Buntemeyer, Maximilian Roos, Miguel Jimenez, Nick Hodgskin, Richard Berg, Spencer Clark and Stephan Hoyer New Features ~~~~~~~~~~~~ - Improved ``pydap`` backend behavior and performance when using :py:func:`open_dataset`, :py:func:`open_datatree` when downloading dap4 (opendap) dimensions data (:issue:`10628`, :pull:`10629`). In addition ``checksums=True|False`` is added as optional argument to be passed to ``pydap`` backend. By `Miguel Jimenez-Urias `_. - :py:func:`combine_nested` now supports :py:class:`DataTree` objects (:pull:`10849`). By `Stephan Hoyer `_. Bug Fixes ~~~~~~~~~ - When assigning an indexed coordinate to a data variable or coordinate, coerce it from ``IndexVariable`` to ``Variable`` (:issue:`9859`, :issue:`10829`, :pull:`10909`). By `Julia Signell `_. - The NetCDF4 backend will now claim to be able to read any URL except for one that contains the substring zarr. This restores backward compatibility after :pull:`10804` broke workflows that relied on ``xr.open_dataset("http://...")`` (:pull:`10931`). By `Ian Hunt-Isaak `_. - Always normalize slices when indexing ``LazilyIndexedArray`` instances (:issue:`10941`, :pull:`10948`). By `Justus Magin `_. - Avoid casting custom indexes in ``Dataset.drop_attrs`` (:pull:`10961`) By `Justus Magin `_. - Support decoding unsigned integers to ``np.timedelta64``. By `Deepak Cherian `_. - Properly handle internal type promotion and ``NA`` objects for extension arrays (:pull:`10423`). By `Ilan Gold `_. Documentation ~~~~~~~~~~~~~ - Added section on the `limitations of cftime arithmetic `_ (:pull:`10653`). By `Lars Buntemeyer `_. Internal Changes ~~~~~~~~~~~~~~~~ - Change the development workflow to use ``pixi`` (:issue:`10732`, :pull:`10888`). By `Nick Nodgskin `_. .. _whats-new.2025.11.0: v2025.11.0 (Nov 17, 2025) ------------------------- This release changes the default for ``keep_attrs`` such that attributes are preserved by default, adds support for :py:class:`DataTree` in top-level functions, and contains several memory and performance improvements as well as a number of bug fixes. Thanks to the 21 contributors to this release: Aled Owen, Charles Turner, Christine P. Chai, David Huard, Deepak Cherian, Gregorio L. Trevisan, Ian Hunt-Isaak, Ilan Gold, Illviljan, Jan Meischner, Jemma Jeffree, Jonas Lundholm Bertelsen, Justus Magin, Kai Mühlbauer, Kristian Bodolai, Lukas Riedel, Max Jones, Maximilian Roos, Niclas Rieger, Stephan Hoyer and William Andrea New Features ~~~~~~~~~~~~ - :py:func:`merge` and :py:func:`concat` now support :py:class:`DataTree` objects (:issue:`9790`, :issue:`9778`). By `Stephan Hoyer `_. - The ``h5netcdf`` engine has support for pseudo ``NETCDF4_CLASSIC`` files, meaning variables and attributes are cast to supported types. Note that the saved files won't be recognized as genuine ``NETCDF4_CLASSIC`` files until ``h5netcdf`` adds support with version 1.7.0 (:issue:`10676`, :pull:`10686`). By `David Huard `_. - Support comparing :py:class:`DataTree` objects with :py:func:`testing.assert_allclose` (:pull:`10887`). By `Justus Magin `_. - Add support for ``chunks="auto"`` for cftime datasets (:issue:`9834`, :pull:`10527`). By `Charles Turner `_. Breaking Changes ~~~~~~~~~~~~~~~~ - All xarray operations now preserve attributes by default (:issue:`3891`, :issue:`2582`). Previously, operations would drop attributes unless explicitly told to preserve them via ``keep_attrs=True``. Additionally, when attributes are preserved in binary operations, they now combine attributes from both operands using ``drop_conflicts`` (keeping matching attributes, dropping conflicts), instead of keeping only the left operand's attributes. **What changed:** .. code-block:: python # Before (xarray <2025.11.0): data = xr.DataArray([1, 2, 3], attrs={"units": "meters", "long_name": "height"}) result = data.mean() result.attrs # {} - Attributes lost! # After (xarray ≥2025.09.1): data = xr.DataArray([1, 2, 3], attrs={"units": "meters", "long_name": "height"}) result = data.mean() result.attrs # {"units": "meters", "long_name": "height"} - Attributes preserved! **Affected operations include:** *Computational operations:* - Reductions: ``mean()``, ``sum()``, ``std()``, ``var()``, ``min()``, ``max()``, ``median()``, ``quantile()``, etc. - Rolling windows: ``rolling().mean()``, ``rolling().sum()``, etc. - Groupby: ``groupby().mean()``, ``groupby().sum()``, etc. - Resampling: ``resample().mean()``, etc. - Weighted: ``weighted().mean()``, ``weighted().sum()``, etc. - ``apply_ufunc()`` and NumPy universal functions *Binary operations:* - Arithmetic: ``+``, ``-``, ``*``, ``/``, ``**``, ``//``, ``%`` (combines attributes using ``drop_conflicts``) - Comparisons: ``<``, ``>``, ``==``, ``!=``, ``<=``, ``>=`` (combines attributes using ``drop_conflicts``) - With scalars: ``data * 2``, ``10 - data`` (preserves data's attributes) *Data manipulation:* - Missing data: ``fillna()``, ``dropna()``, ``interpolate_na()``, ``ffill()``, ``bfill()`` - Indexing/selection: ``isel()``, ``sel()``, ``where()``, ``clip()`` - Alignment: ``interp()``, ``reindex()``, ``align()`` - Transformations: ``map()``, ``pipe()``, ``assign()``, ``assign_coords()`` - Shape operations: ``expand_dims()``, ``squeeze()``, ``transpose()``, ``stack()``, ``unstack()`` **Binary operations - combines attributes with** ``drop_conflicts``: .. code-block:: python a = xr.DataArray([1, 2], attrs={"units": "m", "source": "sensor_a"}) b = xr.DataArray([3, 4], attrs={"units": "m", "source": "sensor_b"}) (a + b).attrs # {"units": "m"} - Matching values kept, conflicts dropped (b + a).attrs # {"units": "m"} - Order doesn't matter for drop_conflicts **How to restore previous behavior:** 1. **Globally for your entire script:** .. code-block:: python import xarray as xr xr.set_options(keep_attrs=False) # Affects all subsequent operations 2. **For specific operations:** .. code-block:: python result = data.mean(dim="time", keep_attrs=False) 3. **For code blocks:** .. code-block:: python with xr.set_options(keep_attrs=False): # All operations in this block drop attrs result = data1 + data2 4. **Remove attributes after operations:** .. code-block:: python result = data.mean().drop_attrs() By `Maximilian Roos `_. Bug Fixes ~~~~~~~~~ - Fix h5netcdf backend for format=None, use same rule as netcdf4 backend (:pull:`10859`). By `Kai Mühlbauer `_. - ``netcdf4`` and ``pydap`` backends now use stricter URL detection to avoid incorrectly claiming remote URLs. The ``pydap`` backend now only claims URLs with explicit DAP protocol indicators (``dap2://`` or ``dap4://`` schemes, or ``/dap2/`` or ``/dap4/`` in the URL path). This prevents both backends from claiming remote Zarr stores and other non-DAP URLs without an explicit ``engine=`` argument (:pull:`10804`). By `Ian Hunt-Isaak `_. - Fix indexing with empty arrays for scipy & h5netcdf backends which now resolves to empty slices (:issue:`10867`, :pull:`10870`). By `Kai Mühlbauer `_ - Fix error handling issue in ``decode_cf_variables`` when decoding fails - the exception is now re-raised correctly, with a note added about the variable name that caused the error (:issue:`10873`, :pull:`10886`). By `Jonas L. Bertelsen `_. - Fix ``equivalent`` for numpy scalar nan comparison (:issue:`10833`, :pull:`10838`). By `Maximilian Roos `_. - Support non-``DataArray`` outputs in :py:meth:`Dataset.map` (:issue:`10835`, :pull:`10839`). By `Maximilian Roos `_. - Support ``drop_sel`` on ``MultiIndex`` objects (:issue:`10862`, :pull:`10863`). By `Aled Owen `_. Performance ~~~~~~~~~~~ - Speedup and reduce memory usage of :py:func:`concat`. Magnitude of improvement scales with size of the concatenation dimension (:issue:`10864`, :pull:`10866`). By `Deepak Cherian `_. - Speedup and reduce memory usage when coarsening along multiple dimensions (:pull:`10921`) By `Deepak Cherian `_. .. _whats-new.2025.10.1: v2025.10.1 (Oct 7, 2025) ------------------------ This release reverts a breaking change to Xarray's preferred netCDF backend. Breaking changes ~~~~~~~~~~~~~~~~ - Xarray's default engine for reading/writing netCDF files has been reverted to prefer netCDF4 over h5netcdf over scipy, which was the default before v2025.09.1. This change had larger implications for the ecosystem than we anticipated. We are still considering changing the default in the future, but will be a bit more careful about the implications. See :issue:`10657` and linked issues for discussion. The behavior can still be customized, e.g., with ``xr.set_options(netcdf_engine_order=['h5netcdf', 'netcdf4', 'scipy'])``. By `Stephan Hoyer `_. New features ~~~~~~~~~~~~ - Coordinates are ordered to match dims when displaying Xarray objects. (:pull:`10778`). By `Julia Signell `_. Bug fixes ~~~~~~~~~ - Fix error raised when writing scalar variables to Zarr with ``region={}`` (:pull:`10796`). By `Stephan Hoyer `_. .. _whats-new.2025.09.1: v2025.09.1 (Sep 29, 2025) ------------------------- This release contains improvements to netCDF IO and the :py:func:`DataTree.from_dict` constructor, as well as a variety of bug fixes. In particular, the default netCDF backend has switched from netCDF4 to h5netcdf, which is typically faster. Thanks to the 17 contributors to this release: Claude, Deepak Cherian, Dimitri Papadopoulos Orfanos, Dylan H. Morris, Emmanuel Mathot, Ian Hunt-Isaak, Joren Hammudoglu, Julia Signell, Justus Magin, Maximilian Roos, Nick Hodgskin, Spencer Clark, Stephan Hoyer, Tom Nicholas, gronniger, joseph nowak and pierre-manchon New Features ~~~~~~~~~~~~ - :py:func:`DataTree.from_dict` now supports passing in ``DataArray`` and nested dictionary values, and has a ``coords`` argument for specifying coordinates as ``DataArray`` objects (:pull:`10658`). - ``engine='netcdf4'`` now supports reading and writing in-memory netCDF files. All of Xarray's netCDF backends now support in-memory reads and writes (:pull:`10624`). By `Stephan Hoyer `_. Breaking changes ~~~~~~~~~~~~~~~~ - :py:meth:`Dataset.update` now returns ``None``, instead of the updated dataset. This completes the deprecation cycle started in version 0.17. The method still updates the dataset in-place. (:issue:`10167`) By `Maximilian Roos `_. - The default ``engine`` when reading/writing netCDF files is now h5netcdf or scipy, which are typically faster than the prior default of netCDF4-python. You can control this default behavior explicitly via the new ``netcdf_engine_order`` parameter in :py:func:`~xarray.set_options`, e.g., ``xr.set_options(netcdf_engine_order=['netcdf4', 'scipy', 'h5netcdf'])`` to restore the prior defaults (:issue:`10657`). By `Stephan Hoyer `_. - The HTML reprs for :py:class:`DataArray`, :py:class:`Dataset` and :py:class:`DataTree` have been tweaked to hide empty sections, consistent with the text reprs. The ``DataTree`` HTML repr also now automatically expands sub-groups (:pull:`10785`). By `Stephan Hoyer `_. - Zarr stores written with Xarray now consistently use a default Zarr fill value of ``NaN`` for float variables, for both Zarr v2 and v3 (:issue:`10646``). All other dtypes still use the Zarr default ``fill_value`` of zero. To customize, explicitly set encoding in :py:meth:`~Dataset.to_zarr`, e.g., ``encoding=dict.fromkey(ds.data_vars, {'fill_value': 0})``. By `Stephan Hoyer `_. Deprecations ~~~~~~~~~~~~ Bug fixes ~~~~~~~~~ - Xarray objects opened from file-like objects with ``engine='h5netcdf'`` can now be pickled, as long as the underlying file-like object also supports pickle (:issue:`10712`). By `Stephan Hoyer `_. - Closing Xarray objects opened from file-like objects with ```engine='scipy'`` no longer closes the underlying file, consistent with the h5netcdf backend (:pull:`10624`). By `Stephan Hoyer `_. - Fix the ``align_chunks`` parameter on the :py:meth:`~xarray.Dataset.to_zarr` method, it was not being passed to the underlying :py:meth:`~xarray.backends.api` method (:issue:`10501`, :pull:`10516`). - Fix error when encoding an empty :py:class:`numpy.datetime64` array (:issue:`10722`, :pull:`10723`). By `Spencer Clark `_. - Propagate coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`). - Fix error from ``to_netcdf(..., compute=False)`` when using Dask Distributed (:issue:`10725`). By `Stephan Hoyer `_. - Propagation coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`). By `Justus Magin `_. - Allow ``combine_attrs="drop_conflicts"`` to handle objects with ``__eq__`` methods that return non-bool values (e.g., numpy arrays) without raising ``ValueError`` (:pull:`10726`). By `Maximilian Roos `_. Documentation ~~~~~~~~~~~~~ - Fixed Zarr encoding documentation with consistent examples and added comprehensive coverage of dimension and coordinate encoding differences between Zarr V2 and V3 formats. The documentation shows what users will see when accessing Zarr files with raw zarr-python, and explains the relationship between ``_ARRAY_DIMENSIONS`` (Zarr V2), ``dimension_names`` metadata (Zarr V3), and CF ``coordinates`` attributes. (:pull:`10720`) By `Emmanuel Mathot `_. Internal Changes ~~~~~~~~~~~~~~~~ - Refactor structure of ``backends`` module to separate code for reading data from code for writing data (:pull:`10771`). By `Tom Nicholas `_. - All test files now have full mypy type checking enabled (``check_untyped_defs = true``), improving type safety and making the test suite a better reference for type annotations. (:pull:`10768`) By `Maximilian Roos `_. .. _whats-new.2025.09.0: v2025.09.0 (Sep 2, 2025) ------------------------ This release brings a number of small improvements and fixes, especially related to writing DataTree objects and netCDF files to disk. Thanks to the 13 contributors to this release: Benoit Bovy, DHRUVA KUMAR KAUSHAL, Deepak Cherian, Dhruva Kumar Kaushal, Giacomo Caria, Ian Hunt-Isaak, Illviljan, Justus Magin, Kai Mühlbauer, Ruth Comer, Spencer Clark, Stephan Hoyer and Tom Nicholas New Features ~~~~~~~~~~~~ - Support rechunking by :py:class:`~xarray.groupers.SeasonResampler` for seasonal data analysis (:issue:`10425`, :pull:`10519`). By `Dhruva Kumar Kaushal `_. - Add convenience methods to :py:class:`~xarray.Coordinates` (:pull:`10318`) By `Justus Magin `_. - Added :py:func:`load_datatree` for loading ``DataTree`` objects into memory from disk. It has the same relationship to :py:func:`open_datatree`, as :py:func:`load_dataset` has to :py:func:`open_dataset`. By `Stephan Hoyer `_. - ``compute=False`` is now supported by :py:meth:`DataTree.to_netcdf` and :py:meth:`DataTree.to_zarr`. By `Stephan Hoyer `_. - ``open_dataset`` will now correctly infer a path ending in ``.zarr/`` as zarr By `Ian Hunt-Isaak `_. Breaking changes ~~~~~~~~~~~~~~~~ - Following pandas 3.0 (`pandas-dev/pandas#61985 `_), ``Day`` is no longer considered a ``Tick``-like frequency. Therefore non-``None`` values of ``offset`` and non-``"start_day"`` values of ``origin`` will have no effect when resampling to a daily frequency for objects indexed by a :py:class:`xarray.CFTimeIndex`. As in `pandas-dev/pandas#62101 `_ warnings will be emitted if non default values are provided in this context (:issue:`10640`, :pull:`10650`). By `Spencer Clark `_. - The default backend ``engine`` used by :py:meth:`Dataset.to_netcdf` and :py:meth:`DataTree.to_netcdf` is now chosen consistently with :py:func:`open_dataset` and :py:func:`open_datatree`, using whichever netCDF libraries are available and valid, and preferring netCDF4 to h5netcdf to scipy (:issue:`10654`). This will change the default backend in some edge cases (e.g., from scipy to netCDF4 when writing to a file-like object or bytes). To override these new defaults, set ``engine`` explicitly. By `Stephan Hoyer `_. - The return value of :py:meth:`Dataset.to_netcdf` without ``path`` is now a ``memoryview`` object instead of ``bytes`` (:pull:`10656`). This removes an unnecessary memory copy and ensures consistency when using either ``engine="scipy"`` or ``engine="h5netcdf"``. If you need a bytes object, simply wrap the return value of ``to_netcdf()`` with ``bytes()``. By `Stephan Hoyer `_. Bug fixes ~~~~~~~~~ - Fix contour plots not normalizing the colors correctly when using for example logarithmic norms. (:issue:`10551`, :pull:`10565`) By `Jimmy Westling `_. - Fix distribution of ``auto_complex`` keyword argument for open_datatree (:issue:`10631`, :pull:`10632`). By `Kai Mühlbauer `_. - Warn instead of raise in case of misconfiguration of ``unlimited_dims`` originating from dataset.encoding, to prevent breaking users workflows (:issue:`10647`, :pull:`10648`). By `Kai Mühlbauer `_. - :py:meth:`DataTree.to_netcdf` and :py:meth:`DataTree.to_zarr` now avoid redundant computation of Dask arrays with cross-group dependencies (:issue:`10637`). By `Stephan Hoyer `_. - :py:meth:`DataTree.to_netcdf` had h5netcdf hard-coded as default (:issue:`10654`). By `Stephan Hoyer `_. Internal Changes ~~~~~~~~~~~~~~~~ - Run ``TestNetCDF4Data`` as ``TestNetCDF4DataTree`` through ``open_datatree`` (:pull:`10632`). By `Kai Mühlbauer `_. .. _whats-new.2025.08.0: v2025.08.0 (Aug 14, 2025) ------------------------- This release brings the ability to load xarray objects asynchronously, write netCDF as bytes, fixes a number of bugs, and starts an important deprecation cycle for changing the default values of keyword arguments for various xarray combining functions. Thanks to the 24 contributors to this release: Alfonso Ladino, Brigitta Sipőcz, Claude, Deepak Cherian, Dimitri Papadopoulos Orfanos, Eric Jansen, Ian Hunt-Isaak, Ilan Gold, Illviljan, Julia Signell, Justus Magin, Kai Mühlbauer, Mathias Hauser, Matthew, Michael Niklas, Miguel Jimenez, Nick Hodgskin, Pratiman, Scott Staniewicz, Spencer Clark, Stephan Hoyer, Tom Nicholas, Yang Yang and jemmajeffree New Features ~~~~~~~~~~~~ - Added :py:meth:`DataTree.prune` method to remove empty nodes while preserving tree structure. Useful for cleaning up DataTree after time-based filtering operations (:issue:`10590`, :pull:`10598`). By `Alfonso Ladino `_. - Added new asynchronous loading methods :py:meth:`Dataset.load_async`, :py:meth:`DataArray.load_async`, :py:meth:`Variable.load_async`. Note that users are expected to limit concurrency themselves - xarray does not internally limit concurrency in any way. (:issue:`10326`, :pull:`10327`) By `Tom Nicholas `_. - :py:meth:`DataTree.to_netcdf` can now write to a file-like object, or return bytes if called without a filepath. (:issue:`10570`) By `Matthew Willson `_. - Added exception handling for invalid files in :py:func:`open_mfdataset`. (:issue:`6736`) By `Pratiman Patel `_. Breaking changes ~~~~~~~~~~~~~~~~ - When writing to NetCDF files with groups, Xarray no longer redefines dimensions that have the same size in parent groups (:issue:`10241`). This conforms with `CF Conventions for group scrope `_ but may require adjustments for code that consumes NetCDF files produced by Xarray. By `Stephan Hoyer `_. Deprecations ~~~~~~~~~~~~ - Start a deprecation cycle for changing the default keyword arguments to :py:func:`concat`, :py:func:`merge`, :py:func:`combine_nested`, :py:func:`combine_by_coords`, and :py:func:`open_mfdataset`. Emits a :py:class:`FutureWarning` when using old defaults and new defaults would result in different behavior. Adds an option: ``use_new_combine_kwarg_defaults`` to opt in to new defaults immediately. New values are: - ``data_vars``: None which means ``all`` when concatenating along a new dimension, and ``"minimal"`` when concatenating along an existing dimension - ``coords``: "minimal" - ``compat``: "override" - ``join``: "exact" (:issue:`8778`, :issue:`1385`, :pull:`10062`). By `Julia Signell `_. Bug fixes ~~~~~~~~~ - Fix Pydap Datatree backend testing. Testing now compares elements of (unordered) two sets (before, lists) (:pull:`10525`). By `Miguel Jimenez-Urias `_. - Fix ``KeyError`` when passing a ``dim`` argument different from the default to ``convert_calendar`` (:pull:`10544`). By `Eric Jansen `_. - Fix transpose of boolean arrays read from disk. (:issue:`10536`) By `Deepak Cherian `_. - Fix detection of the ``h5netcdf`` backend. Xarray now selects ``h5netcdf`` if the default ``netCDF4`` engine is not available (:issue:`10401`, :pull:`10557`). By `Scott Staniewicz `_. - Fix :py:func:`merge` to prevent altering original object depending on join value (:pull:`10596`) By `Julia Signell `_. - Ensure ``unlimited_dims`` passed to :py:meth:`xarray.DataArray.to_netcdf`, :py:meth:`xarray.Dataset.to_netcdf` or :py:meth:`xarray.DataTree.to_netcdf` only contains dimensions present in the object; raise ``ValueError`` otherwise (:issue:`10549`, :pull:`10608`). By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ - Clarify lazy behaviour and eager loading for ``chunks=None`` in :py:func:`~xarray.open_dataset`, :py:func:`~xarray.open_dataarray`, :py:func:`~xarray.open_datatree`, :py:func:`~xarray.open_groups` and :py:func:`~xarray.open_zarr` (:issue:`10612`, :pull:`10627`). By `Kai Mühlbauer `_. Performance ~~~~~~~~~~~ - Speed up non-numeric scalars when calling :py:meth:`Dataset.interp`. (:issue:`10054`, :pull:`10554`) By `Jimmy Westling `_. .. _whats-new.2025.07.1: v2025.07.1 (Jul 09, 2025) ------------------------- This release brings a lot of improvements to flexible indexes functionality, including new classes to ease building of new indexes with custom coordinate transforms (:py:class:`indexes.CoordinateTransformIndex`) and tree-like index structures (:py:class:`indexes.NDPointIndex`). See a `new gallery `_ showing off the possibilities enabled by flexible indexes. Thanks to the 7 contributors to this release: Benoit Bovy, Deepak Cherian, Dhruva Kumar Kaushal, Dimitri Papadopoulos Orfanos, Illviljan, Justus Magin and Tom Nicholas New Features ~~~~~~~~~~~~ - New :py:class:`xarray.indexes.NDPointIndex`, which by default uses :py:class:`scipy.spatial.KDTree` under the hood for the selection of irregular, n-dimensional data (:pull:`10478`). By `Benoit Bovy `_. - Allow skipping the creation of default indexes when opening datasets (:pull:`8051`). By `Benoit Bovy `_ and `Justus Magin `_. Bug fixes ~~~~~~~~~ - :py:meth:`Dataset.set_xindex` now raises a helpful error when a custom index creates extra variables that don't match the provided coordinate names, instead of silently ignoring them. The error message suggests using the factory method pattern with :py:meth:`xarray.Coordinates.from_xindex` and :py:meth:`Dataset.assign_coords` for advanced use cases (:issue:`10499`, :pull:`10503`). By `Dhruva Kumar Kaushal `_. Documentation ~~~~~~~~~~~~~ - A `new gallery `_ showing off the possibilities enabled by flexible indexes. Internal Changes ~~~~~~~~~~~~~~~~ - Refactored the ``PandasIndexingAdapter`` and ``CoordinateTransformIndexingAdapter`` internal indexing classes. Coordinate variables that wrap a :py:class:`pandas.RangeIndex`, a :py:class:`pandas.MultiIndex` or a :py:class:`xarray.indexes.CoordinateTransform` are now displayed as lazy variables in the Xarray data reprs (:pull:`10355`). By `Benoit Bovy `_. .. _whats-new.2025.07.0: v2025.07.0 (Jul 3, 2025) ------------------------ This release extends xarray's support for custom index classes, restores support for reading netCDF3 files with SciPy, updates minimum dependencies, and fixes a number of bugs. Thanks to the 17 contributors to this release: Bas Nijholt, Benoit Bovy, Deepak Cherian, Dhruva Kumar Kaushal, Dimitri Papadopoulos Orfanos, Ian Hunt-Isaak, Kai Mühlbauer, Mathias Hauser, Maximilian Roos, Miguel Jimenez, Nick Hodgskin, Scott Henderson, Shuhao Cao, Spencer Clark, Stephan Hoyer, Tom Nicholas and Zsolt Cserna New Features ~~~~~~~~~~~~ - Expose :py:class:`~xarray.indexes.RangeIndex`, and :py:class:`~xarray.indexes.CoordinateTransformIndex` as public api under the ``xarray.indexes`` namespace. By `Deepak Cherian `_. - Support zarr-python's new ``.supports_consolidated_metadata`` store property (:pull:`10457``). by `Tom Nicholas `_. - Better error messages when encoding data to be written to disk fails (:pull:`10464`). By `Stephan Hoyer `_ Breaking changes ~~~~~~~~~~~~~~~~ The minimum versions of some dependencies were changed (:issue:`10417`, :pull:`10438`): By `Dhruva Kumar Kaushal `_. .. list-table:: :header-rows: 1 :widths: 30 20 20 * - Dependency - Old Version - New Version * - Python - 3.10 - 3.11 * - array-api-strict - 1.0 - 1.1 * - boto3 - 1.29 - 1.34 * - bottleneck - 1.3 - 1.4 * - cartopy - 0.22 - 0.23 * - dask-core - 2023.11 - 2024.6 * - distributed - 2023.11 - 2024.6 * - flox - 0.7 - 0.9 * - h5py - 3.8 - 3.11 * - hdf5 - 1.12 - 1.14 * - iris - 3.7 - 3.9 * - lxml - 4.9 - 5.1 * - matplotlib-base - 3.7 - 3.8 * - numba - 0.57 - 0.60 * - numbagg - 0.6 - 0.8 * - numpy - 1.24 - 1.26 * - packaging - 23.2 - 24.1 * - pandas - 2.1 - 2.2 * - pint - 0.22 - 0.24 * - pydap - N/A - 3.5 * - scipy - 1.11 - 1.13 * - sparse - 0.14 - 0.15 * - typing_extensions - 4.8 - Removed * - zarr - 2.16 - 2.18 Bug fixes ~~~~~~~~~ - Fix Pydap test_cmp_local_file for numpy 2.3.0 changes, 1. do always return arrays for all versions and 2. skip astype(str) for numpy >= 2.3.0 for expected data. (:pull:`10421`) By `Kai Mühlbauer `_. - Fix the SciPy backend for netCDF3 files . (:issue:`8909`, :pull:`10376`) By `Deepak Cherian `_. - Check and fix character array string dimension names, issue warnings as needed (:issue:`6352`, :pull:`10395`). By `Kai Mühlbauer `_. - Fix the error message of :py:func:`testing.assert_equal` when two different :py:class:`DataTree` objects are passed (:pull:`10440`). By `Mathias Hauser `_. - Fix :py:func:`testing.assert_equal` with ``check_dim_order=False`` for :py:class:`DataTree` objects (:pull:`10442`). By `Mathias Hauser `_. - Fix Pydap backend testing. Now test forces string arrays to dtype "S" (pydap converts them to unicode type by default). Removes conditional to numpy version. (:issue:`10261`, :pull:`10482`) By `Miguel Jimenez-Urias `_. - Fix attribute overwriting bug when decoding encoded :py:class:`numpy.timedelta64` values from disk with a dtype attribute (:issue:`10468`, :pull:`10469`). By `Spencer Clark `_. - Fix default ``"_FillValue"`` dtype coercion bug when encoding :py:class:`numpy.timedelta64` values to an on-disk format that only supports 32-bit integers (:issue:`10466`, :pull:`10469`). By `Spencer Clark `_. Internal Changes ~~~~~~~~~~~~~~~~ - Forward variable name down to coders for AbstractWritableDataStore.encode_variable and subclasses. (:pull:`10395`). By `Kai Mühlbauer `_. .. _whats-new.2025.06.1: v2025.06.1 (Jun 11, 2025) ------------------------- This is quick bugfix release to remove an unintended dependency on ``typing_extensions``. Thanks to the 4 contributors to this release: Alex Merose, Deepak Cherian, Ilan Gold and Simon Perkins Bug fixes ~~~~~~~~~ - Remove dependency on ``typing_extensions`` (:pull:`10413`). By `Simon Perkins `_. .. _whats-new.2025.06.0: v2025.06.0 (Jun 10, 2025) ------------------------- This release brings HTML reprs to the documentation, fixes to flexible Xarray indexes, performance optimizations, more ergonomic seasonal grouping and resampling with new :py:class:`~xarray.groupers.SeasonGrouper` and :py:class:`~xarray.groupers.SeasonResampler` objects, and bugfixes. Thanks to the 33 contributors to this release: Andrecho, Antoine Gibek, Benoit Bovy, Brian Michell, Christine P. Chai, David Huard, Davis Bennett, Deepak Cherian, Dimitri Papadopoulos Orfanos, Elliott Sales de Andrade, Erik, Erik Månsson, Giacomo Caria, Ilan Gold, Illviljan, Jesse Rusak, Jonathan Neuhauser, Justus Magin, Kai Mühlbauer, Kimoon Han, Konstantin Ntokas, Mark Harfouche, Michael Niklas, Nick Hodgskin, Niko Sirmpilatze, Pascal Bourgault, Scott Henderson, Simon Perkins, Spencer Clark, Tom Vo, Trevor James Smith, joseph nowak and micguerr-bopen New Features ~~~~~~~~~~~~ - Switch docs to jupyter-execute sphinx extension for HTML reprs. (:issue:`3893`, :pull:`10383`) By `Scott Henderson `_. - Allow an Xarray index that uses multiple dimensions checking equality with another index for only a subset of those dimensions (i.e., ignoring the dimensions that are excluded from alignment). (:issue:`10243`, :pull:`10293`) By `Benoit Bovy `_. - New :py:class:`~xarray.groupers.SeasonGrouper` and :py:class:`~xarray.groupers.SeasonResampler` objects for ergonomic seasonal aggregation. See the docs on :ref:`seasonal_grouping` or `blog post `_ for more. By `Deepak Cherian `_. - Data corruption issues arising from misaligned Dask and Zarr chunks can now be prevented using the new ``align_chunks`` parameter in :py:meth:`~xarray.DataArray.to_zarr`. This option automatically rechunk the Dask array to align it with the Zarr storage chunks. For now, it is disabled by default, but this could change on the future. (:issue:`9914`, :pull:`10336`) By `Joseph Nowak `_. Documentation ~~~~~~~~~~~~~ - HTML reprs! By `Scott Henderson `_. Bug fixes ~~~~~~~~~ - Fix :py:class:`~xarray.groupers.BinGrouper` when ``labels`` is not specified (:issue:`10284`). By `Deepak Cherian `_. - Allow accessing arbitrary attributes on Pandas ExtensionArrays. By `Deepak Cherian `_. - Fix coding empty (zero-size) timedelta64 arrays, ``units`` taking precedence when encoding, fallback to default values when decoding (:issue:`10310`, :pull:`10313`). By `Kai Mühlbauer `_. - Use dtype from intermediate sum instead of source dtype or "int" for casting of count when calculating mean in rolling for correct operations (preserve float dtypes, correct mean of bool arrays) (:issue:`10340`, :pull:`10341`). By `Kai Mühlbauer `_. - Improve the html ``repr`` of Xarray objects (dark mode, icons and variable attribute / data dropdown sections). (:pull:`10353`, :pull:`10354`) By `Benoit Bovy `_. - Raise an error when attempting to encode :py:class:`numpy.datetime64` values prior to the Gregorian calendar reform date of 1582-10-15 with a ``"standard"`` or ``"gregorian"`` calendar. Previously we would warn and encode these as :py:class:`cftime.DatetimeGregorian` objects, but it is not clear that this is the user's intent, since this implicitly converts the calendar of the datetimes from ``"proleptic_gregorian"`` to ``"gregorian"`` and prevents round-tripping them as :py:class:`numpy.datetime64` values (:pull:`10352`). By `Spencer Clark `_. - Avoid unsafe casts from float to unsigned int in CFMaskCoder (:issue:`9815`, :pull:`9964`). By ` Elliott Sales de Andrade `_. Performance ~~~~~~~~~~~ - Lazily indexed arrays now use less memory to store keys by avoiding copies in :py:class:`~xarray.indexing.VectorizedIndexer` and :py:class:`~xarray.indexing.OuterIndexer` (:issue:`10316`). By `Jesse Rusak `_. - Fix performance regression in interp where more data was loaded than was necessary. (:issue:`10287`). By `Deepak Cherian `_. - Speed up encoding of :py:class:`cftime.datetime` objects by roughly a factor of three (:pull:`8324`). By `Antoine Gibek `_. .. _whats-new.2025.04.0: v2025.04.0 (Apr 29, 2025) ------------------------- This release brings bug fixes, better support for extension arrays including returning a :py:class:`pandas.IntervalArray` from ``groupby_bins``, and performance improvements. Thanks to the 24 contributors to this release: Alban Farchi, Andrecho, Benoit Bovy, Deepak Cherian, Dimitri Papadopoulos Orfanos, Florian Jetter, Giacomo Caria, Ilan Gold, Illviljan, Joren Hammudoglu, Julia Signell, Kai Muehlbauer, Kai Mühlbauer, Mathias Hauser, Mattia Almansi, Michael Sumner, Miguel Jimenez, Nick Hodgskin (🦎 Vecko), Pascal Bourgault, Philip Chmielowiec, Scott Henderson, Spencer Clark, Stephan Hoyer and Tom Nicholas New Features ~~~~~~~~~~~~ - By default xarray now encodes :py:class:`numpy.timedelta64` values by converting to :py:class:`numpy.int64` values and storing ``"dtype"`` and ``"units"`` attributes consistent with the dtype of the in-memory :py:class:`numpy.timedelta64` values, e.g. ``"timedelta64[s]"`` and ``"seconds"`` for second-resolution timedeltas. These values will always be decoded to timedeltas without a warning moving forward. Timedeltas encoded via the previous approach can still be roundtripped exactly, but in the future will not be decoded by default (:issue:`1621`, :issue:`10099`, :pull:`10101`). By `Spencer Clark `_. - Added `scipy-stubs `_ to the ``xarray[types]`` dependencies. By `Joren Hammudoglu `_. - Added a :mod:`xarray.typing` module to expose selected public types for use in downstream libraries and static type checking. (:issue:`10179`, :pull:`10215`). By `Michele Guerreri `_. - Improved compatibility with OPeNDAP DAP4 data model for backend engine ``pydap``. This includes ``datatree`` support, and removing slashes from dimension names. By `Miguel Jimenez-Urias `_. - Allow assigning index coordinates with non-array dimension(s) in a :py:class:`DataArray` by overriding :py:meth:`Index.should_add_coord_to_array`. For example, this enables support for CF boundaries coordinate (e.g., ``time(time)`` and ``time_bnds(time, nbnd)``) in a DataArray (:pull:`10137`). By `Benoit Bovy `_. - Improved support pandas categorical extension as indices (i.e., :py:class:`pandas.IntervalIndex`). (:issue:`9661`, :pull:`9671`) By `Ilan Gold `_. - Improved checks and errors raised when trying to align objects with conflicting indexes. It is now possible to align objects each with multiple indexes sharing common dimension(s). (:issue:`7695`, :pull:`10251`) By `Benoit Bovy `_. Breaking changes ~~~~~~~~~~~~~~~~ - The minimum versions of some dependencies were changed ===================== ========= ======= Package Old New ===================== ========= ======= pydap 3.4 3.5.0 ===================== ========= ======= - Reductions with ``groupby_bins`` or those that involve :py:class:`xarray.groupers.BinGrouper` now return objects indexed by :py:meth:`pandas.IntervalArray` objects, instead of numpy object arrays containing tuples. This change enables interval-aware indexing of such Xarray objects. (:pull:`9671`). By `Ilan Gold `_. - Remove ``PandasExtensionArrayIndex`` from :py:attr:`xarray.Variable.data` when the attribute is a :py:class:`pandas.api.extensions.ExtensionArray` (:pull:`10263`). By `Ilan Gold `_. - The html and text ``repr`` for ``DataTree`` are now truncated. Up to 6 children are displayed for each node -- the first 3 and the last 3 children -- with a ``...`` between them. The number of children to include in the display is configurable via options. For instance use ``set_options(display_max_children=8)`` to display 8 children rather than the default 6. (:pull:`10139`) By `Julia Signell `_. Deprecations ~~~~~~~~~~~~ - The deprecation cycle for the ``eagerly_compute_group`` kwarg to ``groupby`` and ``groupby_bins`` is now complete. By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ - :py:meth:`~xarray.Dataset.to_stacked_array` now uses dimensions in order of appearance. This fixes the issue where using :py:meth:`~xarray.Dataset.transpose` before :py:meth:`~xarray.Dataset.to_stacked_array` had no effect. (Mentioned in :issue:`9921`) - Enable ``keep_attrs`` in ``DatasetView.map`` relevant for :py:func:`map_over_datasets` (:pull:`10219`) By `Mathias Hauser `_. - Variables with no temporal dimension are left untouched by :py:meth:`~xarray.Dataset.convert_calendar`. (:issue:`10266`, :pull:`10268`) By `Pascal Bourgault `_. - Enable ``chunk_key_encoding`` in :py:meth:`~xarray.Dataset.to_zarr` for Zarr v2 Datasets (:pull:`10274`) By `BrianMichell `_. Documentation ~~~~~~~~~~~~~ - Fix references to core classes in docs (:issue:`10195`, :pull:`10207`). By `Mattia Almansi `_. - Fix references to point to updated pydap documentation (:pull:`10182`). By `Miguel Jimenez-Urias `_. - Switch to `pydata-sphinx-theme `_ from `sphinx-book-theme `_ (:pull:`8708`). By `Scott Henderson `_. - Add a dedicated 'Complex Numbers' sections to the User Guide (:issue:`10213`, :pull:`10235`). By `Andre Wendlinger `_. Internal Changes ~~~~~~~~~~~~~~~~ - Avoid stacking when grouping by a chunked array. This can be a large performance improvement. By `Deepak Cherian `_. - The implementation of ``Variable.set_dims`` has changed to use array indexing syntax instead of ``np.broadcast_to`` to perform dimension expansions where all new dimensions have a size of 1. This should improve compatibility with duck arrays that do not support broadcasting (:issue:`9462`, :pull:`10277`). By `Mark Harfouche `_. .. _whats-new.2025.03.1: v2025.03.1 (Mar 30, 2025) ------------------------- This release brings the ability to specify ``fill_value`` and ``write_empty_chunks`` for Zarr V3 stores, and a few bug fixes. Thanks to the 10 contributors to this release: Andrecho, Deepak Cherian, Ian Hunt-Isaak, Karl Krauth, Mathias Hauser, Maximilian Roos, Nick Hodgskin (🦎 Vecko), Spencer Clark, Tom Nicholas and wpbonelli. New Features ~~~~~~~~~~~~ - Allow setting a ``fill_value`` for Zarr format 3 arrays. Specify ``fill_value`` in ``encoding`` as usual. (:issue:`10064`). By `Deepak Cherian `_. - Added :py:class:`indexes.RangeIndex` as an alternative, memory saving Xarray index representing a 1-dimensional bounded interval with evenly spaced floating values (:issue:`8473`, :pull:`10076`). By `Benoit Bovy `_. Breaking changes ~~~~~~~~~~~~~~~~ - Explicitly forbid appending a :py:class:`~xarray.DataTree` to zarr using :py:meth:`~xarray.DataTree.to_zarr` with ``append_dim``, because the expected behaviour is currently undefined. (:issue:`9858`, :pull:`10156`) By `Tom Nicholas `_. Bug fixes ~~~~~~~~~ - Update the parameters of :py:meth:`~xarray.DataArray.to_zarr` to match :py:meth:`~xarray.Dataset.to_zarr`. This fixes the issue where using the ``zarr_version`` parameter would raise a deprecation warning telling the user to use a non-existent ``zarr_format`` parameter instead. (:issue:`10163`, :pull:`10164`) By `Karl Krauth `_. - :py:meth:`DataTree.sel` and :py:meth:`DataTree.isel` display the path of the first failed node again (:pull:`10154`). By `Mathias Hauser `_. - Fix grouped and resampled ``first``, ``last`` with datetimes (:issue:`10169`, :pull:`10173`) By `Deepak Cherian `_. - FacetGrid plots now include units in their axis labels when available (:issue:`10184`, :pull:`10185`) By `Andre Wendlinger `_. .. _whats-new.2025.03.0: v2025.03.0 (Mar 20, 2025) ------------------------- This release brings tested support for Python 3.13, support for reading Zarr V3 datasets into a :py:class:`~xarray.DataTree`, significant improvements to datetime & timedelta encoding/decoding, and improvements to the :py:class:`~xarray.DataTree` API; in addition to the usual bug fixes and other improvements. Thanks to the 26 contributors to this release: Alfonso Ladino, Benoit Bovy, Chuck Daniels, Deepak Cherian, Eni, Florian Jetter, Ian Hunt-Isaak, Jan, Joe Hamman, Josh Kihm, Julia Signell, Justus Magin, Kai Mühlbauer, Kobe Vandelanotte, Mathias Hauser, Max Jones, Maximilian Roos, Oliver Watt-Meyer, Sam Levang, Sander van Rijn, Spencer Clark, Stephan Hoyer, Tom Nicholas, Tom White, Vecko and maddogghoek New Features ~~~~~~~~~~~~ - Added :py:meth:`tutorial.open_datatree` and :py:meth:`tutorial.load_datatree` By `Eni Awowale `_. - Added :py:meth:`DataTree.filter_like` to conveniently restructure a DataTree like another DataTree (:issue:`10096`, :pull:`10097`). By `Kobe Vandelanotte `_. - Added :py:meth:`Coordinates.from_xindex` as convenience for creating a new :py:class:`Coordinates` object directly from an existing Xarray index object if the latter supports it (:pull:`10000`) By `Benoit Bovy `_. - Allow kwargs in :py:meth:`DataTree.map_over_datasets` and :py:func:`map_over_datasets` (:issue:`10009`, :pull:`10012`). By `Kai Mühlbauer `_. - support python 3.13 (no free-threading) (:issue:`9664`, :pull:`9681`) By `Justus Magin `_. - Added experimental support for coordinate transforms (not ready for public use yet!) (:pull:`9543`) By `Benoit Bovy `_. - Similar to our :py:class:`numpy.datetime64` encoding path, automatically modify the units when an integer dtype is specified during eager cftime encoding, but the specified units would not allow for an exact round trip (:pull:`9498`). By `Spencer Clark `_. - Support reading to `GPU memory with Zarr `_ (:pull:`10078`). By `Deepak Cherian `_. Performance ~~~~~~~~~~~ - :py:meth:`DatasetGroupBy.first` and :py:meth:`DatasetGroupBy.last` can now use ``flox`` if available. (:issue:`9647`) By `Deepak Cherian `_. Breaking changes ~~~~~~~~~~~~~~~~ - Rolled back code that would attempt to catch integer overflow when encoding times with small integer dtypes (:issue:`8542`), since it was inconsistent with xarray's handling of standard integers, and interfered with encoding times with small integer dtypes and missing values (:pull:`9498`). By `Spencer Clark `_. - Warn instead of raise if phony_dims are detected when using h5netcdf-backend and ``phony_dims=None`` (:issue:`10049`, :pull:`10058`) By `Kai Mühlbauer `_. Deprecations ~~~~~~~~~~~~ - Deprecate :py:func:`~xarray.cftime_range` in favor of :py:func:`~xarray.date_range` with ``use_cftime=True`` (:issue:`9886`, :pull:`10024`). By `Josh Kihm `_. - Move from phony_dims=None to phony_dims="access" for h5netcdf-backend(:issue:`10049`, :pull:`10058`) By `Kai Mühlbauer `_. Bug fixes ~~~~~~~~~ - Fix ``open_datatree`` incompatibilities with Zarr-Python V3 and refactor ``TestZarrDatatreeIO`` accordingly (:issue:`9960`, :pull:`10020`). By `Alfonso Ladino-Rincon `_. - Default to resolution-dependent optimal integer encoding units when saving chunked non-nanosecond :py:class:`numpy.datetime64` or :py:class:`numpy.timedelta64` arrays to disk. Previously units of "nanoseconds" were chosen by default, which are optimal for nanosecond-resolution times, but not for times with coarser resolution. By `Spencer Clark `_ (:pull:`10017`). - Use mean of min/max years as offset in calculation of datetime64 mean (:issue:`10019`, :pull:`10035`). By `Kai Mühlbauer `_. - Fix ``DataArray().drop_attrs(deep=False)`` and add support for attrs to ``DataArray()._replace()``. (:issue:`10027`, :pull:`10030`). By `Jan Haacker `_. - Fix bug preventing encoding times with missing values with small integer dtype (:issue:`9134`, :pull:`9498`). By `Spencer Clark `_. - More robustly raise an error when lazily encoding times and an integer dtype is specified with units that do not allow for an exact round trip (:pull:`9498`). By `Spencer Clark `_. - Prevent false resolution change warnings from being emitted when decoding timedeltas encoded with floating point values, and make it clearer how to silence this warning message in the case that it is rightfully emitted (:issue:`10071`, :pull:`10072`). By `Spencer Clark `_. - Fix ``isel`` for multi-coordinate Xarray indexes (:issue:`10063`, :pull:`10066`). By `Benoit Bovy `_. - Fix dask tokenization when opening each node in :py:func:`xarray.open_datatree` (:issue:`10098`, :pull:`10100`). By `Sam Levang `_. - Improve handling of dtype and NaT when encoding/decoding masked and packaged datetimes and timedeltas (:issue:`8957`, :pull:`10050`). By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ - Better expose the :py:class:`Coordinates` class in API reference (:pull:`10000`) By `Benoit Bovy `_. .. _whats-new.2025.01.2: v2025.01.2 (Jan 31, 2025) ------------------------- This release brings non-nanosecond datetime and timedelta resolution to xarray, sharded reading in zarr, suggestion of correct names when trying to access non-existent data variables and bug fixes! Thanks to the 16 contributors to this release: Deepak Cherian, Elliott Sales de Andrade, Jacob Prince-Bieker, Jimmy Westling, Joe Hamman, Joseph Nowak, Justus Magin, Kai Mühlbauer, Mattia Almansi, Michael Niklas, Roelof Rietbroek, Salaheddine EL FARISSI, Sam Levang, Spencer Clark, Stephan Hoyer and Tom Nicholas In the last couple of releases xarray has been prepared for allowing non-nanosecond datetime and timedelta resolution. The code had to be changed and adapted in numerous places, affecting especially the test suite. The documentation has been updated accordingly and a new internal chapter on :ref:`internals.timecoding` has been added. To make the transition as smooth as possible this is designed to be fully backwards compatible, keeping the current default of ``'ns'`` resolution on decoding. To opt-into decoding to other resolutions (``'us'``, ``'ms'`` or ``'s'``) an instance of the newly public :py:class:`coders.CFDatetimeCoder` class can be passed through the ``decode_times`` keyword argument (see also :ref:`internals.default_timeunit`): .. code-block:: python coder = xr.coders.CFDatetimeCoder(time_unit="s") ds = xr.open_dataset(filename, decode_times=coder) Similar control of the resolution of decoded timedeltas can be achieved through passing a :py:class:`coders.CFTimedeltaCoder` instance to the ``decode_timedelta`` keyword argument: .. code-block:: python coder = xr.coders.CFTimedeltaCoder(time_unit="s") ds = xr.open_dataset(filename, decode_timedelta=coder) though by default timedeltas will be decoded to the same ``time_unit`` as datetimes. There might slight changes when encoding/decoding times as some warning and error messages have been removed or rewritten. Xarray will now also allow non-nanosecond datetimes (with ``'us'``, ``'ms'`` or ``'s'`` resolution) when creating DataArray's from scratch, picking the lowest possible resolution: .. code:: python xr.DataArray(data=[np.datetime64("2000-01-01", "D")], dims=("time",)) In a future release the current default of ``'ns'`` resolution on decoding will eventually be deprecated. New Features ~~~~~~~~~~~~ - Relax nanosecond resolution restriction in CF time coding and permit :py:class:`numpy.datetime64` or :py:class:`numpy.timedelta64` dtype arrays with ``"s"``, ``"ms"``, ``"us"``, or ``"ns"`` resolution throughout xarray (:issue:`7493`, :pull:`9618`, :pull:`9977`, :pull:`9966`, :pull:`9999`). By `Kai Mühlbauer `_ and `Spencer Clark `_. - Enable the ``compute=False`` option in :py:meth:`DataTree.to_zarr`. (:pull:`9958`). By `Sam Levang `_. - Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`) By `Jimmy Westling `_. - Added a ``time_unit`` argument to :py:meth:`CFTimeIndex.to_datetimeindex`. Note that in a future version of xarray, :py:meth:`CFTimeIndex.to_datetimeindex` will return a microsecond-resolution :py:class:`pandas.DatetimeIndex` instead of a nanosecond-resolution :py:class:`pandas.DatetimeIndex` (:pull:`9965`). By `Spencer Clark `_ and `Kai Mühlbauer `_. - Adds shards to the list of valid_encodings in the zarr backend, so that sharded Zarr V3s can be written (:issue:`9947`, :pull:`9948`). By `Jacob Prince_Bieker `_ Deprecations ~~~~~~~~~~~~ - In a future version of xarray decoding of variables into :py:class:`numpy.timedelta64` values will be disabled by default. To silence warnings associated with this, set ``decode_timedelta`` to ``True``, ``False``, or a :py:class:`coders.CFTimedeltaCoder` instance when opening data (:issue:`1621`, :pull:`9966`). By `Spencer Clark `_. Bug fixes ~~~~~~~~~ - Fix :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`Dataset.ffill` and :py:meth:`Dataset.bfill` when the limit is bigger than the chunksize (:issue:`9939`). By `Joseph Nowak `_. - Fix issues related to Pandas v3 ("us" vs. "ns" for python datetime, copy on write) and handling of 0d-numpy arrays in datetime/timedelta decoding (:pull:`9953`). By `Kai Mühlbauer `_. - Remove dask-expr from CI runs, add "pyarrow" dask dependency to windows CI runs, fix related tests (:issue:`9962`, :pull:`9971`). By `Kai Mühlbauer `_. - Use zarr-fixture to prevent thread leakage errors (:pull:`9967`). By `Kai Mühlbauer `_. - Fix weighted ``polyfit`` for arrays with more than two dimensions (:issue:`9972`, :pull:`9974`). By `Mattia Almansi `_. - Preserve order of variables in :py:func:`xarray.combine_by_coords` (:issue:`8828`, :pull:`9070`). By `Kai Mühlbauer `_. - Cast ``numpy`` scalars to arrays in :py:meth:`NamedArray.from_arrays` (:issue:`10005`, :pull:`10008`) By `Justus Magin `_. Documentation ~~~~~~~~~~~~~ - A chapter on :ref:`internals.timecoding` is added to the internal section (:pull:`9618`). By `Kai Mühlbauer `_. - Clarified xarray's policy on API stability in the FAQ. (:issue:`9854`, :pull:`9855`) By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ - Updated time coding tests to assert exact equality rather than equality with a tolerance, since xarray's minimum supported version of cftime is greater than 1.2.1 (:pull:`9961`). By `Spencer Clark `_. .. _whats-new.2025.01.1: v2025.01.1 (Jan 9, 2025) ------------------------ This is a quick release to bring compatibility with the Zarr V3 release. It also includes an update to the time decoding infrastructure as a step toward `enabling non-nanosecond datetime support `_! New Features ~~~~~~~~~~~~ - Split out :py:class:`coders.CFDatetimeCoder` as public API in ``xr.coders``, make ``decode_times`` keyword argument consume :py:class:`coders.CFDatetimeCoder` (:pull:`9901`). By `Kai Mühlbauer `_. Deprecations ~~~~~~~~~~~~ - Time decoding related kwarg ``use_cftime`` is deprecated. Use keyword argument ``decode_times=CFDatetimeCoder(use_cftime=True)`` in :py:func:`~xarray.open_dataset`, :py:func:`~xarray.open_dataarray`, :py:func:`~xarray.open_datatree`, :py:func:`~xarray.open_groups`, :py:func:`~xarray.open_zarr` and :py:func:`~xarray.decode_cf` instead (:pull:`9901`). By `Kai Mühlbauer `_. .. _whats-new.2025.01.0: v.2025.01.0 (Jan 3, 2025) ------------------------- This release brings much improved read performance with Zarr arrays (without consolidated metadata), better support for additional array types, as well as bugfixes and performance improvements. Thanks to the 20 contributors to this release: Bruce Merry, Davis Bennett, Deepak Cherian, Dimitri Papadopoulos Orfanos, Florian Jetter, Illviljan, Janukan Sivajeyan, Justus Magin, Kai Germaschewski, Kai Mühlbauer, Max Jones, Maximilian Roos, Michael Niklas, Patrick Peglar, Sam Levang, Scott Huberty, Spencer Clark, Stephan Hoyer, Tom Nicholas and Vecko New Features ~~~~~~~~~~~~ - Improve the error message raised when using chunked-array methods if no chunk manager is available or if the requested chunk manager is missing (:pull:`9676`) By `Justus Magin `_. (:pull:`9676`) - Better support wrapping additional array types (e.g. ``cupy`` or ``jax``) by calling generalized duck array operations throughout more xarray methods. (:issue:`7848`, :pull:`9798`). By `Sam Levang `_. - Better performance for reading Zarr arrays in the ``ZarrStore`` class by caching the state of Zarr storage and avoiding redundant IO operations. By default, ``ZarrStore`` stores a snapshot of names and metadata of the in-scope Zarr arrays; this cache is then used when iterating over those Zarr arrays, which avoids IO operations and thereby reduces latency. (:issue:`9853`, :pull:`9861`). By `Davis Bennett `_. - Add ``unit`` - keyword argument to :py:func:`date_range` and ``microsecond`` parsing to iso8601-parser (:pull:`9885`). By `Kai Mühlbauer `_. Breaking changes ~~~~~~~~~~~~~~~~ - Methods including ``dropna``, ``rank``, ``idxmax``, ``idxmin`` require non-dimension arguments to be passed as keyword arguments. The previous behavior, which allowed ``.idxmax('foo', 'all')`` was too easily confused with ``'all'`` being a dimension. The updated equivalent is ``.idxmax('foo', how='all')``. The previous behavior was deprecated in v2023.10.0. By `Maximilian Roos `_. Deprecations ~~~~~~~~~~~~ - Finalize deprecation of ``closed`` parameters of :py:func:`cftime_range` and :py:func:`date_range` (:pull:`9882`). By `Kai Mühlbauer `_. Performance ~~~~~~~~~~~ - Better preservation of chunksizes in :py:meth:`Dataset.idxmin` and :py:meth:`Dataset.idxmax` (:issue:`9425`, :pull:`9800`). By `Deepak Cherian `_. - Much better implementation of vectorized interpolation for dask arrays (:pull:`9881`). By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ - Fix type annotations for ``get_axis_num``. (:issue:`9822`, :pull:`9827`). By `Bruce Merry `_. - Fix unintended load on datasets when calling :py:meth:`DataArray.plot.scatter` (:pull:`9818`). By `Jimmy Westling `_. - Fix interpolation when non-numeric coordinate variables are present (:issue:`8099`, :issue:`9839`). By `Deepak Cherian `_. Internal Changes ~~~~~~~~~~~~~~~~ - Move non-CF related ``ensure_dtype_not_object`` from conventions to backends (:pull:`9828`). By `Kai Mühlbauer `_. - Move handling of scalar datetimes into ``_possibly_convert_objects`` within ``as_compatible_data``. This is consistent with how lists of these objects will be converted (:pull:`9900`). By `Kai Mühlbauer `_. - Move ISO-8601 parser from coding.cftimeindex to coding.times to make it available there (prevents circular import), add capability to parse negative and/or five-digit years (:pull:`9899`). By `Kai Mühlbauer `_. - Refactor of time coding to prepare for relaxing nanosecond restriction (:pull:`9906`). By `Kai Mühlbauer `_. .. _whats-new.2024.11.0: v.2024.11.0 (Nov 22, 2024) -------------------------- This release brings better support for wrapping JAX arrays and Astropy Quantity objects, :py:meth:`DataTree.persist`, algorithmic improvements to many methods with dask (:py:meth:`Dataset.polyfit`, :py:meth:`Dataset.ffill`, :py:meth:`Dataset.bfill`, rolling reductions), and bug fixes. Thanks to the 22 contributors to this release: Benoit Bovy, Deepak Cherian, Dimitri Papadopoulos Orfanos, Holly Mandel, James Bourbeau, Joe Hamman, Justus Magin, Kai Mühlbauer, Lukas Trippe, Mathias Hauser, Maximilian Roos, Michael Niklas, Pascal Bourgault, Patrick Hoefler, Sam Levang, Sarah Charlotte Johnson, Scott Huberty, Stephan Hoyer, Tom Nicholas, Virgile Andreani, joseph nowak and tvo New Features ~~~~~~~~~~~~ - Added :py:meth:`DataTree.persist` method (:issue:`9675`, :pull:`9682`). By `Sam Levang `_. - Added ``write_inherited_coords`` option to :py:meth:`DataTree.to_netcdf` and :py:meth:`DataTree.to_zarr` (:pull:`9677`). By `Stephan Hoyer `_. - Support lazy grouping by dask arrays, and allow specifying ordered groups with ``UniqueGrouper(labels=["a", "b", "c"])`` (:issue:`2852`, :issue:`757`). By `Deepak Cherian `_. - Add new ``automatic_rechunk`` kwarg to :py:meth:`DataArrayRolling.construct` and :py:meth:`DatasetRolling.construct`. This is only useful on ``dask>=2024.11.0`` (:issue:`9550`). By `Deepak Cherian `_. - Optimize ffill, bfill with dask when limit is specified (:pull:`9771`). By `Joseph Nowak `_, and `Patrick Hoefler `_. - Allow wrapping ``np.ndarray`` subclasses, e.g. ``astropy.units.Quantity`` (:issue:`9704`, :pull:`9760`). By `Sam Levang `_ and `Tien Vo `_. - Optimize :py:meth:`DataArray.polyfit` and :py:meth:`Dataset.polyfit` with dask, when used with arrays with more than two dimensions. (:issue:`5629`). By `Deepak Cherian `_. - Support for directly opening remote files as string paths (for example, ``s3://bucket/data.nc``) with ``fsspec`` when using the ``h5netcdf`` engine (:issue:`9723`, :pull:`9797`). By `James Bourbeau `_. - Re-implement the :py:mod:`ufuncs` module, which now dynamically dispatches to the underlying array's backend. Provides better support for certain wrapped array types like ``jax.numpy.ndarray``. (:issue:`7848`, :pull:`9776`). By `Sam Levang `_. - Speed up loading of large zarr stores using dask arrays. (:issue:`8902`) By `Deepak Cherian `_. Breaking Changes ~~~~~~~~~~~~~~~~ - The minimum versions of some dependencies were changed ===================== ========= ======= Package Old New ===================== ========= ======= boto3 1.28 1.29 dask-core 2023.9 2023.11 distributed 2023.9 2023.11 h5netcdf 1.2 1.3 numbagg 0.2.1 0.6 typing_extensions 4.7 4.8 ===================== ========= ======= Deprecations ~~~~~~~~~~~~ - Grouping by a chunked array (e.g. dask or cubed) currently eagerly loads that variable in to memory. This behaviour is deprecated. If eager loading was intended, please load such arrays manually using ``.load()`` or ``.compute()``. Else pass ``eagerly_compute_group=False``, and provide expected group labels using the ``labels`` kwarg to a grouper object such as :py:class:`grouper.UniqueGrouper` or :py:class:`grouper.BinGrouper`. Bug fixes ~~~~~~~~~ - Fix inadvertent deep-copying of child data in DataTree (:issue:`9683`, :pull:`9684`). By `Stephan Hoyer `_. - Avoid including parent groups when writing DataTree subgroups to Zarr or netCDF (:pull:`9682`). By `Stephan Hoyer `_. - Fix regression in the interoperability of :py:meth:`DataArray.polyfit` and :py:meth:`xr.polyval` for date-time coordinates. (:pull:`9691`). By `Pascal Bourgault `_. - Fix CF decoding of ``grid_mapping`` to allow all possible formats, add tests (:issue:`9761`, :pull:`9765`). By `Kai Mühlbauer `_. - Add ``User-Agent`` to request-headers when retrieving tutorial data (:issue:`9774`, :pull:`9782`) By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ - Mention attribute peculiarities in docs/docstrings (:issue:`4798`, :pull:`9700`). By `Kai Mühlbauer `_. Internal Changes ~~~~~~~~~~~~~~~~ - ``persist`` methods now route through the :py:class:`xr.namedarray.parallelcompat.ChunkManagerEntrypoint` (:pull:`9682`). By `Sam Levang `_. .. _whats-new.2024.10.0: v2024.10.0 (Oct 24th, 2024) --------------------------- This release brings official support for ``xarray.DataTree``, and compatibility with zarr-python v3! Aside from these two huge features, it also improves support for vectorised interpolation and fixes various bugs. Thanks to the 31 contributors to this release: Alfonso Ladino, DWesl, Deepak Cherian, Eni, Etienne Schalk, Holly Mandel, Ilan Gold, Illviljan, Joe Hamman, Justus Magin, Kai Mühlbauer, Karl Krauth, Mark Harfouche, Martey Dodoo, Matt Savoie, Maximilian Roos, Patrick Hoefler, Peter Hill, Renat Sibgatulin, Ryan Abernathey, Spencer Clark, Stephan Hoyer, Tom Augspurger, Tom Nicholas, Vecko, Virgile Andreani, Yvonne Fröhlich, carschandler, joseph nowak, mgunyho and owenlittlejohns New Features ~~~~~~~~~~~~ - ``DataTree`` related functionality is now exposed in the main ``xarray`` public API. This includes: ``xarray.DataTree``, ``xarray.open_datatree``, ``xarray.open_groups``, ``xarray.map_over_datasets``, ``xarray.group_subtrees``, ``xarray.register_datatree_accessor`` and ``xarray.testing.assert_isomorphic``. By `Owen Littlejohns `_, `Eni Awowale `_, `Matt Savoie `_, `Stephan Hoyer `_, `Tom Nicholas `_, `Justus Magin `_, and `Alfonso Ladino `_. - A migration guide for users of the prototype `xarray-contrib/datatree repository `_ has been added, and can be found in the ``DATATREE_MIGRATION_GUIDE.md`` file in the repository root. By `Tom Nicholas `_. - Support for Zarr-Python 3 (:issue:`95515`, :pull:`9552`). By `Tom Augspurger `_, `Ryan Abernathey `_ and `Joe Hamman `_. - Added zarr backends for :py:func:`open_groups` (:issue:`9430`, :pull:`9469`). By `Eni Awowale `_. - Added support for vectorized interpolation using additional interpolators from the ``scipy.interpolate`` module (:issue:`9049`, :pull:`9526`). By `Holly Mandel `_. - Implement handling of complex numbers (netcdf4/h5netcdf) and enums (h5netcdf) (:issue:`9246`, :issue:`3297`, :pull:`9509`). By `Kai Mühlbauer `_. - Fix passing missing arguments to when opening hdf5 and netCDF4 datatrees (:issue:`9427`, :pull:`9428`). By `Alfonso Ladino `_. Bug fixes ~~~~~~~~~ - Make illegal path-like variable names when constructing a DataTree from a Dataset (:issue:`9339`, :pull:`9378`) By `Etienne Schalk `_. - Work around `upstream pandas issue `_ to ensure that we can decode times encoded with small integer dtype values (e.g. ``np.int32``) in environments with NumPy 2.0 or greater without needing to fall back to cftime (:pull:`9518`). By `Spencer Clark `_. - Fix bug when encoding times with missing values as floats in the case when the non-missing times could in theory be encoded with integers (:issue:`9488`, :pull:`9497`). By `Spencer Clark `_. - Fix a few bugs affecting groupby reductions with ``flox``. (:issue:`8090`, :issue:`9398`, :issue:`9648`). - Fix a few bugs affecting groupby reductions with ``flox``. (:issue:`8090`, :issue:`9398`). By `Deepak Cherian `_. - Fix the safe_chunks validation option on the to_zarr method (:issue:`5511`, :pull:`9559`). By `Joseph Nowak `_. - Fix binning by multiple variables where some bins have no observations. (:issue:`9630`). By `Deepak Cherian `_. - Fix issue where polyfit wouldn't handle non-dimension coordinates. (:issue:`4375`, :pull:`9369`) By `Karl Krauth `_. Documentation ~~~~~~~~~~~~~ - Migrate documentation for ``datatree`` into main ``xarray`` documentation (:pull:`9033`). For information on previous ``datatree`` releases, please see: `datatree's historical release notes `_. By `Owen Littlejohns `_, `Matt Savoie `_, and `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ .. _whats-new.2024.09.0: v2024.09.0 (Sept 11, 2024) -------------------------- This release drops support for Python 3.9, and adds support for grouping by :ref:`multiple arrays `, while providing numerous performance improvements and bug fixes. Thanks to the 33 contributors to this release: Alfonso Ladino, Andrew Scherer, Anurag Nayak, David Hoese, Deepak Cherian, Diogo Teles Sant'Anna, Dom, Elliott Sales de Andrade, Eni, Holly Mandel, Illviljan, Jack Kelly, Julius Busecke, Justus Magin, Kai Mühlbauer, Manish Kumar Gupta, Matt Savoie, Maximilian Roos, Michele Claus, Miguel Jimenez, Niclas Rieger, Pascal Bourgault, Philip Chmielowiec, Spencer Clark, Stephan Hoyer, Tao Xin, Tiago Sanona, TimothyCera-NOAA, Tom Nicholas, Tom White, Virgile Andreani, oliverhiggs and tiago New Features ~~~~~~~~~~~~ - Add :py:attr:`~core.accessor_dt.DatetimeAccessor.days_in_year` and :py:attr:`~core.accessor_dt.DatetimeAccessor.decimal_year` to the ``DatetimeAccessor`` on ``xr.DataArray``. (:pull:`9105`). By `Pascal Bourgault `_. Performance ~~~~~~~~~~~ - Make chunk manager an option in ``set_options`` (:pull:`9362`). By `Tom White `_. - Support for :ref:`grouping by multiple variables `. This is quite new, so please check your results and report bugs. Binary operations after grouping by multiple arrays are not supported yet. (:issue:`1056`, :issue:`9332`, :issue:`324`, :pull:`9372`). By `Deepak Cherian `_. - Allow data variable specific ``constant_values`` in the dataset ``pad`` function (:pull:`9353`). By `Tiago Sanona `_. - Speed up grouping by avoiding deep-copy of non-dimension coordinates (:issue:`9426`, :pull:`9393`) By `Deepak Cherian `_. Breaking changes ~~~~~~~~~~~~~~~~ - Support for ``python 3.9`` has been dropped (:pull:`8937`) - The minimum versions of some dependencies were changed ===================== ========= ======= Package Old New ===================== ========= ======= boto3 1.26 1.28 cartopy 0.21 0.22 dask-core 2023.4 2023.9 distributed 2023.4 2023.9 h5netcdf 1.1 1.2 iris 3.4 3.7 numba 0.56 0.57 numpy 1.23 1.24 pandas 2.0 2.1 scipy 1.10 1.11 typing_extensions 4.5 4.7 zarr 2.14 2.16 ===================== ========= ======= Bug fixes ~~~~~~~~~ - Fix bug with rechunking to a frequency when some periods contain no data (:issue:`9360`). By `Deepak Cherian `_. - Fix bug causing ``DataTree.from_dict`` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`). By `Tom Nicholas `_. - Fix resampling error with monthly, quarterly, or yearly frequencies with cftime when the time bins straddle the date "0001-01-01". For example, this can happen in certain circumstances when the time coordinate contains the date "0001-01-01". (:issue:`9108`, :pull:`9116`) By `Spencer Clark `_ and `Deepak Cherian `_. - Fix issue with passing parameters to ZarrStore.open_store when opening datatree in zarr format (:issue:`9376`, :pull:`9377`). By `Alfonso Ladino `_ - Fix deprecation warning that was raised when calling ``np.array`` on an ``xr.DataArray`` in NumPy 2.0 (:issue:`9312`, :pull:`9393`) By `Andrew Scherer `_. - Fix passing missing arguments to when opening hdf5 and netCDF4 datatrees (:issue:`9427`, :pull:`9428`). By `Alfonso Ladino `_. - Fix support for using ``pandas.DateOffset``, ``pandas.Timedelta``, and ``datetime.timedelta`` objects as ``resample`` frequencies (:issue:`9408`, :pull:`9413`). By `Oliver Higgs `_. Internal Changes ~~~~~~~~~~~~~~~~ - Re-enable testing ``pydap`` backend with ``numpy>=2`` (:pull:`9391`). By `Miguel Jimenez `_ . .. _whats-new.2024.07.0: v2024.07.0 (Jul 30, 2024) ------------------------- This release extends the API for groupby operations with various `grouper objects `_, and includes improvements to the documentation and numerous bugfixes. Thanks to the 22 contributors to this release: Alfonso Ladino, ChrisCleaner, David Hoese, Deepak Cherian, Dieter Werthmüller, Illviljan, Jessica Scheick, Joel Jaeschke, Justus Magin, K. Arthur Endsley, Kai Mühlbauer, Mark Harfouche, Martin Raspaud, Mathijs Verhaegh, Maximilian Roos, Michael Niklas, Michał Górny, Moritz Schreiber, Pontus Lurcock, Spencer Clark, Stephan Hoyer and Tom Nicholas New Features ~~~~~~~~~~~~ - Use fastpath when grouping both montonically increasing and decreasing variable in :py:class:`GroupBy` (:issue:`6220`, :pull:`7427`). By `Joel Jaeschke `_. - Introduce new :py:class:`groupers.UniqueGrouper`, :py:class:`groupers.BinGrouper`, and :py:class:`groupers.TimeResampler` objects as a step towards supporting grouping by multiple variables. See the `docs `_ and the `grouper design doc `_ for more. (:issue:`6610`, :pull:`8840`). By `Deepak Cherian `_. - Allow rechunking to a frequency using ``Dataset.chunk(time=TimeResampler("YE"))`` syntax. (:issue:`7559`, :pull:`9109`) Such rechunking allows many time domain analyses to be executed in an embarrassingly parallel fashion. By `Deepak Cherian `_. - Allow per-variable specification of ```mask_and_scale``, ``decode_times``, ``decode_timedelta`` ``use_cftime`` and ``concat_characters`` params in :py:func:`~xarray.open_dataset` (:pull:`9218`). By `Mathijs Verhaegh `_. - Allow chunking for arrays with duplicated dimension names (:issue:`8759`, :pull:`9099`). By `Martin Raspaud `_. - Extract the source url from fsspec objects (:issue:`9142`, :pull:`8923`). By `Justus Magin `_. - Add :py:meth:`DataArray.drop_attrs` & :py:meth:`Dataset.drop_attrs` methods, to return an object without ``attrs``. A ``deep`` parameter controls whether variables' ``attrs`` are also dropped. By `Maximilian Roos `_. (:pull:`8288`) - Added :py:func:`open_groups` for h5netcdf and netCDF4 backends (:issue:`9137`, :pull:`9243`). By `Eni Awowale `_. Breaking changes ~~~~~~~~~~~~~~~~ - The ``base`` and ``loffset`` parameters to :py:meth:`Dataset.resample` and :py:meth:`DataArray.resample` are now removed. These parameters have been deprecated since v2023.03.0. Using the ``origin`` or ``offset`` parameters is recommended as a replacement for using the ``base`` parameter and using time offset arithmetic is recommended as a replacement for using the ``loffset`` parameter. (:pull:`9233`) By `Deepak Cherian `_. - The ``squeeze`` kwarg to ``groupby`` is now ignored. This has been the source of some quite confusing behaviour and has been deprecated since v2024.01.0. ``groupby`` behavior is now always consistent with the existing ``.groupby(..., squeeze=False)`` behavior. No errors will be raised if ``squeeze=False``. (:pull:`9280`) By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ - Fix scatter plot broadcasting unnecessarily. (:issue:`9129`, :pull:`9206`) By `Jimmy Westling `_. - Don't convert custom indexes to ``pandas`` indexes when computing a diff (:pull:`9157`) By `Justus Magin `_. - Make :py:func:`testing.assert_allclose` work with numpy 2.0 (:issue:`9165`, :pull:`9166`). By `Pontus Lurcock `_. - Allow diffing objects with array attributes on variables (:issue:`9153`, :pull:`9169`). By `Justus Magin `_. - ``numpy>=2`` compatibility in the ``netcdf4`` backend (:pull:`9136`). By `Justus Magin `_ and `Kai Mühlbauer `_. - Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`). By `Justus Magin `_. - Address regression introduced in :pull:`9002` that prevented objects returned by :py:meth:`DataArray.convert_calendar` to be indexed by a time index in certain circumstances (:issue:`9138`, :pull:`9192`). By `Mark Harfouche `_ and `Spencer Clark `_. - Fix static typing of tolerance arguments by allowing ``str`` type (:issue:`8892`, :pull:`9194`). By `Michael Niklas `_. - Dark themes are now properly detected for ``html[data-theme=dark]``-tags (:pull:`9200`). By `Dieter Werthmüller `_. - Reductions no longer fail for ``np.complex_`` dtype arrays when numbagg is installed. (:pull:`9210`) By `Maximilian Roos `_. Documentation ~~~~~~~~~~~~~ - Adds intro to backend section of docs, including a flow-chart to navigate types of backends (:pull:`9175`). By `Jessica Scheick `_. - Adds a flow-chart diagram to help users navigate help resources (:discussion:`8990`, :pull:`9147`). By `Jessica Scheick `_. - Improvements to Zarr & chunking docs (:pull:`9139`, :pull:`9140`, :pull:`9132`) By `Maximilian Roos `_. - Fix copybutton for multi line examples and double digit ipython cell numbers (:pull:`9264`). By `Moritz Schreiber `_. Internal Changes ~~~~~~~~~~~~~~~~ - Enable typing checks of pandas (:pull:`9213`). By `Michael Niklas `_. .. _whats-new.2024.06.0: v2024.06.0 (Jun 13, 2024) ------------------------- This release brings various performance optimizations and compatibility with the upcoming numpy 2.0 release. Thanks to the 22 contributors to this release: Alfonso Ladino, David Hoese, Deepak Cherian, Eni Awowale, Ilan Gold, Jessica Scheick, Joe Hamman, Justus Magin, Kai Mühlbauer, Mark Harfouche, Mathias Hauser, Matt Savoie, Maximilian Roos, Mike Thramann, Nicolas Karasiak, Owen Littlejohns, Paul Ockenfuß, Philippe THOMY, Scott Henderson, Spencer Clark, Stephan Hoyer and Tom Nicholas Performance ~~~~~~~~~~~ - Small optimization to the netCDF4 and h5netcdf backends (:issue:`9058`, :pull:`9067`). By `Deepak Cherian `_. - Small optimizations to help reduce indexing speed of datasets (:pull:`9002`). By `Mark Harfouche `_. - Performance improvement in ``open_datatree`` method for Zarr, netCDF4 and h5netcdf backends (:issue:`8994`, :pull:`9014`). By `Alfonso Ladino `_. Bug fixes ~~~~~~~~~ - Preserve conversion of timezone-aware pandas Datetime arrays to numpy object arrays (:issue:`9026`, :pull:`9042`). By `Ilan Gold `_. - :py:meth:`DataArrayResample.interpolate` and :py:meth:`DatasetResample.interpolate` method now support arbitrary kwargs such as ``order`` for polynomial interpolation (:issue:`8762`). By `Nicolas Karasiak `_. Documentation ~~~~~~~~~~~~~ - Add link to CF Conventions on packed data and sentence on type determination in the I/O user guide (:issue:`9041`, :pull:`9045`). By `Kai Mühlbauer `_. Internal Changes ~~~~~~~~~~~~~~~~ - Migrates remainder of ``io.py`` to ``xarray/core/datatree_io.py`` and ``TreeAttrAccessMixin`` into ``xarray/core/common.py`` (:pull:`9011`). By `Owen Littlejohns `_ and `Tom Nicholas `_. - Compatibility with numpy 2 (:issue:`8844`, :pull:`8854`, :pull:`8946`). By `Justus Magin `_ and `Stephan Hoyer `_. .. _whats-new.2024.05.0: v2024.05.0 (May 12, 2024) ------------------------- This release brings support for pandas ExtensionArray objects, optimizations when reading Zarr, the ability to concatenate datasets without pandas indexes, more compatibility fixes for the upcoming numpy 2.0, and the migration of most of the xarray-datatree project code into xarray ``main``! Thanks to the 18 contributors to this release: Aimilios Tsouvelekakis, Andrey Akinshin, Deepak Cherian, Eni Awowale, Ilan Gold, Illviljan, Justus Magin, Mark Harfouche, Matt Savoie, Maximilian Roos, Noah C. Benson, Pascal Bourgault, Ray Bell, Spencer Clark, Tom Nicholas, ignamv, owenlittlejohns, and saschahofmann. New Features ~~~~~~~~~~~~ - New "random" method for converting to and from 360_day calendars (:pull:`8603`). By `Pascal Bourgault `_. - Xarray now makes a best attempt not to coerce :py:class:`pandas.api.extensions.ExtensionArray` to a numpy array by supporting 1D ``ExtensionArray`` objects internally where possible. Thus, :py:class:`Dataset` objects initialized with a ``pd.Categorical``, for example, will retain the object. However, one cannot do operations that are not possible on the ``ExtensionArray`` then, such as broadcasting. (:issue:`5287`, :issue:`8463`, :pull:`8723`) By `Ilan Gold `_. - :py:func:`testing.assert_allclose` / :py:func:`testing.assert_equal` now accept a new argument ``check_dims="transpose"``, controlling whether a transposed array is considered equal. (:issue:`5733`, :pull:`8991`) By `Ignacio Martinez Vazquez `_. - Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg ``create_index_for_new_dim=False``. (:pull:`8960`) By `Tom Nicholas `_. - Avoid automatically re-creating 1D pandas indexes in :py:func:`concat()`. Also added option to avoid creating 1D indexes for new dimension coordinates by passing the new kwarg ``create_index_for_new_dim=False``. (:issue:`8871`, :pull:`8872`) By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ - The PyNIO backend has been deleted (:issue:`4491`, :pull:`7301`). By `Deepak Cherian `_. - The minimum versions of some dependencies were changed, in particular our minimum supported pandas version is now Pandas 2. ===================== ========= ======= Package Old New ===================== ========= ======= dask-core 2022.12 2023.4 distributed 2022.12 2023.4 h5py 3.7 3.8 matplotlib-base 3.6 3.7 packaging 22.0 23.1 pandas 1.5 2.0 pydap 3.3 3.4 sparse 0.13 0.14 typing_extensions 4.4 4.5 zarr 2.13 2.14 ===================== ========= ======= Bug fixes ~~~~~~~~~ - Following `an upstream bug fix `_ to :py:func:`pandas.date_range`, date ranges produced by :py:func:`xarray.cftime_range` with negative frequencies will now fall fully within the bounds of the provided start and end dates (:pull:`8999`). By `Spencer Clark `_. Internal Changes ~~~~~~~~~~~~~~~~ - Enforces failures on CI when tests raise warnings from within xarray (:pull:`8974`) By `Maximilian Roos `_ - Migrates ``formatting_html`` functionality for ``DataTree`` into ``xarray/core`` (:pull:`8930`) By `Eni Awowale `_, `Julia Signell `_ and `Tom Nicholas `_. - Migrates ``datatree_mapping`` functionality into ``xarray/core`` (:pull:`8948`) By `Matt Savoie `_ `Owen Littlejohns `_ and `Tom Nicholas `_. - Migrates ``extensions``, ``formatting`` and ``datatree_render`` functionality for ``DataTree`` into ``xarray/core``. Also migrates ``testing`` functionality into ``xarray/testing/assertions`` for ``DataTree``. (:pull:`8967`) By `Owen Littlejohns `_ and `Tom Nicholas `_. - Migrates ``ops.py`` functionality into ``xarray/core/datatree_ops.py`` (:pull:`8976`) By `Matt Savoie `_ and `Tom Nicholas `_. - Migrates ``iterator`` functionality into ``xarray/core`` (:pull:`8879`) By `Owen Littlejohns `_, `Matt Savoie `_ and `Tom Nicholas `_. - ``transpose``, ``set_dims``, ``stack`` & ``unstack`` now use a ``dim`` kwarg rather than ``dims`` or ``dimensions``. This is the final change to make xarray methods consistent with their use of ``dim``. Using the existing kwarg will raise a warning. By `Maximilian Roos `_ .. _whats-new.2024.03.0: v2024.03.0 (Mar 29, 2024) ------------------------- This release brings performance improvements for grouped and resampled quantile calculations, CF decoding improvements, minor optimizations to distributed Zarr writes, and compatibility fixes for Numpy 2.0 and Pandas 3.0. Thanks to the 18 contributors to this release: Anderson Banihirwe, Christoph Hasse, Deepak Cherian, Etienne Schalk, Justus Magin, Kai Mühlbauer, Kevin Schwarzwald, Mark Harfouche, Martin, Matt Savoie, Maximilian Roos, Ray Bell, Roberto Chang, Spencer Clark, Tom Nicholas, crusaderky, owenlittlejohns, saschahofmann New Features ~~~~~~~~~~~~ - Partial writes to existing chunks with ``region`` or ``append_dim`` will now raise an error (unless ``safe_chunks=False``); previously an error would only be raised on new variables. (:pull:`8459`, :issue:`8371`, :issue:`8882`) By `Maximilian Roos `_. - Grouped and resampling quantile calculations now use the vectorized algorithm in ``flox>=0.9.4`` if present. By `Deepak Cherian `_. - Do not broadcast in arithmetic operations when global option ``arithmetic_broadcast=False`` (:issue:`6806`, :pull:`8784`). By `Etienne Schalk `_ and `Deepak Cherian `_. - Add the ``.oindex`` property to Explicitly Indexed Arrays for orthogonal indexing functionality. (:issue:`8238`, :pull:`8750`) By `Anderson Banihirwe `_. - Add the ``.vindex`` property to Explicitly Indexed Arrays for vectorized indexing functionality. (:issue:`8238`, :pull:`8780`) By `Anderson Banihirwe `_. - Expand use of ``.oindex`` and ``.vindex`` properties. (:pull:`8790`) By `Anderson Banihirwe `_ and `Deepak Cherian `_. - Allow creating :py:class:`xr.Coordinates` objects with no indexes (:pull:`8711`) By `Benoit Bovy `_ and `Tom Nicholas `_. - Enable plotting of ``datetime.dates``. (:issue:`8866`, :pull:`8873`) By `Sascha Hofmann `_. Breaking changes ~~~~~~~~~~~~~~~~ - Don't allow overwriting index variables with ``to_zarr`` region writes. (:issue:`8589`, :pull:`8876`). By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ - The default ``freq`` parameter in :py:meth:`xr.date_range` and :py:meth:`xr.cftime_range` is set to ``'D'`` only if ``periods``, ``start``, or ``end`` are ``None`` (:issue:`8770`, :pull:`8774`). By `Roberto Chang `_. - Ensure that non-nanosecond precision :py:class:`numpy.datetime64` and :py:class:`numpy.timedelta64` values are cast to nanosecond precision values when used in :py:meth:`DataArray.expand_dims` and ::py:meth:`Dataset.expand_dims` (:pull:`8781`). By `Spencer Clark `_. - CF conform handling of ``_FillValue``/``missing_value`` and ``dtype`` in ``CFMaskCoder``/``CFScaleOffsetCoder`` (:issue:`2304`, :issue:`5597`, :issue:`7691`, :pull:`8713`, see also discussion in :pull:`7654`). By `Kai Mühlbauer `_. - Do not cast ``_FillValue``/``missing_value`` in ``CFMaskCoder`` if ``_Unsigned`` is provided (:issue:`8844`, :pull:`8852`). - Adapt handling of copy keyword argument for numpy >= 2.0dev (:issue:`8844`, :pull:`8851`, :pull:`8865`). By `Kai Mühlbauer `_. - Import trapz/trapezoid depending on numpy version (:issue:`8844`, :pull:`8865`). By `Kai Mühlbauer `_. - Warn and return bytes undecoded in case of UnicodeDecodeError in h5netcdf-backend (:issue:`5563`, :pull:`8874`). By `Kai Mühlbauer `_. - Fix bug incorrectly disallowing creation of a dataset with a multidimensional coordinate variable with the same name as one of its dims. (:issue:`8884`, :pull:`8886`) By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ - Migrates ``treenode`` functionality into ``xarray/core`` (:pull:`8757`) By `Matt Savoie `_ and `Tom Nicholas `_. - Migrates ``datatree`` functionality into ``xarray/core``. (:pull:`8789`) By `Owen Littlejohns `_, `Matt Savoie `_ and `Tom Nicholas `_. .. _whats-new.2024.02.0: v2024.02.0 (Feb 19, 2024) ------------------------- This release brings size information to the text ``repr``, changes to the accepted frequency strings, and various bug fixes. Thanks to our 12 contributors: Anderson Banihirwe, Deepak Cherian, Eivind Jahren, Etienne Schalk, Justus Magin, Marco Wolsza, Mathias Hauser, Matt Savoie, Maximilian Roos, Rambaud Pierrick, Tom Nicholas New Features ~~~~~~~~~~~~ - Added a simple ``nbytes`` representation in DataArrays and Dataset ``repr``. (:issue:`8690`, :pull:`8702`). By `Etienne Schalk `_. - Allow negative frequency strings (e.g. ``"-1YE"``). These strings are for example used in :py:func:`date_range`, and :py:func:`cftime_range` (:pull:`8651`). By `Mathias Hauser `_. - Add :py:meth:`NamedArray.expand_dims`, :py:meth:`NamedArray.permute_dims` and :py:meth:`NamedArray.broadcast_to` (:pull:`8380`) By `Anderson Banihirwe `_. - Xarray now defers to `flox's heuristics `_ to set the default ``method`` for groupby problems. This only applies to ``flox>=0.9``. By `Deepak Cherian `_. - All ``quantile`` methods (e.g. :py:meth:`DataArray.quantile`) now use ``numbagg`` for the calculation of nanquantiles (i.e., ``skipna=True``) if it is installed. This is currently limited to the linear interpolation method (`method='linear'`). (:issue:`7377`, :pull:`8684`) By `Marco Wolsza `_. Breaking changes ~~~~~~~~~~~~~~~~ - :py:func:`infer_freq` always returns the frequency strings as defined in pandas 2.2 (:issue:`8612`, :pull:`8627`). By `Mathias Hauser `_. Deprecations ~~~~~~~~~~~~ - The ``dt.weekday_name`` parameter wasn't functional on modern pandas versions and has been removed. (:issue:`8610`, :pull:`8664`) By `Sam Coleman `_. Bug fixes ~~~~~~~~~ - Fixed a regression that prevented multi-index level coordinates being serialized after resetting or dropping the multi-index (:issue:`8628`, :pull:`8672`). By `Benoit Bovy `_. - Fix bug with broadcasting when wrapping array API-compliant classes. (:issue:`8665`, :pull:`8669`) By `Tom Nicholas `_. - Ensure :py:meth:`DataArray.unstack` works when wrapping array API-compliant classes. (:issue:`8666`, :pull:`8668`) By `Tom Nicholas `_. - Fix negative slicing of Zarr arrays without dask installed. (:issue:`8252`) By `Deepak Cherian `_. - Preserve chunks when writing time-like variables to zarr by enabling lazy CF encoding of time-like variables (:issue:`7132`, :issue:`8230`, :issue:`8432`, :pull:`8575`). By `Spencer Clark `_ and `Mattia Almansi `_. - Preserve chunks when writing time-like variables to zarr by enabling their lazy encoding (:issue:`7132`, :issue:`8230`, :issue:`8432`, :pull:`8253`, :pull:`8575`; see also discussion in :pull:`8253`). By `Spencer Clark `_ and `Mattia Almansi `_. - Raise an informative error if dtype encoding of time-like variables would lead to integer overflow or unsafe conversion from floating point to integer values (:issue:`8542`, :pull:`8575`). By `Spencer Clark `_. - Raise an error when unstacking a MultiIndex that has duplicates as this would lead to silent data loss (:issue:`7104`, :pull:`8737`). By `Mathias Hauser `_. Documentation ~~~~~~~~~~~~~ - Fix ``variables`` arg typo in ``Dataset.sortby()`` docstring (:issue:`8663`, :pull:`8670`) By `Tom Vo `_. - Fixed documentation where the use of the depreciated pandas frequency string prevented the documentation from being built. (:pull:`8638`) By `Sam Coleman `_. Internal Changes ~~~~~~~~~~~~~~~~ - ``DataArray.dt`` now raises an ``AttributeError`` rather than a ``TypeError`` when the data isn't datetime-like. (:issue:`8718`, :pull:`8724`) By `Maximilian Roos `_. - Move ``parallelcompat`` and ``chunk managers`` modules from ``xarray/core`` to ``xarray/namedarray``. (:pull:`8319`) By `Tom Nicholas `_ and `Anderson Banihirwe `_. - Imports ``datatree`` repository and history into internal location. (:pull:`8688`) By `Matt Savoie `_, `Justus Magin `_ and `Tom Nicholas `_. - Adds :py:func:`open_datatree` into ``xarray/backends`` (:pull:`8697`) By `Matt Savoie `_ and `Tom Nicholas `_. - Refactor :py:meth:`xarray.core.indexing.DaskIndexingAdapter.__getitem__` to remove an unnecessary rewrite of the indexer key (:issue:`8377`, :pull:`8758`) By `Anderson Banihirwe `_. .. _whats-new.2024.01.1: v2024.01.1 (23 Jan, 2024) ------------------------- This release is to fix a bug with the rendering of the documentation, but it also includes changes to the handling of pandas frequency strings. Breaking changes ~~~~~~~~~~~~~~~~ - Following pandas, :py:meth:`infer_freq` will return ``"YE"``, instead of ``"Y"`` (formerly ``"A"``). This is to be consistent with the deprecation of the latter frequency string in pandas 2.2. This is a follow up to :pull:`8415` (:issue:`8612`, :pull:`8642`). By `Mathias Hauser `_. Deprecations ~~~~~~~~~~~~ - Following pandas, the frequency string ``"Y"`` (formerly ``"A"``) is deprecated in favor of ``"YE"``. These strings are used, for example, in :py:func:`date_range`, :py:func:`cftime_range`, :py:meth:`DataArray.resample`, and :py:meth:`Dataset.resample` among others (:issue:`8612`, :pull:`8629`). By `Mathias Hauser `_. Documentation ~~~~~~~~~~~~~ - Pin ``sphinx-book-theme`` to ``1.0.1`` to fix a rendering issue with the sidebar in the docs. (:issue:`8619`, :pull:`8632`) By `Tom Nicholas `_. .. _whats-new.2024.01.0: v2024.01.0 (17 Jan, 2024) ------------------------- This release brings support for weights in correlation and covariance functions, a new ``DataArray.cumulative`` aggregation, improvements to ``xr.map_blocks``, an update to our minimum dependencies, and various bugfixes. Thanks to our 17 contributors to this release: Abel Aoun, Deepak Cherian, Illviljan, Johan Mathe, Justus Magin, Kai Mühlbauer, Llorenç Lledó, Mark Harfouche, Markel, Mathias Hauser, Maximilian Roos, Michael Niklas, Niclas Rieger, Sébastien Celles, Tom Nicholas, Trinh Quoc Anh, and crusaderky. New Features ~~~~~~~~~~~~ - :py:meth:`xr.cov` and :py:meth:`xr.corr` now support using weights (:issue:`8527`, :pull:`7392`). By `Llorenç Lledó `_. - Accept the compression arguments new in netCDF 1.6.0 in the netCDF4 backend. See `netCDF4 documentation `_ for details. Note that some new compression filters needs plugins to be installed which may not be available in all netCDF distributions. By `Markel García-Díez `_. (:issue:`6929`, :pull:`7551`) - Add :py:meth:`DataArray.cumulative` & :py:meth:`Dataset.cumulative` to compute cumulative aggregations, such as ``sum``, along a dimension — for example ``da.cumulative('time').sum()``. This is similar to pandas' ``.expanding``, and mostly equivalent to ``.cumsum`` methods, or to :py:meth:`DataArray.rolling` with a window length equal to the dimension size. By `Maximilian Roos `_. (:pull:`8512`) - Decode/Encode netCDF4 enums and store the enum definition in dataarrays' dtype metadata. If multiple variables share the same enum in netCDF4, each dataarray will have its own enum definition in their respective dtype metadata. By `Abel Aoun `_. (:issue:`8144`, :pull:`8147`) Breaking changes ~~~~~~~~~~~~~~~~ - The minimum versions of some dependencies were changed (:pull:`8586`): ===================== ========= ======== Package Old New ===================== ========= ======== cartopy 0.20 0.21 dask-core 2022.7 2022.12 distributed 2022.7 2022.12 flox 0.5 0.7 iris 3.2 3.4 matplotlib-base 3.5 3.6 numpy 1.22 1.23 numba 0.55 0.56 packaging 21.3 22.0 seaborn 0.11 0.12 scipy 1.8 1.10 typing_extensions 4.3 4.4 zarr 2.12 2.13 ===================== ========= ======== Deprecations ~~~~~~~~~~~~ - The ``squeeze`` kwarg to GroupBy is now deprecated. (:issue:`2157`, :pull:`8507`) By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ - Support non-string hashable dimensions in :py:class:`xarray.DataArray` (:issue:`8546`, :pull:`8559`). By `Michael Niklas `_. - Reverse index output of bottleneck's rolling move_argmax/move_argmin functions (:issue:`8541`, :pull:`8552`). By `Kai Mühlbauer `_. - Vendor ``SerializableLock`` from dask and use as default lock for netcdf4 backends (:issue:`8442`, :pull:`8571`). By `Kai Mühlbauer `_. - Add tests and fixes for empty :py:class:`CFTimeIndex`, including broken html repr (:issue:`7298`, :pull:`8600`). By `Mathias Hauser `_. Internal Changes ~~~~~~~~~~~~~~~~ - The implementation of :py:func:`map_blocks` has changed to minimize graph size and duplication of data. This should be a strict improvement even though the graphs are not always embarrassingly parallel any more. Please open an issue if you spot a regression. (:pull:`8412`, :issue:`8409`). By `Deepak Cherian `_. - Remove null values before plotting. (:pull:`8535`). By `Jimmy Westling `_. - Redirect cumulative reduction functions internally through the :py:class:`ChunkManagerEntryPoint`, potentially allowing :py:meth:`~xarray.DataArray.ffill` and :py:meth:`~xarray.DataArray.bfill` to use non-dask chunked array types. (:pull:`8019`) By `Tom Nicholas `_. .. _whats-new.2023.12.0: v2023.12.0 (2023 Dec 08) ------------------------ This release brings new `hypothesis `_ strategies for testing, significantly faster rolling aggregations as well as ``ffill`` and ``bfill`` with ``numbagg``, a new :py:meth:`Dataset.eval` method, and improvements to reading and writing Zarr arrays (including a new ``"a-"`` mode). Thanks to our 16 contributors: Anderson Banihirwe, Ben Mares, Carl Andersson, Deepak Cherian, Doug Latornell, Gregorio L. Trevisan, Illviljan, Jens Hedegaard Nielsen, Justus Magin, Mathias Hauser, Max Jones, Maximilian Roos, Michael Niklas, Patrick Hoefler, Ryan Abernathey, Tom Nicholas New Features ~~~~~~~~~~~~ - Added hypothesis strategies for generating :py:class:`xarray.Variable` objects containing arbitrary data, useful for parametrizing downstream tests. Accessible under :py:mod:`testing.strategies`, and documented in a new page on testing in the User Guide. (:issue:`6911`, :pull:`8404`) By `Tom Nicholas `_. - :py:meth:`rolling` uses `numbagg `_ for most of its computations by default. Numbagg is up to 5x faster than bottleneck where parallelization is possible. Where parallelization isn't possible — for example a 1D array — it's about the same speed as bottleneck, and 2-5x faster than pandas' default functions. (:pull:`8493`). numbagg is an optional dependency, so requires installing separately. - Use a concise format when plotting datetime arrays. (:pull:`8449`). By `Jimmy Westling `_. - Avoid overwriting unchanged existing coordinate variables when appending with :py:meth:`Dataset.to_zarr` by setting ``mode='a-'``. By `Ryan Abernathey `_ and `Deepak Cherian `_. - :py:meth:`~xarray.DataArray.rank` now operates on dask-backed arrays, assuming the core dim has exactly one chunk. (:pull:`8475`). By `Maximilian Roos `_. - Add a :py:meth:`Dataset.eval` method, similar to the pandas' method of the same name. (:pull:`7163`). This is currently marked as experimental and doesn't yet support the ``numexpr`` engine. - :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` allow passing a callable, similar to :py:meth:`Dataset.where` & :py:meth:`Dataset.sortby` & others. (:pull:`8511`). By `Maximilian Roos `_. Breaking changes ~~~~~~~~~~~~~~~~ - Explicitly warn when creating xarray objects with repeated dimension names. Such objects will also now raise when :py:meth:`DataArray.get_axis_num` is called, which means many functions will raise. This latter change is technically a breaking change, but whilst allowed, this behaviour was never actually supported! (:issue:`3731`, :pull:`8491`) By `Tom Nicholas `_. Deprecations ~~~~~~~~~~~~ - As part of an effort to standardize the API, we're renaming the ``dims`` keyword arg to ``dim`` for the minority of functions which current use ``dims``. This started with :py:func:`xarray.dot` & :py:meth:`DataArray.dot` and we'll gradually roll this out across all functions. The warnings are currently ``PendingDeprecationWarning``, which are silenced by default. We'll convert these to ``DeprecationWarning`` in a future release. By `Maximilian Roos `_. - Raise a ``FutureWarning`` warning that the type of :py:meth:`Dataset.dims` will be changed from a mapping of dimension names to lengths to a set of dimension names. This is to increase consistency with :py:meth:`DataArray.dims`. To access a mapping of dimension names to lengths please use :py:meth:`Dataset.sizes`. The same change also applies to ``DatasetGroupBy.dims``. (:issue:`8496`, :pull:`8500`) By `Tom Nicholas `_. - :py:meth:`Dataset.drop` & :py:meth:`DataArray.drop` are now deprecated, since pending deprecation for several years. :py:meth:`DataArray.drop_sel` & :py:meth:`DataArray.drop_var` replace them for labels & variables respectively. (:pull:`8497`) By `Maximilian Roos `_. Bug fixes ~~~~~~~~~ - Fix dtype inference for ``pd.CategoricalIndex`` when categories are backed by a ``pd.ExtensionDtype`` (:pull:`8481`) - Fix writing a variable that requires transposing when not writing to a region (:pull:`8484`) By `Maximilian Roos `_. - Static typing of ``p0`` and ``bounds`` arguments of :py:func:`xarray.DataArray.curvefit` and :py:func:`xarray.Dataset.curvefit` was changed to ``Mapping`` (:pull:`8502`). By `Michael Niklas `_. - Fix typing of :py:func:`xarray.DataArray.to_netcdf` and :py:func:`xarray.Dataset.to_netcdf` when ``compute`` is evaluated to bool instead of a Literal (:pull:`8268`). By `Jens Hedegaard Nielsen `_. Documentation ~~~~~~~~~~~~~ - Added illustration of updating the time coordinate values of a resampled dataset using time offset arithmetic. This is the recommended technique to replace the use of the deprecated ``loffset`` parameter in ``resample`` (:pull:`8479`). By `Doug Latornell `_. - Improved error message when attempting to get a variable which doesn't exist from a Dataset. (:pull:`8474`) By `Maximilian Roos `_. - Fix default value of ``combine_attrs`` in :py:func:`xarray.combine_by_coords` (:pull:`8471`) By `Gregorio L. Trevisan `_. Internal Changes ~~~~~~~~~~~~~~~~ - :py:meth:`DataArray.bfill` & :py:meth:`DataArray.ffill` now use numbagg `_ by default, which is up to 5x faster where parallelization is possible. (:pull:`8339`) By `Maximilian Roos `_. - Update mypy version to 1.7 (:issue:`8448`, :pull:`8501`). By `Michael Niklas `_. .. _whats-new.2023.11.0: v2023.11.0 (Nov 16, 2023) ------------------------- .. tip:: `This is our 10th year anniversary release! `_ Thank you for your love and support. This release brings the ability to use ``opt_einsum`` for :py:func:`xarray.dot` by default, support for auto-detecting ``region`` when writing partial datasets to Zarr, and the use of h5py drivers with ``h5netcdf``. Thanks to the 19 contributors to this release: Aman Bagrecha, Anderson Banihirwe, Ben Mares, Deepak Cherian, Dimitri Papadopoulos Orfanos, Ezequiel Cimadevilla Alvarez, Illviljan, Justus Magin, Katelyn FitzGerald, Kai Muehlbauer, Martin Durant, Maximilian Roos, Metamess, Sam Levang, Spencer Clark, Tom Nicholas, mgunyho, templiert New Features ~~~~~~~~~~~~ - Use `opt_einsum `_ for :py:func:`xarray.dot` by default if installed. By `Deepak Cherian `_. (:issue:`7764`, :pull:`8373`). - Add ``DataArray.dt.total_seconds()`` method to match the Pandas API. (:pull:`8435`). By `Ben Mares `_. - Allow passing ``region="auto"`` in :py:meth:`Dataset.to_zarr` to automatically infer the region to write in the original store. Also implement automatic transpose when dimension order does not match the original store. (:issue:`7702`, :issue:`8421`, :pull:`8434`). By `Sam Levang `_. - Allow the usage of h5py drivers (eg: ros3) via h5netcdf (:pull:`8360`). By `Ezequiel Cimadevilla `_. - Enable VLEN string fill_values, preserve VLEN string dtypes (:issue:`1647`, :issue:`7652`, :issue:`7868`, :pull:`7869`). By `Kai Mühlbauer `_. Breaking changes ~~~~~~~~~~~~~~~~ - drop support for `cdms2 `_. Please use `xcdat `_ instead (:pull:`8441`). By `Justus Magin `_. - Following pandas, :py:meth:`infer_freq` will return ``"Y"``, ``"YS"``, ``"QE"``, ``"ME"``, ``"h"``, ``"min"``, ``"s"``, ``"ms"``, ``"us"``, or ``"ns"`` instead of ``"A"``, ``"AS"``, ``"Q"``, ``"M"``, ``"H"``, ``"T"``, ``"S"``, ``"L"``, ``"U"``, or ``"N"``. This is to be consistent with the deprecation of the latter frequency strings (:issue:`8394`, :pull:`8415`). By `Spencer Clark `_. - Bump minimum tested pint version to ``>=0.22``. By `Deepak Cherian `_. - Minimum supported versions for the following packages have changed: ``h5py >=3.7``, ``h5netcdf>=1.1``. By `Kai Mühlbauer `_. Deprecations ~~~~~~~~~~~~ - The PseudoNetCDF backend has been removed. By `Deepak Cherian `_. - Supplying dimension-ordered sequences to :py:meth:`DataArray.chunk` & :py:meth:`Dataset.chunk` is deprecated in favor of supplying a dictionary of dimensions, or a single ``int`` or ``"auto"`` argument covering all dimensions. Xarray favors using dimensions names rather than positions, and this was one place in the API where dimension positions were used. (:pull:`8341`) By `Maximilian Roos `_. - Following pandas, the frequency strings ``"A"``, ``"AS"``, ``"Q"``, ``"M"``, ``"H"``, ``"T"``, ``"S"``, ``"L"``, ``"U"``, and ``"N"`` are deprecated in favor of ``"Y"``, ``"YS"``, ``"QE"``, ``"ME"``, ``"h"``, ``"min"``, ``"s"``, ``"ms"``, ``"us"``, and ``"ns"``, respectively. These strings are used, for example, in :py:func:`date_range`, :py:func:`cftime_range`, :py:meth:`DataArray.resample`, and :py:meth:`Dataset.resample` among others (:issue:`8394`, :pull:`8415`). By `Spencer Clark `_. - Rename :py:meth:`Dataset.to_array` to :py:meth:`Dataset.to_dataarray` for consistency with :py:meth:`DataArray.to_dataset` & :py:func:`open_dataarray` functions. This is a "soft" deprecation — the existing methods work and don't raise any warnings, given the relatively small benefits of the change. By `Maximilian Roos `_. - Finally remove ``keep_attrs`` kwarg from :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample`. These were deprecated a long time ago. By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ - Port `bug fix from pandas `_ to eliminate the adjustment of resample bin edges in the case that the resampling frequency has units of days and is greater than one day (e.g. ``"2D"``, ``"3D"`` etc.) and the ``closed`` argument is set to ``"right"`` to xarray's implementation of resample for data indexed by a :py:class:`CFTimeIndex` (:pull:`8393`). By `Spencer Clark `_. - Fix to once again support date offset strings as input to the loffset parameter of resample and test this functionality (:pull:`8422`, :issue:`8399`). By `Katelyn FitzGerald `_. - Fix a bug where :py:meth:`DataArray.to_dataset` silently drops a variable if a coordinate with the same name already exists (:pull:`8433`, :issue:`7823`). By `András Gunyhó `_. - Fix for :py:meth:`DataArray.to_zarr` & :py:meth:`Dataset.to_zarr` to close the created zarr store when passing a path with ``.zip`` extension (:pull:`8425`). By `Carl Andersson `_. Documentation ~~~~~~~~~~~~~ - Small updates to documentation on distributed writes: See :ref:`io.zarr.appending` to Zarr. By `Deepak Cherian `_. .. _whats-new.2023.10.1: v2023.10.1 (19 Oct, 2023) ------------------------- This release updates our minimum numpy version in ``pyproject.toml`` to 1.22, consistent with our documentation below. .. _whats-new.2023.10.0: v2023.10.0 (19 Oct, 2023) ------------------------- This release brings performance enhancements to reading Zarr datasets, the ability to use `numbagg `_ for reductions, an expansion in API for ``rolling_exp``, fixes two regressions with datetime decoding, and many other bugfixes and improvements. Groupby reductions will also use ``numbagg`` if ``flox>=0.8.1`` and ``numbagg`` are both installed. Thanks to our 13 contributors: Anderson Banihirwe, Bart Schilperoort, Deepak Cherian, Illviljan, Kai Mühlbauer, Mathias Hauser, Maximilian Roos, Michael Niklas, Pieter Eendebak, Simon Høxbro Hansen, Spencer Clark, Tom White, olimcc New Features ~~~~~~~~~~~~ - Support high-performance reductions with `numbagg `_. This is enabled by default if ``numbagg`` is installed. By `Deepak Cherian `_. (:pull:`8316`) - Add ``corr``, ``cov``, ``std`` & ``var`` to ``.rolling_exp``. By `Maximilian Roos `_. (:pull:`8307`) - :py:meth:`DataArray.where` & :py:meth:`Dataset.where` accept a callable for the ``other`` parameter, passing the object as the only argument. Previously, this was only valid for the ``cond`` parameter. (:issue:`8255`) By `Maximilian Roos `_. - ``.rolling_exp`` functions can now take a ``min_weight`` parameter, to only output values when there are sufficient recent non-nan values. ``numbagg>=0.3.1`` is required. (:pull:`8285`) By `Maximilian Roos `_. - :py:meth:`DataArray.sortby` & :py:meth:`Dataset.sortby` accept a callable for the ``variables`` parameter, passing the object as the only argument. By `Maximilian Roos `_. - ``.rolling_exp`` functions can now operate on dask-backed arrays, assuming the core dim has exactly one chunk. (:pull:`8284`). By `Maximilian Roos `_. Breaking changes ~~~~~~~~~~~~~~~~ - Made more arguments keyword-only (e.g. ``keep_attrs``, ``skipna``) for many :py:class:`xarray.DataArray` and :py:class:`xarray.Dataset` methods (:pull:`6403`). By `Mathias Hauser `_. - :py:meth:`Dataset.to_zarr` & :py:meth:`DataArray.to_zarr` require keyword arguments after the initial 7 positional arguments. By `Maximilian Roos `_. Deprecations ~~~~~~~~~~~~ - Rename :py:meth:`Dataset.reset_encoding` & :py:meth:`DataArray.reset_encoding` to :py:meth:`Dataset.drop_encoding` & :py:meth:`DataArray.drop_encoding` for consistency with other ``drop`` & ``reset`` methods — ``drop`` generally removes something, while ``reset`` generally resets to some default or standard value. (:pull:`8287`, :issue:`8259`) By `Maximilian Roos `_. Bug fixes ~~~~~~~~~ - :py:meth:`DataArray.rename` & :py:meth:`Dataset.rename` would emit a warning when the operation was a no-op. (:issue:`8266`) By `Simon Hansen `_. - Fixed a regression introduced in the previous release checking time-like units when encoding/decoding masked data (:issue:`8269`, :pull:`8277`). By `Kai Mühlbauer `_. - Fix datetime encoding precision loss regression introduced in the previous release for datetimes encoded with units requiring floating point values, and a reference date not equal to the first value of the datetime array (:issue:`8271`, :pull:`8272`). By `Spencer Clark `_. - Fix excess metadata requests when using a Zarr store. Prior to this, metadata was re-read every time data was retrieved from the array, now metadata is retrieved only once when they array is initialized. (:issue:`8290`, :pull:`8297`). By `Oliver McCormack `_. - Fix to_zarr ending in a ReadOnlyError when consolidated metadata was used and the write_empty_chunks was provided. (:issue:`8323`, :pull:`8326`) By `Matthijs Amesz `_. Documentation ~~~~~~~~~~~~~ - Added page on the interoperability of xarray objects. (:pull:`7992`) By `Tom Nicholas `_. - Added xarray-regrid to the list of xarray related projects (:pull:`8272`). By `Bart Schilperoort `_. Internal Changes ~~~~~~~~~~~~~~~~ - More improvements to support the Python `array API standard `_ by using duck array ops in more places in the codebase. (:pull:`8267`) By `Tom White `_. .. _whats-new.2023.09.0: v2023.09.0 (Sep 26, 2023) ------------------------- This release continues work on the new :py:class:`xarray.Coordinates` object, allows to provide ``preferred_chunks`` when reading from netcdf files, enables :py:func:`xarray.apply_ufunc` to handle missing core dimensions and fixes several bugs. Thanks to the 24 contributors to this release: Alexander Fischer, Amrest Chinkamol, Benoit Bovy, Darsh Ranjan, Deepak Cherian, Gianfranco Costamagna, Gregorio L. Trevisan, Illviljan, Joe Hamman, JR, Justus Magin, Kai Mühlbauer, Kian-Meng Ang, Kyle Sunden, Martin Raspaud, Mathias Hauser, Mattia Almansi, Maximilian Roos, András Gunyhó, Michael Niklas, Richard Kleijn, Riulinchen, Tom Nicholas and Wiktor Kraśnicki. We welcome the following new contributors to Xarray!: Alexander Fischer, Amrest Chinkamol, Darsh Ranjan, Gianfranco Costamagna, Gregorio L. Trevisan, Kian-Meng Ang, Riulinchen and Wiktor Kraśnicki. New Features ~~~~~~~~~~~~ - Added the :py:meth:`Coordinates.assign` method that can be used to combine different collections of coordinates prior to assign them to a Dataset or DataArray (:pull:`8102`) at once. By `Benoît Bovy `_. - Provide ``preferred_chunks`` for data read from netcdf files (:issue:`1440`, :pull:`7948`). By `Martin Raspaud `_. - Added ``on_missing_core_dims`` to :py:meth:`apply_ufunc` to allow for copying or dropping a :py:class:`Dataset`'s variables with missing core dimensions (:pull:`8138`). By `Maximilian Roos `_. Breaking changes ~~~~~~~~~~~~~~~~ - The :py:class:`Coordinates` constructor now creates a (pandas) index by default for each dimension coordinate. To keep the previous behavior (no index created), pass an empty dictionary to ``indexes``. The constructor now also extracts and add the indexes from another :py:class:`Coordinates` object passed via ``coords`` (:pull:`8107`). By `Benoît Bovy `_. - Static typing of ``xlim`` and ``ylim`` arguments in plotting functions now must be ``tuple[float, float]`` to align with matplotlib requirements. (:issue:`7802`, :pull:`8030`). By `Michael Niklas `_. Deprecations ~~~~~~~~~~~~ - Deprecate passing a :py:class:`pandas.MultiIndex` object directly to the :py:class:`Dataset` and :py:class:`DataArray` constructors as well as to :py:meth:`Dataset.assign` and :py:meth:`Dataset.assign_coords`. A new Xarray :py:class:`Coordinates` object has to be created first using :py:meth:`Coordinates.from_pandas_multiindex` (:pull:`8094`). By `Benoît Bovy `_. Bug fixes ~~~~~~~~~ - Improved static typing of reduction methods (:pull:`6746`). By `Richard Kleijn `_. - Fix bug where empty attrs would generate inconsistent tokens (:issue:`6970`, :pull:`8101`). By `Mattia Almansi `_. - Improved handling of multi-coordinate indexes when updating coordinates, including bug fixes (and improved warnings for deprecated features) for pandas multi-indexes (:pull:`8094`). By `Benoît Bovy `_. - Fixed a bug in :py:func:`merge` with ``compat='minimal'`` where the coordinate names were not updated properly internally (:issue:`7405`, :issue:`7588`, :pull:`8104`). By `Benoît Bovy `_. - Fix bug where :py:class:`DataArray` instances on the right-hand side of :py:meth:`DataArray.__setitem__` lose dimension names (:issue:`7030`, :pull:`8067`). By `Darsh Ranjan `_. - Return ``float64`` in presence of ``NaT`` in :py:class:`~core.accessor_dt.DatetimeAccessor` and special case ``NaT`` handling in :py:meth:`~core.accessor_dt.DatetimeAccessor.isocalendar` (:issue:`7928`, :pull:`8084`). By `Kai Mühlbauer `_. - Fix :py:meth:`~computation.rolling.DatasetRolling.construct` with stride on Datasets without indexes. (:issue:`7021`, :pull:`7578`). By `Amrest Chinkamol `_ and `Michael Niklas `_. - Calling plot with kwargs ``col``, ``row`` or ``hue`` no longer squeezes dimensions passed via these arguments (:issue:`7552`, :pull:`8174`). By `Wiktor Kraśnicki `_. - Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) led to varying issues (:issue:`7817`, :issue:`7942`, :issue:`7790`, :issue:`6191`, :issue:`7096`, :issue:`1064`, :pull:`7827`). By `Kai Mühlbauer `_. - Fixed a bug where inaccurate ``coordinates`` silently failed to decode variable (:issue:`1809`, :pull:`8195`). By `Kai Mühlbauer `_ - ``.rolling_exp`` functions no longer mistakenly lose non-dimensioned coords (:issue:`6528`, :pull:`8114`). By `Maximilian Roos `_. - In the event that user-provided datetime64/timedelta64 units and integer dtype encoding parameters conflict with each other, override the units to preserve an integer dtype for most faithful serialization to disk (:issue:`1064`, :pull:`8201`). By `Kai Mühlbauer `_. - Static typing of dunder ops methods (like :py:meth:`DataArray.__eq__`) has been fixed. Remaining issues are upstream problems (:issue:`7780`, :pull:`8204`). By `Michael Niklas `_. - Fix type annotation for ``center`` argument of plotting methods (like :py:meth:`xarray.plot.dataarray_plot.pcolormesh`) (:pull:`8261`). By `Pieter Eendebak `_. Documentation ~~~~~~~~~~~~~ - Make documentation of :py:meth:`DataArray.where` clearer (:issue:`7767`, :pull:`7955`). By `Riulinchen `_. Internal Changes ~~~~~~~~~~~~~~~~ - Many error messages related to invalid dimensions or coordinates now always show the list of valid dims/coords (:pull:`8079`). By `András Gunyhó `_. - Refactor of encoding and decoding times/timedeltas to preserve nanosecond resolution in arrays that contain missing values (:pull:`7827`). By `Kai Mühlbauer `_. - Transition ``.rolling_exp`` functions to use ``.apply_ufunc`` internally rather than ``.reduce``, as the start of a broader effort to move non-reducing functions away from ```.reduce``, (:pull:`8114`). By `Maximilian Roos `_. - Test range of fill_value's in test_interpolate_pd_compat (:issue:`8146`, :pull:`8189`). By `Kai Mühlbauer `_. .. _whats-new.2023.08.0: v2023.08.0 (Aug 18, 2023) ------------------------- This release brings changes to minimum dependencies, allows reading of datasets where a dimension name is associated with a multidimensional variable (e.g. finite volume ocean model output), and introduces a new :py:class:`xarray.Coordinates` object. Thanks to the 16 contributors to this release: Anderson Banihirwe, Articoking, Benoit Bovy, Deepak Cherian, Harshitha, Ian Carroll, Joe Hamman, Justus Magin, Peter Hill, Rachel Wegener, Riley Kuttruff, Thomas Nicholas, Tom Nicholas, ilgast, quantsnus, vallirep Announcements ~~~~~~~~~~~~~ The :py:class:`xarray.Variable` class is being refactored out to a new project title 'namedarray'. See the `design doc `_ for more details. Reach out to us on this [discussion topic](https://github.com/pydata/xarray/discussions/8080) if you have any thoughts. New Features ~~~~~~~~~~~~ - :py:class:`Coordinates` can now be constructed independently of any Dataset or DataArray (it is also returned by the :py:attr:`Dataset.coords` and :py:attr:`DataArray.coords` properties). ``Coordinates`` objects are useful for passing both coordinate variables and indexes to new Dataset / DataArray objects, e.g., via their constructor or via :py:meth:`Dataset.assign_coords`. We may also wrap coordinate variables in a ``Coordinates`` object in order to skip the automatic creation of (pandas) indexes for dimension coordinates. The :py:class:`Coordinates.from_pandas_multiindex` constructor may be used to create coordinates directly from a :py:class:`pandas.MultiIndex` object (it is preferred over passing it directly as coordinate data, which may be deprecated soon). Like Dataset and DataArray objects, ``Coordinates`` objects may now be used in :py:func:`align` and :py:func:`merge`. (:issue:`6392`, :pull:`7368`). By `Benoît Bovy `_. - Visually group together coordinates with the same indexes in the index section of the text repr (:pull:`7225`). By `Justus Magin `_. - Allow creating Xarray objects where a multidimensional variable shares its name with a dimension. Examples include output from finite volume models like FVCOM. (:issue:`2233`, :pull:`7989`) By `Deepak Cherian `_ and `Benoit Bovy `_. - When outputting :py:class:`Dataset` objects as Zarr via :py:meth:`Dataset.to_zarr`, user can now specify that chunks that will contain no valid data will not be written. Originally, this could be done by specifying ``"write_empty_chunks": True`` in the ``encoding`` parameter; however, this setting would not carry over when appending new data to an existing dataset. (:issue:`8009`) Requires ``zarr>=2.11``. Breaking changes ~~~~~~~~~~~~~~~~ - The minimum versions of some dependencies were changed (:pull:`8022`): ===================== ========= ======== Package Old New ===================== ========= ======== boto3 1.20 1.24 cftime 1.5 1.6 dask-core 2022.1 2022.7 distributed 2022.1 2022.7 hfnetcdf 0.13 1.0 iris 3.1 3.2 lxml 4.7 4.9 netcdf4 1.5.7 1.6.0 numpy 1.21 1.22 pint 0.18 0.19 pydap 3.2 3.3 rasterio 1.2 1.3 scipy 1.7 1.8 toolz 0.11 0.12 typing_extensions 4.0 4.3 zarr 2.10 2.12 numbagg 0.1 0.2.1 ===================== ========= ======== Documentation ~~~~~~~~~~~~~ - Added page on the internal design of xarray objects. (:pull:`7991`) By `Tom Nicholas `_. - Added examples to docstrings of :py:meth:`Dataset.assign_attrs`, :py:meth:`Dataset.broadcast_equals`, :py:meth:`Dataset.equals`, :py:meth:`Dataset.identical`, :py:meth:`Dataset.expand_dims`, :py:meth:`Dataset.drop_vars` (:issue:`6793`, :pull:`7937`) By `Harshitha `_. - Add docstrings for the :py:class:`Index` base class and add some documentation on how to create custom, Xarray-compatible indexes (:pull:`6975`) By `Benoît Bovy `_. - Added a page clarifying the role of Xarray core team members. (:pull:`7999`) By `Tom Nicholas `_. - Fixed broken links in "See also" section of :py:meth:`Dataset.count` (:issue:`8055`, :pull:`8057`) By `Articoking `_. - Extended the glossary by adding terms Aligning, Broadcasting, Merging, Concatenating, Combining, lazy, labeled, serialization, indexing (:issue:`3355`, :pull:`7732`) By `Harshitha `_. Internal Changes ~~~~~~~~~~~~~~~~ - :py:func:`as_variable` now consistently includes the variable name in any exceptions raised. (:pull:`7995`). By `Peter Hill `_ - :py:func:`encode_dataset_coordinates` now sorts coordinates automatically assigned to ``coordinates`` attributes during serialization (:issue:`8026`, :pull:`8034`). `By Ian Carroll `_. .. _whats-new.2023.07.0: v2023.07.0 (July 17, 2023) -------------------------- This release brings improvements to the documentation on wrapping numpy-like arrays, improved docstrings, and bug fixes. Deprecations ~~~~~~~~~~~~ - ``hue_style`` is being deprecated for scatter plots. (:issue:`7907`, :pull:`7925`). By `Jimmy Westling `_. Bug fixes ~~~~~~~~~ - Ensure no forward slashes in variable and dimension names for HDF5-based engines. (:issue:`7943`, :pull:`7953`) By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ - Added examples to docstrings of :py:meth:`Dataset.assign_attrs`, :py:meth:`Dataset.broadcast_equals`, :py:meth:`Dataset.equals`, :py:meth:`Dataset.identical`, :py:meth:`Dataset.expand_dims`, :py:meth:`Dataset.drop_vars` (:issue:`6793`, :pull:`7937`) By `Harshitha `_. - Added page on wrapping chunked numpy-like arrays as alternatives to dask arrays. (:pull:`7951`) By `Tom Nicholas `_. - Expanded the page on wrapping numpy-like "duck" arrays. (:pull:`7911`) By `Tom Nicholas `_. - Added examples to docstrings of :py:meth:`Dataset.isel`, :py:meth:`Dataset.reduce`, :py:meth:`Dataset.argmin`, :py:meth:`Dataset.argmax` (:issue:`6793`, :pull:`7881`) By `Harshitha `_ . Internal Changes ~~~~~~~~~~~~~~~~ - Allow chunked non-dask arrays (i.e. Cubed arrays) in groupby operations. (:pull:`7941`) By `Tom Nicholas `_. .. _whats-new.2023.06.0: v2023.06.0 (June 21, 2023) -------------------------- This release adds features to ``curvefit``, improves the performance of concatenation, and fixes various bugs. Thank to our 13 contributors to this release: Anderson Banihirwe, Deepak Cherian, dependabot[bot], Illviljan, Juniper Tyree, Justus Magin, Martin Fleischmann, Mattia Almansi, mgunyho, Rutger van Haasteren, Thomas Nicholas, Tom Nicholas, Tom White. New Features ~~~~~~~~~~~~ - Added support for multidimensional initial guess and bounds in :py:meth:`DataArray.curvefit` (:issue:`7768`, :pull:`7821`). By `András Gunyhó `_. - Add an ``errors`` option to :py:meth:`Dataset.curve_fit` that allows returning NaN for the parameters and covariances of failed fits, rather than failing the whole series of fits (:issue:`6317`, :pull:`7891`). By `Dominik Stańczak `_ and `András Gunyhó `_. Breaking changes ~~~~~~~~~~~~~~~~ Deprecations ~~~~~~~~~~~~ - Deprecate the `cdms2 `_ conversion methods (:pull:`7876`) By `Justus Magin `_. Performance ~~~~~~~~~~~ - Improve concatenation performance (:issue:`7833`, :pull:`7824`). By `Jimmy Westling `_. Bug fixes ~~~~~~~~~ - Fix bug where weighted ``polyfit`` were changing the original object (:issue:`5644`, :pull:`7900`). By `Mattia Almansi `_. - Don't call ``CachingFileManager.__del__`` on interpreter shutdown (:issue:`7814`, :pull:`7880`). By `Justus Magin `_. - Preserve vlen dtype for empty string arrays (:issue:`7328`, :pull:`7862`). By `Tom White `_ and `Kai Mühlbauer `_. - Ensure dtype of reindex result matches dtype of the original DataArray (:issue:`7299`, :pull:`7917`) By `Anderson Banihirwe `_. - Fix bug where a zero-length zarr ``chunk_store`` was ignored as if it was ``None`` (:pull:`7923`) By `Juniper Tyree `_. Documentation ~~~~~~~~~~~~~ Internal Changes ~~~~~~~~~~~~~~~~ - Minor improvements to support of the python `array api standard `_, internally using the function ``xp.astype()`` instead of the method ``arr.astype()``, as the latter is not in the standard. (:pull:`7847`) By `Tom Nicholas `_. - Xarray now uploads nightly wheels to https://pypi.anaconda.org/scientific-python-nightly-wheels/simple/ (:issue:`7863`, :pull:`7865`). By `Martin Fleischmann `_. - Stop uploading development wheels to TestPyPI (:pull:`7889`) By `Justus Magin `_. - Added an exception catch for ``AttributeError`` along with ``ImportError`` when duck typing the dynamic imports in pycompat.py. This catches some name collisions between packages. (:issue:`7870`, :pull:`7874`) .. _whats-new.2023.05.0: v2023.05.0 (May 18, 2023) ------------------------- This release adds some new methods and operators, updates our deprecation policy for python versions, fixes some bugs with groupby, and introduces experimental support for alternative chunked parallel array computation backends via a new plugin system! **Note:** If you are using a locally-installed development version of xarray then pulling the changes from this release may require you to re-install. This avoids an error where xarray cannot detect dask via the new entrypoints system introduced in :pull:`7019`. See :issue:`7856` for details. Thanks to our 14 contributors: Alan Brammer, crusaderky, David Stansby, dcherian, Deeksha, Deepak Cherian, Illviljan, James McCreight, Joe Hamman, Justus Magin, Kyle Sunden, Max Hollmann, mgunyho, and Tom Nicholas New Features ~~~~~~~~~~~~ - Added new method :py:meth:`DataArray.to_dask_dataframe`, convert a dataarray into a dask dataframe (:issue:`7409`). By `Deeksha `_. - Add support for lshift and rshift binary operators (``<<``, ``>>``) on :py:class:`xr.DataArray` of type :py:class:`int` (:issue:`7727` , :pull:`7741`). By `Alan Brammer `_. - Keyword argument ``data='array'`` to both :py:meth:`xarray.Dataset.to_dict` and :py:meth:`xarray.DataArray.to_dict` will now return data as the underlying array type. Python lists are returned for ``data='list'`` or ``data=True``. Supplying ``data=False`` only returns the schema without data. ``encoding=True`` returns the encoding dictionary for the underlying variable also. (:issue:`1599`, :pull:`7739`) . By `James McCreight `_. Breaking changes ~~~~~~~~~~~~~~~~ - adjust the deprecation policy for python to once again align with NEP-29 (:issue:`7765`, :pull:`7793`) By `Justus Magin `_. Performance ~~~~~~~~~~~ - Optimize ``.dt `` accessor performance with ``CFTimeIndex``. (:pull:`7796`) By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ - Fix ``as_compatible_data`` for masked float arrays, now always creates a copy when mask is present (:issue:`2377`, :pull:`7788`). By `Max Hollmann `_. - Fix groupby binary ops when grouped array is subset relative to other. (:issue:`7797`). By `Deepak Cherian `_. - Fix groupby sum, prod for all-NaN groups with ``flox``. (:issue:`7808`). By `Deepak Cherian `_. Internal Changes ~~~~~~~~~~~~~~~~ - Experimental support for wrapping chunked array libraries other than dask. A new ABC is defined - :py:class:`xr.namedarray.parallelcompat.ChunkManagerEntrypoint` - which can be subclassed and then registered by alternative chunked array implementations. (:issue:`6807`, :pull:`7019`) By `Tom Nicholas `_. .. _whats-new.2023.04.2: v2023.04.2 (April 20, 2023) --------------------------- This is a patch release to fix a bug with binning (:issue:`7766`) Bug fixes ~~~~~~~~~ - Fix binning when ``labels`` is specified. (:issue:`7766`). By `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ - Added examples to docstrings for :py:meth:`xarray.core.accessor_str.StringAccessor` methods. (:pull:`7669`) . By `Mary Gathoni `_. .. _whats-new.2023.04.1: v2023.04.1 (April 18, 2023) --------------------------- This is a patch release to fix a bug with binning (:issue:`7759`) Bug fixes ~~~~~~~~~ - Fix binning by unsorted arrays. (:issue:`7759`) .. _whats-new.2023.04.0: v2023.04.0 (April 14, 2023) --------------------------- This release includes support for pandas v2, allows refreshing of backend engines in a session, and removes deprecated backends for ``rasterio`` and ``cfgrib``. Thanks to our 19 contributors: Chinemere, Tom Coleman, Deepak Cherian, Harshitha, Illviljan, Jessica Scheick, Joe Hamman, Justus Magin, Kai Mühlbauer, Kwonil-Kim, Mary Gathoni, Michael Niklas, Pierre, Scott Henderson, Shreyal Gupta, Spencer Clark, mccloskey, nishtha981, veenstrajelmer We welcome the following new contributors to Xarray!: Mary Gathoni, Harshitha, veenstrajelmer, Chinemere, nishtha981, Shreyal Gupta, Kwonil-Kim, mccloskey. New Features ~~~~~~~~~~~~ - New methods to reset an objects encoding (:py:meth:`Dataset.reset_encoding`, :py:meth:`DataArray.reset_encoding`). (:issue:`7686`, :pull:`7689`). By `Joe Hamman `_. - Allow refreshing backend engines with :py:meth:`xarray.backends.refresh_engines` (:issue:`7478`, :pull:`7523`). By `Michael Niklas `_. - Added ability to save ``DataArray`` objects directly to Zarr using :py:meth:`~xarray.DataArray.to_zarr`. (:issue:`7692`, :pull:`7693`) . By `Joe Hamman `_. Breaking changes ~~~~~~~~~~~~~~~~ - Remove deprecated rasterio backend in favor of rioxarray (:pull:`7392`). By `Scott Henderson `_. Deprecations ~~~~~~~~~~~~ Performance ~~~~~~~~~~~ - Optimize alignment with ``join="exact", copy=False`` by avoiding copies. (:pull:`7736`) By `Deepak Cherian `_. - Avoid unnecessary copies of ``CFTimeIndex``. (:pull:`7735`) By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ - Fix :py:meth:`xr.polyval` with non-system standard integer coeffs (:pull:`7619`). By `Shreyal Gupta `_ and `Michael Niklas `_. - Improve error message when trying to open a file which you do not have permission to read (:issue:`6523`, :pull:`7629`). By `Thomas Coleman `_. - Proper plotting when passing :py:class:`~matplotlib.colors.BoundaryNorm` type argument in :py:meth:`DataArray.plot`. (:issue:`4061`, :issue:`7014`,:pull:`7553`) By `Jelmer Veenstra `_. - Ensure the formatting of time encoding reference dates outside the range of nanosecond-precision datetimes remains the same under pandas version 2.0.0 (:issue:`7420`, :pull:`7441`). By `Justus Magin `_ and `Spencer Clark `_. - Various ``dtype`` related fixes needed to support ``pandas>=2.0`` (:pull:`7724`) By `Justus Magin `_. - Preserve boolean dtype within encoding (:issue:`7652`, :pull:`7720`). By `Kai Mühlbauer `_ Documentation ~~~~~~~~~~~~~ - Update FAQ page on how do I open format X file as an xarray dataset? (:issue:`1285`, :pull:`7638`) using :py:func:`~xarray.open_dataset` By `Harshitha `_ , `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ - Don't assume that arrays read from disk will be Numpy arrays. This is a step toward enabling reads from a Zarr store using the `Kvikio `_ or `TensorStore `_ libraries. (:pull:`6874`). By `Deepak Cherian `_. - Remove internal support for reading GRIB files through the ``cfgrib`` backend. ``cfgrib`` now uses the external backend interface, so no existing code should break. By `Deepak Cherian `_. - Implement CF coding functions in ``VariableCoders`` (:pull:`7719`). By `Kai Mühlbauer `_ - Added a config.yml file with messages for the welcome bot when a Github user creates their first ever issue or pull request or has their first PR merged. (:issue:`7685`, :pull:`7685`) By `Nishtha P `_. - Ensure that only nanosecond-precision :py:class:`pd.Timestamp` objects continue to be used internally under pandas version 2.0.0. This is mainly to ease the transition to this latest version of pandas. It should be relaxed when addressing :issue:`7493`. By `Spencer Clark `_ (:issue:`7707`, :pull:`7731`). .. _whats-new.2023.03.0: v2023.03.0 (March 22, 2023) --------------------------- This release brings many bug fixes, and some new features. The maximum pandas version is pinned to ``<2`` until we can support the new pandas datetime types. Thanks to our 19 contributors: Abel Aoun, Alex Goodman, Deepak Cherian, Illviljan, Jody Klymak, Joe Hamman, Justus Magin, Mary Gathoni, Mathias Hauser, Mattia Almansi, Mick, Oriol Abril-Pla, Patrick Hoefler, Paul Ockenfuß, Pierre, Shreyal Gupta, Spencer Clark, Tom Nicholas, Tom Vo New Features ~~~~~~~~~~~~ - Fix :py:meth:`xr.cov` and :py:meth:`xr.corr` now support complex valued arrays (:issue:`7340`, :pull:`7392`). By `Michael Niklas `_. - Allow indexing along unindexed dimensions with dask arrays (:issue:`2511`, :issue:`4276`, :issue:`4663`, :pull:`5873`). By `Abel Aoun `_ and `Deepak Cherian `_. - Support dask arrays in ``first`` and ``last`` reductions. By `Deepak Cherian `_. - Improved performance in ``open_dataset`` for datasets with large object arrays (:issue:`7484`, :pull:`7494`). By `Alex Goodman `_ and `Deepak Cherian `_. Breaking changes ~~~~~~~~~~~~~~~~ Deprecations ~~~~~~~~~~~~ - Following pandas, the ``base`` and ``loffset`` parameters of :py:meth:`xr.DataArray.resample` and :py:meth:`xr.Dataset.resample` have been deprecated and will be removed in a future version of xarray. Using the ``origin`` or ``offset`` parameters is recommended as a replacement for using the ``base`` parameter and using time offset arithmetic is recommended as a replacement for using the ``loffset`` parameter (:pull:`8459`). By `Spencer Clark `_. Bug fixes ~~~~~~~~~ - Improve error message when using in :py:meth:`Dataset.drop_vars` to state which variables can't be dropped. (:pull:`7518`) By `Tom Nicholas `_. - Require to explicitly defining optional dimensions such as hue and markersize for scatter plots. (:issue:`7314`, :pull:`7277`). By `Jimmy Westling `_. - Fix matplotlib raising a UserWarning when plotting a scatter plot with an unfilled marker (:issue:`7313`, :pull:`7318`). By `Jimmy Westling `_. - Fix issue with ``max_gap`` in ``interpolate_na``, when applied to multidimensional arrays. (:issue:`7597`, :pull:`7598`). By `Paul Ockenfuß `_. - Fix :py:meth:`DataArray.plot.pcolormesh` which now works if one of the coordinates has str dtype (:issue:`6775`, :pull:`7612`). By `Michael Niklas `_. Documentation ~~~~~~~~~~~~~ - Clarify language in contributor's guide (:issue:`7495`, :pull:`7595`) By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ - Pin pandas to ``<2``. By `Deepak Cherian `_. .. _whats-new.2023.02.0: v2023.02.0 (Feb 7, 2023) ------------------------ This release brings a major upgrade to :py:func:`xarray.concat`, many bug fixes, and a bump in supported dependency versions. Thanks to our 11 contributors: Aron Gergely, Deepak Cherian, Illviljan, James Bourbeau, Joe Hamman, Justus Magin, Hauke Schulz, Kai Mühlbauer, Ken Mankoff, Spencer Clark, Tom Nicholas. Breaking changes ~~~~~~~~~~~~~~~~ - Support for ``python 3.8`` has been dropped and the minimum versions of some dependencies were changed (:pull:`7461`): ===================== ========= ======== Package Old New ===================== ========= ======== python 3.8 3.9 numpy 1.20 1.21 pandas 1.3 1.4 dask 2021.11 2022.1 distributed 2021.11 2022.1 h5netcdf 0.11 0.13 lxml 4.6 4.7 numba 5.4 5.5 ===================== ========= ======== Deprecations ~~~~~~~~~~~~ - Following pandas, the ``closed`` parameters of :py:func:`cftime_range` and :py:func:`date_range` are deprecated in favor of the ``inclusive`` parameters, and will be removed in a future version of xarray (:issue:`6985`:, :pull:`7373`). By `Spencer Clark `_. Bug fixes ~~~~~~~~~ - :py:func:`xarray.concat` can now concatenate variables present in some datasets but not others (:issue:`508`, :pull:`7400`). By `Kai Mühlbauer `_ and `Scott Chamberlin `_. - Handle ``keep_attrs`` option in binary operators of :py:meth:`Dataset` (:issue:`7390`, :pull:`7391`). By `Aron Gergely `_. - Improve error message when using dask in :py:func:`apply_ufunc` with ``output_sizes`` not supplied. (:pull:`7509`) By `Tom Nicholas `_. - :py:func:`xarray.Dataset.to_zarr` now drops variable encodings that have been added by xarray during reading a dataset. (:issue:`7129`, :pull:`7500`). By `Hauke Schulz `_. Documentation ~~~~~~~~~~~~~ - Mention the `flox package `_ in GroupBy documentation and docstrings. By `Deepak Cherian `_. .. _whats-new.2023.01.0: v2023.01.0 (Jan 17, 2023) ------------------------- This release includes a number of bug fixes. Thanks to the 14 contributors to this release: Aron Gergely, Benoit Bovy, Deepak Cherian, Ian Carroll, Illviljan, Joe Hamman, Justus Magin, Mark Harfouche, Matthew Roeschke, Paige Martin, Pierre, Sam Levang, Tom White, stefank0. Breaking changes ~~~~~~~~~~~~~~~~ - :py:meth:`CFTimeIndex.get_loc` has removed the ``method`` and ``tolerance`` keyword arguments. Use ``.get_indexer([key], method=..., tolerance=...)`` instead (:pull:`7361`). By `Matthew Roeschke `_. Bug fixes ~~~~~~~~~ - Avoid in-memory broadcasting when converting to a dask dataframe using ``.to_dask_dataframe.`` (:issue:`6811`, :pull:`7472`). By `Jimmy Westling `_. - Accessing the property ``.nbytes`` of a DataArray, or Variable no longer accidentally triggers loading the variable into memory. - Allow numpy-only objects in :py:func:`where` when ``keep_attrs=True`` (:issue:`7362`, :pull:`7364`). By `Sam Levang `_. - add a ``keep_attrs`` parameter to :py:meth:`Dataset.pad`, :py:meth:`DataArray.pad`, and :py:meth:`Variable.pad` (:pull:`7267`). By `Justus Magin `_. - Fixed performance regression in alignment between indexed and non-indexed objects of the same shape (:pull:`7382`). By `Benoît Bovy `_. - Preserve original dtype on accessing MultiIndex levels (:issue:`7250`, :pull:`7393`). By `Ian Carroll `_. Internal Changes ~~~~~~~~~~~~~~~~ - Add the pre-commit hook ``absolufy-imports`` to convert relative xarray imports to absolute imports (:pull:`7204`, :pull:`7370`). By `Jimmy Westling `_. .. _whats-new.2022.12.0: v2022.12.0 (2022 Dec 2) ----------------------- This release includes a number of bug fixes and experimental support for Zarr V3. Thanks to the 16 contributors to this release: Deepak Cherian, Francesco Zanetta, Gregory Lee, Illviljan, Joe Hamman, Justus Magin, Luke Conibear, Mark Harfouche, Mathias Hauser, Mick, Mike Taves, Sam Levang, Spencer Clark, Tom Nicholas, Wei Ji, templiert New Features ~~~~~~~~~~~~ - Enable using ``offset`` and ``origin`` arguments in :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`7284`). By `Spencer Clark `_. - Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`). By `Gregory Lee `_ and `Joe Hamman `_. Breaking changes ~~~~~~~~~~~~~~~~ - The minimum versions of some dependencies were changed (:pull:`7300`): ========================== ========= ======== Package Old New ========================== ========= ======== boto 1.18 1.20 cartopy 0.19 0.20 distributed 2021.09 2021.11 dask 2021.09 2021.11 h5py 3.1 3.6 hdf5 1.10 1.12 matplotlib-base 3.4 3.5 nc-time-axis 1.3 1.4 netcdf4 1.5.3 1.5.7 packaging 20.3 21.3 pint 0.17 0.18 pseudonetcdf 3.1 3.2 typing_extensions 3.10 4.0 ========================== ========= ======== Deprecations ~~~~~~~~~~~~ - The PyNIO backend has been deprecated (:issue:`4491`, :pull:`7301`). By `Joe Hamman `_. Bug fixes ~~~~~~~~~ - Fix handling of coordinate attributes in :py:func:`where`. (:issue:`7220`, :pull:`7229`) By `Sam Levang `_. - Import ``nc_time_axis`` when needed (:issue:`7275`, :pull:`7276`). By `Michael Niklas `_. - Fix static typing of :py:meth:`xr.polyval` (:issue:`7312`, :pull:`7315`). By `Michael Niklas `_. - Fix multiple reads on fsspec S3 files by resetting file pointer to 0 when reading file streams (:issue:`6813`, :pull:`7304`). By `David Hoese `_ and `Wei Ji Leong `_. - Fix :py:meth:`Dataset.assign_coords` resetting all dimension coordinates to default (pandas) index (:issue:`7346`, :pull:`7347`). By `Benoît Bovy `_. Documentation ~~~~~~~~~~~~~ - Add example of reading and writing individual groups to a single netCDF file to I/O docs page. (:pull:`7338`) By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ .. _whats-new.2022.11.0: v2022.11.0 (Nov 4, 2022) ------------------------ This release brings a number of bugfixes and documentation improvements. Both text and HTML reprs now have a new "Indexes" section, which we expect will help with development of new Index objects. This release also features more support for the Python Array API. Many thanks to the 16 contributors to this release: Daniel Goman, Deepak Cherian, Illviljan, Jessica Scheick, Justus Magin, Mark Harfouche, Maximilian Roos, Mick, Patrick Naylor, Pierre, Spencer Clark, Stephan Hoyer, Tom Nicholas, Tom White New Features ~~~~~~~~~~~~ - Add static typing to plot accessors (:issue:`6949`, :pull:`7052`). By `Michael Niklas `_. - Display the indexes in a new section of the text and HTML reprs (:pull:`6795`, :pull:`7183`, :pull:`7185`) By `Justus Magin `_ and `Benoît Bovy `_. - Added methods :py:meth:`DataArrayGroupBy.cumprod` and :py:meth:`DatasetGroupBy.cumprod`. (:pull:`5816`) By `Patrick Naylor `_ Breaking changes ~~~~~~~~~~~~~~~~ - ``repr(ds)`` may not show the same result because it doesn't load small, lazy data anymore. Use ``ds.head().load()`` when wanting to see just a sample of the data. (:issue:`6722`, :pull:`7203`). By `Jimmy Westling `_. - Many arguments of plotmethods have been made keyword-only. - ``xarray.plot.plot`` module renamed to ``xarray.plot.dataarray_plot`` to prevent shadowing of the ``plot`` method. (:issue:`6949`, :pull:`7052`). By `Michael Niklas `_. Deprecations ~~~~~~~~~~~~ - Positional arguments for all plot methods have been deprecated (:issue:`6949`, :pull:`7052`). By `Michael Niklas `_. - ``xarray.plot.FacetGrid.axes`` has been renamed to ``xarray.plot.FacetGrid.axs`` because it's not clear if ``axes`` refers to single or multiple ``Axes`` instances. This aligns with ``matplotlib.pyplot.subplots``. (:pull:`7194`) By `Jimmy Westling `_. Bug fixes ~~~~~~~~~ - Explicitly opening a file multiple times (e.g., after modifying it on disk) now reopens the file from scratch for h5netcdf and scipy netCDF backends, rather than reusing a cached version (:issue:`4240`, :issue:`4862`). By `Stephan Hoyer `_. - Fixed bug where :py:meth:`Dataset.coarsen.construct` would demote non-dimension coordinates to variables. (:pull:`7233`) By `Tom Nicholas `_. - Raise a TypeError when trying to plot empty data (:issue:`7156`, :pull:`7228`). By `Michael Niklas `_. Documentation ~~~~~~~~~~~~~ - Improves overall documentation around available backends, including adding docstrings for :py:func:`xarray.backends.list_engines` Add :py:meth:`__str__` to surface the new :py:class:`BackendEntrypoint` ``description`` and ``url`` attributes. (:issue:`6577`, :pull:`7000`) By `Jessica Scheick `_. - Created docstring examples for :py:meth:`DataArray.cumsum`, :py:meth:`DataArray.cumprod`, :py:meth:`Dataset.cumsum`, :py:meth:`Dataset.cumprod`, :py:meth:`DatasetGroupBy.cumsum`, :py:meth:`DataArrayGroupBy.cumsum`. (:issue:`5816`, :pull:`7152`) By `Patrick Naylor `_ - Add example of using :py:meth:`DataArray.coarsen.construct` to User Guide. (:pull:`7192`) By `Tom Nicholas `_. - Rename ``axes`` to ``axs`` in plotting to align with ``matplotlib.pyplot.subplots``. (:pull:`7194`) By `Jimmy Westling `_. - Add documentation of specific BackendEntrypoints (:pull:`7200`). By `Michael Niklas `_. - Add examples to docstring for :py:meth:`DataArray.drop_vars`, :py:meth:`DataArray.reindex_like`, :py:meth:`DataArray.interp_like`. (:issue:`6793`, :pull:`7123`) By `Daniel Goman `_. Internal Changes ~~~~~~~~~~~~~~~~ - Doctests fail on any warnings (:pull:`7166`) By `Maximilian Roos `_. - Improve import time by lazy loading ``dask.distributed`` (:pull:`7172`). - Explicitly specify ``longdouble=False`` in :py:func:`cftime.date2num` when encoding times to preserve existing behavior and prevent future errors when it is eventually set to ``True`` by default in cftime (:pull:`7171`). By `Spencer Clark `_. - Improved import time by lazily importing backend modules, matplotlib, dask.array and flox. (:issue:`6726`, :pull:`7179`) By `Michael Niklas `_. - Emit a warning under the development version of pandas when we convert non-nanosecond precision datetime or timedelta values to nanosecond precision. This was required in the past, because pandas previously was not compatible with non-nanosecond precision values. However pandas is currently working towards removing this restriction. When things stabilize in pandas we will likely consider relaxing this behavior in xarray as well (:issue:`7175`, :pull:`7201`). By `Spencer Clark `_. .. _whats-new.2022.10.0: v2022.10.0 (Oct 14 2022) ------------------------ This release brings numerous bugfixes, a change in minimum supported versions, and a new scatter plot method for DataArrays. Many thanks to 11 contributors to this release: Anderson Banihirwe, Benoit Bovy, Dan Adriaansen, Illviljan, Justus Magin, Lukas Bindreiter, Mick, Patrick Naylor, Spencer Clark, Thomas Nicholas New Features ~~~~~~~~~~~~ - Add scatter plot for datarrays. Scatter plots now also supports 3d plots with the z argument. (:pull:`6778`) By `Jimmy Westling `_. - Include the variable name in the error message when CF decoding fails to allow for easier identification of problematic variables (:issue:`7145`, :pull:`7147`). By `Spencer Clark `_. Breaking changes ~~~~~~~~~~~~~~~~ - The minimum versions of some dependencies were changed: ========================== ========= ======== Package Old New ========================== ========= ======== cftime 1.4 1.5 distributed 2021.08 2021.09 dask 2021.08 2021.09 iris 2.4 3.1 nc-time-axis 1.2 1.3 numba 0.53 0.54 numpy 1.19 1.20 pandas 1.2 1.3 packaging 20.0 21.0 scipy 1.6 1.7 sparse 0.12 0.13 typing_extensions 3.7 3.10 zarr 2.8 2.10 ========================== ========= ======== Bug fixes ~~~~~~~~~ - Remove nested function from :py:func:`open_mfdataset` to allow Dataset objects to be pickled. (:issue:`7109`, :pull:`7116`) By `Daniel Adriaansen `_. - Support for recursively defined Arrays. Fixes repr and deepcopy. (:issue:`7111`, :pull:`7112`) By `Michael Niklas `_. - Fixed :py:meth:`Dataset.transpose` to raise a more informative error. (:issue:`6502`, :pull:`7120`) By `Patrick Naylor `_ - Fix groupby on a multi-index level coordinate and fix :py:meth:`DataArray.to_index` for multi-index levels (convert to single index). (:issue:`6836`, :pull:`7105`) By `Benoît Bovy `_. - Support for open_dataset backends that return datasets containing multi-indexes (:issue:`7139`, :pull:`7150`) By `Lukas Bindreiter `_. .. _whats-new.2022.09.0: v2022.09.0 (September 30, 2022) ------------------------------- This release brings a large number of bugfixes and documentation improvements, as well as an external interface for setting custom indexes! Many thanks to our 40 contributors: Anderson Banihirwe, Andrew Ronald Friedman, Bane Sullivan, Benoit Bovy, ColemanTom, Deepak Cherian, Dimitri Papadopoulos Orfanos, Emma Marshall, Fabian Hofmann, Francesco Nattino, ghislainp, Graham Inggs, Hauke Schulz, Illviljan, James Bourbeau, Jody Klymak, Julia Signell, Justus Magin, Keewis, Ken Mankoff, Luke Conibear, Mathias Hauser, Max Jones, mgunyho, Michael Delgado, Mick, Mike Taves, Oliver Lopez, Patrick Naylor, Paul Hockett, Pierre Manchon, Ray Bell, Riley Brady, Sam Levang, Spencer Clark, Stefaan Lippens, Tom Nicholas, Tom White, Travis A. O'Brien, and Zachary Moon. New Features ~~~~~~~~~~~~ - Add :py:meth:`Dataset.set_xindex` and :py:meth:`Dataset.drop_indexes` and their DataArray counterpart for setting and dropping pandas or custom indexes given a set of arbitrary coordinates. (:pull:`6971`) By `Benoît Bovy `_ and `Justus Magin `_. - Enable taking the mean of dask-backed :py:class:`cftime.datetime` arrays (:pull:`6556`, :pull:`6940`). By `Deepak Cherian `_ and `Spencer Clark `_. Bug fixes ~~~~~~~~~ - Allow reading netcdf files where the 'units' attribute is a number. (:pull:`7085`) By `Ghislain Picard `_. - Allow decoding of 0 sized datetimes. (:issue:`1329`, :pull:`6882`) By `Deepak Cherian `_. - Make sure DataArray.name is always a string when used as label for plotting. (:issue:`6826`, :pull:`6832`) By `Jimmy Westling `_. - :py:attr:`DataArray.nbytes` now uses the ``nbytes`` property of the underlying array if available. (:pull:`6797`) By `Max Jones `_. - Rely on the array backend for string formatting. (:pull:`6823`). By `Jimmy Westling `_. - Fix incompatibility with numpy 1.20. (:issue:`6818`, :pull:`6821`) By `Michael Niklas `_. - Fix side effects on index coordinate metadata after aligning objects. (:issue:`6852`, :pull:`6857`) By `Benoît Bovy `_. - Make FacetGrid.set_titles send kwargs correctly using ``handle.update(kwargs)``. (:issue:`6839`, :pull:`6843`) By `Oliver Lopez `_. - Fix bug where index variables would be changed inplace. (:issue:`6931`, :pull:`6938`) By `Michael Niklas `_. - Allow taking the mean over non-time dimensions of datasets containing dask-backed cftime arrays. (:issue:`5897`, :pull:`6950`) By `Spencer Clark `_. - Harmonize returned multi-indexed indexes when applying ``concat`` along new dimension. (:issue:`6881`, :pull:`6889`) By `Fabian Hofmann `_. - Fix step plots with ``hue`` arg. (:pull:`6944`) By `András Gunyhó `_. - Avoid use of random numbers in ``test_weighted.test_weighted_operations_nonequal_coords``. (:issue:`6504`, :pull:`6961`) By `Luke Conibear `_. - Fix multiple regression issues with :py:meth:`Dataset.set_index` and :py:meth:`Dataset.reset_index`. (:pull:`6992`) By `Benoît Bovy `_. - Raise a ``UserWarning`` when renaming a coordinate or a dimension creates a non-indexed dimension coordinate, and suggest the user creating an index either with ``swap_dims`` or ``set_index``. (:issue:`6607`, :pull:`6999`) By `Benoît Bovy `_. - Use ``keep_attrs=True`` in grouping and resampling operations by default. (:issue:`7012`) This means :py:attr:`Dataset.attrs` and :py:attr:`DataArray.attrs` are now preserved by default. By `Deepak Cherian `_. - ``Dataset.encoding['source']`` now exists when reading from a Path object. (:issue:`5888`, :pull:`6974`) By `Thomas Coleman `_. - Better dtype consistency for ``rolling.mean()``. (:issue:`7062`, :pull:`7063`) By `Sam Levang `_. - Allow writing NetCDF files including only dimensionless variables using the distributed or multiprocessing scheduler. (:issue:`7013`, :pull:`7040`) By `Francesco Nattino `_. - Fix deepcopy of attrs and encoding of DataArrays and Variables. (:issue:`2835`, :pull:`7089`) By `Michael Niklas `_. - Fix bug where subplot_kwargs were not working when plotting with figsize, size or aspect. (:issue:`7078`, :pull:`7080`) By `Michael Niklas `_. Documentation ~~~~~~~~~~~~~ - Update merge docstrings. (:issue:`6935`, :pull:`7033`) By `Zach Moon `_. - Raise a more informative error when trying to open a non-existent zarr store. (:issue:`6484`, :pull:`7060`) By `Sam Levang `_. - Added examples to docstrings for :py:meth:`DataArray.expand_dims`, :py:meth:`DataArray.drop_duplicates`, :py:meth:`DataArray.reset_coords`, :py:meth:`DataArray.equals`, :py:meth:`DataArray.identical`, :py:meth:`DataArray.broadcast_equals`, :py:meth:`DataArray.bfill`, :py:meth:`DataArray.ffill`, :py:meth:`DataArray.fillna`, :py:meth:`DataArray.dropna`, :py:meth:`DataArray.drop_isel`, :py:meth:`DataArray.drop_sel`, :py:meth:`DataArray.head`, :py:meth:`DataArray.tail`. (:issue:`5816`, :pull:`7088`) By `Patrick Naylor `_. - Add missing docstrings to various array properties. (:pull:`7090`) By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ - Added test for DataArray attrs deepcopy recursion/nested attrs. (:issue:`2835`, :pull:`7086`) By `Paul hockett `_. .. _whats-new.2022.06.0: v2022.06.0 (July 21, 2022) -------------------------- This release brings a number of bug fixes and improvements, most notably a major internal refactor of the indexing functionality, the use of `flox`_ in ``groupby`` operations, and experimental support for the new Python `Array API standard `_. It also stops testing support for the abandoned PyNIO. Much effort has been made to preserve backwards compatibility as part of the indexing refactor. We are aware of one `unfixed issue `_. Please also see the `whats-new.2022.06.0rc0`_ for a full list of changes. Many thanks to our 18 contributors: Bane Sullivan, Deepak Cherian, Dimitri Papadopoulos Orfanos, Emma Marshall, Hauke Schulz, Illviljan, Julia Signell, Justus Magin, Keewis, Mathias Hauser, Michael Delgado, Mick, Pierre Manchon, Ray Bell, Spencer Clark, Stefaan Lippens, Tom White, Travis A. O'Brien, New Features ~~~~~~~~~~~~ - Add :py:attr:`Dataset.dtypes`, :py:attr:`core.coordinates.DatasetCoordinates.dtypes`, :py:attr:`core.coordinates.DataArrayCoordinates.dtypes` properties: Mapping from variable names to dtypes. (:pull:`6706`) By `Michael Niklas `_. - Initial typing support for :py:meth:`groupby`, :py:meth:`rolling`, :py:meth:`rolling_exp`, :py:meth:`coarsen`, :py:meth:`weighted`, :py:meth:`resample`, (:pull:`6702`) By `Michael Niklas `_. - Experimental support for wrapping any array type that conforms to the python `array api standard `_. (:pull:`6804`) By `Tom White `_. - Allow string formatting of scalar DataArrays. (:pull:`5981`) By `fmaussion `_. Bug fixes ~~~~~~~~~ - :py:meth:`save_mfdataset` now passes ``**kwargs`` on to :py:meth:`Dataset.to_netcdf`, allowing the ``encoding`` and ``unlimited_dims`` options with :py:meth:`save_mfdataset`. (:issue:`6684`) By `Travis A. O'Brien `_. - Fix backend support of pydap versions <3.3.0 (:issue:`6648`, :pull:`6656`). By `Hauke Schulz `_. - :py:meth:`Dataset.where` with ``drop=True`` now behaves correctly with mixed dimensions. (:issue:`6227`, :pull:`6690`) By `Michael Niklas `_. - Accommodate newly raised ``OutOfBoundsTimedelta`` error in the development version of pandas when decoding times outside the range that can be represented with nanosecond-precision values (:issue:`6716`, :pull:`6717`). By `Spencer Clark `_. - :py:meth:`open_dataset` with dask and ``~`` in the path now resolves the home directory instead of raising an error. (:issue:`6707`, :pull:`6710`) By `Michael Niklas `_. - :py:meth:`DataArrayRolling.__iter__` with ``center=True`` now works correctly. (:issue:`6739`, :pull:`6744`) By `Michael Niklas `_. Internal Changes ~~~~~~~~~~~~~~~~ - ``xarray.core.groupby``, ``xarray.core.rolling``, ``xarray.core.rolling_exp``, ``xarray.core.weighted`` and ``xarray.core.resample`` modules are no longer imported by default. (:pull:`6702`) .. _whats-new.2022.06.0rc0: v2022.06.0rc0 (9 June 2022) --------------------------- This pre-release brings a number of bug fixes and improvements, most notably a major internal refactor of the indexing functionality and the use of `flox`_ in ``groupby`` operations. It also stops testing support for the abandoned PyNIO. Install it using :: mamba create -n python=3.10 xarray python -m pip install --pre --upgrade --no-deps xarray Many thanks to the 39 contributors: Abel Soares Siqueira, Alex Santana, Anderson Banihirwe, Benoit Bovy, Blair Bonnett, Brewster Malevich, brynjarmorka, Charles Stern, Christian Jauvin, Deepak Cherian, Emma Marshall, Fabien Maussion, Greg Behm, Guelate Seyo, Illviljan, Joe Hamman, Joseph K Aicher, Justus Magin, Kevin Paul, Louis Stenger, Mathias Hauser, Mattia Almansi, Maximilian Roos, Michael Bauer, Michael Delgado, Mick, ngam, Oleh Khoma, Oriol Abril-Pla, Philippe Blain, PLSeuJ, Sam Levang, Spencer Clark, Stan West, Thomas Nicholas, Thomas Vogt, Tom White, Xianxiang Li Known Regressions ~~~~~~~~~~~~~~~~~ - ``reset_coords(drop=True)`` does not create indexes (:issue:`6607`) New Features ~~~~~~~~~~~~ - The ``zarr`` backend is now able to read NCZarr. By `Mattia Almansi `_. - Add a weighted ``quantile`` method to :py:class:`.computation.weighted.DatasetWeighted` and :py:class:`~computation.weighted.DataArrayWeighted` (:pull:`6059`). By `Christian Jauvin `_ and `David Huard `_. - Add a ``create_index=True`` parameter to :py:meth:`Dataset.stack` and :py:meth:`DataArray.stack` so that the creation of multi-indexes is optional (:pull:`5692`). By `Benoît Bovy `_. - Multi-index levels are now accessible through their own, regular coordinates instead of virtual coordinates (:pull:`5692`). By `Benoît Bovy `_. - Add a ``display_values_threshold`` option to control the total number of array elements which trigger summarization rather than full repr in (numpy) array detailed views of the html repr (:pull:`6400`). By `Benoît Bovy `_. - Allow passing chunks in ``kwargs`` form to :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) By `Tom Nicholas `_. - Add :py:meth:`core.groupby.DatasetGroupBy.cumsum` and :py:meth:`core.groupby.DataArrayGroupBy.cumsum`. By `Vladislav Skripniuk `_ and `Deepak Cherian `_. (:pull:`3147`, :pull:`6525`, :issue:`3141`) - Expose ``inline_array`` kwarg from ``dask.array.from_array`` in :py:func:`open_dataset`, :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) - Expose the ``inline_array`` kwarg from :py:func:`dask.array.from_array` in :py:func:`open_dataset`, :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) By `Tom Nicholas `_. - :py:func:`polyval` now supports :py:class:`Dataset` and :py:class:`DataArray` args of any shape, is faster and requires less memory. (:pull:`6548`) By `Michael Niklas `_. - Improved overall typing. - :py:meth:`Dataset.to_dict` and :py:meth:`DataArray.to_dict` may now optionally include encoding attributes. (:pull:`6635`) By `Joe Hamman `_. - Upload development versions to `TestPyPI `_. By `Justus Magin `_. Breaking changes ~~~~~~~~~~~~~~~~ - PyNIO support is now untested. The minimum versions of some dependencies were changed: =============== ===== ==== Package Old New =============== ===== ==== cftime 1.2 1.4 dask 2.30 2021.4 distributed 2.30 2021.4 h5netcdf 0.8 0.11 matplotlib-base 3.3 3.4 numba 0.51 0.53 numpy 1.18 1.19 pandas 1.1 1.2 pint 0.16 0.17 rasterio 1.1 1.2 scipy 1.5 1.6 sparse 0.11 0.12 zarr 2.5 2.8 =============== ===== ==== - The Dataset and DataArray ``rename```` methods do not implicitly add or drop indexes. (:pull:`5692`). By `Benoît Bovy `_. - Many arguments like ``keep_attrs``, ``axis``, and ``skipna`` are now keyword only for all reduction operations like ``.mean``. By `Deepak Cherian `_, `Jimmy Westling `_. - Xarray's ufuncs have been removed, now that they can be replaced by numpy's ufuncs in all supported versions of numpy. By `Maximilian Roos `_. - :py:meth:`xr.polyval` now uses the ``coord`` argument directly instead of its index coordinate. (:pull:`6548`) By `Michael Niklas `_. Bug fixes ~~~~~~~~~ - :py:meth:`Dataset.to_zarr` now allows to write all attribute types supported by ``zarr-python``. By `Mattia Almansi `_. - Set ``skipna=None`` for all ``quantile`` methods (e.g. :py:meth:`Dataset.quantile`) and ensure it skips missing values for float dtypes (consistent with other methods). This should not change the behavior (:pull:`6303`). By `Mathias Hauser `_. - Many bugs fixed by the explicit indexes refactor, mainly related to multi-index (virtual) coordinates. See the corresponding pull-request on GitHub for more details. (:pull:`5692`). By `Benoît Bovy `_. - Fixed "unhashable type" error trying to read NetCDF file with variable having its 'units' attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`). By `Oleh Khoma `_. - Omit warning about specified dask chunks separating chunks on disk when the underlying array is empty (e.g., because of an empty dimension) (:issue:`6401`). By `Joseph K Aicher `_. - Fixed the poor html repr performance on large multi-indexes (:pull:`6400`). By `Benoît Bovy `_. - Allow fancy indexing of duck dask arrays along multiple dimensions. (:pull:`6414`) By `Justus Magin `_. - In the API for backends, support dimensions that express their preferred chunk sizes as a tuple of integers. (:issue:`6333`, :pull:`6334`) By `Stan West `_. - Fix bug in :py:func:`where` when passing non-xarray objects with ``keep_attrs=True``. (:issue:`6444`, :pull:`6461`) By `Sam Levang `_. - Allow passing both ``other`` and ``drop=True`` arguments to :py:meth:`DataArray.where` and :py:meth:`Dataset.where` (:pull:`6466`, :pull:`6467`). By `Michael Delgado `_. - Ensure dtype encoding attributes are not added or modified on variables that contain datetime-like values prior to being passed to :py:func:`xarray.conventions.decode_cf_variable` (:issue:`6453`, :pull:`6489`). By `Spencer Clark `_. - Dark themes are now properly detected in Furo-themed Sphinx documents (:issue:`6500`, :pull:`6501`). By `Kevin Paul `_. - :py:meth:`Dataset.isel`, :py:meth:`DataArray.isel` with ``drop=True`` works as intended with scalar :py:class:`DataArray` indexers. (:issue:`6554`, :pull:`6579`) By `Michael Niklas `_. - Fixed silent overflow issue when decoding times encoded with 32-bit and below unsigned integer data types (:issue:`6589`, :pull:`6598`). By `Spencer Clark `_. - Fixed ``.chunks`` loading lazy data (:issue:`6538`). By `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ - Revise the documentation for developers on specifying a backend's preferred chunk sizes. In particular, correct the syntax and replace lists with tuples in the examples. (:issue:`6333`, :pull:`6334`) By `Stan West `_. - Mention that :py:meth:`DataArray.rename` can rename coordinates. (:issue:`5458`, :pull:`6665`) By `Michael Niklas `_. - Added examples to :py:meth:`Dataset.thin` and :py:meth:`DataArray.thin` By `Emma Marshall `_. Performance ~~~~~~~~~~~ - GroupBy binary operations are now vectorized. Previously this involved looping over all groups. (:issue:`5804`, :pull:`6160`) By `Deepak Cherian `_. - Substantially improved GroupBy operations using `flox `_. This is auto-enabled when ``flox`` is installed. Use ``xr.set_options(use_flox=False)`` to use the old algorithm. (:issue:`4473`, :issue:`4498`, :issue:`659`, :issue:`2237`, :pull:`271`). By `Deepak Cherian `_, `Anderson Banihirwe `_, `Jimmy Westling `_. Internal Changes ~~~~~~~~~~~~~~~~ - Many internal changes due to the explicit indexes refactor. See the corresponding pull-request on GitHub for more details. (:pull:`5692`). By `Benoît Bovy `_. .. _whats-new.2022.03.0: v2022.03.0 (2 March 2022) ------------------------- This release brings a number of small improvements, as well as a move to `calendar versioning `_ (:issue:`6176`). Many thanks to the 16 contributors to the v2022.02.0 release! Aaron Spring, Alan D. Snow, Anderson Banihirwe, crusaderky, Illviljan, Joe Hamman, Jonas Gliß, Lukas Pilz, Martin Bergemann, Mathias Hauser, Maximilian Roos, Romain Caneill, Stan West, Stijn Van Hoey, Tobias Kölling, and Tom Nicholas. New Features ~~~~~~~~~~~~ - Enabled multiplying tick offsets by floats. Allows ``float`` ``n`` in :py:meth:`CFTimeIndex.shift` if ``shift_freq`` is between ``Day`` and ``Microsecond``. (:issue:`6134`, :pull:`6135`). By `Aaron Spring `_. - Enable providing more keyword arguments to the ``pydap`` backend when reading OpenDAP datasets (:issue:`6274`). By `Jonas Gliß `_. - Allow :py:meth:`DataArray.drop_duplicates` to drop duplicates along multiple dimensions at once, and add :py:meth:`Dataset.drop_duplicates`. (:pull:`6307`) By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ - Renamed the ``interpolation`` keyword of all ``quantile`` methods (e.g. :py:meth:`DataArray.quantile`) to ``method`` for consistency with numpy v1.22.0 (:pull:`6108`). By `Mathias Hauser `_. Deprecations ~~~~~~~~~~~~ Bug fixes ~~~~~~~~~ - Variables which are chunked using dask in larger (but aligned) chunks than the target zarr chunk size can now be stored using ``to_zarr()`` (:pull:`6258`) By `Tobias Kölling `_. - Multi-file datasets containing encoded :py:class:`cftime.datetime` objects can be read in parallel again (:issue:`6226`, :pull:`6249`, :pull:`6305`). By `Martin Bergemann `_ and `Stan West `_. Documentation ~~~~~~~~~~~~~ - Delete files of datasets saved to disk while building the documentation and enable building on Windows via ``sphinx-build`` (:pull:`6237`). By `Stan West `_. Internal Changes ~~~~~~~~~~~~~~~~ .. _whats-new.0.21.1: v0.21.1 (31 January 2022) ------------------------- This is a bugfix release to resolve (:issue:`6216`, :pull:`6207`). Bug fixes ~~~~~~~~~ - Add ``packaging`` as a dependency to Xarray (:issue:`6216`, :pull:`6207`). By `Sebastian Weigand `_ and `Joe Hamman `_. .. _whats-new.0.21.0: v0.21.0 (27 January 2022) ------------------------- Many thanks to the 20 contributors to the v0.21.0 release! Abel Aoun, Anderson Banihirwe, Ant Gib, Chris Roat, Cindy Chiao, Deepak Cherian, Dominik Stańczak, Fabian Hofmann, Illviljan, Jody Klymak, Joseph K Aicher, Mark Harfouche, Mathias Hauser, Matthew Roeschke, Maximilian Roos, Michael Delgado, Pascal Bourgault, Pierre, Ray Bell, Romain Caneill, Tim Heap, Tom Nicholas, Zeb Nicholls, joseph nowak, keewis. New Features ~~~~~~~~~~~~ - New top-level function :py:func:`cross`. (:issue:`3279`, :pull:`5365`). By `Jimmy Westling `_. - ``keep_attrs`` support for :py:func:`where` (:issue:`4141`, :issue:`4682`, :pull:`4687`). By `Justus Magin `_. - Enable the limit option for dask array in the following methods :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`Dataset.ffill` and :py:meth:`Dataset.bfill` (:issue:`6112`) By `Joseph Nowak `_. Breaking changes ~~~~~~~~~~~~~~~~ - Rely on matplotlib's default datetime converters instead of pandas' (:issue:`6102`, :pull:`6109`). By `Jimmy Westling `_. - Improve repr readability when there are a large number of dimensions in datasets or dataarrays by wrapping the text once the maximum display width has been exceeded. (:issue:`5546`, :pull:`5662`) By `Jimmy Westling `_. Deprecations ~~~~~~~~~~~~ - Removed the lock kwarg from the zarr and pydap backends, completing the deprecation cycle started in :issue:`5256`. By `Tom Nicholas `_. - Support for ``python 3.7`` has been dropped. (:pull:`5892`) By `Jimmy Westling `_. Bug fixes ~~~~~~~~~ - Preserve chunks when creating a :py:class:`DataArray` from another :py:class:`DataArray` (:pull:`5984`). By `Fabian Hofmann `_. - Properly support :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`Dataset.ffill` and :py:meth:`Dataset.bfill` along chunked dimensions (:issue:`6112`). By `Joseph Nowak `_. - Subclasses of ``byte`` and ``str`` (e.g. ``np.str_`` and ``np.bytes_``) will now serialise to disk rather than raising a ``ValueError: unsupported dtype for netCDF4 variable: object`` as they did previously (:pull:`5264`). By `Zeb Nicholls `_. - Fix applying function with non-xarray arguments using :py:func:`xr.map_blocks`. By `Cindy Chiao `_. - No longer raise an error for an all-nan-but-one argument to :py:meth:`DataArray.interpolate_na` when using ``method='nearest'`` (:issue:`5994`, :pull:`6144`). By `Michael Delgado `_. - `dt.season `_ can now handle NaN and NaT. (:pull:`5876`). By `Pierre Loicq `_. - Determination of zarr chunks handles empty lists for encoding chunks or variable chunks that occurs in certain circumstances (:pull:`5526`). By `Chris Roat `_. Internal Changes ~~~~~~~~~~~~~~~~ - Replace ``distutils.version`` with ``packaging.version`` (:issue:`6092`). By `Mathias Hauser `_. - Removed internal checks for ``pd.Panel`` (:issue:`6145`). By `Matthew Roeschke `_. - Add ``pyupgrade`` pre-commit hook (:pull:`6152`). By `Maximilian Roos `_. .. _whats-new.0.20.2: v0.20.2 (9 December 2021) ------------------------- This is a bugfix release to resolve (:issue:`3391`, :issue:`5715`). It also includes performance improvements in unstacking to a ``sparse`` array and a number of documentation improvements. Many thanks to the 20 contributors: Aaron Spring, Alexandre Poux, Deepak Cherian, Enrico Minack, Fabien Maussion, Giacomo Caria, Gijom, Guillaume Maze, Illviljan, Joe Hamman, Joseph Hardin, Kai Mühlbauer, Matt Henderson, Maximilian Roos, Michael Delgado, Robert Gieseke, Sebastian Weigand and Stephan Hoyer. Breaking changes ~~~~~~~~~~~~~~~~ - Use complex nan when interpolating complex values out of bounds by default (instead of real nan) (:pull:`6019`). By `Alexandre Poux `_. Performance ~~~~~~~~~~~ - Significantly faster unstacking to a ``sparse`` array. :pull:`5577` By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ - :py:func:`xr.map_blocks` and :py:func:`xr.corr` now work when dask is not installed (:issue:`3391`, :issue:`5715`, :pull:`5731`). By `Gijom `_. - Fix plot.line crash for data of shape ``(1, N)`` in _title_for_slice on format_item (:pull:`5948`). By `Sebastian Weigand `_. - Fix a regression in the removal of duplicate backend entrypoints (:issue:`5944`, :pull:`5959`) By `Kai Mühlbauer `_. - Fix an issue that datasets from being saved when time variables with units that ``cftime`` can parse but pandas can not were present (:pull:`6049`). By `Tim Heap `_. Documentation ~~~~~~~~~~~~~ - Better examples in docstrings for groupby and resampling reductions (:pull:`5871`). By `Deepak Cherian `_, `Maximilian Roos `_, `Jimmy Westling `_ . - Add list-like possibility for tolerance parameter in the reindex functions. By `Antoine Gibek `_, Internal Changes ~~~~~~~~~~~~~~~~ - Use ``importlib`` to replace functionality of ``pkg_resources`` in backend plugins tests. (:pull:`5959`). By `Kai Mühlbauer `_. .. _whats-new.0.20.1: v0.20.1 (5 November 2021) ------------------------- This is a bugfix release to fix :issue:`5930`. Bug fixes ~~~~~~~~~ - Fix a regression in the detection of the backend entrypoints (:issue:`5930`, :pull:`5931`) By `Justus Magin `_. Documentation ~~~~~~~~~~~~~ - Significant improvements to :ref:`api`. By `Deepak Cherian `_. .. _whats-new.0.20.0: v0.20.0 (1 November 2021) ------------------------- This release brings improved support for pint arrays, methods for weighted standard deviation, variance, and sum of squares, the option to disable the use of the bottleneck library, significantly improved performance of unstack, as well as many bugfixes and internal changes. Many thanks to the 40 contributors to this release!: Aaron Spring, Akio Taniguchi, Alan D. Snow, arfy slowy, Benoit Bovy, Christian Jauvin, crusaderky, Deepak Cherian, Giacomo Caria, Illviljan, James Bourbeau, Joe Hamman, Joseph K Aicher, Julien Herzen, Kai Mühlbauer, keewis, lusewell, Martin K. Scherer, Mathias Hauser, Max Grover, Maxime Liquet, Maximilian Roos, Mike Taves, Nathan Lis, pmav99, Pushkar Kopparla, Ray Bell, Rio McMahon, Scott Staniewicz, Spencer Clark, Stefan Bender, Taher Chegini, Thomas Nicholas, Tomas Chor, Tom Augspurger, Victor Negîrneac, Zachary Blackwood, Zachary Moon, and Zeb Nicholls. New Features ~~~~~~~~~~~~ - Add ``std``, ``var``, ``sum_of_squares`` to :py:class:`~computation.weighted.DatasetWeighted` and :py:class:`~computation.weighted.DataArrayWeighted`. By `Christian Jauvin `_. - Added a :py:func:`get_options` method to xarray's root namespace (:issue:`5698`, :pull:`5716`) By `Pushkar Kopparla `_. - Xarray now does a better job rendering variable names that are long LaTeX sequences when plotting (:issue:`5681`, :pull:`5682`). By `Tomas Chor `_. - Add an option (``"use_bottleneck"``) to disable the use of ``bottleneck`` using :py:func:`set_options` (:pull:`5560`) By `Justus Magin `_. - Added ``**kwargs`` argument to :py:meth:`open_rasterio` to access overviews (:issue:`3269`). By `Pushkar Kopparla `_. - Added ``storage_options`` argument to :py:meth:`to_zarr` (:issue:`5601`, :pull:`5615`). By `Ray Bell `_, `Zachary Blackwood `_ and `Nathan Lis `_. - Added calendar utilities :py:func:`DataArray.convert_calendar`, :py:func:`DataArray.interp_calendar`, :py:func:`date_range`, :py:func:`date_range_like` and :py:attr:`DataArray.dt.calendar` (:issue:`5155`, :pull:`5233`). By `Pascal Bourgault `_. - Histogram plots are set with a title displaying the scalar coords if any, similarly to the other plots (:issue:`5791`, :pull:`5792`). By `Maxime Liquet `_. - Slice plots display the coords units in the same way as x/y/colorbar labels (:pull:`5847`). By `Victor Negîrneac `_. - Added a new :py:attr:`Dataset.chunksizes`, :py:attr:`DataArray.chunksizes`, and :py:attr:`Variable.chunksizes` property, which will always return a mapping from dimension names to chunking pattern along that dimension, regardless of whether the object is a Dataset, DataArray, or Variable. (:issue:`5846`, :pull:`5900`) By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ - The minimum versions of some dependencies were changed: =============== ====== ==== Package Old New =============== ====== ==== cftime 1.1 1.2 dask 2.15 2.30 distributed 2.15 2.30 lxml 4.5 4.6 matplotlib-base 3.2 3.3 numba 0.49 0.51 numpy 1.17 1.18 pandas 1.0 1.1 pint 0.15 0.16 scipy 1.4 1.5 seaborn 0.10 0.11 sparse 0.8 0.11 toolz 0.10 0.11 zarr 2.4 2.5 =============== ====== ==== - The ``__repr__`` of a :py:class:`xarray.Dataset`'s ``coords`` and ``data_vars`` ignore ``xarray.set_option(display_max_rows=...)`` and show the full output when called directly as, e.g., ``ds.data_vars`` or ``print(ds.data_vars)`` (:issue:`5545`, :pull:`5580`). By `Stefan Bender `_. Deprecations ~~~~~~~~~~~~ - Deprecate :py:func:`open_rasterio` (:issue:`4697`, :pull:`5808`). By `Alan Snow `_. - Set the default argument for ``roll_coords`` to ``False`` for :py:meth:`DataArray.roll` and :py:meth:`Dataset.roll`. (:pull:`5653`) By `Tom Nicholas `_. - :py:meth:`xarray.open_mfdataset` will now error instead of warn when a value for ``concat_dim`` is passed alongside ``combine='by_coords'``. By `Tom Nicholas `_. Bug fixes ~~~~~~~~~ - Fix ZeroDivisionError from saving dask array with empty dimension (:issue:`5741`). By `Joseph K Aicher `_. - Fixed performance bug where ``cftime`` import attempted within various core operations if ``cftime`` not installed (:pull:`5640`). By `Luke Sewell `_ - Fixed bug when combining named DataArrays using :py:func:`combine_by_coords`. (:pull:`5834`). By `Tom Nicholas `_. - When a custom engine was used in :py:func:`~xarray.open_dataset` the engine wasn't initialized properly, causing missing argument errors or inconsistent method signatures. (:pull:`5684`) By `Jimmy Westling `_. - Numbers are properly formatted in a plot's title (:issue:`5788`, :pull:`5789`). By `Maxime Liquet `_. - Faceted plots will no longer raise a ``pint.UnitStrippedWarning`` when a ``pint.Quantity`` array is plotted, and will correctly display the units of the data in the colorbar (if there is one) (:pull:`5886`). By `Tom Nicholas `_. - With backends, check for path-like objects rather than ``pathlib.Path`` type, use ``os.fspath`` (:pull:`5879`). By `Mike Taves `_. - ``open_mfdataset()`` now accepts a single ``pathlib.Path`` object (:issue:`5881`). By `Panos Mavrogiorgos `_. - Improved performance of :py:meth:`Dataset.unstack` (:pull:`5906`). By `Tom Augspurger `_. Documentation ~~~~~~~~~~~~~ - Users are instructed to try ``use_cftime=True`` if a ``TypeError`` occurs when combining datasets and one of the types involved is a subclass of ``cftime.datetime`` (:pull:`5776`). By `Zeb Nicholls `_. - A clearer error is now raised if a user attempts to assign a Dataset to a single key of another Dataset. (:pull:`5839`) By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ - Explicit indexes refactor: avoid ``len(index)`` in ``map_blocks`` (:pull:`5670`). By `Deepak Cherian `_. - Explicit indexes refactor: decouple ``xarray.Index``` from ``xarray.Variable`` (:pull:`5636`). By `Benoit Bovy `_. - Fix ``Mapping`` argument typing to allow mypy to pass on ``str`` keys (:pull:`5690`). By `Maximilian Roos `_. - Annotate many of our tests, and fix some of the resulting typing errors. This will also mean our typing annotations are tested as part of CI. (:pull:`5728`). By `Maximilian Roos `_. - Improve the performance of reprs for large datasets or dataarrays. (:pull:`5661`) By `Jimmy Westling `_. - Use isort's ``float_to_top`` config. (:pull:`5695`). By `Maximilian Roos `_. - Remove use of the deprecated ``kind`` argument in :py:meth:`pandas.Index.get_slice_bound` inside :py:class:`xarray.CFTimeIndex` tests (:pull:`5723`). By `Spencer Clark `_. - Refactor ``xarray.core.duck_array_ops`` to no longer special-case dispatching to dask versions of functions when acting on dask arrays, instead relying numpy and dask's adherence to NEP-18 to dispatch automatically. (:pull:`5571`) By `Tom Nicholas `_. - Add an ASV benchmark CI and improve performance of the benchmarks (:pull:`5796`) By `Jimmy Westling `_. - Use ``importlib`` to replace functionality of ``pkg_resources`` such as version setting and loading of resources. (:pull:`5845`). By `Martin K. Scherer `_. .. _whats-new.0.19.0: v0.19.0 (23 July 2021) ---------------------- This release brings improvements to plotting of categorical data, the ability to specify how attributes are combined in xarray operations, a new high-level :py:func:`unify_chunks` function, as well as various deprecations, bug fixes, and minor improvements. Many thanks to the 29 contributors to this release!: Andrew Williams, Augustus, Aureliana Barghini, Benoit Bovy, crusaderky, Deepak Cherian, ellesmith88, Elliott Sales de Andrade, Giacomo Caria, github-actions[bot], Illviljan, Joeperdefloep, joooeey, Julia Kent, Julius Busecke, keewis, Mathias Hauser, Matthias Göbel, Mattia Almansi, Maximilian Roos, Peter Andreas Entschev, Ray Bell, Sander, Santiago Soler, Sebastian, Spencer Clark, Stephan Hoyer, Thomas Hirtz, Thomas Nicholas. New Features ~~~~~~~~~~~~ - Allow passing argument ``missing_dims`` to :py:meth:`Variable.transpose` and :py:meth:`Dataset.transpose` (:issue:`5550`, :pull:`5586`) By `Giacomo Caria `_. - Allow passing a dictionary as coords to a :py:class:`DataArray` (:issue:`5527`, reverts :pull:`1539`, which had deprecated this due to python's inconsistent ordering in earlier versions). By `Sander van Rijn `_. - Added :py:meth:`Dataset.coarsen.construct`, :py:meth:`DataArray.coarsen.construct` (:issue:`5454`, :pull:`5475`). By `Deepak Cherian `_. - Xarray now uses consolidated metadata by default when writing and reading Zarr stores (:issue:`5251`). By `Stephan Hoyer `_. - New top-level function :py:func:`unify_chunks`. By `Mattia Almansi `_. - Allow assigning values to a subset of a dataset using positional or label-based indexing (:issue:`3015`, :pull:`5362`). By `Matthias Göbel `_. - Attempting to reduce a weighted object over missing dimensions now raises an error (:pull:`5362`). By `Mattia Almansi `_. - Add ``.sum`` to :py:meth:`~xarray.DataArray.rolling_exp` and :py:meth:`~xarray.Dataset.rolling_exp` for exponentially weighted rolling sums. These require numbagg 0.2.1; (:pull:`5178`). By `Maximilian Roos `_. - :py:func:`xarray.cov` and :py:func:`xarray.corr` now lazily check for missing values if inputs are dask arrays (:issue:`4804`, :pull:`5284`). By `Andrew Williams `_. - Attempting to ``concat`` list of elements that are not all ``Dataset`` or all ``DataArray`` now raises an error (:issue:`5051`, :pull:`5425`). By `Thomas Hirtz `_. - allow passing a function to ``combine_attrs`` (:pull:`4896`). By `Justus Magin `_. - Allow plotting categorical data (:pull:`5464`). By `Jimmy Westling `_. - Allow removal of the coordinate attribute ``coordinates`` on variables by setting ``.attrs['coordinates']= None`` (:issue:`5510`). By `Elle Smith `_. - Added :py:meth:`DataArray.to_numpy`, :py:meth:`DataArray.as_numpy`, and :py:meth:`Dataset.as_numpy`. (:pull:`5568`). By `Tom Nicholas `_. - Units in plot labels are now automatically inferred from wrapped :py:meth:`pint.Quantity` arrays. (:pull:`5561`). By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ - The default ``mode`` for :py:meth:`Dataset.to_zarr` when ``region`` is set has changed to the new ``mode="r+"``, which only allows for overriding pre-existing array values. This is a safer default than the prior ``mode="a"``, and allows for higher performance writes (:pull:`5252`). By `Stephan Hoyer `_. - The main parameter to :py:func:`combine_by_coords` is renamed to ``data_objects`` instead of ``datasets`` so anyone calling this method using a named parameter will need to update the name accordingly (:issue:`3248`, :pull:`4696`). By `Augustus Ijams `_. Deprecations ~~~~~~~~~~~~ - Removed the deprecated ``dim`` kwarg to :py:func:`DataArray.integrate` (:pull:`5630`) - Removed the deprecated ``keep_attrs`` kwarg to :py:func:`DataArray.rolling` (:pull:`5630`) - Removed the deprecated ``keep_attrs`` kwarg to :py:func:`DataArray.coarsen` (:pull:`5630`) - Completed deprecation of passing an ``xarray.DataArray`` to :py:func:`Variable` - will now raise a ``TypeError`` (:pull:`5630`) Bug fixes ~~~~~~~~~ - Fix a minor incompatibility between partial datetime string indexing with a :py:class:`CFTimeIndex` and upcoming pandas version 1.3.0 (:issue:`5356`, :pull:`5359`). By `Spencer Clark `_. - Fix 1-level multi-index incorrectly converted to single index (:issue:`5384`, :pull:`5385`). By `Benoit Bovy `_. - Don't cast a duck array in a coordinate to :py:class:`numpy.ndarray` in :py:meth:`DataArray.differentiate` (:pull:`5408`) By `Justus Magin `_. - Fix the ``repr`` of :py:class:`Variable` objects with ``display_expand_data=True`` (:pull:`5406`) By `Justus Magin `_. - Plotting a pcolormesh with ``xscale="log"`` and/or ``yscale="log"`` works as expected after improving the way the interval breaks are generated (:issue:`5333`). By `Santiago Soler `_ - :py:func:`combine_by_coords` can now handle combining a list of unnamed ``DataArray`` as input (:issue:`3248`, :pull:`4696`). By `Augustus Ijams `_. Internal Changes ~~~~~~~~~~~~~~~~ - Run CI on the first & last python versions supported only; currently 3.7 & 3.9. (:pull:`5433`) By `Maximilian Roos `_. - Publish test results & timings on each PR. (:pull:`5537`) By `Maximilian Roos `_. - Explicit indexes refactor: add a ``xarray.Index.query()`` method in which one may eventually provide a custom implementation of label-based data selection (not ready yet for public use). Also refactor the internal, pandas-specific implementation into ``PandasIndex.query()`` and ``PandasMultiIndex.query()`` (:pull:`5322`). By `Benoit Bovy `_. .. _whats-new.0.18.2: v0.18.2 (19 May 2021) --------------------- This release reverts a regression in xarray's unstacking of dask-backed arrays. .. _whats-new.0.18.1: v0.18.1 (18 May 2021) --------------------- This release is intended as a small patch release to be compatible with the new 2021.5.0 ``dask.distributed`` release. It also includes a new ``drop_duplicates`` method, some documentation improvements, the beginnings of our internal Index refactoring, and some bug fixes. Thank you to all 16 contributors! Anderson Banihirwe, Andrew, Benoit Bovy, Brewster Malevich, Giacomo Caria, Illviljan, James Bourbeau, Keewis, Maximilian Roos, Ravin Kumar, Stephan Hoyer, Thomas Nicholas, Tom Nicholas, Zachary Moon. New Features ~~~~~~~~~~~~ - Implement :py:meth:`DataArray.drop_duplicates` to remove duplicate dimension values (:pull:`5239`). By `Andrew Huang `_. - Allow passing ``combine_attrs`` strategy names to the ``keep_attrs`` parameter of :py:func:`apply_ufunc` (:pull:`5041`) By `Justus Magin `_. - :py:meth:`Dataset.interp` now allows interpolation with non-numerical datatypes, such as booleans, instead of dropping them. (:issue:`4761` :pull:`5008`). By `Jimmy Westling `_. - Raise more informative error when decoding time variables with invalid reference dates. (:issue:`5199`, :pull:`5288`). By `Giacomo Caria `_. Bug fixes ~~~~~~~~~ - Opening netCDF files from a path that doesn't end in ``.nc`` without supplying an explicit ``engine`` works again (:issue:`5295`), fixing a bug introduced in 0.18.0. By `Stephan Hoyer `_ Documentation ~~~~~~~~~~~~~ - Clean up and enhance docstrings for the :py:class:`DataArray.plot` and ``Dataset.plot.*`` families of methods (:pull:`5285`). By `Zach Moon `_. - Explanation of deprecation cycles and how to implement them added to contributors guide. (:pull:`5289`) By `Tom Nicholas `_. Internal Changes ~~~~~~~~~~~~~~~~ - Explicit indexes refactor: add an ``xarray.Index`` base class and ``Dataset.xindexes`` / ``DataArray.xindexes`` properties. Also rename ``PandasIndexAdapter`` to ``PandasIndex``, which now inherits from ``xarray.Index`` (:pull:`5102`). By `Benoit Bovy `_. - Replace ``SortedKeysDict`` with python's ``dict``, given dicts are now ordered. By `Maximilian Roos `_. - Updated the release guide for developers. Now accounts for actions that are automated via github actions. (:pull:`5274`). By `Tom Nicholas `_. .. _whats-new.0.18.0: v0.18.0 (6 May 2021) -------------------- This release brings a few important performance improvements, a wide range of usability upgrades, lots of bug fixes, and some new features. These include a plugin API to add backend engines, a new theme for the documentation, curve fitting methods, and several new plotting functions. Many thanks to the 38 contributors to this release: Aaron Spring, Alessandro Amici, Alex Marandon, Alistair Miles, Ana Paula Krelling, Anderson Banihirwe, Aureliana Barghini, Baudouin Raoult, Benoit Bovy, Blair Bonnett, David Trémouilles, Deepak Cherian, Gabriel Medeiros Abrahão, Giacomo Caria, Hauke Schulz, Illviljan, Mathias Hauser, Matthias Bussonnier, Mattia Almansi, Maximilian Roos, Ray Bell, Richard Kleijn, Ryan Abernathey, Sam Levang, Spencer Clark, Spencer Jones, Tammas Loughran, Tobias Kölling, Todd, Tom Nicholas, Tom White, Victor Negîrneac, Xianxiang Li, Zeb Nicholls, crusaderky, dschwoerer, johnomotani, keewis New Features ~~~~~~~~~~~~ - apply ``combine_attrs`` on data variables and coordinate variables when concatenating and merging datasets and dataarrays (:pull:`4902`). By `Justus Magin `_. - Add :py:meth:`Dataset.to_pandas` (:pull:`5247`) By `Giacomo Caria `_. - Add :py:meth:`DataArray.plot.surface` which wraps matplotlib's ``plot_surface`` to make surface plots (:issue:`2235` :issue:`5084` :pull:`5101`). By `John Omotani `_. - Allow passing multiple arrays to :py:meth:`Dataset.__setitem__` (:pull:`5216`). By `Giacomo Caria `_. - Add 'cumulative' option to :py:meth:`Dataset.integrate` and :py:meth:`DataArray.integrate` so that result is a cumulative integral, like :py:func:`scipy.integrate.cumulative_trapezoidal` (:pull:`5153`). By `John Omotani `_. - Add ``safe_chunks`` option to :py:meth:`Dataset.to_zarr` which allows overriding checks made to ensure Dask and Zarr chunk compatibility (:issue:`5056`). By `Ryan Abernathey `_ - Add :py:meth:`Dataset.query` and :py:meth:`DataArray.query` which enable indexing of datasets and data arrays by evaluating query expressions against the values of the data variables (:pull:`4984`). By `Alistair Miles `_. - Allow passing ``combine_attrs`` to :py:meth:`Dataset.merge` (:pull:`4895`). By `Justus Magin `_. - Support for `dask.graph_manipulation `_ (requires dask >=2021.3) By `Guido Imperiale `_ - Add :py:meth:`Dataset.plot.streamplot` for streamplot plots with :py:class:`Dataset` variables (:pull:`5003`). By `John Omotani `_. - Many of the arguments for the :py:attr:`DataArray.str` methods now support providing an array-like input. In this case, the array provided to the arguments is broadcast against the original array and applied elementwise. - :py:attr:`DataArray.str` now supports ``+``, ``*``, and ``%`` operators. These behave the same as they do for :py:class:`str`, except that they follow array broadcasting rules. - A large number of new :py:attr:`DataArray.str` methods were implemented, :py:meth:`DataArray.str.casefold`, :py:meth:`DataArray.str.cat`, :py:meth:`DataArray.str.extract`, :py:meth:`DataArray.str.extractall`, :py:meth:`DataArray.str.findall`, :py:meth:`DataArray.str.format`, :py:meth:`DataArray.str.get_dummies`, :py:meth:`DataArray.str.islower`, :py:meth:`DataArray.str.join`, :py:meth:`DataArray.str.normalize`, :py:meth:`DataArray.str.partition`, :py:meth:`DataArray.str.rpartition`, :py:meth:`DataArray.str.rsplit`, and :py:meth:`DataArray.str.split`. A number of these methods allow for splitting or joining the strings in an array. (:issue:`4622`) By `Todd Jennings `_ - Thanks to the new pluggable backend infrastructure external packages may now use the ``xarray.backends`` entry point to register additional engines to be used in :py:func:`open_dataset`, see the documentation in :ref:`add_a_backend` (:issue:`4309`, :issue:`4803`, :pull:`4989`, :pull:`4810` and many others). The backend refactor has been sponsored with the "Essential Open Source Software for Science" grant from the `Chan Zuckerberg Initiative `_ and developed by `B-Open `_. By `Aureliana Barghini `_ and `Alessandro Amici `_. - :py:attr:`~core.accessor_dt.DatetimeAccessor.date` added (:issue:`4983`, :pull:`4994`). By `Hauke Schulz `_. - Implement ``__getitem__`` for both :py:class:`~core.groupby.DatasetGroupBy` and :py:class:`~core.groupby.DataArrayGroupBy`, inspired by pandas' :py:meth:`~pandas.core.groupby.GroupBy.get_group`. By `Deepak Cherian `_. - Switch the tutorial functions to use `pooch `_ (which is now a optional dependency) and add :py:func:`tutorial.open_rasterio` as a way to open example rasterio files (:issue:`3986`, :pull:`4102`, :pull:`5074`). By `Justus Magin `_. - Add typing information to unary and binary arithmetic operators operating on :py:class:`Dataset`, :py:class:`DataArray`, :py:class:`Variable`, :py:class:`~core.groupby.DatasetGroupBy` or :py:class:`~core.groupby.DataArrayGroupBy` (:pull:`4904`). By `Richard Kleijn `_. - Add a ``combine_attrs`` parameter to :py:func:`open_mfdataset` (:pull:`4971`). By `Justus Magin `_. - Enable passing arrays with a subset of dimensions to :py:meth:`DataArray.clip` & :py:meth:`Dataset.clip`; these methods now use :py:func:`xarray.apply_ufunc`; (:pull:`5184`). By `Maximilian Roos `_. - Disable the ``cfgrib`` backend if the ``eccodes`` library is not installed (:pull:`5083`). By `Baudouin Raoult `_. - Added :py:meth:`DataArray.curvefit` and :py:meth:`Dataset.curvefit` for general curve fitting applications. (:issue:`4300`, :pull:`4849`) By `Sam Levang `_. - Add options to control expand/collapse of sections in display of Dataset and DataArray. The function :py:func:`set_options` now takes keyword arguments ``display_expand_attrs``, ``display_expand_coords``, ``display_expand_data``, ``display_expand_data_vars``, all of which can be one of ``True`` to always expand, ``False`` to always collapse, or ``default`` to expand unless over a pre-defined limit (:pull:`5126`). By `Tom White `_. - Significant speedups in :py:meth:`Dataset.interp` and :py:meth:`DataArray.interp`. (:issue:`4739`, :pull:`4740`). By `Deepak Cherian `_. - Prevent passing ``concat_dim`` to :py:func:`xarray.open_mfdataset` when ``combine='by_coords'`` is specified, which should never have been possible (as :py:func:`xarray.combine_by_coords` has no ``concat_dim`` argument to pass to). Also removes unneeded internal reordering of datasets in :py:func:`xarray.open_mfdataset` when ``combine='by_coords'`` is specified. Fixes (:issue:`5230`). By `Tom Nicholas `_. - Implement ``__setitem__`` for ``xarray.core.indexing.DaskIndexingAdapter`` if dask version supports item assignment. (:issue:`5171`, :pull:`5174`) By `Tammas Loughran `_. Breaking changes ~~~~~~~~~~~~~~~~ - The minimum versions of some dependencies were changed: ============ ====== ==== Package Old New ============ ====== ==== boto3 1.12 1.13 cftime 1.0 1.1 dask 2.11 2.15 distributed 2.11 2.15 matplotlib 3.1 3.2 numba 0.48 0.49 ============ ====== ==== - :py:func:`open_dataset` and :py:func:`open_dataarray` now accept only the first argument as positional, all others need to be passed are keyword arguments. This is part of the refactor to support external backends (:issue:`4309`, :pull:`4989`). By `Alessandro Amici `_. - Functions that are identities for 0d data return the unchanged data if axis is empty. This ensures that Datasets where some variables do not have the averaged dimensions are not accidentally changed (:issue:`4885`, :pull:`5207`). By `David Schwörer `_. - :py:attr:`DataArray.coarsen` and :py:attr:`Dataset.coarsen` no longer support passing ``keep_attrs`` via its constructor. Pass ``keep_attrs`` via the applied function, i.e. use ``ds.coarsen(...).mean(keep_attrs=False)`` instead of ``ds.coarsen(..., keep_attrs=False).mean()``. Further, coarsen now keeps attributes per default (:pull:`5227`). By `Mathias Hauser `_. - switch the default of the :py:func:`merge` ``combine_attrs`` parameter to ``"override"``. This will keep the current behavior for merging the ``attrs`` of variables but stop dropping the ``attrs`` of the main objects (:pull:`4902`). By `Justus Magin `_. Deprecations ~~~~~~~~~~~~ - Warn when passing ``concat_dim`` to :py:func:`xarray.open_mfdataset` when ``combine='by_coords'`` is specified, which should never have been possible (as :py:func:`xarray.combine_by_coords` has no ``concat_dim`` argument to pass to). Also removes unneeded internal reordering of datasets in :py:func:`xarray.open_mfdataset` when ``combine='by_coords'`` is specified. Fixes (:issue:`5230`), via (:pull:`5231`, :pull:`5255`). By `Tom Nicholas `_. - The ``lock`` keyword argument to :py:func:`open_dataset` and :py:func:`open_dataarray` is now a backend specific option. It will give a warning if passed to a backend that doesn't support it instead of being silently ignored. From the next version it will raise an error. This is part of the refactor to support external backends (:issue:`5073`). By `Tom Nicholas `_ and `Alessandro Amici `_. Bug fixes ~~~~~~~~~ - Properly support :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`Dataset.ffill`, :py:meth:`Dataset.bfill` along chunked dimensions. (:issue:`2699`). By `Deepak Cherian `_. - Fix 2d plot failure for certain combinations of dimensions when ``x`` is 1d and ``y`` is 2d (:issue:`5097`, :pull:`5099`). By `John Omotani `_. - Ensure standard calendar times encoded with large values (i.e. greater than approximately 292 years), can be decoded correctly without silently overflowing (:pull:`5050`). This was a regression in xarray 0.17.0. By `Zeb Nicholls `_. - Added support for ``numpy.bool_`` attributes in roundtrips using ``h5netcdf`` engine with ``invalid_netcdf=True`` [which casts ``bool`` s to ``numpy.bool_``] (:issue:`4981`, :pull:`4986`). By `Victor Negîrneac `_. - Don't allow passing ``axis`` to :py:meth:`Dataset.reduce` methods (:issue:`3510`, :pull:`4940`). By `Justus Magin `_. - Decode values as signed if attribute ``_Unsigned = "false"`` (:issue:`4954`) By `Tobias Kölling `_. - Keep coords attributes when interpolating when the indexer is not a Variable. (:issue:`4239`, :issue:`4839` :pull:`5031`) By `Jimmy Westling `_. - Ensure standard calendar dates encoded with a calendar attribute with some or all uppercase letters can be decoded or encoded to or from ``np.datetime64[ns]`` dates with or without ``cftime`` installed (:issue:`5093`, :pull:`5180`). By `Spencer Clark `_. - Warn on passing ``keep_attrs`` to ``resample`` and ``rolling_exp`` as they are ignored, pass ``keep_attrs`` to the applied function instead (:pull:`5265`). By `Mathias Hauser `_. Documentation ~~~~~~~~~~~~~ - New section on :ref:`add_a_backend` in the "Internals" chapter aimed to backend developers (:issue:`4803`, :pull:`4810`). By `Aureliana Barghini `_. - Add :py:meth:`Dataset.polyfit` and :py:meth:`DataArray.polyfit` under "See also" in the docstrings of :py:meth:`Dataset.polyfit` and :py:meth:`DataArray.polyfit` (:issue:`5016`, :pull:`5020`). By `Aaron Spring `_. - New sphinx theme & rearrangement of the docs (:pull:`4835`). By `Anderson Banihirwe `_. Internal Changes ~~~~~~~~~~~~~~~~ - Enable displaying mypy error codes and ignore only specific error codes using ``# type: ignore[error-code]`` (:pull:`5096`). By `Mathias Hauser `_. - Replace uses of ``raises_regex`` with the more standard ``pytest.raises(Exception, match="foo")``; (:pull:`5188`), (:pull:`5191`). By `Maximilian Roos `_. .. _whats-new.0.17.0: v0.17.0 (24 Feb 2021) --------------------- This release brings a few important performance improvements, a wide range of usability upgrades, lots of bug fixes, and some new features. These include better ``cftime`` support, a new quiver plot, better ``unstack`` performance, more efficient memory use in rolling operations, and some python packaging improvements. We also have a few documentation improvements (and more planned!). Many thanks to the 36 contributors to this release: Alessandro Amici, Anderson Banihirwe, Aureliana Barghini, Ayrton Bourn, Benjamin Bean, Blair Bonnett, Chun Ho Chow, DWesl, Daniel Mesejo-León, Deepak Cherian, Eric Keenan, Illviljan, Jens Hedegaard Nielsen, Jody Klymak, Julien Seguinot, Julius Busecke, Kai Mühlbauer, Leif Denby, Martin Durant, Mathias Hauser, Maximilian Roos, Michael Mann, Ray Bell, RichardScottOZ, Spencer Clark, Tim Gates, Tom Nicholas, Yunus Sevinchan, alexamici, aurghs, crusaderky, dcherian, ghislainp, keewis, rhkleijn Breaking changes ~~~~~~~~~~~~~~~~ - xarray no longer supports python 3.6 The minimum version policy was changed to also apply to projects with irregular releases. As a result, the minimum versions of some dependencies have changed: ============ ====== ==== Package Old New ============ ====== ==== Python 3.6 3.7 setuptools 38.4 40.4 numpy 1.15 1.17 pandas 0.25 1.0 dask 2.9 2.11 distributed 2.9 2.11 bottleneck 1.2 1.3 h5netcdf 0.7 0.8 iris 2.2 2.4 netcdf4 1.4 1.5 pseudonetcdf 3.0 3.1 rasterio 1.0 1.1 scipy 1.3 1.4 seaborn 0.9 0.10 zarr 2.3 2.4 ============ ====== ==== (:issue:`4688`, :pull:`4720`, :pull:`4907`, :pull:`4942`) - As a result of :pull:`4684` the default units encoding for datetime-like values (``np.datetime64[ns]`` or ``cftime.datetime``) will now always be set such that ``int64`` values can be used. In the past, no units finer than "seconds" were chosen, which would sometimes mean that ``float64`` values were required, which would lead to inaccurate I/O round-trips. - Variables referred to in attributes like ``bounds`` and ``grid_mapping`` can be set as coordinate variables. These attributes are moved to :py:attr:`DataArray.encoding` from :py:attr:`DataArray.attrs`. This behaviour is controlled by the ``decode_coords`` kwarg to :py:func:`open_dataset` and :py:func:`open_mfdataset`. The full list of decoded attributes is in :ref:`weather-climate` (:pull:`2844`, :issue:`3689`) - As a result of :pull:`4911` the output from calling :py:meth:`DataArray.sum` or :py:meth:`DataArray.prod` on an integer array with ``skipna=True`` and a non-None value for ``min_count`` will now be a float array rather than an integer array. Deprecations ~~~~~~~~~~~~ - ``dim`` argument to :py:meth:`DataArray.integrate` is being deprecated in favour of a ``coord`` argument, for consistency with :py:meth:`Dataset.integrate`. For now using ``dim`` issues a ``FutureWarning``. It will be removed in version 0.19.0 (:pull:`3993`). By `Tom Nicholas `_. - Deprecated ``autoclose`` kwargs from :py:func:`open_dataset` are removed (:pull:`4725`). By `Aureliana Barghini `_. - the return value of :py:meth:`Dataset.update` is being deprecated to make it work more like :py:meth:`dict.update`. It will be removed in version 0.19.0 (:pull:`4932`). By `Justus Magin `_. New Features ~~~~~~~~~~~~ - :py:meth:`~xarray.cftime_range` and :py:meth:`DataArray.resample` now support millisecond (``"L"`` or ``"ms"``) and microsecond (``"U"`` or ``"us"``) frequencies for ``cftime.datetime`` coordinates (:issue:`4097`, :pull:`4758`). By `Spencer Clark `_. - Significantly higher ``unstack`` performance on numpy-backed arrays which contain missing values; 8x faster than previous versions in our benchmark, and now 2x faster than pandas (:pull:`4746`). By `Maximilian Roos `_. - Add :py:meth:`Dataset.plot.quiver` for quiver plots with :py:class:`Dataset` variables. By `Deepak Cherian `_. - Add ``"drop_conflicts"`` to the strategies supported by the ``combine_attrs`` kwarg (:issue:`4749`, :pull:`4827`). By `Justus Magin `_. - Allow installing from git archives (:pull:`4897`). By `Justus Magin `_. - :py:class:`~computation.rolling.DataArrayCoarsen` and :py:class:`~computation.rolling.DatasetCoarsen` now implement a ``reduce`` method, enabling coarsening operations with custom reduction functions (:issue:`3741`, :pull:`4939`). By `Spencer Clark `_. - Most rolling operations use significantly less memory. (:issue:`4325`). By `Deepak Cherian `_. - Add :py:meth:`Dataset.drop_isel` and :py:meth:`DataArray.drop_isel` (:issue:`4658`, :pull:`4819`). By `Daniel Mesejo `_. - Xarray now leverages updates as of cftime version 1.4.1, which enable exact I/O roundtripping of ``cftime.datetime`` objects (:pull:`4758`). By `Spencer Clark `_. - :py:func:`open_dataset` and :py:func:`open_mfdataset` now accept ``fsspec`` URLs (including globs for the latter) for ``engine="zarr"``, and so allow reading from many remote and other file systems (:pull:`4461`) By `Martin Durant `_ - :py:meth:`DataArray.swap_dims` & :py:meth:`Dataset.swap_dims` now accept dims in the form of kwargs as well as a dict, like most similar methods. By `Maximilian Roos `_. Bug fixes ~~~~~~~~~ - Use specific type checks in ``xarray.core.variable.as_compatible_data`` instead of blanket access to ``values`` attribute (:issue:`2097`) By `Yunus Sevinchan `_. - :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` do not trigger computations anymore if :py:meth:`Dataset.weighted` or :py:meth:`DataArray.weighted` are applied (:issue:`4625`, :pull:`4668`). By `Julius Busecke `_. - :py:func:`merge` with ``combine_attrs='override'`` makes a copy of the attrs (:issue:`4627`). - By default, when possible, xarray will now always use values of type ``int64`` when encoding and decoding ``numpy.datetime64[ns]`` datetimes. This ensures that maximum precision and accuracy are maintained in the round-tripping process (:issue:`4045`, :pull:`4684`). It also enables encoding and decoding standard calendar dates with time units of nanoseconds (:pull:`4400`). By `Spencer Clark `_ and `Mark Harfouche `_. - :py:meth:`DataArray.astype`, :py:meth:`Dataset.astype` and :py:meth:`Variable.astype` support the ``order`` and ``subok`` parameters again. This fixes a regression introduced in version 0.16.1 (:issue:`4644`, :pull:`4683`). By `Richard Kleijn `_ . - Remove dictionary unpacking when using ``.loc`` to avoid collision with ``.sel`` parameters (:pull:`4695`). By `Anderson Banihirwe `_. - Fix the legend created by :py:meth:`Dataset.plot.scatter` (:issue:`4641`, :pull:`4723`). By `Justus Magin `_. - Fix a crash in orthogonal indexing on geographic coordinates with ``engine='cfgrib'`` (:issue:`4733` :pull:`4737`). By `Alessandro Amici `_. - Coordinates with dtype ``str`` or ``bytes`` now retain their dtype on many operations, e.g. ``reindex``, ``align``, ``concat``, ``assign``, previously they were cast to an object dtype (:issue:`2658` and :issue:`4543`). By `Mathias Hauser `_. - Limit number of data rows when printing large datasets. (:issue:`4736`, :pull:`4750`). By `Jimmy Westling `_. - Add ``missing_dims`` parameter to transpose (:issue:`4647`, :pull:`4767`). By `Daniel Mesejo `_. - Resolve intervals before appending other metadata to labels when plotting (:issue:`4322`, :pull:`4794`). By `Justus Magin `_. - Fix regression when decoding a variable with a ``scale_factor`` and ``add_offset`` given as a list of length one (:issue:`4631`). By `Mathias Hauser `_. - Expand user directory paths (e.g. ``~/``) in :py:func:`open_mfdataset` and :py:meth:`Dataset.to_zarr` (:issue:`4783`, :pull:`4795`). By `Julien Seguinot `_. - Raise DeprecationWarning when trying to typecast a tuple containing a :py:class:`DataArray`. User now prompted to first call ``.data`` on it (:issue:`4483`). By `Chun Ho Chow `_. - Ensure that :py:meth:`Dataset.interp` raises ``ValueError`` when interpolating outside coordinate range and ``bounds_error=True`` (:issue:`4854`, :pull:`4855`). By `Leif Denby `_. - Fix time encoding bug associated with using cftime versions greater than 1.4.0 with xarray (:issue:`4870`, :pull:`4871`). By `Spencer Clark `_. - Stop :py:meth:`DataArray.sum` and :py:meth:`DataArray.prod` computing lazy arrays when called with a ``min_count`` parameter (:issue:`4898`, :pull:`4911`). By `Blair Bonnett `_. - Fix bug preventing the ``min_count`` parameter to :py:meth:`DataArray.sum` and :py:meth:`DataArray.prod` working correctly when calculating over all axes of a float64 array (:issue:`4898`, :pull:`4911`). By `Blair Bonnett `_. - Fix decoding of vlen strings using h5py versions greater than 3.0.0 with h5netcdf backend (:issue:`4570`, :pull:`4893`). By `Kai Mühlbauer `_. - Allow converting :py:class:`Dataset` or :py:class:`DataArray` objects with a ``MultiIndex`` and at least one other dimension to a ``pandas`` object (:issue:`3008`, :pull:`4442`). By `ghislainp `_. Documentation ~~~~~~~~~~~~~ - Add information about requirements for accessor classes (:issue:`2788`, :pull:`4657`). By `Justus Magin `_. - Start a list of external I/O integrating with ``xarray`` (:issue:`683`, :pull:`4566`). By `Justus Magin `_. - Add concat examples and improve combining documentation (:issue:`4620`, :pull:`4645`). By `Ray Bell `_ and `Justus Magin `_. - explicitly mention that :py:meth:`Dataset.update` updates inplace (:issue:`2951`, :pull:`4932`). By `Justus Magin `_. - Added docs on vectorized indexing (:pull:`4711`). By `Eric Keenan `_. Internal Changes ~~~~~~~~~~~~~~~~ - Speed up of the continuous integration tests on azure. - Switched to mamba and use matplotlib-base for a faster installation of all dependencies (:pull:`4672`). - Use ``pytest.mark.skip`` instead of ``pytest.mark.xfail`` for some tests that can currently not succeed (:pull:`4685`). - Run the tests in parallel using pytest-xdist (:pull:`4694`). By `Justus Magin `_ and `Mathias Hauser `_. - Use ``pyproject.toml`` instead of the ``setup_requires`` option for ``setuptools`` (:pull:`4897`). By `Justus Magin `_. - Replace all usages of ``assert x.identical(y)`` with ``assert_identical(x, y)`` for clearer error messages (:pull:`4752`). By `Maximilian Roos `_. - Speed up attribute style access (e.g. ``ds.somevar`` instead of ``ds["somevar"]``) and tab completion in IPython (:issue:`4741`, :pull:`4742`). By `Richard Kleijn `_. - Added the ``set_close`` method to ``Dataset`` and ``DataArray`` for backends to specify how to voluntary release all resources. (:pull:`#4809`) By `Alessandro Amici `_. - Update type hints to work with numpy v1.20 (:pull:`4878`). By `Mathias Hauser `_. - Ensure warnings cannot be turned into exceptions in :py:func:`testing.assert_equal` and the other ``assert_*`` functions (:pull:`4864`). By `Mathias Hauser `_. - Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables. By `Deepak Cherian `_. .. _whats-new.0.16.2: v0.16.2 (30 Nov 2020) --------------------- This release brings the ability to write to limited regions of ``zarr`` files, open zarr files with :py:func:`open_dataset` and :py:func:`open_mfdataset`, increased support for propagating ``attrs`` using the ``keep_attrs`` flag, as well as numerous bugfixes and documentation improvements. Many thanks to the 31 contributors who contributed to this release: Aaron Spring, Akio Taniguchi, Aleksandar Jelenak, alexamici, Alexandre Poux, Anderson Banihirwe, Andrew Pauling, Ashwin Vishnu, aurghs, Brian Ward, Caleb, crusaderky, Dan Nowacki, darikg, David Brochart, David Huard, Deepak Cherian, Dion Häfner, Gerardo Rivera, Gerrit Holl, Illviljan, inakleinbottle, Jacob Tomlinson, James A. Bednar, jenssss, Joe Hamman, johnomotani, Joris Van den Bossche, Julia Kent, Julius Busecke, Kai Mühlbauer, keewis, Keisuke Fujii, Kyle Cranmer, Luke Volpatti, Mathias Hauser, Maximilian Roos, Michaël Defferrard, Michal Baumgartner, Nick R. Papior, Pascal Bourgault, Peter Hausamann, PGijsbers, Ray Bell, Romain Martinez, rpgoldman, Russell Manser, Sahid Velji, Samnan Rahee, Sander, Spencer Clark, Stephan Hoyer, Thomas Zilio, Tobias Kölling, Tom Augspurger, Wei Ji, Yash Saboo, Zeb Nicholls, Deprecations ~~~~~~~~~~~~ - :py:attr:`~core.accessor_dt.DatetimeAccessor.weekofyear` and :py:attr:`~core.accessor_dt.DatetimeAccessor.week` have been deprecated. Use ``DataArray.dt.isocalendar().week`` instead (:pull:`4534`). By `Mathias Hauser `_. `Maximilian Roos `_, and `Spencer Clark `_. - :py:attr:`DataArray.rolling` and :py:attr:`Dataset.rolling` no longer support passing ``keep_attrs`` via its constructor. Pass ``keep_attrs`` via the applied function, i.e. use ``ds.rolling(...).mean(keep_attrs=False)`` instead of ``ds.rolling(..., keep_attrs=False).mean()`` Rolling operations now keep their attributes per default (:pull:`4510`). By `Mathias Hauser `_. New Features ~~~~~~~~~~~~ - :py:func:`open_dataset` and :py:func:`open_mfdataset` now works with ``engine="zarr"`` (:issue:`3668`, :pull:`4003`, :pull:`4187`). By `Miguel Jimenez `_ and `Wei Ji Leong `_. - Unary & binary operations follow the ``keep_attrs`` flag (:issue:`3490`, :issue:`4065`, :issue:`3433`, :issue:`3595`, :pull:`4195`). By `Deepak Cherian `_. - Added :py:meth:`~core.accessor_dt.DatetimeAccessor.isocalendar()` that returns a Dataset with year, week, and weekday calculated according to the ISO 8601 calendar. Requires pandas version 1.1.0 or greater (:pull:`4534`). By `Mathias Hauser `_, `Maximilian Roos `_, and `Spencer Clark `_. - :py:meth:`Dataset.to_zarr` now supports a ``region`` keyword for writing to limited regions of existing Zarr stores (:pull:`4035`). See :ref:`io.zarr.appending` for full details. By `Stephan Hoyer `_. - Added typehints in :py:func:`align` to reflect that the same type received in ``objects`` arg will be returned (:pull:`4522`). By `Michal Baumgartner `_. - :py:meth:`Dataset.weighted` and :py:meth:`DataArray.weighted` are now executing value checks lazily if weights are provided as dask arrays (:issue:`4541`, :pull:`4559`). By `Julius Busecke `_. - Added the ``keep_attrs`` keyword to ``rolling_exp.mean()``; it now keeps attributes per default. By `Mathias Hauser `_ (:pull:`4592`). - Added ``freq`` as property to :py:class:`CFTimeIndex` and into the ``CFTimeIndex.repr``. (:issue:`2416`, :pull:`4597`) By `Aaron Spring `_. Bug fixes ~~~~~~~~~ - Fix bug where reference times without padded years (e.g. ``since 1-1-1``) would lose their units when being passed by ``encode_cf_datetime`` (:issue:`4422`, :pull:`4506`). Such units are ambiguous about which digit represents the years (is it YMD or DMY?). Now, if such formatting is encountered, it is assumed that the first digit is the years, they are padded appropriately (to e.g. ``since 0001-1-1``) and a warning that this assumption is being made is issued. Previously, without ``cftime``, such times would be silently parsed incorrectly (at least based on the CF conventions) e.g. "since 1-1-1" would be parsed (via ``pandas`` and ``dateutil``) to ``since 2001-1-1``. By `Zeb Nicholls `_. - Fix :py:meth:`DataArray.plot.step`. By `Deepak Cherian `_. - Fix bug where reading a scalar value from a NetCDF file opened with the ``h5netcdf`` backend would raise a ``ValueError`` when ``decode_cf=True`` (:issue:`4471`, :pull:`4485`). By `Gerrit Holl `_. - Fix bug where datetime64 times are silently changed to incorrect values if they are outside the valid date range for ns precision when provided in some other units (:issue:`4427`, :pull:`4454`). By `Andrew Pauling `_ - Fix silently overwriting the ``engine`` key when passing :py:func:`open_dataset` a file object to an incompatible netCDF (:issue:`4457`). Now incompatible combinations of files and engines raise an exception instead. By `Alessandro Amici `_. - The ``min_count`` argument to :py:meth:`DataArray.sum()` and :py:meth:`DataArray.prod()` is now ignored when not applicable, i.e. when ``skipna=False`` or when ``skipna=None`` and the dtype does not have a missing value (:issue:`4352`). By `Mathias Hauser `_. - :py:func:`combine_by_coords` now raises an informative error when passing coordinates with differing calendars (:issue:`4495`). By `Mathias Hauser `_. - :py:attr:`DataArray.rolling` and :py:attr:`Dataset.rolling` now also keep the attributes and names of of (wrapped) ``DataArray`` objects, previously only the global attributes were retained (:issue:`4497`, :pull:`4510`). By `Mathias Hauser `_. - Improve performance where reading small slices from huge dimensions was slower than necessary (:pull:`4560`). By `Dion Häfner `_. - Fix bug where ``dask_gufunc_kwargs`` was silently changed in :py:func:`apply_ufunc` (:pull:`4576`). By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ - document the API not supported with duck arrays (:pull:`4530`). By `Justus Magin `_. - Mention the possibility to pass functions to :py:meth:`Dataset.where` or :py:meth:`DataArray.where` in the parameter documentation (:issue:`4223`, :pull:`4613`). By `Justus Magin `_. - Update the docstring of :py:class:`DataArray` and :py:class:`Dataset`. (:pull:`4532`); By `Jimmy Westling `_. - Raise a more informative error when :py:meth:`DataArray.to_dataframe` is is called on a scalar, (:issue:`4228`); By `Pieter Gijsbers `_. - Fix grammar and typos in the :ref:`contributing` guide (:pull:`4545`). By `Sahid Velji `_. - Fix grammar and typos in the :doc:`user-guide/io` guide (:pull:`4553`). By `Sahid Velji `_. - Update link to NumPy docstring standard in the :ref:`contributing` guide (:pull:`4558`). By `Sahid Velji `_. - Add docstrings to ``isnull`` and ``notnull``, and fix the displayed signature (:issue:`2760`, :pull:`4618`). By `Justus Magin `_. Internal Changes ~~~~~~~~~~~~~~~~ - Optional dependencies can be installed along with xarray by specifying extras as ``pip install "xarray[extra]"`` where ``extra`` can be one of ``io``, ``accel``, ``parallel``, ``viz`` and ``complete``. See docs for updated :ref:`installation instructions `. (:issue:`2888`, :pull:`4480`). By `Ashwin Vishnu `_, `Justus Magin `_ and `Mathias Hauser `_. - Removed stray spaces that stem from black removing new lines (:pull:`4504`). By `Mathias Hauser `_. - Ensure tests are not skipped in the ``py38-all-but-dask`` test environment (:issue:`4509`). By `Mathias Hauser `_. - Ignore select numpy warnings around missing values, where xarray handles the values appropriately, (:pull:`4536`); By `Maximilian Roos `_. - Replace the internal use of ``pd.Index.__or__`` and ``pd.Index.__and__`` with ``pd.Index.union`` and ``pd.Index.intersection`` as they will stop working as set operations in the future (:issue:`4565`). By `Mathias Hauser `_. - Add GitHub action for running nightly tests against upstream dependencies (:pull:`4583`). By `Anderson Banihirwe `_. - Ensure all figures are closed properly in plot tests (:pull:`4600`). By `Yash Saboo `_, `Nirupam K N `_ and `Mathias Hauser `_. .. _whats-new.0.16.1: v0.16.1 (2020-09-20) --------------------- This patch release fixes an incompatibility with a recent pandas change, which was causing an issue indexing with a ``datetime64``. It also includes improvements to ``rolling``, ``to_dataframe``, ``cov`` & ``corr`` methods and bug fixes. Our documentation has a number of improvements, including fixing all doctests and confirming their accuracy on every commit. Many thanks to the 36 contributors who contributed to this release: Aaron Spring, Akio Taniguchi, Aleksandar Jelenak, Alexandre Poux, Caleb, Dan Nowacki, Deepak Cherian, Gerardo Rivera, Jacob Tomlinson, James A. Bednar, Joe Hamman, Julia Kent, Kai Mühlbauer, Keisuke Fujii, Mathias Hauser, Maximilian Roos, Nick R. Papior, Pascal Bourgault, Peter Hausamann, Romain Martinez, Russell Manser, Samnan Rahee, Sander, Spencer Clark, Stephan Hoyer, Thomas Zilio, Tobias Kölling, Tom Augspurger, alexamici, crusaderky, darikg, inakleinbottle, jenssss, johnomotani, keewis, and rpgoldman. Breaking changes ~~~~~~~~~~~~~~~~ - :py:meth:`DataArray.astype` and :py:meth:`Dataset.astype` now preserve attributes. Keep the old behavior by passing ``keep_attrs=False`` (:issue:`2049`, :pull:`4314`). By `Dan Nowacki `_ and `Gabriel Joel Mitchell `_. New Features ~~~~~~~~~~~~ - :py:meth:`~xarray.DataArray.rolling` and :py:meth:`~xarray.Dataset.rolling` now accept more than 1 dimension. (:pull:`4219`) By `Keisuke Fujii `_. - :py:meth:`~xarray.DataArray.to_dataframe` and :py:meth:`~xarray.Dataset.to_dataframe` now accept a ``dim_order`` parameter allowing to specify the resulting dataframe's dimensions order (:issue:`4331`, :pull:`4333`). By `Thomas Zilio `_. - Support multiple outputs in :py:func:`xarray.apply_ufunc` when using ``dask='parallelized'``. (:issue:`1815`, :pull:`4060`). By `Kai Mühlbauer `_. - ``min_count`` can be supplied to reductions such as ``.sum`` when specifying multiple dimension to reduce over; (:pull:`4356`). By `Maximilian Roos `_. - :py:func:`xarray.cov` and :py:func:`xarray.corr` now handle missing values; (:pull:`4351`). By `Maximilian Roos `_. - Add support for parsing datetime strings formatted following the default string representation of cftime objects, i.e. YYYY-MM-DD hh:mm:ss, in partial datetime string indexing, as well as :py:meth:`~xarray.cftime_range` (:issue:`4337`). By `Spencer Clark `_. - Build ``CFTimeIndex.__repr__`` explicitly as :py:class:`pandas.Index`. Add ``calendar`` as a new property for :py:class:`CFTimeIndex` and show ``calendar`` and ``length`` in ``CFTimeIndex.__repr__`` (:issue:`2416`, :pull:`4092`) By `Aaron Spring `_. - Use a wrapped array's ``_repr_inline_`` method to construct the collapsed ``repr`` of :py:class:`DataArray` and :py:class:`Dataset` objects and document the new method in :doc:`internals/index`. (:pull:`4248`). By `Justus Magin `_. - Allow per-variable fill values in most functions. (:pull:`4237`). By `Justus Magin `_. - Expose ``use_cftime`` option in :py:func:`~xarray.open_zarr` (:issue:`2886`, :pull:`3229`) By `Samnan Rahee `_ and `Anderson Banihirwe `_. Bug fixes ~~~~~~~~~ - Fix indexing with datetime64 scalars with pandas 1.1 (:issue:`4283`). By `Stephan Hoyer `_ and `Justus Magin `_. - Variables which are chunked using dask only along some dimensions can be chunked while storing with zarr along previously unchunked dimensions (:pull:`4312`) By `Tobias Kölling `_. - Fixed a bug in backend caused by basic installation of Dask (:issue:`4164`, :pull:`4318`) `Sam Morley `_. - Fixed a few bugs with :py:meth:`Dataset.polyfit` when encountering deficient matrix ranks (:issue:`4190`, :pull:`4193`). By `Pascal Bourgault `_. - Fixed inconsistencies between docstring and functionality for :py:meth:`DataArray.str.get` and :py:meth:`DataArray.str.wrap` (:issue:`4334`). By `Mathias Hauser `_. - Fixed overflow issue causing incorrect results in computing means of :py:class:`cftime.datetime` arrays (:issue:`4341`). By `Spencer Clark `_. - Fixed :py:meth:`Dataset.coarsen`, :py:meth:`DataArray.coarsen` dropping attributes on original object (:issue:`4120`, :pull:`4360`). By `Julia Kent `_. - fix the signature of the plot methods. (:pull:`4359`) By `Justus Magin `_. - Fix :py:func:`xarray.apply_ufunc` with ``vectorize=True`` and ``exclude_dims`` (:issue:`3890`). By `Mathias Hauser `_. - Fix ``KeyError`` when doing linear interpolation to an nd ``DataArray`` that contains NaNs (:pull:`4233`). By `Jens Svensmark `_ - Fix incorrect legend labels for :py:meth:`Dataset.plot.scatter` (:issue:`4126`). By `Peter Hausamann `_. - Fix ``dask.optimize`` on ``DataArray`` producing an invalid Dask task graph (:issue:`3698`) By `Tom Augspurger `_ - Fix ``pip install .`` when no ``.git`` directory exists; namely when the xarray source directory has been rsync'ed by PyCharm Professional for a remote deployment over SSH. By `Guido Imperiale `_ - Preserve dimension and coordinate order during :py:func:`xarray.concat` (:issue:`2811`, :issue:`4072`, :pull:`4419`). By `Kai Mühlbauer `_. - Avoid relying on :py:class:`set` objects for the ordering of the coordinates (:pull:`4409`) By `Justus Magin `_. Documentation ~~~~~~~~~~~~~ - Update the docstring of :py:meth:`DataArray.copy` to remove incorrect mention of 'dataset' (:issue:`3606`) By `Sander van Rijn `_. - Removed skipna argument from :py:meth:`DataArray.count`, :py:meth:`DataArray.any`, :py:meth:`DataArray.all`. (:issue:`755`) By `Sander van Rijn `_ - Update the contributing guide to use merges instead of rebasing and state that we squash-merge. (:pull:`4355`). By `Justus Magin `_. - Make sure the examples from the docstrings actually work (:pull:`4408`). By `Justus Magin `_. - Updated Vectorized Indexing to a clearer example. By `Maximilian Roos `_ Internal Changes ~~~~~~~~~~~~~~~~ - Fixed all doctests and enabled their running in CI. By `Justus Magin `_. - Relaxed the :ref:`mindeps_policy` to support: - all versions of setuptools released in the last 42 months (but no older than 38.4) - all versions of dask and dask.distributed released in the last 12 months (but no older than 2.9) - all versions of other packages released in the last 12 months All are up from 6 months (:issue:`4295`) `Guido Imperiale `_. - Use :py:func:`dask.array.apply_gufunc ` instead of :py:func:`dask.array.blockwise` in :py:func:`xarray.apply_ufunc` when using ``dask='parallelized'``. (:pull:`4060`, :pull:`4391`, :pull:`4392`) By `Kai Mühlbauer `_. - Align ``mypy`` versions to ``0.782`` across ``requirements`` and ``.pre-commit-config.yml`` files. (:pull:`4390`) By `Maximilian Roos `_ - Only load resource files when running inside a Jupyter Notebook (:issue:`4294`) By `Guido Imperiale `_ - Silenced most ``numpy`` warnings such as ``Mean of empty slice``. (:pull:`4369`) By `Maximilian Roos `_ - Enable type checking for :py:func:`concat` (:issue:`4238`) By `Mathias Hauser `_. - Updated plot functions for matplotlib version 3.3 and silenced warnings in the plot tests (:pull:`4365`). By `Mathias Hauser `_. - Versions in ``pre-commit.yaml`` are now pinned, to reduce the chances of conflicting versions. (:pull:`4388`) By `Maximilian Roos `_ .. _whats-new.0.16.0: v0.16.0 (2020-07-11) --------------------- This release adds ``xarray.cov`` & ``xarray.corr`` for covariance & correlation respectively; the ``idxmax`` & ``idxmin`` methods, the ``polyfit`` method & ``xarray.polyval`` for fitting polynomials, as well as a number of documentation improvements, other features, and bug fixes. Many thanks to all 44 contributors who contributed to this release: Akio Taniguchi, Andrew Williams, Aurélien Ponte, Benoit Bovy, Dave Cole, David Brochart, Deepak Cherian, Elliott Sales de Andrade, Etienne Combrisson, Hossein Madadi, Huite, Joe Hamman, Kai Mühlbauer, Keisuke Fujii, Maik Riechert, Marek Jacob, Mathias Hauser, Matthieu Ancellin, Maximilian Roos, Noah D Brenowitz, Oriol Abril, Pascal Bourgault, Phillip Butcher, Prajjwal Nijhara, Ray Bell, Ryan Abernathey, Ryan May, Spencer Clark, Spencer Hill, Srijan Saurav, Stephan Hoyer, Taher Chegini, Todd, Tom Nicholas, Yohai Bar Sinai, Yunus Sevinchan, arabidopsis, aurghs, clausmichele, dmey, johnomotani, keewis, raphael dussin, risebell Breaking changes ~~~~~~~~~~~~~~~~ - Minimum supported versions for the following packages have changed: ``dask >=2.9``, ``distributed>=2.9``. By `Deepak Cherian `_ - ``groupby`` operations will restore coord dimension order. Pass ``restore_coord_dims=False`` to revert to previous behavior. - :meth:`DataArray.transpose` will now transpose coordinates by default. Pass ``transpose_coords=False`` to revert to previous behaviour. By `Maximilian Roos `_ - Alternate draw styles for :py:meth:`plot.step` must be passed using the ``drawstyle`` (or ``ds``) keyword argument, instead of the ``linestyle`` (or ``ls``) keyword argument, in line with the `upstream change in Matplotlib `_. (:pull:`3274`) By `Elliott Sales de Andrade `_ - The old ``auto_combine`` function has now been removed in favour of the :py:func:`combine_by_coords` and :py:func:`combine_nested` functions. This also means that the default behaviour of :py:func:`open_mfdataset` has changed to use ``combine='by_coords'`` as the default argument value. (:issue:`2616`, :pull:`3926`) By `Tom Nicholas `_. - The ``DataArray`` and ``Variable`` HTML reprs now expand the data section by default (:issue:`4176`) By `Stephan Hoyer `_. New Features ~~~~~~~~~~~~ - :py:meth:`DataArray.argmin` and :py:meth:`DataArray.argmax` now support sequences of 'dim' arguments, and if a sequence is passed return a dict (which can be passed to :py:meth:`DataArray.isel` to get the value of the minimum) of the indices for each dimension of the minimum or maximum of a DataArray. (:pull:`3936`) By `John Omotani `_, thanks to `Keisuke Fujii `_ for work in :pull:`1469`. - Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`). By `Andrew Williams `_ and `Robin Beer `_. - Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) By `Todd Jennings `_ - Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`, :pull:`3733`, :pull:`4099`) By `Pascal Bourgault `_. - Added :py:meth:`xarray.infer_freq` for extending frequency inferring to CFTime indexes and data (:pull:`4033`). By `Pascal Bourgault `_. - ``chunks='auto'`` is now supported in the ``chunks`` argument of :py:meth:`Dataset.chunk`. (:issue:`4055`) By `Andrew Williams `_ - Control over attributes of result in :py:func:`merge`, :py:func:`concat`, :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) By `John Omotani `_ - ``missing_dims`` argument to :py:meth:`Dataset.isel`, :py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing the exception when a dimension passed to ``isel`` is not present with a warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`) By `John Omotani `_ - Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:pull:`3922`, :pull:`4135`) By `Kai Mühlbauer `_ and `Pascal Bourgault `_. - More support for unit aware arrays with pint (:pull:`3643`, :pull:`3975`, :pull:`4163`) By `Justus Magin `_. - Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even without ``append_dim``, as long as dimension sizes do not change. By `Stephan Hoyer `_. - Allow plotting of boolean arrays. (:pull:`3766`) By `Marek Jacob `_ - Enable using MultiIndex levels as coordinates in 1D and 2D plots (:issue:`3927`). By `Mathias Hauser `_. - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to the ``days_in_month`` accessor for a :py:class:`pandas.DatetimeIndex`, which returns the days in the month each datetime in the index. Now days in month weights for both standard and non-standard calendars can be obtained using the :py:class:`~core.accessor_dt.DatetimeAccessor` (:pull:`3935`). This feature requires cftime version 1.1.0 or greater. By `Spencer Clark `_. - For the netCDF3 backend, added dtype coercions for unsigned integer types. (:issue:`4014`, :pull:`4018`) By `Yunus Sevinchan `_ - :py:meth:`map_blocks` now accepts a ``template`` kwarg. This allows use cases where the result of a computation could not be inferred automatically. By `Deepak Cherian `_ - :py:meth:`map_blocks` can now handle dask-backed xarray objects in ``args``. (:pull:`3818`) By `Deepak Cherian `_ - Add keyword ``decode_timedelta`` to :py:func:`xarray.open_dataset`, (:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`, :py:func:`xarray.decode_cf`) that allows to disable/enable the decoding of timedeltas independently of time decoding (:issue:`1621`) `Aureliana Barghini `_ Enhancements ~~~~~~~~~~~~ - Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp` We performs independent interpolation sequentially rather than interpolating in one large multidimensional space. (:issue:`2223`) By `Keisuke Fujii `_. - :py:meth:`DataArray.interp` now support interpolations over chunked dimensions (:pull:`4155`). By `Alexandre Poux `_. - Major performance improvement for :py:meth:`Dataset.from_dataframe` when the dataframe has a MultiIndex (:pull:`4184`). By `Stephan Hoyer `_. - :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep coordinate attributes (:pull:`4103`). By `Oriol Abril `_. - Axes kwargs such as ``facecolor`` can now be passed to :py:meth:`DataArray.plot` in ``subplot_kws``. This works for both single axes plots and FacetGrid plots. By `Raphael Dussin `_. - Array items with long string reprs are now limited to a reasonable width (:pull:`3900`) By `Maximilian Roos `_ - Large arrays whose numpy reprs would have greater than 40 lines are now limited to a reasonable length. (:pull:`3905`) By `Maximilian Roos `_ Bug fixes ~~~~~~~~~ - Fix errors combining attrs in :py:func:`open_mfdataset` (:issue:`4009`, :pull:`4173`) By `John Omotani `_ - If groupby receives a ``DataArray`` with name=None, assign a default name (:issue:`158`) By `Phil Butcher `_. - Support dark mode in VS code (:issue:`4024`) By `Keisuke Fujii `_. - Fix bug when converting multiindexed pandas objects to sparse xarray objects. (:issue:`4019`) By `Deepak Cherian `_. - ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue:`3977`) By `Huite Bootsma `_. - Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`, :issue:`4186`) By `Keisuke Fujii `_ and `Stephan Hoyer `_. - Fix renaming of coords when one or more stacked coords is not in sorted order during stack+groupby+apply operations. (:issue:`3287`, :pull:`3906`) By `Spencer Hill `_ - Fix a regression where deleting a coordinate from a copied :py:class:`DataArray` can affect the original :py:class:`DataArray`. (:issue:`3899`, :pull:`3871`) By `Todd Jennings `_ - Fix :py:class:`~xarray.plot.FacetGrid` plots with a single contour. (:issue:`3569`, :pull:`3915`). By `Deepak Cherian `_ - Use divergent colormap if ``levels`` spans 0. (:issue:`3524`) By `Deepak Cherian `_ - Fix :py:class:`~xarray.plot.FacetGrid` when ``vmin == vmax``. (:issue:`3734`) By `Deepak Cherian `_ - Fix plotting when ``levels`` is a scalar and ``norm`` is provided. (:issue:`3735`) By `Deepak Cherian `_ - Fix bug where plotting line plots with 2D coordinates depended on dimension order. (:issue:`3933`) By `Tom Nicholas `_. - Fix ``RasterioDeprecationWarning`` when using a ``vrt`` in ``open_rasterio``. (:issue:`3964`) By `Taher Chegini `_. - Fix ``AttributeError`` on displaying a :py:class:`Variable` in a notebook context. (:issue:`3972`, :pull:`3973`) By `Ian Castleden `_. - Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes, and added ``keep_attrs`` argument. (:issue:`3968`) By `Tom Nicholas `_. - Fix bug in time parsing failing to fall back to cftime. This was causing time variables with a time unit of ``'msecs'`` to fail to parse. (:pull:`3998`) By `Ryan May `_. - Fix weighted mean when passing boolean weights (:issue:`4074`). By `Mathias Hauser `_. - Fix html repr in untrusted notebooks: fallback to plain text repr. (:pull:`4053`) By `Benoit Bovy `_. - Fix :py:meth:`DataArray.to_unstacked_dataset` for single-dimension variables. (:issue:`4049`) By `Deepak Cherian `_ - Fix :py:func:`open_rasterio` for ``WarpedVRT`` with specified ``src_crs``. (:pull:`4104`) By `Dave Cole `_. Documentation ~~~~~~~~~~~~~ - update the docstring of :py:meth:`DataArray.assign_coords` : clarify how to add a new coordinate to an existing dimension and illustrative example (:issue:`3952`, :pull:`3958`) By `Etienne Combrisson `_. - update the docstring of :py:meth:`Dataset.diff` and :py:meth:`DataArray.diff` so it does document the ``dim`` parameter as required. (:issue:`1040`, :pull:`3909`) By `Justus Magin `_. - Updated :doc:`Calculating Seasonal Averages from Timeseries of Monthly Means ` example notebook to take advantage of the new ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex` (:pull:`3935`). By `Spencer Clark `_. - Updated the list of current core developers. (:issue:`3892`) By `Tom Nicholas `_. - Add example for multi-dimensional extrapolation and note different behavior of ``kwargs`` in :py:meth:`Dataset.interp` and :py:meth:`DataArray.interp` for 1-d and n-d interpolation (:pull:`3956`). By `Matthias Riße `_. - Apply ``black`` to all the code in the documentation (:pull:`4012`) By `Justus Magin `_. - Narrative documentation now describes :py:meth:`map_blocks`: :ref:`dask.automatic-parallelization`. By `Deepak Cherian `_. - Document ``.plot``, ``.dt``, ``.str`` accessors the way they are called. (:issue:`3625`, :pull:`3988`) By `Justus Magin `_. - Add documentation for the parameters and return values of :py:meth:`DataArray.sel`. By `Justus Magin `_. Internal Changes ~~~~~~~~~~~~~~~~ - Raise more informative error messages for chunk size conflicts when writing to zarr files. By `Deepak Cherian `_. - Run the ``isort`` pre-commit hook only on python source files and update the ``flake8`` version. (:issue:`3750`, :pull:`3711`) By `Justus Magin `_. - Add `blackdoc `_ to the list of checkers for development. (:pull:`4177`) By `Justus Magin `_. - Add a CI job that runs the tests with every optional dependency except ``dask``. (:issue:`3794`, :pull:`3919`) By `Justus Magin `_. - Use ``async`` / ``await`` for the asynchronous distributed tests. (:issue:`3987`, :pull:`3989`) By `Justus Magin `_. - Various internal code clean-ups (:pull:`4026`, :pull:`4038`). By `Prajjwal Nijhara `_. .. _whats-new.0.15.1: v0.15.1 (23 Mar 2020) --------------------- This release brings many new features such as :py:meth:`Dataset.weighted` methods for weighted array reductions, a new jupyter repr by default, and the start of units integration with pint. There's also the usual batch of usability improvements, documentation additions, and bug fixes. Breaking changes ~~~~~~~~~~~~~~~~ - Raise an error when assigning to the ``.values`` or ``.data`` attribute of dimension coordinates i.e. ``IndexVariable`` objects. This has been broken since v0.12.0. Please use :py:meth:`DataArray.assign_coords` or :py:meth:`Dataset.assign_coords` instead. (:issue:`3470`, :pull:`3862`) By `Deepak Cherian `_ New Features ~~~~~~~~~~~~ - Weighted array reductions are now supported via the new :py:meth:`DataArray.weighted` and :py:meth:`Dataset.weighted` methods. See :ref:`compute.weighted`. (:issue:`422`, :pull:`2922`). By `Mathias Hauser `_. - The new jupyter notebook repr (``Dataset._repr_html_`` and ``DataArray._repr_html_``) (introduced in 0.14.1) is now on by default. To disable, use ``xarray.set_options(display_style="text")``. By `Julia Signell `_. - Added support for :py:class:`pandas.DatetimeIndex`-style rounding of ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the :py:class:`~core.accessor_dt.DatetimeAccessor`. By `Spencer Clark `_ - Support new h5netcdf backend keyword ``phony_dims`` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. - Add partial support for unit aware arrays with pint. (:pull:`3706`, :pull:`3611`) By `Justus Magin `_. - :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a ``TypeError`` on multiple string arguments. Receiving multiple string arguments often means a user is attempting to pass multiple dimensions as separate arguments and should instead pass a single list of dimensions. (:pull:`3802`) By `Maximilian Roos `_ - :py:func:`map_blocks` can now apply functions that add new unindexed dimensions. By `Deepak Cherian `_ - An ellipsis (``...``) is now supported in the ``dims`` argument of :py:meth:`Dataset.stack` and :py:meth:`DataArray.stack`, meaning all unlisted dimensions, similar to its meaning in :py:meth:`DataArray.transpose`. (:pull:`3826`) By `Maximilian Roos `_ - :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a first argument, which is then called on the input; replicating pandas' behavior. By `Maximilian Roos `_. - ``skipna`` is available in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` (:issue:`3843`, :pull:`3844`) By `Aaron Spring `_. - Add a diff summary for ``testing.assert_allclose``. (:issue:`3617`, :pull:`3847`) By `Justus Magin `_. Bug fixes ~~~~~~~~~ - Fix :py:meth:`Dataset.interp` when indexing array shares coordinates with the indexed variable (:issue:`3252`). By `David Huard `_. - Fix recombination of groups in :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` when performing an operation that changes the size of the groups along the grouped dimension. By `Eric Jansen `_. - Fix use of multi-index with categorical values (:issue:`3674`). By `Matthieu Ancellin `_. - Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). By `Deepak Cherian `_. - Fix :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` producing index with name reflecting the previous dimension name instead of the new one (:issue:`3748`, :pull:`3752`). By `Joseph K Aicher `_. - Use ``dask_array_type`` instead of ``dask_array.Array`` for type checking. (:issue:`3779`, :pull:`3787`) By `Justus Magin `_. - :py:func:`concat` can now handle coordinate variables only present in one of the objects to be concatenated when ``coords="different"``. By `Deepak Cherian `_. - xarray now respects the over, under and bad colors if set on a provided colormap. (:issue:`3590`, :pull:`3601`) By `johnomotani `_. - ``coarsen`` and ``rolling`` now respect ``xr.set_options(keep_attrs=True)`` to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword argument ``keep_attrs`` to change this setting. (:issue:`3376`, :pull:`3801`) By `Andrew Thomas `_. - Delete associated indexes when deleting coordinate variables. (:issue:`3746`). By `Deepak Cherian `_. - Fix :py:meth:`Dataset.to_zarr` when using ``append_dim`` and ``group`` simultaneously. (:issue:`3170`). By `Matthias Meyer `_. - Fix html repr on :py:class:`Dataset` with non-string keys (:pull:`3807`). By `Maximilian Roos `_. Documentation ~~~~~~~~~~~~~ - Fix documentation of :py:class:`DataArray` removing the deprecated mention that when omitted, ``dims`` are inferred from a ``coords``-dict. (:pull:`3821`) By `Sander van Rijn `_. - Improve the :py:func:`where` docstring. By `Maximilian Roos `_ - Update the installation instructions: only explicitly list recommended dependencies (:issue:`3756`). By `Mathias Hauser `_. Internal Changes ~~~~~~~~~~~~~~~~ - Remove the internal ``import_seaborn`` function which handled the deprecation of the ``seaborn.apionly`` entry point (:issue:`3747`). By `Mathias Hauser `_. - Don't test pint integration in combination with datetime objects. (:issue:`3778`, :pull:`3788`) By `Justus Magin `_. - Change test_open_mfdataset_list_attr to only run with dask installed (:issue:`3777`, :pull:`3780`). By `Bruno Pagani `_. - Preserve the ability to index with ``method="nearest"`` with a :py:class:`CFTimeIndex` with pandas versions greater than 1.0.1 (:issue:`3751`). By `Spencer Clark `_. - Greater flexibility and improved test coverage of subtracting various types of objects from a :py:class:`CFTimeIndex`. By `Spencer Clark `_. - Update Azure CI MacOS image, given pending removal. By `Maximilian Roos `_ - Remove xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). By `Mathias Hauser `_. - Remove conversion to ``pandas.Panel``, given its removal in pandas in favor of xarray's objects. By `Maximilian Roos `_ .. _whats-new.0.15.0: v0.15.0 (30 Jan 2020) --------------------- This release brings many improvements to xarray's documentation: our examples are now binderized notebooks (`click here `_) and we have new example notebooks from our SciPy 2019 sprint (many thanks to our contributors!). This release also features many API improvements such as a new :py:class:`~core.accessor_dt.TimedeltaAccessor` and support for :py:class:`CFTimeIndex` in :py:meth:`~DataArray.interpolate_na`); as well as many bug fixes. Breaking changes ~~~~~~~~~~~~~~~~ - Bumped minimum tested versions for dependencies: - numpy 1.15 - pandas 0.25 - dask 2.2 - distributed 2.2 - scipy 1.3 - Remove ``compat`` and ``encoding`` kwargs from ``DataArray``, which have been deprecated since 0.12. (:pull:`3650`). Instead, specify the ``encoding`` kwarg when writing to disk or set the :py:attr:`DataArray.encoding` attribute directly. By `Maximilian Roos `_. - :py:func:`xarray.dot`, :py:meth:`DataArray.dot`, and the ``@`` operator now use ``align="inner"`` (except when ``xarray.set_options(arithmetic_join="exact")``; :issue:`3694`) by `Mathias Hauser `_. New Features ~~~~~~~~~~~~ - Implement :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad`. (:issue:`2605`, :pull:`3596`). By `Mark Boer `_. - :py:meth:`DataArray.sel` and :py:meth:`Dataset.sel` now support :py:class:`pandas.CategoricalIndex`. (:issue:`3669`) By `Keisuke Fujii `_. - Support using an existing, opened h5netcdf ``File`` with :py:class:`~xarray.backends.H5NetCDFStore`. This permits creating an :py:class:`~xarray.Dataset` from a h5netcdf ``File`` that has been opened using other means (:issue:`3618`). By `Kai Mühlbauer `_. - Implement ``median`` and ``nanmedian`` for dask arrays. This works by rechunking to a single chunk along all reduction axes. (:issue:`2999`). By `Deepak Cherian `_. - :py:func:`~xarray.concat` now preserves attributes from the first Variable. (:issue:`2575`, :issue:`2060`, :issue:`1614`) By `Deepak Cherian `_. - :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile`` now work with dask Variables. By `Deepak Cherian `_. - Added the ``count`` reduction method to both :py:class:`~computation.rolling.DatasetCoarsen` and :py:class:`~computation.rolling.DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ - Add ``meta`` kwarg to :py:func:`~xarray.apply_ufunc`; this is passed on to :py:func:`dask.array.blockwise`. (:pull:`3660`) By `Deepak Cherian `_. - Add ``attrs_file`` option in :py:func:`~xarray.open_mfdataset` to choose the source file for global attributes in a multi-file dataset (:issue:`2382`, :pull:`3498`). By `Julien Seguinot `_. - :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` now allow swapping to dimension names that don't exist yet. (:pull:`3636`) By `Justus Magin `_. - Extend :py:class:`~core.accessor_dt.DatetimeAccessor` properties and support ``.dt`` accessor for timedeltas via :py:class:`~core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) By `Anderson Banihirwe `_. - Improvements to interpolating along time axes (:issue:`3641`, :pull:`3631`). By `David Huard `_. - Support :py:class:`CFTimeIndex` in :py:meth:`DataArray.interpolate_na` - define 1970-01-01 as the default offset for the interpolation index for both :py:class:`pandas.DatetimeIndex` and :py:class:`CFTimeIndex`, - use microseconds in the conversion from timedelta objects to floats to avoid overflow errors. Bug fixes ~~~~~~~~~ - Applying a user-defined function that adds new dimensions using :py:func:`apply_ufunc` and ``vectorize=True`` now works with ``dask > 2.0``. (:issue:`3574`, :pull:`3660`). By `Deepak Cherian `_. - Fix :py:meth:`~xarray.combine_by_coords` to allow for combining incomplete hypercubes of Datasets (:issue:`3648`). By `Ian Bolliger `_. - Fix :py:func:`~xarray.combine_by_coords` when combining cftime coordinates which span long time intervals (:issue:`3535`). By `Spencer Clark `_. - Fix plotting with transposed 2D non-dimensional coordinates. (:issue:`3138`, :pull:`3441`) By `Deepak Cherian `_. - :py:meth:`plot.FacetGrid.set_titles` can now replace existing row titles of a :py:class:`~xarray.plot.FacetGrid` plot. In addition :py:class:`~xarray.plot.FacetGrid` gained two new attributes: :py:attr:`~xarray.plot.FacetGrid.col_labels` and :py:attr:`~xarray.plot.FacetGrid.row_labels` contain :py:class:`matplotlib.text.Text` handles for both column and row labels. These can be used to manually change the labels. By `Deepak Cherian `_. - Fix issue with Dask-backed datasets raising a ``KeyError`` on some computations involving :py:func:`map_blocks` (:pull:`3598`). By `Tom Augspurger `_. - Ensure :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` issue the correct error when ``q`` is out of bounds (:issue:`3634`) by `Mathias Hauser `_. - Fix regression in xarray 0.14.1 that prevented encoding times with certain ``dtype``, ``_FillValue``, and ``missing_value`` encodings (:issue:`3624`). By `Spencer Clark `_ - Raise an error when trying to use :py:meth:`Dataset.rename_dims` to rename to an existing name (:issue:`3438`, :pull:`3645`) By `Justus Magin `_. - :py:meth:`Dataset.rename`, :py:meth:`DataArray.rename` now check for conflicts with MultiIndex level names. - :py:meth:`Dataset.merge` no longer fails when passed a :py:class:`DataArray` instead of a :py:class:`Dataset`. By `Tom Nicholas `_. - Fix a regression in :py:meth:`Dataset.drop`: allow passing any iterable when dropping variables (:issue:`3552`, :pull:`3693`) By `Justus Magin `_. - Fixed errors emitted by ``mypy --strict`` in modules that import xarray. (:issue:`3695`) by `Guido Imperiale `_. - Allow plotting of binned coordinates on the y axis in :py:meth:`plot.line` and :py:meth:`plot.step` plots (:issue:`3571`, :pull:`3685`) by `Julien Seguinot `_. - setuptools is now marked as a dependency of xarray (:pull:`3628`) by `Richard Höchenberger `_. Documentation ~~~~~~~~~~~~~ - Switch doc examples to use `nbsphinx `_ and replace ``sphinx_gallery`` scripts with Jupyter notebooks. (:pull:`3105`, :pull:`3106`, :pull:`3121`) By `Ryan Abernathey `_. - Added :doc:`example notebook ` demonstrating use of xarray with Regional Ocean Modeling System (ROMS) ocean hydrodynamic model output. (:pull:`3116`) By `Robert Hetland `_. - Added :doc:`example notebook ` demonstrating the visualization of ERA5 GRIB data. (:pull:`3199`) By `Zach Bruick `_ and `Stephan Siemen `_. - Added examples for :py:meth:`DataArray.quantile`, :py:meth:`Dataset.quantile` and ``GroupBy.quantile``. (:pull:`3576`) By `Justus Magin `_. - Add new :doc:`example notebook ` example notebook demonstrating vectorization of a 1D function using :py:func:`apply_ufunc` , dask and numba. By `Deepak Cherian `_. - Added example for :py:func:`~xarray.map_blocks`. (:pull:`3667`) By `Riley X. Brady `_. Internal Changes ~~~~~~~~~~~~~~~~ - Make sure dask names change when rechunking by different chunk sizes. Conversely, make sure they stay the same when rechunking by the same chunk size. (:issue:`3350`) By `Deepak Cherian `_. - 2x to 5x speed boost (on small arrays) for :py:meth:`Dataset.isel`, :py:meth:`DataArray.isel`, and :py:meth:`DataArray.__getitem__` when indexing by int, slice, list of int, scalar ndarray, or 1-dimensional ndarray. (:pull:`3533`) by `Guido Imperiale `_. - Removed internal method ``Dataset._from_vars_and_coord_names``, which was dominated by ``Dataset._construct_direct``. (:pull:`3565`) By `Maximilian Roos `_. - Replaced versioneer with setuptools-scm. Moved contents of setup.py to setup.cfg. Removed pytest-runner from setup.py, as per deprecation notice on the pytest-runner project. (:pull:`3714`) by `Guido Imperiale `_. - Use of isort is now enforced by CI. (:pull:`3721`) by `Guido Imperiale `_ .. _whats-new.0.14.1: v0.14.1 (19 Nov 2019) --------------------- Breaking changes ~~~~~~~~~~~~~~~~ - Broken compatibility with ``cftime < 1.0.3`` . By `Deepak Cherian `_. .. warning:: cftime version 1.0.4 is broken (`cftime/126 `_); please use version 1.0.4.2 instead. - All leftover support for dates from non-standard calendars through ``netcdftime``, the module included in versions of netCDF4 prior to 1.4 that eventually became the `cftime `_ package, has been removed in favor of relying solely on the standalone ``cftime`` package (:pull:`3450`). By `Spencer Clark `_. New Features ~~~~~~~~~~~~ - Added the ``sparse`` option to :py:meth:`~xarray.DataArray.unstack`, :py:meth:`~xarray.Dataset.unstack`, :py:meth:`~xarray.DataArray.reindex`, :py:meth:`~xarray.Dataset.reindex` (:issue:`3518`). By `Keisuke Fujii `_. - Added the ``fill_value`` option to :py:meth:`DataArray.unstack` and :py:meth:`Dataset.unstack` (:issue:`3518`, :pull:`3541`). By `Keisuke Fujii `_. - Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data gap that will be filled by interpolation. By `Deepak Cherian `_. - Added :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` for dropping labels. :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for dropping variables (including coordinates). The existing :py:meth:`Dataset.drop` & :py:meth:`DataArray.drop` methods remain as a backward compatible option for dropping either labels or variables, but using the more specific methods is encouraged. (:pull:`3475`) By `Maximilian Roos `_ - Added :py:meth:`Dataset.map` & ``GroupBy.map`` & ``Resample.map`` for mapping / applying a function over each item in the collection, reflecting the widely used and least surprising name for this operation. The existing ``apply`` methods remain for backward compatibility, though using the ``map`` methods is encouraged. (:pull:`3459`) By `Maximilian Roos `_ - :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (``...``) to represent all 'other' dimensions. For example, to move one dimension to the front, use ``.transpose('x', ...)``. (:pull:`3421`) By `Maximilian Roos `_ - Changed ``xr.ALL_DIMS`` to equal python's ``Ellipsis`` (``...``), and changed internal usages to use ``...`` directly. As before, you can use this to instruct a ``groupby`` operation to reduce over all dimensions. While we have no plans to remove ``xr.ALL_DIMS``, we suggest using ``...``. (:pull:`3418`) By `Maximilian Roos `_ - :py:func:`xarray.dot`, and :py:meth:`DataArray.dot` now support the ``dims=...`` option to sum over the union of dimensions of all input arrays (:issue:`3423`) by `Mathias Hauser `_. - Added new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` to improve representation of objects in Jupyter. By default this feature is turned off for now. Enable it with ``xarray.set_options(display_style="html")``. (:pull:`3425`) by `Benoit Bovy `_ and `Julia Signell `_. - Implement `dask deterministic hashing `_ for xarray objects. Note that xarray objects with a dask.array backend already used deterministic hashing in previous releases; this change implements it when whole xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map_blocks` is invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`) By `Deepak Cherian `_ and `Guido Imperiale `_. - Add the documented-but-missing :py:meth:`~core.groupby.DatasetGroupBy.quantile`. - xarray now respects the ``DataArray.encoding["coordinates"]`` attribute when writing to disk. See :ref:`io.coordinates` for more. (:issue:`3351`, :pull:`3487`) By `Deepak Cherian `_. - Add the documented-but-missing :py:meth:`~core.groupby.DatasetGroupBy.quantile`. (:issue:`3525`, :pull:`3527`). By `Justus Magin `_. Bug fixes ~~~~~~~~~ - Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when calling :py:meth:`Dataset.rename`, :py:meth:`Dataset.rename_dims` and :py:meth:`Dataset.rename_vars`. By `Mathias Hauser `_. (:issue:`3522`). - Fix a bug in :py:meth:`DataArray.set_index` in case that an existing dimension becomes a level variable of MultiIndex. (:pull:`3520`). By `Keisuke Fujii `_. - Harmonize ``_FillValue``, ``missing_value`` during encoding and decoding steps. (:pull:`3502`) By `Anderson Banihirwe `_. - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). By `Deepak Cherian `_. - Make alignment and concatenation significantly more efficient by using dask names to compare dask objects prior to comparing values after computation. This change makes it more convenient to carry around large non-dimensional coordinate variables backed by dask arrays. Existing workarounds involving ``reset_coords(drop=True)`` should now be unnecessary in most cases. (:issue:`3068`, :issue:`3311`, :issue:`3454`, :pull:`3453`). By `Deepak Cherian `_. - Add support for cftime>=1.0.4. By `Anderson Banihirwe `_. - Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`). In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated. By `Deepak Cherian `_. - Fix ``GroupBy.reduce`` when reducing over multiple dimensions. (:issue:`3402`). By `Deepak Cherian `_ - Allow appending datetime and bool data variables to zarr stores. (:issue:`3480`). By `Akihiro Matsukawa `_. - Add support for numpy >=1.18 (); bugfix mean() on datetime64 arrays on dask backend (:issue:`3409`, :pull:`3537`). By `Guido Imperiale `_. - Add support for pandas >=0.26 (:issue:`3440`). By `Deepak Cherian `_. - Add support for pseudonetcdf >=3.1 (:pull:`3485`). By `Barron Henderson `_. Documentation ~~~~~~~~~~~~~ - Fix leap year condition in `monthly means example `_. By `Mickaël Lalande `_. - Fix the documentation of :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample`, explicitly stating that a datetime-like dimension is required. (:pull:`3400`) By `Justus Magin `_. - Update the :ref:`terminology` page to address multidimensional coordinates. (:pull:`3410`) By `Jon Thielen `_. - Fix the documentation of :py:meth:`Dataset.integrate` and :py:meth:`DataArray.integrate` and add an example to :py:meth:`Dataset.integrate`. (:pull:`3469`) By `Justus Magin `_. Internal Changes ~~~~~~~~~~~~~~~~ - Added integration tests against `pint `_. (:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`) by `Justus Magin `_. .. note:: At the moment of writing, these tests *as well as the ability to use pint in general* require `a highly experimental version of pint `_ (install with ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``. Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken. - Use Python 3.6 idioms throughout the codebase. (:pull:`3419`) By `Maximilian Roos `_ - Run basic CI tests on Python 3.8. (:pull:`3477`) By `Maximilian Roos `_ - Enable type checking on default sentinel values (:pull:`3472`) By `Maximilian Roos `_ - Add ``Variable._replace`` for simpler replacing of a subset of attributes (:pull:`3472`) By `Maximilian Roos `_ .. _whats-new.0.14.0: v0.14.0 (14 Oct 2019) --------------------- Breaking changes ~~~~~~~~~~~~~~~~ - This release introduces a rolling policy for minimum dependency versions: :ref:`mindeps_policy`. Several minimum versions have been increased: ============ ================== ==== Package Old New ============ ================== ==== Python 3.5.3 3.6 numpy 1.12 1.14 pandas 0.19.2 0.24 dask 0.16 (tested: 2.4) 1.2 bottleneck 1.1 (tested: 1.2) 1.2 matplotlib 1.5 (tested: 3.1) 3.1 ============ ================== ==== Obsolete patch versions (x.y.Z) are not tested anymore. The oldest supported versions of all optional dependencies are now covered by automated tests (before, only the very latest versions were tested). (:issue:`3222`, :issue:`3293`, :issue:`3340`, :issue:`3346`, :issue:`3358`). By `Guido Imperiale `_. - Dropped the ``drop=False`` optional parameter from :py:meth:`Variable.isel`. It was unused and doesn't make sense for a Variable. (:pull:`3375`). By `Guido Imperiale `_. - Remove internal usage of :py:class:`collections.OrderedDict`. After dropping support for Python <=3.5, most uses of ``OrderedDict`` in xarray were no longer necessary. We have removed the internal use of the ``OrderedDict`` in favor of Python's builtin ``dict`` object which is now ordered itself. This change will be most obvious when interacting with the ``attrs`` property on Dataset and DataArray objects. (:issue:`3380`, :pull:`3389`). By `Joe Hamman `_. New functions/methods ~~~~~~~~~~~~~~~~~~~~~ - Added :py:func:`~xarray.map_blocks`, modeled after :py:func:`dask.array.map_blocks`. Also added :py:meth:`Dataset.unify_chunks`, :py:meth:`DataArray.unify_chunks` and :py:meth:`testing.assert_chunks_equal`. (:pull:`3276`). By `Deepak Cherian `_ and `Guido Imperiale `_. Enhancements ~~~~~~~~~~~~ - ``core.groupby.GroupBy`` enhancements. By `Deepak Cherian `_. - Added a repr (:pull:`3344`). Example:: >>> da.groupby("time.season") DataArrayGroupBy, grouped over 'season' 4 groups with labels 'DJF', 'JJA', 'MAM', 'SON' - Added a ``GroupBy.dims`` property that mirrors the dimensions of each group (:issue:`3344`). - Speed up :py:meth:`Dataset.isel` up to 33% and :py:meth:`DataArray.isel` up to 25% for small arrays (:issue:`2799`, :pull:`3375`). By `Guido Imperiale `_. Bug fixes ~~~~~~~~~ - Reintroduce support for :mod:`weakref` (broken in v0.13.0). Support has been reinstated for :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects only. Internal xarray objects remain unaddressable by weakref in order to save memory (:issue:`3317`). By `Guido Imperiale `_. - Line plots with the ``x`` or ``y`` argument set to a 1D non-dimensional coord now plot the correct data for 2D DataArrays (:issue:`3334`). By `Tom Nicholas `_. - Make :py:func:`~xarray.concat` more robust when merging variables present in some datasets but not others (:issue:`508`). By `Deepak Cherian `_. - The default behaviour of reducing across all dimensions for :py:class:`~xarray.core.groupby.DataArrayGroupBy` objects has now been properly removed as was done for :py:class:`~xarray.core.groupby.DatasetGroupBy` in 0.13.0 (:issue:`3337`). Use ``xarray.ALL_DIMS`` if you need to replicate previous behaviour. Also raise nicer error message when no groups are created (:issue:`1764`). By `Deepak Cherian `_. - Fix error in concatenating unlabeled dimensions (:pull:`3362`). By `Deepak Cherian `_. - Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is specified when the :py:class:`~computation.rolling.DatasetRolling` or :py:class:`~computation.rolling.DataArrayRolling` object is created. (:pull:`3362`). By `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ - Created a glossary of important xarray terms (:issue:`2410`, :pull:`3352`). By `Gregory Gundersen `_. - Created a "How do I..." section (:ref:`howdoi`) for solutions to common questions. (:pull:`3357`). By `Deepak Cherian `_. - Add examples for :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` (:pull:`3331`, :pull:`3331`). By `Justus Magin `_. - Add examples for :py:meth:`align`, :py:meth:`merge`, :py:meth:`combine_by_coords`, :py:meth:`full_like`, :py:meth:`zeros_like`, :py:meth:`ones_like`, :py:meth:`Dataset.pipe`, :py:meth:`Dataset.assign`, :py:meth:`Dataset.reindex`, :py:meth:`Dataset.fillna` (:pull:`3328`). By `Anderson Banihirwe `_. - Fixed documentation to clean up an unwanted file created in ``ipython`` example (:pull:`3353`). By `Gregory Gundersen `_. .. _whats-new.0.13.0: v0.13.0 (17 Sep 2019) --------------------- This release includes many exciting changes: wrapping of `NEP18 `_ compliant numpy-like arrays; new :py:meth:`~Dataset.plot.scatter` plotting method that can scatter two ``DataArrays`` in a ``Dataset`` against each other; support for converting pandas DataFrames to xarray objects that wrap ``pydata/sparse``; and more! Breaking changes ~~~~~~~~~~~~~~~~ - This release increases the minimum required Python version from 3.5.0 to 3.5.3 (:issue:`3089`). By `Guido Imperiale `_. - The ``isel_points`` and ``sel_points`` methods are removed, having been deprecated since v0.10.0. These are redundant with the ``isel`` / ``sel`` methods. See :ref:`vectorized_indexing` for the details By `Maximilian Roos `_ - The ``inplace`` kwarg for public methods now raises an error, having been deprecated since v0.11.0. By `Maximilian Roos `_ - :py:func:`~xarray.concat` now requires the ``dim`` argument. Its ``indexers``, ``mode`` and ``concat_over`` kwargs have now been removed. By `Deepak Cherian `_ - Passing a list of colors in ``cmap`` will now raise an error, having been deprecated since v0.6.1. - Most xarray objects now define ``__slots__``. This reduces overall RAM usage by ~22% (not counting the underlying numpy buffers); on CPython 3.7/x64, a trivial DataArray has gone down from 1.9kB to 1.5kB. Caveats: - Pickle streams produced by older versions of xarray can't be loaded using this release, and vice versa. - Any user code that was accessing the ``__dict__`` attribute of xarray objects will break. The best practice to attach custom metadata to xarray objects is to use the ``attrs`` dictionary. - Any user code that defines custom subclasses of xarray classes must now explicitly define ``__slots__`` itself. Subclasses that don't add any attributes must state so by defining ``__slots__ = ()`` right after the class header. Omitting ``__slots__`` will now cause a ``FutureWarning`` to be logged, and will raise an error in a later release. (:issue:`3250`) by `Guido Imperiale `_. - The default dimension for :py:meth:`Dataset.groupby`, :py:meth:`Dataset.resample`, :py:meth:`DataArray.groupby` and :py:meth:`DataArray.resample` reductions is now the grouping or resampling dimension. - :py:meth:`DataArray.to_dataset` requires ``name`` to be passed as a kwarg (previously ambiguous positional arguments were deprecated) - Reindexing with variables of a different dimension now raise an error (previously deprecated) - ``xarray.broadcast_array`` is removed (previously deprecated in favor of :py:func:`~xarray.broadcast`) - ``Variable.expand_dims`` is removed (previously deprecated in favor of :py:meth:`Variable.set_dims`) New functions/methods ~~~~~~~~~~~~~~~~~~~~~ - xarray can now wrap around any `NEP18 `_ compliant numpy-like library (important: read notes about ``NUMPY_EXPERIMENTAL_ARRAY_FUNCTION`` in the above link). Added explicit test coverage for `sparse `_. (:issue:`3117`, :issue:`3202`). This requires ``sparse>=0.8.0``. By `Nezar Abdennur `_ and `Guido Imperiale `_. - :py:meth:`~Dataset.from_dataframe` and :py:meth:`~DataArray.from_series` now support ``sparse=True`` for converting pandas objects into xarray objects wrapping sparse arrays. This is particularly useful with sparsely populated hierarchical indexes. (:issue:`3206`) By `Stephan Hoyer `_. - The xarray package is now discoverable by mypy (although typing hints coverage is not complete yet). mypy type checking is now enforced by CI. Libraries that depend on xarray and use mypy can now remove from their setup.cfg the lines:: [mypy-xarray] ignore_missing_imports = True (:issue:`2877`, :issue:`3088`, :issue:`3090`, :issue:`3112`, :issue:`3117`, :issue:`3207`) By `Guido Imperiale `_ and `Maximilian Roos `_. - Added :py:meth:`DataArray.broadcast_like` and :py:meth:`Dataset.broadcast_like`. By `Deepak Cherian `_ and `David Mertz `_. - Dataset plotting API for visualizing dependencies between two DataArrays! Currently only :py:meth:`Dataset.plot.scatter` is implemented. By `Yohai Bar Sinai `_ and `Deepak Cherian `_ - Added :py:meth:`DataArray.head`, :py:meth:`DataArray.tail` and :py:meth:`DataArray.thin`; as well as :py:meth:`Dataset.head`, :py:meth:`Dataset.tail` and :py:meth:`Dataset.thin` methods. (:issue:`319`) By `Gerardo Rivera `_. Enhancements ~~~~~~~~~~~~ - Multiple enhancements to :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset`. By `Deepak Cherian `_ - Added ``compat='override'``. When merging, this option picks the variable from the first dataset and skips all comparisons. - Added ``join='override'``. When aligning, this only checks that index sizes are equal among objects and skips checking indexes for equality. - :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg. It is passed down to :py:func:`~xarray.align`. - :py:func:`~xarray.concat` now calls :py:func:`~xarray.merge` on variables that are not concatenated (i.e. variables without ``concat_dim`` when ``data_vars`` or ``coords`` are ``"minimal"``). :py:func:`~xarray.concat` passes its new ``compat`` kwarg down to :py:func:`~xarray.merge`. (:issue:`2064`) Users can avoid a common bottleneck when using :py:func:`~xarray.open_mfdataset` on a large number of files with variables that are known to be aligned and some of which need not be concatenated. Slow equality comparisons can now be avoided, for e.g.:: data = xr.open_mfdataset(files, concat_dim='time', data_vars='minimal', coords='minimal', compat='override', join='override') - In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if ``append_dim`` is set, as it will automatically be set to ``'a'`` internally. By `David Brochart `_. - Added the ability to initialize an empty or full DataArray with a single value. (:issue:`277`) By `Gerardo Rivera `_. - :py:func:`~xarray.Dataset.to_netcdf()` now supports the ``invalid_netcdf`` kwarg when used with ``engine="h5netcdf"``. It is passed to ``h5netcdf.File``. By `Ulrich Herter `_. - ``xarray.Dataset.drop`` now supports keyword arguments; dropping index labels by using both ``dim`` and ``labels`` or using a :py:class:`~core.coordinates.DataArrayCoordinates` object are deprecated (:issue:`2910`). By `Gregory Gundersen `_. - Added examples of :py:meth:`Dataset.set_index` and :py:meth:`DataArray.set_index`, as well are more specific error messages when the user passes invalid arguments (:issue:`3176`). By `Gregory Gundersen `_. - :py:meth:`Dataset.filter_by_attrs` now filters the coordinates as well as the variables. By `Spencer Jones `_. Bug fixes ~~~~~~~~~ - Improve "missing dimensions" error message for :py:func:`~xarray.apply_ufunc` (:issue:`2078`). By `Rick Russotto `_. - :py:meth:`~xarray.DataArray.assign_coords` now supports dictionary arguments (:issue:`3231`). By `Gregory Gundersen `_. - Fix regression introduced in v0.12.2 where ``copy(deep=True)`` would convert unicode indices to dtype=object (:issue:`3094`). By `Guido Imperiale `_. - Improved error handling and documentation for ``.expand_dims()`` read-only view. - Fix tests for big-endian systems (:issue:`3125`). By `Graham Inggs `_. - XFAIL several tests which are expected to fail on ARM systems due to a ``datetime`` issue in NumPy (:issue:`2334`). By `Graham Inggs `_. - Fix KeyError that arises when using .sel method with float values different from coords float type (:issue:`3137`). By `Hasan Ahmad `_. - Fixed bug in ``combine_by_coords()`` causing a ``ValueError`` if the input had an unused dimension with coordinates which were not monotonic (:issue:`3150`). By `Tom Nicholas `_. - Fixed crash when applying ``distributed.Client.compute()`` to a DataArray (:issue:`3171`). By `Guido Imperiale `_. - Better error message when using groupby on an empty DataArray (:issue:`3037`). By `Hasan Ahmad `_. - Fix error that arises when using open_mfdataset on a series of netcdf files having differing values for a variable attribute of type list. (:issue:`3034`) By `Hasan Ahmad `_. - Prevent :py:meth:`~xarray.DataArray.argmax` and :py:meth:`~xarray.DataArray.argmin` from calling dask compute (:issue:`3237`). By `Ulrich Herter `_. - Plots in 2 dimensions (pcolormesh, contour) now allow to specify levels as numpy array (:issue:`3284`). By `Mathias Hauser `_. - Fixed bug in :meth:`DataArray.quantile` failing to keep attributes when ``keep_attrs`` was True (:issue:`3304`). By `David Huard `_. Documentation ~~~~~~~~~~~~~ - Created a `PR checklist `_ as a quick reference for tasks before creating a new PR or pushing new commits. By `Gregory Gundersen `_. - Fixed documentation to clean up unwanted files created in ``ipython`` examples (:issue:`3227`). By `Gregory Gundersen `_. .. _whats-new.0.12.3: v0.12.3 (10 July 2019) ---------------------- New functions/methods ~~~~~~~~~~~~~~~~~~~~~ - New methods :py:meth:`Dataset.to_stacked_array` and :py:meth:`DataArray.to_unstacked_dataset` for reshaping Datasets of variables with different dimensions (:issue:`1317`). This is useful for feeding data from xarray into machine learning models, as described in :ref:`reshape.stacking_different`. By `Noah Brenowitz `_. Enhancements ~~~~~~~~~~~~ - Support for renaming ``Dataset`` variables and dimensions independently with :py:meth:`~Dataset.rename_vars` and :py:meth:`~Dataset.rename_dims` (:issue:`3026`). By `Julia Kent `_. - Add ``scales``, ``offsets``, ``units`` and ``descriptions`` attributes to :py:class:`~xarray.DataArray` returned by :py:func:`~xarray.open_rasterio`. (:issue:`3013`) By `Erle Carrara `_. Bug fixes ~~~~~~~~~ - Resolved deprecation warnings from newer versions of matplotlib and dask. - Compatibility fixes for the upcoming pandas 0.25 and NumPy 1.17 releases. By `Stephan Hoyer `_. - Fix summaries for multiindex coordinates (:issue:`3079`). By `Jonas Hörsch `_. - Fix HDF5 error that could arise when reading multiple groups from a file at once (:issue:`2954`). By `Stephan Hoyer `_. .. _whats-new.0.12.2: v0.12.2 (29 June 2019) ---------------------- New functions/methods ~~~~~~~~~~~~~~~~~~~~~ - Two new functions, :py:func:`~xarray.combine_nested` and :py:func:`~xarray.combine_by_coords`, allow for combining datasets along any number of dimensions, instead of the one-dimensional list of datasets supported by :py:func:`~xarray.concat`. The new ``combine_nested`` will accept the datasets as a nested list-of-lists, and combine by applying a series of concat and merge operations. The new ``combine_by_coords`` instead uses the dimension coordinates of datasets to order them. :py:func:`~xarray.open_mfdataset` can use either ``combine_nested`` or ``combine_by_coords`` to combine datasets along multiple dimensions, by specifying the argument ``combine='nested'`` or ``combine='by_coords'``. The older function ``auto_combine`` has been deprecated, because its functionality has been subsumed by the new functions. To avoid FutureWarnings switch to using ``combine_nested`` or ``combine_by_coords``, (or set the ``combine`` argument in ``open_mfdataset``). (:issue:`2159`) By `Tom Nicholas `_. - :py:meth:`~xarray.DataArray.rolling_exp` and :py:meth:`~xarray.Dataset.rolling_exp` added, similar to pandas' ``pd.DataFrame.ewm`` method. Calling ``.mean`` on the resulting object will return an exponentially weighted moving average. By `Maximilian Roos `_. - New :py:func:`DataArray.str ` for string related manipulations, based on ``pandas.Series.str``. By `0x0L `_. - Added ``strftime`` method to ``.dt`` accessor, making it simpler to hand a datetime ``DataArray`` to other code expecting formatted dates and times. (:issue:`2090`). :py:meth:`~xarray.CFTimeIndex.strftime` is also now available on :py:class:`CFTimeIndex`. By `Alan Brammer `_ and `Ryan May `_. - ``GroupBy.quantile`` is now a method of ``GroupBy`` objects (:issue:`3018`). By `David Huard `_. - Argument and return types are added to most methods on ``DataArray`` and ``Dataset``, allowing static type checking both within xarray and external libraries. Type checking with `mypy `_ is enabled in CI (though not required yet). By `Guido Imperiale `_ and `Maximilian Roos `_. Enhancements to existing functionality ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Add ``keepdims`` argument for reduce operations (:issue:`2170`) By `Scott Wales `_. - Enable ``@`` operator for DataArray. This is equivalent to :py:meth:`DataArray.dot` By `Maximilian Roos `_. - Add ``fill_value`` argument for reindex, align, and merge operations to enable custom fill values. (:issue:`2876`) By `Zach Griffith `_. - :py:meth:`DataArray.transpose` now accepts a keyword argument ``transpose_coords`` which enables transposition of coordinates in the same way as :py:meth:`Dataset.transpose`. :py:meth:`DataArray.groupby` :py:meth:`DataArray.groupby_bins`, and :py:meth:`DataArray.resample` now accept a keyword argument ``restore_coord_dims`` which keeps the order of the dimensions of multi-dimensional coordinates intact (:issue:`1856`). By `Peter Hausamann `_. - Clean up Python 2 compatibility in code (:issue:`2950`) By `Guido Imperiale `_. - Better warning message when supplying invalid objects to ``xr.merge`` (:issue:`2948`). By `Mathias Hauser `_. - Add ``errors`` keyword argument to ``Dataset.drop`` and :py:meth:`Dataset.drop_dims` that allows ignoring errors if a passed label or dimension is not in the dataset (:issue:`2994`). By `Andrew Ross `_. IO related enhancements ~~~~~~~~~~~~~~~~~~~~~~~ - Implement :py:func:`~xarray.load_dataset` and :py:func:`~xarray.load_dataarray` as alternatives to :py:func:`~xarray.open_dataset` and :py:func:`~xarray.open_dataarray` to open, load into memory, and close files, returning the Dataset or DataArray. These functions are helpful for avoiding file-lock errors when trying to write to files opened using ``open_dataset()`` or ``open_dataarray()``. (:issue:`2887`) By `Dan Nowacki `_. - It is now possible to extend existing :ref:`io.zarr` datasets, by using ``mode='a'`` and the new ``append_dim`` argument in :py:meth:`~xarray.Dataset.to_zarr`. By `Jendrik Jördening `_, `David Brochart `_, `Ryan Abernathey `_ and `Shikhar Goenka `_. - ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=`` parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for backwards compatibility. The ``overwrite_encoded_chunks`` parameter is added to remove the original zarr chunk encoding. By `Lily Wang `_. - netCDF chunksizes are now only dropped when original_shape is different, not when it isn't found. (:issue:`2207`) By `Karel van de Plassche `_. - Character arrays' character dimension name decoding and encoding handled by ``var.encoding['char_dim_name']`` (:issue:`2895`) By `James McCreight `_. - open_rasterio() now supports rasterio.vrt.WarpedVRT with custom transform, width and height (:issue:`2864`). By `Julien Michel `_. Bug fixes ~~~~~~~~~ - Rolling operations on xarray objects containing dask arrays could silently compute the incorrect result or use large amounts of memory (:issue:`2940`). By `Stephan Hoyer `_. - Don't set encoding attributes on bounds variables when writing to netCDF. (:issue:`2921`) By `Deepak Cherian `_. - NetCDF4 output: variables with unlimited dimensions must be chunked (not contiguous) on output. (:issue:`1849`) By `James McCreight `_. - indexing with an empty list creates an object with zero-length axis (:issue:`2882`) By `Mayeul d'Avezac `_. - Return correct count for scalar datetime64 arrays (:issue:`2770`) By `Dan Nowacki `_. - Fixed max, min exception when applied to a multiIndex (:issue:`2923`) By `Ian Castleden `_ - A deep copy deep-copies the coords (:issue:`1463`) By `Martin Pletcher `_. - Increased support for ``missing_value`` (:issue:`2871`) By `Deepak Cherian `_. - Removed usages of ``pytest.config``, which is deprecated (:issue:`2988`) By `Maximilian Roos `_. - Fixed performance issues with cftime installed (:issue:`3000`) By `0x0L `_. - Replace incorrect usages of ``message`` in pytest assertions with ``match`` (:issue:`3011`) By `Maximilian Roos `_. - Add explicit pytest markers, now required by pytest (:issue:`3032`). By `Maximilian Roos `_. - Test suite fixes for newer versions of pytest (:issue:`3011`, :issue:`3032`). By `Maximilian Roos `_ and `Stephan Hoyer `_. .. _whats-new.0.12.1: v0.12.1 (4 April 2019) ---------------------- Enhancements ~~~~~~~~~~~~ - Allow ``expand_dims`` method to support inserting/broadcasting dimensions with size > 1. (:issue:`2710`) By `Martin Pletcher `_. Bug fixes ~~~~~~~~~ - Dataset.copy(deep=True) now creates a deep copy of the attrs (:issue:`2835`). By `Andras Gefferth `_. - Fix incorrect ``indexes`` resulting from various ``Dataset`` operations (e.g., ``swap_dims``, ``isel``, ``reindex``, ``[]``) (:issue:`2842`, :issue:`2856`). By `Stephan Hoyer `_. .. _whats-new.0.12.0: v0.12.0 (15 March 2019) ----------------------- Highlights include: - Removed support for Python 2. This is the first version of xarray that is Python 3 only! - New :py:meth:`~xarray.DataArray.coarsen` and :py:meth:`~xarray.DataArray.integrate` methods. See :ref:`compute.coarsen` and :ref:`compute.using_coordinates` for details. - Many improvements to cftime support. See below for details. Deprecations ~~~~~~~~~~~~ - The ``compat`` argument to ``Dataset`` and the ``encoding`` argument to ``DataArray`` are deprecated and will be removed in a future release. (:issue:`1188`) By `Maximilian Roos `_. cftime related enhancements ~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Resampling of standard and non-standard calendars indexed by :py:class:`~xarray.CFTimeIndex` is now possible. (:issue:`2191`). By `Jwen Fai Low `_ and `Spencer Clark `_. - Taking the mean of arrays of :py:class:`cftime.datetime` objects, and by extension, use of :py:meth:`~xarray.DataArray.coarsen` with :py:class:`cftime.datetime` coordinates is now possible. By `Spencer Clark `_. - Internal plotting now supports ``cftime.datetime`` objects as time series. (:issue:`2164`) By `Julius Busecke `_ and `Spencer Clark `_. - :py:meth:`~xarray.cftime_range` now supports QuarterBegin and QuarterEnd offsets (:issue:`2663`). By `Jwen Fai Low `_ - :py:meth:`~xarray.open_dataset` now accepts a ``use_cftime`` argument, which can be used to require that ``cftime.datetime`` objects are always used, or never used when decoding dates encoded with a standard calendar. This can be used to ensure consistent date types are returned when using :py:meth:`~xarray.open_mfdataset` (:issue:`1263`) and/or to silence serialization warnings raised if dates from a standard calendar are found to be outside the :py:class:`pandas.Timestamp`-valid range (:issue:`2754`). By `Spencer Clark `_. - :py:meth:`pandas.Series.dropna` is now supported for a :py:class:`pandas.Series` indexed by a :py:class:`~xarray.CFTimeIndex` (:issue:`2688`). By `Spencer Clark `_. Other enhancements ~~~~~~~~~~~~~~~~~~ - Added ability to open netcdf4/hdf5 file-like objects with ``open_dataset``. Requires (h5netcdf>0.7 and h5py>2.9.0). (:issue:`2781`) By `Scott Henderson `_ - Add ``data=False`` option to ``to_dict()`` methods. (:issue:`2656`) By `Ryan Abernathey `_ - :py:meth:`DataArray.coarsen` and :py:meth:`Dataset.coarsen` are newly added. See :ref:`compute.coarsen` for details. (:issue:`2525`) By `Keisuke Fujii `_. - Upsampling an array via interpolation with resample is now dask-compatible, as long as the array is not chunked along the resampling dimension. By `Spencer Clark `_. - :py:func:`xarray.testing.assert_equal` and :py:func:`xarray.testing.assert_identical` now provide a more detailed report showing what exactly differs between the two objects (dimensions / coordinates / variables / attributes) (:issue:`1507`). By `Benoit Bovy `_. - Add ``tolerance`` option to ``resample()`` methods ``bfill``, ``pad``, ``nearest``. (:issue:`2695`) By `Hauke Schulz `_. - :py:meth:`DataArray.integrate` and :py:meth:`Dataset.integrate` are newly added. See :ref:`compute.using_coordinates` for the detail. (:issue:`1332`) By `Keisuke Fujii `_. - Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`). By `Kevin Squire `_. Bug fixes ~~~~~~~~~ - Silenced warnings that appear when using pandas 0.24. By `Stephan Hoyer `_ - Interpolating via resample now internally specifies ``bounds_error=False`` as an argument to ``scipy.interpolate.interp1d``, allowing for interpolation from higher frequencies to lower frequencies. Datapoints outside the bounds of the original time coordinate are now filled with NaN (:issue:`2197`). By `Spencer Clark `_. - Line plots with the ``x`` argument set to a non-dimensional coord now plot the correct data for 1D DataArrays. (:issue:`2725`). By `Tom Nicholas `_. - Subtracting a scalar ``cftime.datetime`` object from a :py:class:`CFTimeIndex` now results in a :py:class:`pandas.TimedeltaIndex` instead of raising a ``TypeError`` (:issue:`2671`). By `Spencer Clark `_. - backend_kwargs are no longer ignored when using open_dataset with pynio engine (:issue:'2380') By `Jonathan Joyce `_. - Fix ``open_rasterio`` creating a WKT CRS instead of PROJ.4 with ``rasterio`` 1.0.14+ (:issue:`2715`). By `David Hoese `_. - Masking data arrays with :py:meth:`xarray.DataArray.where` now returns an array with the name of the original masked array (:issue:`2748` and :issue:`2457`). By `Yohai Bar-Sinai `_. - Fixed error when trying to reduce a DataArray using a function which does not require an axis argument. (:issue:`2768`) By `Tom Nicholas `_. - Concatenating a sequence of :py:class:`~xarray.DataArray` with varying names sets the name of the output array to ``None``, instead of the name of the first input array. If the names are the same it sets the name to that, instead to the name of the first DataArray in the list as it did before. (:issue:`2775`). By `Tom Nicholas `_. - Per the `CF conventions section on calendars `_, specifying ``'standard'`` as the calendar type in :py:meth:`~xarray.cftime_range` now correctly refers to the ``'gregorian'`` calendar instead of the ``'proleptic_gregorian'`` calendar (:issue:`2761`). .. _whats-new.0.11.3: v0.11.3 (26 January 2019) ------------------------- Bug fixes ~~~~~~~~~ - Saving files with times encoded with reference dates with timezones (e.g. '2000-01-01T00:00:00-05:00') no longer raises an error (:issue:`2649`). By `Spencer Clark `_. - Fixed performance regression with ``open_mfdataset`` (:issue:`2662`). By `Tom Nicholas `_. - Fixed supplying an explicit dimension in the ``concat_dim`` argument to to ``open_mfdataset`` (:issue:`2647`). By `Ben Root `_. .. _whats-new.0.11.2: v0.11.2 (2 January 2019) ------------------------ Removes inadvertently introduced setup dependency on pytest-runner (:issue:`2641`). Otherwise, this release is exactly equivalent to 0.11.1. .. warning:: This is the last xarray release that will support Python 2.7. Future releases will be Python 3 only, but older versions of xarray will always be available for Python 2.7 users. For the more details, see: - :issue:`Xarray Github issue discussing dropping Python 2 <1829>` - `Python 3 Statement `__ - `Tips on porting to Python 3 `__ .. _whats-new.0.11.1: v0.11.1 (29 December 2018) -------------------------- This minor release includes a number of enhancements and bug fixes, and two (slightly) breaking changes. Breaking changes ~~~~~~~~~~~~~~~~ - Minimum rasterio version increased from 0.36 to 1.0 (for ``open_rasterio``) - Time bounds variables are now also decoded according to CF conventions (:issue:`2565`). The previous behavior was to decode them only if they had specific time attributes, now these attributes are copied automatically from the corresponding time coordinate. This might break downstream code that was relying on these variables to be brake downstream code that was relying on these variables to be not decoded. By `Fabien Maussion `_. Enhancements ~~~~~~~~~~~~ - Ability to read and write consolidated metadata in zarr stores (:issue:`2558`). By `Ryan Abernathey `_. - :py:class:`CFTimeIndex` uses slicing for string indexing when possible (like :py:class:`pandas.DatetimeIndex`), which avoids unnecessary copies. By `Stephan Hoyer `_ - Enable passing ``rasterio.io.DatasetReader`` or ``rasterio.vrt.WarpedVRT`` to ``open_rasterio`` instead of file path string. Allows for in-memory reprojection, see (:issue:`2588`). By `Scott Henderson `_. - Like :py:class:`pandas.DatetimeIndex`, :py:class:`CFTimeIndex` now supports "dayofyear" and "dayofweek" accessors (:issue:`2597`). Note this requires a version of cftime greater than 1.0.2. By `Spencer Clark `_. - The option ``'warn_for_unclosed_files'`` (False by default) has been added to allow users to enable a warning when files opened by xarray are deallocated but were not explicitly closed. This is mostly useful for debugging; we recommend enabling it in your test suites if you use xarray for IO. By `Stephan Hoyer `_ - Support Dask ``HighLevelGraphs`` by `Matthew Rocklin `_. - :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now supports the ``loffset`` kwarg just like pandas. By `Deepak Cherian `_ - Datasets are now guaranteed to have a ``'source'`` encoding, so the source file name is always stored (:issue:`2550`). By `Tom Nicholas `_. - The ``apply`` methods for ``DatasetGroupBy``, ``DataArrayGroupBy``, ``DatasetResample`` and ``DataArrayResample`` now support passing positional arguments to the applied function as a tuple to the ``args`` argument. By `Matti Eskelinen `_. - 0d slices of ndarrays are now obtained directly through indexing, rather than extracting and wrapping a scalar, avoiding unnecessary copying. By `Daniel Wennberg `_. - Added support for ``fill_value`` with :py:meth:`~xarray.DataArray.shift` and :py:meth:`~xarray.Dataset.shift` By `Maximilian Roos `_ Bug fixes ~~~~~~~~~ - Ensure files are automatically closed, if possible, when no longer referenced by a Python variable (:issue:`2560`). By `Stephan Hoyer `_ - Fixed possible race conditions when reading/writing to disk in parallel (:issue:`2595`). By `Stephan Hoyer `_ - Fix h5netcdf saving scalars with filters or chunks (:issue:`2563`). By `Martin Raspaud `_. - Fix parsing of ``_Unsigned`` attribute set by OPENDAP servers. (:issue:`2583`). By `Deepak Cherian `_ - Fix failure in time encoding when exporting to netCDF with versions of pandas less than 0.21.1 (:issue:`2623`). By `Spencer Clark `_. - Fix MultiIndex selection to update label and level (:issue:`2619`). By `Keisuke Fujii `_. .. _whats-new.0.11.0: v0.11.0 (7 November 2018) ------------------------- Breaking changes ~~~~~~~~~~~~~~~~ - Finished deprecations (changed behavior with this release): - ``Dataset.T`` has been removed as a shortcut for :py:meth:`Dataset.transpose`. Call :py:meth:`Dataset.transpose` directly instead. - Iterating over a ``Dataset`` now includes only data variables, not coordinates. Similarly, calling ``len`` and ``bool`` on a ``Dataset`` now includes only data variables. - ``DataArray.__contains__`` (used by Python's ``in`` operator) now checks array data, not coordinates. - The old resample syntax from before xarray 0.10, e.g., ``data.resample('1D', dim='time', how='mean')``, is no longer supported will raise an error in most cases. You need to use the new resample syntax instead, e.g., ``data.resample(time='1D').mean()`` or ``data.resample({'time': '1D'}).mean()``. - New deprecations (behavior will be changed in xarray 0.12): - Reduction of :py:meth:`DataArray.groupby` and :py:meth:`DataArray.resample` without dimension argument will change in the next release. Now we warn a FutureWarning. By `Keisuke Fujii `_. - The ``inplace`` kwarg of a number of ``DataArray`` and ``Dataset`` methods is being deprecated and will be removed in the next release. By `Deepak Cherian `_. - Refactored storage backends: - Xarray's storage backends now automatically open and close files when necessary, rather than requiring opening a file with ``autoclose=True``. A global least-recently-used cache is used to store open files; the default limit of 128 open files should suffice in most cases, but can be adjusted if necessary with ``xarray.set_options(file_cache_maxsize=...)``. The ``autoclose`` argument to ``open_dataset`` and related functions has been deprecated and is now a no-op. This change, along with an internal refactor of xarray's storage backends, should significantly improve performance when reading and writing netCDF files with Dask, especially when working with many files or using Dask Distributed. By `Stephan Hoyer `_ - Support for non-standard calendars used in climate science: - Xarray will now always use :py:class:`cftime.datetime` objects, rather than by default trying to coerce them into ``np.datetime64[ns]`` objects. A :py:class:`~xarray.CFTimeIndex` will be used for indexing along time coordinates in these cases. - A new method :py:meth:`~xarray.CFTimeIndex.to_datetimeindex` has been added to aid in converting from a :py:class:`~xarray.CFTimeIndex` to a :py:class:`pandas.DatetimeIndex` for the remaining use-cases where using a :py:class:`~xarray.CFTimeIndex` is still a limitation (e.g. for resample or plotting). - Setting the ``enable_cftimeindex`` option is now a no-op and emits a ``FutureWarning``. Enhancements ~~~~~~~~~~~~ - :py:meth:`xarray.DataArray.plot.line` can now accept multidimensional coordinate variables as input. ``hue`` must be a dimension name in this case. (:issue:`2407`) By `Deepak Cherian `_. - Added support for Python 3.7. (:issue:`2271`). By `Joe Hamman `_. - Added support for plotting data with ``pandas.Interval`` coordinates, such as those created by :py:meth:`~xarray.DataArray.groupby_bins` By `Maximilian Maahn `_. - Added :py:meth:`~xarray.CFTimeIndex.shift` for shifting the values of a CFTimeIndex by a specified frequency. (:issue:`2244`). By `Spencer Clark `_. - Added support for using ``cftime.datetime`` coordinates with :py:meth:`~xarray.DataArray.differentiate`, :py:meth:`~xarray.Dataset.differentiate`, :py:meth:`~xarray.DataArray.interp`, and :py:meth:`~xarray.Dataset.interp`. By `Spencer Clark `_ - There is now a global option to either always keep or always discard dataset and dataarray attrs upon operations. The option is set with ``xarray.set_options(keep_attrs=True)``, and the default is to use the old behaviour. By `Tom Nicholas `_. - Added a new backend for the GRIB file format based on ECMWF *cfgrib* python driver and *ecCodes* C-library. (:issue:`2475`) By `Alessandro Amici `_, sponsored by `ECMWF `_. - Resample now supports a dictionary mapping from dimension to frequency as its first argument, e.g., ``data.resample({'time': '1D'}).mean()``. This is consistent with other xarray functions that accept either dictionaries or keyword arguments. By `Stephan Hoyer `_. - The preferred way to access tutorial data is now to load it lazily with :py:meth:`xarray.tutorial.open_dataset`. :py:meth:`xarray.tutorial.load_dataset` calls ``Dataset.load()`` prior to returning (and is now deprecated). This was changed in order to facilitate using tutorial datasets with dask. By `Joe Hamman `_. - ``DataArray`` can now use ``xr.set_option(keep_attrs=True)`` and retain attributes in binary operations, such as (``+, -, * ,/``). Default behaviour is unchanged (*Attributes will be dismissed*). By `Michael Blaschek `_ Bug fixes ~~~~~~~~~ - ``FacetGrid`` now properly uses the ``cbar_kwargs`` keyword argument. (:issue:`1504`, :issue:`1717`) By `Deepak Cherian `_. - Addition and subtraction operators used with a CFTimeIndex now preserve the index's type. (:issue:`2244`). By `Spencer Clark `_. - We now properly handle arrays of ``datetime.datetime`` and ``datetime.timedelta`` provided as coordinates. (:issue:`2512`) By `Deepak Cherian `_. - ``xarray.DataArray.roll`` correctly handles multidimensional arrays. (:issue:`2445`) By `Keisuke Fujii `_. - ``xarray.plot()`` now properly accepts a ``norm`` argument and does not override the norm's ``vmin`` and ``vmax``. (:issue:`2381`) By `Deepak Cherian `_. - ``xarray.DataArray.std()`` now correctly accepts ``ddof`` keyword argument. (:issue:`2240`) By `Keisuke Fujii `_. - Restore matplotlib's default of plotting dashed negative contours when a single color is passed to ``DataArray.contour()`` e.g. ``colors='k'``. By `Deepak Cherian `_. - Fix a bug that caused some indexing operations on arrays opened with ``open_rasterio`` to error (:issue:`2454`). By `Stephan Hoyer `_. - Subtracting one CFTimeIndex from another now returns a ``pandas.TimedeltaIndex``, analogous to the behavior for DatetimeIndexes (:issue:`2484`). By `Spencer Clark `_. - Adding a TimedeltaIndex to, or subtracting a TimedeltaIndex from a CFTimeIndex is now allowed (:issue:`2484`). By `Spencer Clark `_. - Avoid use of Dask's deprecated ``get=`` parameter in tests by `Matthew Rocklin `_. - An ``OverflowError`` is now accurately raised and caught during the encoding process if a reference date is used that is so distant that the dates must be encoded using cftime rather than NumPy (:issue:`2272`). By `Spencer Clark `_. - Chunked datasets can now roundtrip to Zarr storage continually with ``to_zarr`` and ``open_zarr`` (:issue:`2300`). By `Lily Wang `_. .. _whats-new.0.10.9: v0.10.9 (21 September 2018) --------------------------- This minor release contains a number of backwards compatible enhancements. Announcements of note: - Xarray is now a NumFOCUS fiscally sponsored project! Read `the announcement `_ for more details. - We have a new :doc:`roadmap` that outlines our future development plans. - ``Dataset.apply`` now properly documents the way ``func`` is called. By `Matti Eskelinen `_. Enhancements ~~~~~~~~~~~~ - :py:meth:`~xarray.DataArray.differentiate` and :py:meth:`~xarray.Dataset.differentiate` are newly added. (:issue:`1332`) By `Keisuke Fujii `_. - Default colormap for sequential and divergent data can now be set via :py:func:`~xarray.set_options()` (:issue:`2394`) By `Julius Busecke `_. - min_count option is newly supported in :py:meth:`~xarray.DataArray.sum`, :py:meth:`~xarray.DataArray.prod` and :py:meth:`~xarray.Dataset.sum`, and :py:meth:`~xarray.Dataset.prod`. (:issue:`2230`) By `Keisuke Fujii `_. - :py:func:`~plot.plot()` now accepts the kwargs ``xscale, yscale, xlim, ylim, xticks, yticks`` just like pandas. Also ``xincrease=False, yincrease=False`` now use matplotlib's axis inverting methods instead of setting limits. By `Deepak Cherian `_. (:issue:`2224`) - DataArray coordinates and Dataset coordinates and data variables are now displayed as ``a b ... y z`` rather than ``a b c d ...``. (:issue:`1186`) By `Seth P `_. - A new CFTimeIndex-enabled :py:func:`cftime_range` function for use in generating dates from standard or non-standard calendars. By `Spencer Clark `_. - When interpolating over a ``datetime64`` axis, you can now provide a datetime string instead of a ``datetime64`` object. E.g. ``da.interp(time='1991-02-01')`` (:issue:`2284`) By `Deepak Cherian `_. - A clear error message is now displayed if a ``set`` or ``dict`` is passed in place of an array (:issue:`2331`) By `Maximilian Roos `_. - Applying ``unstack`` to a large DataArray or Dataset is now much faster if the MultiIndex has not been modified after stacking the indices. (:issue:`1560`) By `Maximilian Maahn `_. - You can now control whether or not to offset the coordinates when using the ``roll`` method and the current behavior, coordinates rolled by default, raises a deprecation warning unless explicitly setting the keyword argument. (:issue:`1875`) By `Andrew Huang `_. - You can now call ``unstack`` without arguments to unstack every MultiIndex in a DataArray or Dataset. By `Julia Signell `_. - Added the ability to pass a data kwarg to ``copy`` to create a new object with the same metadata as the original object but using new values. By `Julia Signell `_. Bug fixes ~~~~~~~~~ - ``xarray.plot.imshow()`` correctly uses the ``origin`` argument. (:issue:`2379`) By `Deepak Cherian `_. - Fixed ``DataArray.to_iris()`` failure while creating ``DimCoord`` by falling back to creating ``AuxCoord``. Fixed dependency on ``var_name`` attribute being set. (:issue:`2201`) By `Thomas Voigt `_. - Fixed a bug in ``zarr`` backend which prevented use with datasets with invalid chunk size encoding after reading from an existing store (:issue:`2278`). By `Joe Hamman `_. - Tests can be run in parallel with pytest-xdist By `Tony Tung `_. - Follow up the renamings in dask; from dask.ghost to dask.overlap By `Keisuke Fujii `_. - Now raises a ValueError when there is a conflict between dimension names and level names of MultiIndex. (:issue:`2299`) By `Keisuke Fujii `_. - Follow up the renamings in dask; from dask.ghost to dask.overlap By `Keisuke Fujii `_. - Now :py:func:`~xarray.apply_ufunc` raises a ValueError when the size of ``input_core_dims`` is inconsistent with the number of arguments. (:issue:`2341`) By `Keisuke Fujii `_. - Fixed ``Dataset.filter_by_attrs()`` behavior not matching ``netCDF4.Dataset.get_variables_by_attributes()``. When more than one ``key=value`` is passed into ``Dataset.filter_by_attrs()`` it will now return a Dataset with variables which pass all the filters. (:issue:`2315`) By `Andrew Barna `_. .. _whats-new.0.10.8: v0.10.8 (18 July 2018) ---------------------- Breaking changes ~~~~~~~~~~~~~~~~ - Xarray no longer supports python 3.4. Additionally, the minimum supported versions of the following dependencies has been updated and/or clarified: - pandas: 0.18 -> 0.19 - NumPy: 1.11 -> 1.12 - Dask: 0.9 -> 0.16 - Matplotlib: unspecified -> 1.5 (:issue:`2204`). By `Joe Hamman `_. Enhancements ~~~~~~~~~~~~ - :py:meth:`~xarray.DataArray.interp_like` and :py:meth:`~xarray.Dataset.interp_like` methods are newly added. (:issue:`2218`) By `Keisuke Fujii `_. - Added support for curvilinear and unstructured generic grids to :py:meth:`~xarray.DataArray.to_cdms2` and :py:meth:`~xarray.DataArray.from_cdms2` (:issue:`2262`). By `Stephane Raynaud `_. Bug fixes ~~~~~~~~~ - Fixed a bug in ``zarr`` backend which prevented use with datasets with incomplete chunks in multiple dimensions (:issue:`2225`). By `Joe Hamman `_. - Fixed a bug in :py:meth:`~Dataset.to_netcdf` which prevented writing datasets when the arrays had different chunk sizes (:issue:`2254`). By `Mike Neish `_. - Fixed masking during the conversion to cdms2 objects by :py:meth:`~xarray.DataArray.to_cdms2` (:issue:`2262`). By `Stephane Raynaud `_. - Fixed a bug in 2D plots which incorrectly raised an error when 2D coordinates weren't monotonic (:issue:`2250`). By `Fabien Maussion `_. - Fixed warning raised in :py:meth:`~Dataset.to_netcdf` due to deprecation of ``effective_get`` in dask (:issue:`2238`). By `Joe Hamman `_. .. _whats-new.0.10.7: v0.10.7 (7 June 2018) --------------------- Enhancements ~~~~~~~~~~~~ - Plot labels now make use of metadata that follow CF conventions (:issue:`2135`). By `Deepak Cherian `_ and `Ryan Abernathey `_. - Line plots now support facetting with ``row`` and ``col`` arguments (:issue:`2107`). By `Yohai Bar Sinai `_. - :py:meth:`~xarray.DataArray.interp` and :py:meth:`~xarray.Dataset.interp` methods are newly added. See :ref:`interp` for the detail. (:issue:`2079`) By `Keisuke Fujii `_. Bug fixes ~~~~~~~~~ - Fixed a bug in ``rasterio`` backend which prevented use with ``distributed``. The ``rasterio`` backend now returns pickleable objects (:issue:`2021`). By `Joe Hamman `_. .. _whats-new.0.10.6: v0.10.6 (31 May 2018) --------------------- The minor release includes a number of bug-fixes and backwards compatible enhancements. Enhancements ~~~~~~~~~~~~ - New PseudoNetCDF backend for many Atmospheric data formats including GEOS-Chem, CAMx, NOAA arlpacked bit and many others. See ``io.PseudoNetCDF`` for more details. By `Barron Henderson `_. - The :py:class:`Dataset` constructor now aligns :py:class:`DataArray` arguments in ``data_vars`` to indexes set explicitly in ``coords``, where previously an error would be raised. (:issue:`674`) By `Maximilian Roos `_. - :py:meth:`~DataArray.sel`, :py:meth:`~DataArray.isel` & :py:meth:`~DataArray.reindex`, (and their :py:class:`Dataset` counterparts) now support supplying a ``dict`` as a first argument, as an alternative to the existing approach of supplying ``kwargs``. This allows for more robust behavior of dimension names which conflict with other keyword names, or are not strings. By `Maximilian Roos `_. - :py:meth:`~DataArray.rename` now supports supplying ``**kwargs``, as an alternative to the existing approach of supplying a ``dict`` as the first argument. By `Maximilian Roos `_. - :py:meth:`~DataArray.cumsum` and :py:meth:`~DataArray.cumprod` now support aggregation over multiple dimensions at the same time. This is the default behavior when dimensions are not specified (previously this raised an error). By `Stephan Hoyer `_ - :py:meth:`DataArray.dot` and :py:func:`dot` are partly supported with older dask<0.17.4. (related to :issue:`2203`) By `Keisuke Fujii `_. - Xarray now uses `Versioneer `__ to manage its version strings. (:issue:`1300`). By `Joe Hamman `_. Bug fixes ~~~~~~~~~ - Fixed a regression in 0.10.4, where explicitly specifying ``dtype='S1'`` or ``dtype=str`` in ``encoding`` with ``to_netcdf()`` raised an error (:issue:`2149`). `Stephan Hoyer `_ - :py:func:`apply_ufunc` now directly validates output variables (:issue:`1931`). By `Stephan Hoyer `_. - Fixed a bug where ``to_netcdf(..., unlimited_dims='bar')`` yielded NetCDF files with spurious 0-length dimensions (i.e. ``b``, ``a``, and ``r``) (:issue:`2134`). By `Joe Hamman `_. - Removed spurious warnings with ``Dataset.update(Dataset)`` (:issue:`2161`) and ``array.equals(array)`` when ``array`` contains ``NaT`` (:issue:`2162`). By `Stephan Hoyer `_. - Aggregations with :py:meth:`Dataset.reduce` (including ``mean``, ``sum``, etc) no longer drop unrelated coordinates (:issue:`1470`). Also fixed a bug where non-scalar data-variables that did not include the aggregation dimension were improperly skipped. By `Stephan Hoyer `_ - Fix :meth:`~DataArray.stack` with non-unique coordinates on pandas 0.23 (:issue:`2160`). By `Stephan Hoyer `_ - Selecting data indexed by a length-1 ``CFTimeIndex`` with a slice of strings now behaves as it does when using a length-1 ``DatetimeIndex`` (i.e. it no longer falsely returns an empty array when the slice includes the value in the index) (:issue:`2165`). By `Spencer Clark `_. - Fix ``DataArray.groupby().reduce()`` mutating coordinates on the input array when grouping over dimension coordinates with duplicated entries (:issue:`2153`). By `Stephan Hoyer `_ - Fix ``Dataset.to_netcdf()`` cannot create group with ``engine="h5netcdf"`` (:issue:`2177`). By `Stephan Hoyer `_ .. _whats-new.0.10.4: v0.10.4 (16 May 2018) ---------------------- The minor release includes a number of bug-fixes and backwards compatible enhancements. A highlight is ``CFTimeIndex``, which offers support for non-standard calendars used in climate modeling. Documentation ~~~~~~~~~~~~~ - New FAQ entry, :ref:`ecosystem`. By `Deepak Cherian `_. - :ref:`assigning_values` now includes examples on how to select and assign values to a :py:class:`~xarray.DataArray` with ``.loc``. By `Chiara Lepore `_. Enhancements ~~~~~~~~~~~~ - Add an option for using a ``CFTimeIndex`` for indexing times with non-standard calendars and/or outside the Timestamp-valid range; this index enables a subset of the functionality of a standard ``pandas.DatetimeIndex``. See :ref:`CFTimeIndex` for full details. (:issue:`789`, :issue:`1084`, :issue:`1252`) By `Spencer Clark `_ with help from `Stephan Hoyer `_. - Allow for serialization of ``cftime.datetime`` objects (:issue:`789`, :issue:`1084`, :issue:`2008`, :issue:`1252`) using the standalone ``cftime`` library. By `Spencer Clark `_. - Support writing lists of strings as netCDF attributes (:issue:`2044`). By `Dan Nowacki `_. - :py:meth:`~xarray.Dataset.to_netcdf` with ``engine='h5netcdf'`` now accepts h5py encoding settings ``compression`` and ``compression_opts``, along with the NetCDF4-Python style settings ``gzip=True`` and ``complevel``. This allows using any compression plugin installed in hdf5, e.g. LZF (:issue:`1536`). By `Guido Imperiale `_. - :py:meth:`~xarray.dot` on dask-backed data will now call :func:`dask.array.einsum`. This greatly boosts speed and allows chunking on the core dims. The function now requires dask >= 0.17.3 to work on dask-backed data (:issue:`2074`). By `Guido Imperiale `_. - ``plot.line()`` learned new kwargs: ``xincrease``, ``yincrease`` that change the direction of the respective axes. By `Deepak Cherian `_. - Added the ``parallel`` option to :py:func:`open_mfdataset`. This option uses ``dask.delayed`` to parallelize the open and preprocessing steps within ``open_mfdataset``. This is expected to provide performance improvements when opening many files, particularly when used in conjunction with dask's multiprocessing or distributed schedulers (:issue:`1981`). By `Joe Hamman `_. - New ``compute`` option in :py:meth:`~xarray.Dataset.to_netcdf`, :py:meth:`~xarray.Dataset.to_zarr`, and :py:func:`~xarray.save_mfdataset` to allow for the lazy computation of netCDF and zarr stores. This feature is currently only supported by the netCDF4 and zarr backends. (:issue:`1784`). By `Joe Hamman `_. Bug fixes ~~~~~~~~~ - ``ValueError`` is raised when coordinates with the wrong size are assigned to a :py:class:`DataArray`. (:issue:`2112`) By `Keisuke Fujii `_. - Fixed a bug in :py:meth:`~xarray.DataArray.rolling` with bottleneck. Also, fixed a bug in rolling an integer dask array. (:issue:`2113`) By `Keisuke Fujii `_. - Fixed a bug where ``keep_attrs=True`` flag was neglected if :py:func:`apply_ufunc` was used with :py:class:`Variable`. (:issue:`2114`) By `Keisuke Fujii `_. - When assigning a :py:class:`DataArray` to :py:class:`Dataset`, any conflicted non-dimensional coordinates of the DataArray are now dropped. (:issue:`2068`) By `Keisuke Fujii `_. - Better error handling in ``open_mfdataset`` (:issue:`2077`). By `Stephan Hoyer `_. - ``plot.line()`` does not call ``autofmt_xdate()`` anymore. Instead it changes the rotation and horizontal alignment of labels without removing the x-axes of any other subplots in the figure (if any). By `Deepak Cherian `_. - Colorbar limits are now determined by excluding ±Infs too. By `Deepak Cherian `_. By `Joe Hamman `_. - Fixed ``to_iris`` to maintain lazy dask array after conversion (:issue:`2046`). By `Alex Hilson `_ and `Stephan Hoyer `_. .. _whats-new.0.10.3: v0.10.3 (13 April 2018) ------------------------ The minor release includes a number of bug-fixes and backwards compatible enhancements. Enhancements ~~~~~~~~~~~~ - :py:meth:`~xarray.DataArray.isin` and :py:meth:`~xarray.Dataset.isin` methods, which test each value in the array for whether it is contained in the supplied list, returning a bool array. See :ref:`selecting values with isin` for full details. Similar to the ``np.isin`` function. By `Maximilian Roos `_. - Some speed improvement to construct :py:class:`~xarray.computation.rolling.DataArrayRolling` object (:issue:`1993`) By `Keisuke Fujii `_. - Handle variables with different values for ``missing_value`` and ``_FillValue`` by masking values for both attributes; previously this resulted in a ``ValueError``. (:issue:`2016`) By `Ryan May `_. Bug fixes ~~~~~~~~~ - Fixed ``decode_cf`` function to operate lazily on dask arrays (:issue:`1372`). By `Ryan Abernathey `_. - Fixed labeled indexing with slice bounds given by xarray objects with datetime64 or timedelta64 dtypes (:issue:`1240`). By `Stephan Hoyer `_. - Attempting to convert an xarray.Dataset into a numpy array now raises an informative error message. By `Stephan Hoyer `_. - Fixed a bug in decode_cf_datetime where ``int32`` arrays weren't parsed correctly (:issue:`2002`). By `Fabien Maussion `_. - When calling ``xr.auto_combine()`` or ``xr.open_mfdataset()`` with a ``concat_dim``, the resulting dataset will have that one-element dimension (it was silently dropped, previously) (:issue:`1988`). By `Ben Root `_. .. _whats-new.0.10.2: v0.10.2 (13 March 2018) ----------------------- The minor release includes a number of bug-fixes and enhancements, along with one possibly **backwards incompatible change**. Backwards incompatible changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The addition of ``__array_ufunc__`` for xarray objects (see below) means that NumPy `ufunc methods`_ (e.g., ``np.add.reduce``) that previously worked on ``xarray.DataArray`` objects by converting them into NumPy arrays will now raise ``NotImplementedError`` instead. In all cases, the work-around is simple: convert your objects explicitly into NumPy arrays before calling the ufunc (e.g., with ``.values``). .. _ufunc methods: https://numpy.org/doc/stable/reference/ufuncs.html#methods Enhancements ~~~~~~~~~~~~ - Added :py:func:`~xarray.dot`, equivalent to :py:func:`numpy.einsum`. Also, :py:func:`~xarray.DataArray.dot` now supports ``dims`` option, which specifies the dimensions to sum over. (:issue:`1951`) By `Keisuke Fujii `_. - Support for writing xarray datasets to netCDF files (netcdf4 backend only) when using the `dask.distributed `_ scheduler (:issue:`1464`). By `Joe Hamman `_. - Support lazy vectorized-indexing. After this change, flexible indexing such as orthogonal/vectorized indexing, becomes possible for all the backend arrays. Also, lazy ``transpose`` is now also supported. (:issue:`1897`) By `Keisuke Fujii `_. - Implemented NumPy's ``__array_ufunc__`` protocol for all xarray objects (:issue:`1617`). This enables using NumPy ufuncs directly on ``xarray.Dataset`` objects with recent versions of NumPy (v1.13 and newer): .. code:: python ds = xr.Dataset({"a": 1}) np.sin(ds) This obliviates the need for the ``xarray.ufuncs`` module, which will be deprecated in the future when xarray drops support for older versions of NumPy. By `Stephan Hoyer `_. - Improve :py:func:`~xarray.DataArray.rolling` logic. :py:func:`~xarray.computation.rolling.DataArrayRolling` object now supports :py:func:`~xarray.computation.rolling.DataArrayRolling.construct` method that returns a view of the DataArray / Dataset object with the rolling-window dimension added to the last axis. This enables more flexible operation, such as strided rolling, windowed rolling, ND-rolling, short-time FFT and convolution. (:issue:`1831`, :issue:`1142`, :issue:`819`) By `Keisuke Fujii `_. - :py:func:`~plot.line()` learned to make plots with data on x-axis if so specified. (:issue:`575`) By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ - Raise an informative error message when using ``apply_ufunc`` with numpy v1.11 (:issue:`1956`). By `Stephan Hoyer `_. - Fix the precision drop after indexing datetime64 arrays (:issue:`1932`). By `Keisuke Fujii `_. - Silenced irrelevant warnings issued by ``open_rasterio`` (:issue:`1964`). By `Stephan Hoyer `_. - Fix kwarg ``colors`` clashing with auto-inferred ``cmap`` (:issue:`1461`) By `Deepak Cherian `_. - Fix :py:func:`~xarray.plot.imshow` error when passed an RGB array with size one in a spatial dimension. By `Zac Hatfield-Dodds `_. .. _whats-new.0.10.1: v0.10.1 (25 February 2018) -------------------------- The minor release includes a number of bug-fixes and backwards compatible enhancements. Documentation ~~~~~~~~~~~~~ - Added a new guide on :ref:`contributing` (:issue:`640`) By `Joe Hamman `_. - Added apply_ufunc example to :ref:`/examples/weather-data.ipynb#Toy-weather-data` (:issue:`1844`). By `Liam Brannigan `_. - New entry ``Why don’t aggregations return Python scalars?`` in the :ref:`faq` (:issue:`1726`). By `0x0L `_. Enhancements ~~~~~~~~~~~~ **New functions and methods**: - Added :py:meth:`DataArray.to_iris` and :py:meth:`DataArray.from_iris` for converting data arrays to and from Iris_ Cubes with the same data and coordinates (:issue:`621` and :issue:`37`). By `Neil Parley `_ and `Duncan Watson-Parris `_. - Experimental support for using `Zarr`_ as storage layer for xarray (:issue:`1223`). By `Ryan Abernathey `_ and `Joe Hamman `_. - New :py:meth:`~xarray.DataArray.rank` on arrays and datasets. Requires bottleneck (:issue:`1731`). By `0x0L `_. - ``.dt`` accessor can now ceil, floor and round timestamps to specified frequency. By `Deepak Cherian `_. **Plotting enhancements**: - :func:`xarray.plot.imshow` now handles RGB and RGBA images. Saturation can be adjusted with ``vmin`` and ``vmax``, or with ``robust=True``. By `Zac Hatfield-Dodds `_. - :py:func:`~plot.contourf()` learned to contour 2D variables that have both a 1D coordinate (e.g. time) and a 2D coordinate (e.g. depth as a function of time) (:issue:`1737`). By `Deepak Cherian `_. - :py:func:`~plot.plot()` rotates x-axis ticks if x-axis is time. By `Deepak Cherian `_. - :py:func:`~plot.line()` can draw multiple lines if provided with a 2D variable. By `Deepak Cherian `_. **Other enhancements**: - Reduce methods such as :py:func:`DataArray.sum()` now handles object-type array. .. code:: python da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims="x") da.sum() (:issue:`1866`) By `Keisuke Fujii `_. - Reduce methods such as :py:func:`DataArray.sum()` now accepts ``dtype`` arguments. (:issue:`1838`) By `Keisuke Fujii `_. - Added nodatavals attribute to DataArray when using :py:func:`~xarray.open_rasterio`. (:issue:`1736`). By `Alan Snow `_. - Use ``pandas.Grouper`` class in xarray resample methods rather than the deprecated ``pandas.TimeGrouper`` class (:issue:`1766`). By `Joe Hamman `_. - Experimental support for parsing ENVI metadata to coordinates and attributes in :py:func:`xarray.open_rasterio`. By `Matti Eskelinen `_. - Reduce memory usage when decoding a variable with a scale_factor, by converting 8-bit and 16-bit integers to float32 instead of float64 (:pull:`1840`), and keeping float16 and float32 as float32 (:issue:`1842`). Correspondingly, encoded variables may also be saved with a smaller dtype. By `Zac Hatfield-Dodds `_. - Speed of reindexing/alignment with dask array is orders of magnitude faster when inserting missing values (:issue:`1847`). By `Stephan Hoyer `_. - Fix ``axis`` keyword ignored when applying ``np.squeeze`` to ``DataArray`` (:issue:`1487`). By `Florian Pinault `_. - ``netcdf4-python`` has moved the its time handling in the ``netcdftime`` module to a standalone package (`netcdftime`_). As such, xarray now considers `netcdftime`_ an optional dependency. One benefit of this change is that it allows for encoding/decoding of datetimes with non-standard calendars without the ``netcdf4-python`` dependency (:issue:`1084`). By `Joe Hamman `_. .. _Zarr: http://zarr.readthedocs.io/ .. _Iris: http://scitools-iris.readthedocs.io .. _netcdftime: https://unidata.github.io/netcdftime **New functions/methods** - New :py:meth:`~xarray.DataArray.rank` on arrays and datasets. Requires bottleneck (:issue:`1731`). By `0x0L `_. Bug fixes ~~~~~~~~~ - Rolling aggregation with ``center=True`` option now gives the same result with pandas including the last element (:issue:`1046`). By `Keisuke Fujii `_. - Support indexing with a 0d-np.ndarray (:issue:`1921`). By `Keisuke Fujii `_. - Added warning in api.py of a netCDF4 bug that occurs when the filepath has 88 characters (:issue:`1745`). By `Liam Brannigan `_. - Fixed encoding of multi-dimensional coordinates in :py:meth:`~Dataset.to_netcdf` (:issue:`1763`). By `Mike Neish `_. - Fixed chunking with non-file-based rasterio datasets (:issue:`1816`) and refactored rasterio test suite. By `Ryan Abernathey `_ - Bug fix in open_dataset(engine='pydap') (:issue:`1775`) By `Keisuke Fujii `_. - Bug fix in vectorized assignment (:issue:`1743`, :issue:`1744`). Now item assignment to :py:meth:`~DataArray.__setitem__` checks - Bug fix in vectorized assignment (:issue:`1743`, :issue:`1744`). Now item assignment to :py:meth:`DataArray.__setitem__` checks coordinates of target, destination and keys. If there are any conflict among these coordinates, ``IndexError`` will be raised. By `Keisuke Fujii `_. - Properly point ``DataArray.__dask_scheduler__`` to ``dask.threaded.get``. By `Matthew Rocklin `_. - Bug fixes in :py:meth:`DataArray.plot.imshow`: all-NaN arrays and arrays with size one in some dimension can now be plotted, which is good for exploring satellite imagery (:issue:`1780`). By `Zac Hatfield-Dodds `_. - Fixed ``UnboundLocalError`` when opening netCDF file (:issue:`1781`). By `Stephan Hoyer `_. - The ``variables``, ``attrs``, and ``dimensions`` properties have been deprecated as part of a bug fix addressing an issue where backends were unintentionally loading the datastores data and attributes repeatedly during writes (:issue:`1798`). By `Joe Hamman `_. - Compatibility fixes to plotting module for NumPy 1.14 and pandas 0.22 (:issue:`1813`). By `Joe Hamman `_. - Bug fix in encoding coordinates with ``{'_FillValue': None}`` in netCDF metadata (:issue:`1865`). By `Chris Roth `_. - Fix indexing with lists for arrays loaded from netCDF files with ``engine='h5netcdf`` (:issue:`1864`). By `Stephan Hoyer `_. - Corrected a bug with incorrect coordinates for non-georeferenced geotiff files (:issue:`1686`). Internally, we now use the rasterio coordinate transform tool instead of doing the computations ourselves. A ``parse_coordinates`` kwarg has been added to :py:func:`~open_rasterio` (set to ``True`` per default). By `Fabien Maussion `_. - The colors of discrete colormaps are now the same regardless if ``seaborn`` is installed or not (:issue:`1896`). By `Fabien Maussion `_. - Fixed dtype promotion rules in :py:func:`where` and :py:func:`concat` to match pandas (:issue:`1847`). A combination of strings/numbers or unicode/bytes now promote to object dtype, instead of strings or unicode. By `Stephan Hoyer `_. - Fixed bug where :py:meth:`~xarray.DataArray.isnull` was loading data stored as dask arrays (:issue:`1937`). By `Joe Hamman `_. .. _whats-new.0.10.0: v0.10.0 (20 November 2017) -------------------------- This is a major release that includes bug fixes, new features and a few backwards incompatible changes. Highlights include: - Indexing now supports broadcasting over dimensions, similar to NumPy's vectorized indexing (but better!). - :py:meth:`~DataArray.resample` has a new groupby-like API like pandas. - :py:func:`~xarray.apply_ufunc` facilitates wrapping and parallelizing functions written for NumPy arrays. - Performance improvements, particularly for dask and :py:func:`open_mfdataset`. Breaking changes ~~~~~~~~~~~~~~~~ - xarray now supports a form of vectorized indexing with broadcasting, where the result of indexing depends on dimensions of indexers, e.g., ``array.sel(x=ind)`` with ``ind.dims == ('y',)``. Alignment between coordinates on indexed and indexing objects is also now enforced. Due to these changes, existing uses of xarray objects to index other xarray objects will break in some cases. The new indexing API is much more powerful, supporting outer, diagonal and vectorized indexing in a single interface. The ``isel_points`` and ``sel_points`` methods are deprecated, since they are now redundant with the ``isel`` / ``sel`` methods. See :ref:`vectorized_indexing` for the details (:issue:`1444`, :issue:`1436`). By `Keisuke Fujii `_ and `Stephan Hoyer `_. - A new resampling interface to match pandas' groupby-like API was added to :py:meth:`Dataset.resample` and :py:meth:`DataArray.resample` (:issue:`1272`). :ref:`Timeseries resampling ` is fully supported for data with arbitrary dimensions as is both downsampling and upsampling (including linear, quadratic, cubic, and spline interpolation). Old syntax: .. jupyter-input:: ds.resample("24H", dim="time", how="max") New syntax: .. jupyter-input:: ds.resample(time="24H").max() Note that both versions are currently supported, but using the old syntax will produce a warning encouraging users to adopt the new syntax. By `Daniel Rothenberg `_. - Calling ``repr()`` or printing xarray objects at the command line or in a Jupyter Notebook will not longer automatically compute dask variables or load data on arrays lazily loaded from disk (:issue:`1522`). By `Guido Imperiale `_. - Supplying ``coords`` as a dictionary to the ``DataArray`` constructor without also supplying an explicit ``dims`` argument is no longer supported. This behavior was deprecated in version 0.9 but will now raise an error (:issue:`727`). - Several existing features have been deprecated and will change to new behavior in xarray v0.11. If you use any of them with xarray v0.10, you should see a ``FutureWarning`` that describes how to update your code: - ``Dataset.T`` has been deprecated an alias for ``Dataset.transpose()`` (:issue:`1232`). In the next major version of xarray, it will provide short- cut lookup for variables or attributes with name ``'T'``. - ``DataArray.__contains__`` (e.g., ``key in data_array``) currently checks for membership in ``DataArray.coords``. In the next major version of xarray, it will check membership in the array data found in ``DataArray.values`` instead (:issue:`1267`). - Direct iteration over and counting a ``Dataset`` (e.g., ``[k for k in ds]``, ``ds.keys()``, ``ds.values()``, ``len(ds)`` and ``if ds``) currently includes all variables, both data and coordinates. For improved usability and consistency with pandas, in the next major version of xarray these will change to only include data variables (:issue:`884`). Use ``ds.variables``, ``ds.data_vars`` or ``ds.coords`` as alternatives. - Changes to minimum versions of dependencies: - Old numpy < 1.11 and pandas < 0.18 are no longer supported (:issue:`1512`). By `Keisuke Fujii `_. - The minimum supported version bottleneck has increased to 1.1 (:issue:`1279`). By `Joe Hamman `_. Enhancements ~~~~~~~~~~~~ **New functions/methods** - New helper function :py:func:`~xarray.apply_ufunc` for wrapping functions written to work on NumPy arrays to support labels on xarray objects (:issue:`770`). ``apply_ufunc`` also support automatic parallelization for many functions with dask. See :ref:`compute.wrapping-custom` and :ref:`dask.automatic-parallelization` for details. By `Stephan Hoyer `_. - Added new method :py:meth:`Dataset.to_dask_dataframe`, convert a dataset into a dask dataframe. This allows lazy loading of data from a dataset containing dask arrays (:issue:`1462`). By `James Munroe `_. - New function :py:func:`~xarray.where` for conditionally switching between values in xarray objects, like :py:func:`numpy.where`: .. jupyter-input:: import xarray as xr arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("x", "y")) xr.where(arr % 2, "even", "odd") .. jupyter-output:: array([['even', 'odd', 'even'], ['odd', 'even', 'odd']], dtype='`_. - Added :py:func:`~xarray.show_versions` function to aid in debugging (:issue:`1485`). By `Joe Hamman `_. **Performance improvements** - :py:func:`~xarray.concat` was computing variables that aren't in memory (e.g. dask-based) multiple times; :py:func:`~xarray.open_mfdataset` was loading them multiple times from disk. Now, both functions will instead load them at most once and, if they do, store them in memory in the concatenated array/dataset (:issue:`1521`). By `Guido Imperiale `_. - Speed-up (x 100) of ``xarray.conventions.decode_cf_datetime``. By `Christian Chwala `_. **IO related improvements** - Unicode strings (``str`` on Python 3) are now round-tripped successfully even when written as character arrays (e.g., as netCDF3 files or when using ``engine='scipy'``) (:issue:`1638`). This is controlled by the ``_Encoding`` attribute convention, which is also understood directly by the netCDF4-Python interface. See :ref:`io.string-encoding` for full details. By `Stephan Hoyer `_. - Support for ``data_vars`` and ``coords`` keywords from :py:func:`~xarray.concat` added to :py:func:`~xarray.open_mfdataset` (:issue:`438`). Using these keyword arguments can significantly reduce memory usage and increase speed. By `Oleksandr Huziy `_. - Support for :py:class:`pathlib.Path` objects added to :py:func:`~xarray.open_dataset`, :py:func:`~xarray.open_mfdataset`, ``xarray.to_netcdf``, and :py:func:`~xarray.save_mfdataset` (:issue:`799`): .. jupyter-input:: from pathlib import Path # In Python 2, use pathlib2! data_dir = Path("data/") one_file = data_dir / "dta_for_month_01.nc" xr.open_dataset(one_file) By `Willi Rath `_. - You can now explicitly disable any default ``_FillValue`` (``NaN`` for floating point values) by passing the encoding ``{'_FillValue': None}`` (:issue:`1598`). By `Stephan Hoyer `_. - More attributes available in :py:attr:`~xarray.Dataset.attrs` dictionary when raster files are opened with :py:func:`~xarray.open_rasterio`. By `Greg Brener `_. - Support for NetCDF files using an ``_Unsigned`` attribute to indicate that a a signed integer data type should be interpreted as unsigned bytes (:issue:`1444`). By `Eric Bruning `_. - Support using an existing, opened netCDF4 ``Dataset`` with :py:class:`~xarray.backends.NetCDF4DataStore`. This permits creating an :py:class:`~xarray.Dataset` from a netCDF4 ``Dataset`` that has been opened using other means (:issue:`1459`). By `Ryan May `_. - Changed :py:class:`~xarray.backends.PydapDataStore` to take a Pydap dataset. This permits opening Opendap datasets that require authentication, by instantiating a Pydap dataset with a session object. Also added :py:meth:`xarray.backends.PydapDataStore.open` which takes a url and session object (:issue:`1068`). By `Philip Graae `_. - Support reading and writing unlimited dimensions with h5netcdf (:issue:`1636`). By `Joe Hamman `_. **Other improvements** - Added ``_ipython_key_completions_`` to xarray objects, to enable autocompletion for dictionary-like access in IPython, e.g., ``ds['tem`` + tab -> ``ds['temperature']`` (:issue:`1628`). By `Keisuke Fujii `_. - Support passing keyword arguments to ``load``, ``compute``, and ``persist`` methods. Any keyword arguments supplied to these methods are passed on to the corresponding dask function (:issue:`1523`). By `Joe Hamman `_. - Encoding attributes are now preserved when xarray objects are concatenated. The encoding is copied from the first object (:issue:`1297`). By `Joe Hamman `_ and `Gerrit Holl `_. - Support applying rolling window operations using bottleneck's moving window functions on data stored as dask arrays (:issue:`1279`). By `Joe Hamman `_. - Experimental support for the Dask collection interface (:issue:`1674`). By `Matthew Rocklin `_. Bug fixes ~~~~~~~~~ - Suppress ``RuntimeWarning`` issued by ``numpy`` for "invalid value comparisons" (e.g. ``NaN``). Xarray now behaves similarly to pandas in its treatment of binary and unary operations on objects with NaNs (:issue:`1657`). By `Joe Hamman `_. - Unsigned int support for reduce methods with ``skipna=True`` (:issue:`1562`). By `Keisuke Fujii `_. - Fixes to ensure xarray works properly with pandas 0.21: - Fix :py:meth:`~xarray.DataArray.isnull` method (:issue:`1549`). - :py:meth:`~xarray.DataArray.to_series` and :py:meth:`~xarray.Dataset.to_dataframe` should not return a ``pandas.MultiIndex`` for 1D data (:issue:`1548`). - Fix plotting with datetime64 axis labels (:issue:`1661`). By `Stephan Hoyer `_. - :py:func:`~xarray.open_rasterio` method now shifts the rasterio coordinates so that they are centered in each pixel (:issue:`1468`). By `Greg Brener `_. - :py:meth:`~xarray.Dataset.rename` method now doesn't throw errors if some ``Variable`` is renamed to the same name as another ``Variable`` as long as that other ``Variable`` is also renamed (:issue:`1477`). This method now does throw when two ``Variables`` would end up with the same name after the rename (since one of them would get overwritten in this case). By `Prakhar Goel `_. - Fix :py:func:`xarray.testing.assert_allclose` to actually use ``atol`` and ``rtol`` arguments when called on ``DataArray`` objects (:issue:`1488`). By `Stephan Hoyer `_. - xarray ``quantile`` methods now properly raise a ``TypeError`` when applied to objects with data stored as ``dask`` arrays (:issue:`1529`). By `Joe Hamman `_. - Fix positional indexing to allow the use of unsigned integers (:issue:`1405`). By `Joe Hamman `_ and `Gerrit Holl `_. - Creating a :py:class:`Dataset` now raises ``MergeError`` if a coordinate shares a name with a dimension but is comprised of arbitrary dimensions (:issue:`1120`). By `Joe Hamman `_. - :py:func:`~xarray.open_rasterio` method now skips rasterio's ``crs`` attribute if its value is ``None`` (:issue:`1520`). By `Leevi Annala `_. - Fix :py:func:`xarray.DataArray.to_netcdf` to return bytes when no path is provided (:issue:`1410`). By `Joe Hamman `_. - Fix :py:func:`xarray.save_mfdataset` to properly raise an informative error when objects other than ``Dataset`` are provided (:issue:`1555`). By `Joe Hamman `_. - :py:func:`xarray.Dataset.copy` would not preserve the encoding property (:issue:`1586`). By `Guido Imperiale `_. - :py:func:`xarray.concat` would eagerly load dask variables into memory if the first argument was a numpy variable (:issue:`1588`). By `Guido Imperiale `_. - Fix bug in :py:meth:`~xarray.Dataset.to_netcdf` when writing in append mode (:issue:`1215`). By `Joe Hamman `_. - Fix ``netCDF4`` backend to properly roundtrip the ``shuffle`` encoding option (:issue:`1606`). By `Joe Hamman `_. - Fix bug when using ``pytest`` class decorators to skipping certain unittests. The previous behavior unintentionally causing additional tests to be skipped (:issue:`1531`). By `Joe Hamman `_. - Fix pynio backend for upcoming release of pynio with Python 3 support (:issue:`1611`). By `Ben Hillman `_. - Fix ``seaborn`` import warning for Seaborn versions 0.8 and newer when the ``apionly`` module was deprecated. (:issue:`1633`). By `Joe Hamman `_. - Fix COMPAT: MultiIndex checking is fragile (:issue:`1833`). By `Florian Pinault `_. - Fix ``rasterio`` backend for Rasterio versions 1.0alpha10 and newer. (:issue:`1641`). By `Chris Holden `_. Bug fixes after rc1 ~~~~~~~~~~~~~~~~~~~ - Suppress warning in IPython autocompletion, related to the deprecation of ``.T`` attributes (:issue:`1675`). By `Keisuke Fujii `_. - Fix a bug in lazily-indexing netCDF array. (:issue:`1688`) By `Keisuke Fujii `_. - (Internal bug) MemoryCachedArray now supports the orthogonal indexing. Also made some internal cleanups around array wrappers (:issue:`1429`). By `Keisuke Fujii `_. - (Internal bug) MemoryCachedArray now always wraps ``np.ndarray`` by ``NumpyIndexingAdapter``. (:issue:`1694`) By `Keisuke Fujii `_. - Fix importing xarray when running Python with ``-OO`` (:issue:`1706`). By `Stephan Hoyer `_. - Saving a netCDF file with a coordinates with a spaces in its names now raises an appropriate warning (:issue:`1689`). By `Stephan Hoyer `_. - Fix two bugs that were preventing dask arrays from being specified as coordinates in the DataArray constructor (:issue:`1684`). By `Joe Hamman `_. - Fixed ``apply_ufunc`` with ``dask='parallelized'`` for scalar arguments (:issue:`1697`). By `Stephan Hoyer `_. - Fix "Chunksize cannot exceed dimension size" error when writing netCDF4 files loaded from disk (:issue:`1225`). By `Stephan Hoyer `_. - Validate the shape of coordinates with names matching dimensions in the DataArray constructor (:issue:`1709`). By `Stephan Hoyer `_. - Raise ``NotImplementedError`` when attempting to save a MultiIndex to a netCDF file (:issue:`1547`). By `Stephan Hoyer `_. - Remove netCDF dependency from rasterio backend tests. By `Matti Eskelinen `_ Bug fixes after rc2 ~~~~~~~~~~~~~~~~~~~ - Fixed unexpected behavior in ``Dataset.set_index()`` and ``DataArray.set_index()`` introduced by pandas 0.21.0. Setting a new index with a single variable resulted in 1-level ``pandas.MultiIndex`` instead of a simple ``pandas.Index`` (:issue:`1722`). By `Benoit Bovy `_. - Fixed unexpected memory loading of backend arrays after ``print``. (:issue:`1720`). By `Keisuke Fujii `_. .. _whats-new.0.9.6: v0.9.6 (8 June 2017) -------------------- This release includes a number of backwards compatible enhancements and bug fixes. Enhancements ~~~~~~~~~~~~ - New :py:meth:`~xarray.Dataset.sortby` method to ``Dataset`` and ``DataArray`` that enable sorting along dimensions (:issue:`967`). See :ref:`the docs ` for examples. By `Chun-Wei Yuan `_ and `Kyle Heuton `_. - Add ``.dt`` accessor to DataArrays for computing datetime-like properties for the values they contain, similar to ``pandas.Series`` (:issue:`358`). By `Daniel Rothenberg `_. - Renamed internal dask arrays created by ``open_dataset`` to match new dask conventions (:issue:`1343`). By `Ryan Abernathey `_. - :py:meth:`~xarray.as_variable` is now part of the public API (:issue:`1303`). By `Benoit Bovy `_. - :py:func:`~xarray.align` now supports ``join='exact'``, which raises an error instead of aligning when indexes to be aligned are not equal. By `Stephan Hoyer `_. - New function :py:func:`~xarray.open_rasterio` for opening raster files with the `rasterio `_ library. See :ref:`the docs ` for details. By `Joe Hamman `_, `Nic Wayand `_ and `Fabien Maussion `_ Bug fixes ~~~~~~~~~ - Fix error from repeated indexing of datasets loaded from disk (:issue:`1374`). By `Stephan Hoyer `_. - Fix a bug where ``.isel_points`` wrongly assigns unselected coordinate to ``data_vars``. By `Keisuke Fujii `_. - Tutorial datasets are now checked against a reference MD5 sum to confirm successful download (:issue:`1392`). By `Matthew Gidden `_. - ``DataArray.chunk()`` now accepts dask specific kwargs like ``Dataset.chunk()`` does. By `Fabien Maussion `_. - Support for ``engine='pydap'`` with recent releases of Pydap (3.2.2+), including on Python 3 (:issue:`1174`). Documentation ~~~~~~~~~~~~~ - A new `gallery `_ allows to add interactive examples to the documentation. By `Fabien Maussion `_. Testing ~~~~~~~ - Fix test suite failure caused by changes to ``pandas.cut`` function (:issue:`1386`). By `Ryan Abernathey `_. - Enhanced tests suite by use of ``@network`` decorator, which is controlled via ``--run-network-tests`` command line argument to ``py.test`` (:issue:`1393`). By `Matthew Gidden `_. .. _whats-new.0.9.5: v0.9.5 (17 April, 2017) ----------------------- Remove an inadvertently introduced print statement. .. _whats-new.0.9.3: v0.9.3 (16 April, 2017) ----------------------- This minor release includes bug-fixes and backwards compatible enhancements. Enhancements ~~~~~~~~~~~~ - New :py:meth:`~xarray.DataArray.persist` method to Datasets and DataArrays to enable persisting data in distributed memory when using Dask (:issue:`1344`). By `Matthew Rocklin `_. - New :py:meth:`~xarray.DataArray.expand_dims` method for ``DataArray`` and ``Dataset`` (:issue:`1326`). By `Keisuke Fujii `_. Bug fixes ~~~~~~~~~ - Fix ``.where()`` with ``drop=True`` when arguments do not have indexes (:issue:`1350`). This bug, introduced in v0.9, resulted in xarray producing incorrect results in some cases. By `Stephan Hoyer `_. - Fixed writing to file-like objects with :py:meth:`~xarray.Dataset.to_netcdf` (:issue:`1320`). `Stephan Hoyer `_. - Fixed explicitly setting ``engine='scipy'`` with ``to_netcdf`` when not providing a path (:issue:`1321`). `Stephan Hoyer `_. - Fixed open_dataarray does not pass properly its parameters to open_dataset (:issue:`1359`). `Stephan Hoyer `_. - Ensure test suite works when runs from an installed version of xarray (:issue:`1336`). Use ``@pytest.mark.slow`` instead of a custom flag to mark slow tests. By `Stephan Hoyer `_ .. _whats-new.0.9.2: v0.9.2 (2 April 2017) --------------------- The minor release includes bug-fixes and backwards compatible enhancements. Enhancements ~~~~~~~~~~~~ - ``rolling`` on Dataset is now supported (:issue:`859`). - ``.rolling()`` on Dataset is now supported (:issue:`859`). By `Keisuke Fujii `_. - When bottleneck version 1.1 or later is installed, use bottleneck for rolling ``var``, ``argmin``, ``argmax``, and ``rank`` computations. Also, rolling median now accepts a ``min_periods`` argument (:issue:`1276`). By `Joe Hamman `_. - When ``.plot()`` is called on a 2D DataArray and only one dimension is specified with ``x=`` or ``y=``, the other dimension is now guessed (:issue:`1291`). By `Vincent Noel `_. - Added new method :py:meth:`~Dataset.assign_attrs` to ``DataArray`` and ``Dataset``, a chained-method compatible implementation of the ``dict.update`` method on attrs (:issue:`1281`). By `Henry S. Harrison `_. - Added new ``autoclose=True`` argument to :py:func:`~xarray.open_mfdataset` to explicitly close opened files when not in use to prevent occurrence of an OS Error related to too many open files (:issue:`1198`). Note, the default is ``autoclose=False``, which is consistent with previous xarray behavior. By `Phillip J. Wolfram `_. - The ``repr()`` of ``Dataset`` and ``DataArray`` attributes uses a similar format to coordinates and variables, with vertically aligned entries truncated to fit on a single line (:issue:`1319`). Hopefully this will stop people writing ``data.attrs = {}`` and discarding metadata in notebooks for the sake of cleaner output. The full metadata is still available as ``data.attrs``. By `Zac Hatfield-Dodds `_. - Enhanced tests suite by use of ``@slow`` and ``@flaky`` decorators, which are controlled via ``--run-flaky`` and ``--skip-slow`` command line arguments to ``py.test`` (:issue:`1336`). By `Stephan Hoyer `_ and `Phillip J. Wolfram `_. - New aggregation on rolling objects :py:meth:`~computation.rolling.DataArrayRolling.count` which providing a rolling count of valid values (:issue:`1138`). Bug fixes ~~~~~~~~~ - Rolling operations now keep preserve original dimension order (:issue:`1125`). By `Keisuke Fujii `_. - Fixed ``sel`` with ``method='nearest'`` on Python 2.7 and 64-bit Windows (:issue:`1140`). `Stephan Hoyer `_. - Fixed ``where`` with ``drop='True'`` for empty masks (:issue:`1341`). By `Stephan Hoyer `_ and `Phillip J. Wolfram `_. .. _whats-new.0.9.1: v0.9.1 (30 January 2017) ------------------------ Renamed the "Unindexed dimensions" section in the ``Dataset`` and ``DataArray`` repr (added in v0.9.0) to "Dimensions without coordinates" (:issue:`1199`). .. _whats-new.0.9.0: v0.9.0 (25 January 2017) ------------------------ This major release includes five months worth of enhancements and bug fixes from 24 contributors, including some significant changes that are not fully backwards compatible. Highlights include: - Coordinates are now *optional* in the xarray data model, even for dimensions. - Changes to caching, lazy loading and pickling to improve xarray's experience for parallel computing. - Improvements for accessing and manipulating ``pandas.MultiIndex`` levels. - Many new methods and functions, including :py:meth:`~DataArray.quantile`, :py:meth:`~DataArray.cumsum`, :py:meth:`~DataArray.cumprod` :py:attr:`~DataArray.combine_first` :py:meth:`~DataArray.set_index`, :py:meth:`~DataArray.reset_index`, :py:meth:`~DataArray.reorder_levels`, :py:func:`~xarray.full_like`, :py:func:`~xarray.zeros_like`, :py:func:`~xarray.ones_like` :py:func:`~xarray.open_dataarray`, :py:meth:`~DataArray.compute`, :py:meth:`Dataset.info`, :py:func:`testing.assert_equal`, :py:func:`testing.assert_identical`, and :py:func:`testing.assert_allclose`. Breaking changes ~~~~~~~~~~~~~~~~ - Index coordinates for each dimensions are now optional, and no longer created by default :issue:`1017`. You can identify such dimensions without coordinates by their appearance in list of "Dimensions without coordinates" in the ``Dataset`` or ``DataArray`` repr: .. jupyter-input:: xr.Dataset({"foo": (("x", "y"), [[1, 2]])}) .. jupyter-output:: Dimensions: (x: 1, y: 2) Dimensions without coordinates: x, y Data variables: foo (x, y) int64 1 2 This has a number of implications: - :py:func:`~align` and :py:meth:`~Dataset.reindex` can now error, if dimensions labels are missing and dimensions have different sizes. - Because pandas does not support missing indexes, methods such as ``to_dataframe``/``from_dataframe`` and ``stack``/``unstack`` no longer roundtrip faithfully on all inputs. Use :py:meth:`~Dataset.reset_index` to remove undesired indexes. - ``Dataset.__delitem__`` and :py:meth:`~Dataset.drop` no longer delete/drop variables that have dimensions matching a deleted/dropped variable. - ``DataArray.coords.__delitem__`` is now allowed on variables matching dimension names. - ``.sel`` and ``.loc`` now handle indexing along a dimension without coordinate labels by doing integer based indexing. See :ref:`indexing.missing_coordinates` for an example. - :py:attr:`~Dataset.indexes` is no longer guaranteed to include all dimensions names as keys. The new method :py:meth:`~Dataset.get_index` has been added to get an index for a dimension guaranteed, falling back to produce a default ``RangeIndex`` if necessary. - The default behavior of ``merge`` is now ``compat='no_conflicts'``, so some merges will now succeed in cases that previously raised ``xarray.MergeError``. Set ``compat='broadcast_equals'`` to restore the previous default. See :ref:`combining.no_conflicts` for more details. - Reading :py:attr:`~DataArray.values` no longer always caches values in a NumPy array :issue:`1128`. Caching of ``.values`` on variables read from netCDF files on disk is still the default when :py:func:`open_dataset` is called with ``cache=True``. By `Guido Imperiale `_ and `Stephan Hoyer `_. - Pickling a ``Dataset`` or ``DataArray`` linked to a file on disk no longer caches its values into memory before pickling (:issue:`1128`). Instead, pickle stores file paths and restores objects by reopening file references. This enables preliminary, experimental use of xarray for opening files with `dask.distributed `_. By `Stephan Hoyer `_. - Coordinates used to index a dimension are now loaded eagerly into :py:class:`pandas.Index` objects, instead of loading the values lazily. By `Guido Imperiale `_. - Automatic levels for 2d plots are now guaranteed to land on ``vmin`` and ``vmax`` when these kwargs are explicitly provided (:issue:`1191`). The automated level selection logic also slightly changed. By `Fabien Maussion `_. - ``DataArray.rename()`` behavior changed to strictly change the ``DataArray.name`` if called with string argument, or strictly change coordinate names if called with dict-like argument. By `Markus Gonser `_. - By default ``to_netcdf()`` add a ``_FillValue = NaN`` attributes to float types. By `Frederic Laliberte `_. - ``repr`` on ``DataArray`` objects uses an shortened display for NumPy array data that is less likely to overflow onto multiple pages (:issue:`1207`). By `Stephan Hoyer `_. - xarray no longer supports python 3.3, versions of dask prior to v0.9.0, or versions of bottleneck prior to v1.0. Deprecations ~~~~~~~~~~~~ - Renamed the ``Coordinate`` class from xarray's low level API to :py:class:`~xarray.IndexVariable`. ``Variable.to_variable`` and ``Variable.to_coord`` have been renamed to :py:meth:`~xarray.Variable.to_base_variable` and :py:meth:`~xarray.Variable.to_index_variable`. - Deprecated supplying ``coords`` as a dictionary to the ``DataArray`` constructor without also supplying an explicit ``dims`` argument. The old behavior encouraged relying on the iteration order of dictionaries, which is a bad practice (:issue:`727`). - Removed a number of methods deprecated since v0.7.0 or earlier: ``load_data``, ``vars``, ``drop_vars``, ``dump``, ``dumps`` and the ``variables`` keyword argument to ``Dataset``. - Removed the dummy module that enabled ``import xray``. Enhancements ~~~~~~~~~~~~ - Added new method :py:meth:`~DataArray.combine_first` to ``DataArray`` and ``Dataset``, based on the pandas method of the same name (see :ref:`combine`). By `Chun-Wei Yuan `_. - Added the ability to change default automatic alignment (arithmetic_join="inner") for binary operations via :py:func:`~xarray.set_options()` (see :ref:`math automatic alignment`). By `Chun-Wei Yuan `_. - Add checking of ``attr`` names and values when saving to netCDF, raising useful error messages if they are invalid. (:issue:`911`). By `Robin Wilson `_. - Added ability to save ``DataArray`` objects directly to netCDF files using :py:meth:`~xarray.DataArray.to_netcdf`, and to load directly from netCDF files using :py:func:`~xarray.open_dataarray` (:issue:`915`). These remove the need to convert a ``DataArray`` to a ``Dataset`` before saving as a netCDF file, and deals with names to ensure a perfect 'roundtrip' capability. By `Robin Wilson `_. - Multi-index levels are now accessible as "virtual" coordinate variables, e.g., ``ds['time']`` can pull out the ``'time'`` level of a multi-index (see :ref:`coordinates`). ``sel`` also accepts providing multi-index levels as keyword arguments, e.g., ``ds.sel(time='2000-01')`` (see :ref:`multi-level indexing`). By `Benoit Bovy `_. - Added ``set_index``, ``reset_index`` and ``reorder_levels`` methods to easily create and manipulate (multi-)indexes (see :ref:`reshape.set_index`). By `Benoit Bovy `_. - Added the ``compat`` option ``'no_conflicts'`` to ``merge``, allowing the combination of xarray objects with disjoint (:issue:`742`) or overlapping (:issue:`835`) coordinates as long as all present data agrees. By `Johnnie Gray `_. See :ref:`combining.no_conflicts` for more details. - It is now possible to set ``concat_dim=None`` explicitly in :py:func:`~xarray.open_mfdataset` to disable inferring a dimension along which to concatenate. By `Stephan Hoyer `_. - Added methods :py:meth:`DataArray.compute`, :py:meth:`Dataset.compute`, and :py:meth:`Variable.compute` as a non-mutating alternative to :py:meth:`~DataArray.load`. By `Guido Imperiale `_. - Adds DataArray and Dataset methods :py:meth:`~xarray.DataArray.cumsum` and :py:meth:`~xarray.DataArray.cumprod`. By `Phillip J. Wolfram `_. - New properties :py:attr:`Dataset.sizes` and :py:attr:`DataArray.sizes` for providing consistent access to dimension length on both ``Dataset`` and ``DataArray`` (:issue:`921`). By `Stephan Hoyer `_. - New keyword argument ``drop=True`` for :py:meth:`~DataArray.sel`, :py:meth:`~DataArray.isel` and :py:meth:`~DataArray.squeeze` for dropping scalar coordinates that arise from indexing. ``DataArray`` (:issue:`242`). By `Stephan Hoyer `_. - New top-level functions :py:func:`~xarray.full_like`, :py:func:`~xarray.zeros_like`, and :py:func:`~xarray.ones_like` By `Guido Imperiale `_. - Overriding a preexisting attribute with :py:func:`~xarray.register_dataset_accessor` or :py:func:`~xarray.register_dataarray_accessor` now issues a warning instead of raising an error (:issue:`1082`). By `Stephan Hoyer `_. - Options for axes sharing between subplots are exposed to :py:class:`~xarray.plot.FacetGrid` and :py:func:`~xarray.plot.plot`, so axes sharing can be disabled for polar plots. By `Bas Hoonhout `_. - New utility functions :py:func:`~xarray.testing.assert_equal`, :py:func:`~xarray.testing.assert_identical`, and :py:func:`~xarray.testing.assert_allclose` for asserting relationships between xarray objects, designed for use in a pytest test suite. - ``figsize``, ``size`` and ``aspect`` plot arguments are now supported for all plots (:issue:`897`). See :ref:`plotting.figsize` for more details. By `Stephan Hoyer `_ and `Fabien Maussion `_. - New :py:meth:`~Dataset.info` method to summarize ``Dataset`` variables and attributes. The method prints to a buffer (e.g. ``stdout``) with output similar to what the command line utility ``ncdump -h`` produces (:issue:`1150`). By `Joe Hamman `_. - Added the ability write unlimited netCDF dimensions with the ``scipy`` and ``netcdf4`` backends via the new ``xray.Dataset.encoding`` attribute or via the ``unlimited_dims`` argument to ``xray.Dataset.to_netcdf``. By `Joe Hamman `_. - New :py:meth:`~DataArray.quantile` method to calculate quantiles from DataArray objects (:issue:`1187`). By `Joe Hamman `_. Bug fixes ~~~~~~~~~ - ``groupby_bins`` now restores empty bins by default (:issue:`1019`). By `Ryan Abernathey `_. - Fix issues for dates outside the valid range of pandas timestamps (:issue:`975`). By `Mathias Hauser `_. - Unstacking produced flipped array after stacking decreasing coordinate values (:issue:`980`). By `Stephan Hoyer `_. - Setting ``dtype`` via the ``encoding`` parameter of ``to_netcdf`` failed if the encoded dtype was the same as the dtype of the original array (:issue:`873`). By `Stephan Hoyer `_. - Fix issues with variables where both attributes ``_FillValue`` and ``missing_value`` are set to ``NaN`` (:issue:`997`). By `Marco Zühlke `_. - ``.where()`` and ``.fillna()`` now preserve attributes (:issue:`1009`). By `Fabien Maussion `_. - Applying :py:func:`broadcast()` to an xarray object based on the dask backend won't accidentally convert the array from dask to numpy anymore (:issue:`978`). By `Guido Imperiale `_. - ``Dataset.concat()`` now preserves variables order (:issue:`1027`). By `Fabien Maussion `_. - Fixed an issue with pcolormesh (:issue:`781`). A new ``infer_intervals`` keyword gives control on whether the cell intervals should be computed or not. By `Fabien Maussion `_. - Grouping over an dimension with non-unique values with ``groupby`` gives correct groups. By `Stephan Hoyer `_. - Fixed accessing coordinate variables with non-string names from ``.coords``. By `Stephan Hoyer `_. - :py:meth:`~xarray.DataArray.rename` now simultaneously renames the array and any coordinate with the same name, when supplied via a :py:class:`dict` (:issue:`1116`). By `Yves Delley `_. - Fixed sub-optimal performance in certain operations with object arrays (:issue:`1121`). By `Yves Delley `_. - Fix ``.groupby(group)`` when ``group`` has datetime dtype (:issue:`1132`). By `Jonas Sølvsteen `_. - Fixed a bug with facetgrid (the ``norm`` keyword was ignored, :issue:`1159`). By `Fabien Maussion `_. - Resolved a concurrency bug that could cause Python to crash when simultaneously reading and writing netCDF4 files with dask (:issue:`1172`). By `Stephan Hoyer `_. - Fix to make ``.copy()`` actually copy dask arrays, which will be relevant for future releases of dask in which dask arrays will be mutable (:issue:`1180`). By `Stephan Hoyer `_. - Fix opening NetCDF files with multi-dimensional time variables (:issue:`1229`). By `Stephan Hoyer `_. Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - ``xarray.Dataset.isel_points`` and ``xarray.Dataset.sel_points`` now use vectorised indexing in numpy and dask (:issue:`1161`), which can result in several orders of magnitude speedup. By `Jonathan Chambers `_. .. _whats-new.0.8.2: v0.8.2 (18 August 2016) ----------------------- This release includes a number of bug fixes and minor enhancements. Breaking changes ~~~~~~~~~~~~~~~~ - :py:func:`~xarray.broadcast` and :py:func:`~xarray.concat` now auto-align inputs, using ``join=outer``. Previously, these functions raised ``ValueError`` for non-aligned inputs. By `Guido Imperiale `_. Enhancements ~~~~~~~~~~~~ - New documentation on :ref:`panel transition`. By `Maximilian Roos `_. - New ``Dataset`` and ``DataArray`` methods :py:meth:`~xarray.Dataset.to_dict` and :py:meth:`~xarray.Dataset.from_dict` to allow easy conversion between dictionaries and xarray objects (:issue:`432`). See :ref:`dictionary IO` for more details. By `Julia Signell `_. - Added ``exclude`` and ``indexes`` optional parameters to :py:func:`~xarray.align`, and ``exclude`` optional parameter to :py:func:`~xarray.broadcast`. By `Guido Imperiale `_. - Better error message when assigning variables without dimensions (:issue:`971`). By `Stephan Hoyer `_. - Better error message when reindex/align fails due to duplicate index values (:issue:`956`). By `Stephan Hoyer `_. Bug fixes ~~~~~~~~~ - Ensure xarray works with h5netcdf v0.3.0 for arrays with ``dtype=str`` (:issue:`953`). By `Stephan Hoyer `_. - ``Dataset.__dir__()`` (i.e. the method python calls to get autocomplete options) failed if one of the dataset's keys was not a string (:issue:`852`). By `Maximilian Roos `_. - ``Dataset`` constructor can now take arbitrary objects as values (:issue:`647`). By `Maximilian Roos `_. - Clarified ``copy`` argument for :py:meth:`~xarray.DataArray.reindex` and :py:func:`~xarray.align`, which now consistently always return new xarray objects (:issue:`927`). - Fix ``open_mfdataset`` with ``engine='pynio'`` (:issue:`936`). By `Stephan Hoyer `_. - ``groupby_bins`` sorted bin labels as strings (:issue:`952`). By `Stephan Hoyer `_. - Fix bug introduced by v0.8.0 that broke assignment to datasets when both the left and right side have the same non-unique index values (:issue:`956`). .. _whats-new.0.8.1: v0.8.1 (5 August 2016) ---------------------- Bug fixes ~~~~~~~~~ - Fix bug in v0.8.0 that broke assignment to Datasets with non-unique indexes (:issue:`943`). By `Stephan Hoyer `_. .. _whats-new.0.8.0: v0.8.0 (2 August 2016) ---------------------- This release includes four months of new features and bug fixes, including several breaking changes. .. _v0.8.0.breaking: Breaking changes ~~~~~~~~~~~~~~~~ - Dropped support for Python 2.6 (:issue:`855`). - Indexing on multi-index now drop levels, which is consistent with pandas. It also changes the name of the dimension / coordinate when the multi-index is reduced to a single index (:issue:`802`). - Contour plots no longer add a colorbar per default (:issue:`866`). Filled contour plots are unchanged. - ``DataArray.values`` and ``.data`` now always returns an NumPy array-like object, even for 0-dimensional arrays with object dtype (:issue:`867`). Previously, ``.values`` returned native Python objects in such cases. To convert the values of scalar arrays to Python objects, use the ``.item()`` method. Enhancements ~~~~~~~~~~~~ - Groupby operations now support grouping over multidimensional variables. A new method called :py:meth:`~xarray.Dataset.groupby_bins` has also been added to allow users to specify bins for grouping. The new features are described in :ref:`groupby.multidim` and :ref:`/examples/multidimensional-coords.ipynb`. By `Ryan Abernathey `_. - DataArray and Dataset method :py:meth:`where` now supports a ``drop=True`` option that clips coordinate elements that are fully masked. By `Phillip J. Wolfram `_. - New top level :py:func:`merge` function allows for combining variables from any number of ``Dataset`` and/or ``DataArray`` variables. See :ref:`merge` for more details. By `Stephan Hoyer `_. - :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now support the ``keep_attrs=False`` option that determines whether variable and dataset attributes are retained in the resampled object. By `Jeremy McGibbon `_. - Better multi-index support in :py:meth:`DataArray.sel`, :py:meth:`DataArray.loc`, :py:meth:`Dataset.sel` and :py:meth:`Dataset.loc`, which now behave more closely to pandas and which also accept dictionaries for indexing based on given level names and labels (see :ref:`multi-level indexing`). By `Benoit Bovy `_. - New (experimental) decorators :py:func:`~xarray.register_dataset_accessor` and :py:func:`~xarray.register_dataarray_accessor` for registering custom xarray extensions without subclassing. They are described in the new documentation page on :ref:`internals`. By `Stephan Hoyer `_. - Round trip boolean datatypes. Previously, writing boolean datatypes to netCDF formats would raise an error since netCDF does not have a ``bool`` datatype. This feature reads/writes a ``dtype`` attribute to boolean variables in netCDF files. By `Joe Hamman `_. - 2D plotting methods now have two new keywords (``cbar_ax`` and ``cbar_kwargs``), allowing more control on the colorbar (:issue:`872`). By `Fabien Maussion `_. - New Dataset method :py:meth:`Dataset.filter_by_attrs`, akin to ``netCDF4.Dataset.get_variables_by_attributes``, to easily filter data variables using its attributes. `Filipe Fernandes `_. Bug fixes ~~~~~~~~~ - Attributes were being retained by default for some resampling operations when they should not. With the ``keep_attrs=False`` option, they will no longer be retained by default. This may be backwards-incompatible with some scripts, but the attributes may be kept by adding the ``keep_attrs=True`` option. By `Jeremy McGibbon `_. - Concatenating xarray objects along an axis with a MultiIndex or PeriodIndex preserves the nature of the index (:issue:`875`). By `Stephan Hoyer `_. - Fixed bug in arithmetic operations on DataArray objects whose dimensions are numpy structured arrays or recarrays :issue:`861`, :issue:`837`. By `Maciek Swat `_. - ``decode_cf_timedelta`` now accepts arrays with ``ndim`` >1 (:issue:`842`). This fixes issue :issue:`665`. `Filipe Fernandes `_. - Fix a bug where ``xarray.ufuncs`` that take two arguments would incorrectly use to numpy functions instead of dask.array functions (:issue:`876`). By `Stephan Hoyer `_. - Support for pickling functions from ``xarray.ufuncs`` (:issue:`901`). By `Stephan Hoyer `_. - ``Variable.copy(deep=True)`` no longer converts MultiIndex into a base Index (:issue:`769`). By `Benoit Bovy `_. - Fixes for groupby on dimensions with a multi-index (:issue:`867`). By `Stephan Hoyer `_. - Fix printing datasets with unicode attributes on Python 2 (:issue:`892`). By `Stephan Hoyer `_. - Fixed incorrect test for dask version (:issue:`891`). By `Stephan Hoyer `_. - Fixed ``dim`` argument for ``isel_points``/``sel_points`` when a ``pandas.Index`` is passed. By `Stephan Hoyer `_. - :py:func:`~xarray.plot.contour` now plots the correct number of contours (:issue:`866`). By `Fabien Maussion `_. .. _whats-new.0.7.2: v0.7.2 (13 March 2016) ---------------------- This release includes two new, entirely backwards compatible features and several bug fixes. Enhancements ~~~~~~~~~~~~ - New DataArray method :py:meth:`DataArray.dot` for calculating the dot product of two DataArrays along shared dimensions. By `Dean Pospisil `_. - Rolling window operations on DataArray objects are now supported via a new :py:meth:`DataArray.rolling` method. For example: .. jupyter-input:: import xarray as xr import numpy as np arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) arr .. jupyter-output:: array([[ 0. , 0.5, 1. , 1.5, 2. ], [ 2.5, 3. , 3.5, 4. , 4.5], [ 5. , 5.5, 6. , 6.5, 7. ]]) Coordinates: * x (x) int64 0 1 2 * y (y) int64 0 1 2 3 4 .. jupyter-input:: arr.rolling(y=3, min_periods=2).mean() .. jupyter-output:: array([[ nan, 0.25, 0.5 , 1. , 1.5 ], [ nan, 2.75, 3. , 3.5 , 4. ], [ nan, 5.25, 5.5 , 6. , 6.5 ]]) Coordinates: * x (x) int64 0 1 2 * y (y) int64 0 1 2 3 4 See :ref:`compute.rolling` for more details. By `Joe Hamman `_. Bug fixes ~~~~~~~~~ - Fixed an issue where plots using pcolormesh and Cartopy axes were being distorted by the inference of the axis interval breaks. This change chooses not to modify the coordinate variables when the axes have the attribute ``projection``, allowing Cartopy to handle the extent of pcolormesh plots (:issue:`781`). By `Joe Hamman `_. - 2D plots now better handle additional coordinates which are not ``DataArray`` dimensions (:issue:`788`). By `Fabien Maussion `_. .. _whats-new.0.7.1: v0.7.1 (16 February 2016) ------------------------- This is a bug fix release that includes two small, backwards compatible enhancements. We recommend that all users upgrade. Enhancements ~~~~~~~~~~~~ - Numerical operations now return empty objects on no overlapping labels rather than raising ``ValueError`` (:issue:`739`). - :py:class:`~pandas.Series` is now supported as valid input to the ``Dataset`` constructor (:issue:`740`). Bug fixes ~~~~~~~~~ - Restore checks for shape consistency between data and coordinates in the DataArray constructor (:issue:`758`). - Single dimension variables no longer transpose as part of a broader ``.transpose``. This behavior was causing ``pandas.PeriodIndex`` dimensions to lose their type (:issue:`749`) - :py:class:`~xarray.Dataset` labels remain as their native type on ``.to_dataset``. Previously they were coerced to strings (:issue:`745`) - Fixed a bug where replacing a ``DataArray`` index coordinate would improperly align the coordinate (:issue:`725`). - ``DataArray.reindex_like`` now maintains the dtype of complex numbers when reindexing leads to NaN values (:issue:`738`). - ``Dataset.rename`` and ``DataArray.rename`` support the old and new names being the same (:issue:`724`). - Fix :py:meth:`~xarray.Dataset.from_dataframe` for DataFrames with Categorical column and a MultiIndex index (:issue:`737`). - Fixes to ensure xarray works properly after the upcoming pandas v0.18 and NumPy v1.11 releases. Acknowledgments ~~~~~~~~~~~~~~~ The following individuals contributed to this release: - Edward Richards - Maximilian Roos - Rafael Guedes - Spencer Hill - Stephan Hoyer .. _whats-new.0.7.0: v0.7.0 (21 January 2016) ------------------------ This major release includes redesign of :py:class:`~xarray.DataArray` internals, as well as new methods for reshaping, rolling and shifting data. It includes preliminary support for :py:class:`pandas.MultiIndex`, as well as a number of other features and bug fixes, several of which offer improved compatibility with pandas. New name ~~~~~~~~ The project formerly known as "xray" is now "xarray", pronounced "x-array"! This avoids a namespace conflict with the entire field of x-ray science. Renaming our project seemed like the right thing to do, especially because some scientists who work with actual x-rays are interested in using this project in their work. Thanks for your understanding and patience in this transition. You can now find our documentation and code repository at new URLs: - https://docs.xarray.dev - https://github.com/pydata/xarray/ To ease the transition, we have simultaneously released v0.7.0 of both ``xray`` and ``xarray`` on the Python Package Index. These packages are identical. For now, ``import xray`` still works, except it issues a deprecation warning. This will be the last xray release. Going forward, we recommend switching your import statements to ``import xarray as xr``. .. _v0.7.0.breaking: Breaking changes ~~~~~~~~~~~~~~~~ - The internal data model used by ``xray.DataArray`` has been rewritten to fix several outstanding issues (:issue:`367`, :issue:`634`, `this stackoverflow report`_). Internally, ``DataArray`` is now implemented in terms of ``._variable`` and ``._coords`` attributes instead of holding variables in a ``Dataset`` object. This refactor ensures that if a DataArray has the same name as one of its coordinates, the array and the coordinate no longer share the same data. In practice, this means that creating a DataArray with the same ``name`` as one of its dimensions no longer automatically uses that array to label the corresponding coordinate. You will now need to provide coordinate labels explicitly. Here's the old behavior: .. jupyter-input:: xray.DataArray([4, 5, 6], dims="x", name="x") .. jupyter-output:: array([4, 5, 6]) Coordinates: * x (x) int64 4 5 6 and the new behavior (compare the values of the ``x`` coordinate): .. jupyter-input:: xray.DataArray([4, 5, 6], dims="x", name="x") .. jupyter-output:: array([4, 5, 6]) Coordinates: * x (x) int64 0 1 2 - It is no longer possible to convert a DataArray to a Dataset with ``xray.DataArray.to_dataset`` if it is unnamed. This will now raise ``ValueError``. If the array is unnamed, you need to supply the ``name`` argument. .. _this stackoverflow report: http://stackoverflow.com/questions/33158558/python-xray-extract-first-and-last-time-value-within-each-month-of-a-timeseries Enhancements ~~~~~~~~~~~~ - Basic support for :py:class:`~pandas.MultiIndex` coordinates on xray objects, including indexing, :py:meth:`~DataArray.stack` and :py:meth:`~DataArray.unstack`: .. jupyter-input:: df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]}) s = df.set_index(["x", "y"])["foo"] arr = xray.DataArray(s, dims="z") arr .. jupyter-output:: array([0, 1, 2]) Coordinates: * z (z) object ('a', 0) ('b', 0) ('b', 1) .. jupyter-input:: arr.indexes["z"] .. jupyter-output:: MultiIndex(levels=[[u'a', u'b'], [0, 1]], labels=[[0, 1, 1], [0, 0, 1]], names=[u'x', u'y']) .. jupyter-input:: arr.unstack("z") .. jupyter-output:: array([[ 0., nan], [ 1., 2.]]) Coordinates: * x (x) object 'a' 'b' * y (y) int64 0 1 .. jupyter-input:: arr.unstack("z").stack(z=("x", "y")) .. jupyter-output:: array([ 0., nan, 1., 2.]) Coordinates: * z (z) object ('a', 0) ('a', 1) ('b', 0) ('b', 1) See :ref:`reshape.stack` for more details. .. warning:: xray's MultiIndex support is still experimental, and we have a long to- do list of desired additions (:issue:`719`), including better display of multi-index levels when printing a ``Dataset``, and support for saving datasets with a MultiIndex to a netCDF file. User contributions in this area would be greatly appreciated. - Support for reading GRIB, HDF4 and other file formats via PyNIO_. - Better error message when a variable is supplied with the same name as one of its dimensions. - Plotting: more control on colormap parameters (:issue:`642`). ``vmin`` and ``vmax`` will not be silently ignored anymore. Setting ``center=False`` prevents automatic selection of a divergent colormap. - New ``xray.Dataset.shift`` and ``xray.Dataset.roll`` methods for shifting/rotating datasets or arrays along a dimension: .. code:: python array = xray.DataArray([5, 6, 7, 8], dims="x") array.shift(x=2) array.roll(x=2) Notice that ``shift`` moves data independently of coordinates, but ``roll`` moves both data and coordinates. - Assigning a ``pandas`` object directly as a ``Dataset`` variable is now permitted. Its index names correspond to the ``dims`` of the ``Dataset``, and its data is aligned. - Passing a :py:class:`pandas.DataFrame` or ``pandas.Panel`` to a Dataset constructor is now permitted. - New function ``xray.broadcast`` for explicitly broadcasting ``DataArray`` and ``Dataset`` objects against each other. For example: .. code:: python a = xray.DataArray([1, 2, 3], dims="x") b = xray.DataArray([5, 6], dims="y") a b a2, b2 = xray.broadcast(a, b) a2 b2 .. _PyNIO: https://www.pyngl.ucar.edu/Nio.shtml Bug fixes ~~~~~~~~~ - Fixes for several issues found on ``DataArray`` objects with the same name as one of their coordinates (see :ref:`v0.7.0.breaking` for more details). - ``DataArray.to_masked_array`` always returns masked array with mask being an array (not a scalar value) (:issue:`684`) - Allows for (imperfect) repr of Coords when underlying index is PeriodIndex (:issue:`645`). - Fixes for several issues found on ``DataArray`` objects with the same name as one of their coordinates (see :ref:`v0.7.0.breaking` for more details). - Attempting to assign a ``Dataset`` or ``DataArray`` variable/attribute using attribute-style syntax (e.g., ``ds.foo = 42``) now raises an error rather than silently failing (:issue:`656`, :issue:`714`). - You can now pass pandas objects with non-numpy dtypes (e.g., ``categorical`` or ``datetime64`` with a timezone) into xray without an error (:issue:`716`). Acknowledgments ~~~~~~~~~~~~~~~ The following individuals contributed to this release: - Antony Lee - Fabien Maussion - Joe Hamman - Maximilian Roos - Stephan Hoyer - Takeshi Kanmae - femtotrader v0.6.1 (21 October 2015) ------------------------ This release contains a number of bug and compatibility fixes, as well as enhancements to plotting, indexing and writing files to disk. Note that the minimum required version of dask for use with xray is now version 0.6. API Changes ~~~~~~~~~~~ - The handling of colormaps and discrete color lists for 2D plots in ``xray.DataArray.plot`` was changed to provide more compatibility with matplotlib's ``contour`` and ``contourf`` functions (:issue:`538`). Now discrete lists of colors should be specified using ``colors`` keyword, rather than ``cmap``. Enhancements ~~~~~~~~~~~~ - Faceted plotting through ``xray.plot.FacetGrid`` and the ``xray.plot.plot`` method. See :ref:`plotting.faceting` for more details and examples. - ``xray.Dataset.sel`` and ``xray.Dataset.reindex`` now support the ``tolerance`` argument for controlling nearest-neighbor selection (:issue:`629`): .. jupyter-input:: array = xray.DataArray([1, 2, 3], dims="x") array.reindex(x=[0.9, 1.5], method="nearest", tolerance=0.2) .. jupyter-output:: array([ 2., nan]) Coordinates: * x (x) float64 0.9 1.5 This feature requires pandas v0.17 or newer. - New ``encoding`` argument in ``xray.Dataset.to_netcdf`` for writing netCDF files with compression, as described in the new documentation section on :ref:`io.netcdf.writing_encoded`. - Add ``xray.Dataset.real`` and ``xray.Dataset.imag`` attributes to Dataset and DataArray (:issue:`553`). - More informative error message with ``xray.Dataset.from_dataframe`` if the frame has duplicate columns. - xray now uses deterministic names for dask arrays it creates or opens from disk. This allows xray users to take advantage of dask's nascent support for caching intermediate computation results. See :issue:`555` for an example. Bug fixes ~~~~~~~~~ - Forwards compatibility with the latest pandas release (v0.17.0). We were using some internal pandas routines for datetime conversion, which unfortunately have now changed upstream (:issue:`569`). - Aggregation functions now correctly skip ``NaN`` for data for ``complex128`` dtype (:issue:`554`). - Fixed indexing 0d arrays with unicode dtype (:issue:`568`). - ``xray.DataArray.name`` and Dataset keys must be a string or None to be written to netCDF (:issue:`533`). - ``xray.DataArray.where`` now uses dask instead of numpy if either the array or ``other`` is a dask array. Previously, if ``other`` was a numpy array the method was evaluated eagerly. - Global attributes are now handled more consistently when loading remote datasets using ``engine='pydap'`` (:issue:`574`). - It is now possible to assign to the ``.data`` attribute of DataArray objects. - ``coordinates`` attribute is now kept in the encoding dictionary after decoding (:issue:`610`). - Compatibility with numpy 1.10 (:issue:`617`). Acknowledgments ~~~~~~~~~~~~~~~ The following individuals contributed to this release: - Ryan Abernathey - Pete Cable - Clark Fitzgerald - Joe Hamman - Stephan Hoyer - Scott Sinclair v0.6.0 (21 August 2015) ----------------------- This release includes numerous bug fixes and enhancements. Highlights include the introduction of a plotting module and the new Dataset and DataArray methods ``xray.Dataset.isel_points``, ``xray.Dataset.sel_points``, ``xray.Dataset.where`` and ``xray.Dataset.diff``. There are no breaking changes from v0.5.2. Enhancements ~~~~~~~~~~~~ - Plotting methods have been implemented on DataArray objects ``xray.DataArray.plot`` through integration with matplotlib (:issue:`185`). For an introduction, see :ref:`plotting`. - Variables in netCDF files with multiple missing values are now decoded as NaN after issuing a warning if open_dataset is called with mask_and_scale=True. - We clarified our rules for when the result from an xray operation is a copy vs. a view (see :ref:`copies_vs_views` for more details). - Dataset variables are now written to netCDF files in order of appearance when using the netcdf4 backend (:issue:`479`). - Added ``xray.Dataset.isel_points`` and ``xray.Dataset.sel_points`` to support pointwise indexing of Datasets and DataArrays (:issue:`475`). .. jupyter-input:: da = xray.DataArray( ...: np.arange(56).reshape((7, 8)), ...: coords={"x": list("abcdefg"), "y": 10 * np.arange(8)}, ...: dims=["x", "y"], ...: ) da .. jupyter-output:: array([[ 0, 1, 2, 3, 4, 5, 6, 7], [ 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55]]) Coordinates: * y (y) int64 0 10 20 30 40 50 60 70 * x (x) |S1 'a' 'b' 'c' 'd' 'e' 'f' 'g' .. jupyter-input:: # we can index by position along each dimension da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim="points") .. jupyter-output:: array([ 0, 9, 48]) Coordinates: y (points) int64 0 10 0 x (points) |S1 'a' 'b' 'g' * points (points) int64 0 1 2 .. jupyter-input:: # or equivalently by label da.sel_points(x=["a", "b", "g"], y=[0, 10, 0], dim="points") .. jupyter-output:: array([ 0, 9, 48]) Coordinates: y (points) int64 0 10 0 x (points) |S1 'a' 'b' 'g' * points (points) int64 0 1 2 - New ``xray.Dataset.where`` method for masking xray objects according to some criteria. This works particularly well with multi-dimensional data: .. code:: python ds = xray.Dataset(coords={"x": range(100), "y": range(100)}) ds["distance"] = np.sqrt(ds.x**2 + ds.y**2) ds.distance.where(ds.distance < 100).plot() - Added new methods ``xray.DataArray.diff`` and ``xray.Dataset.diff`` for finite difference calculations along a given axis. - New ``xray.DataArray.to_masked_array`` convenience method for returning a numpy.ma.MaskedArray. .. code:: python da = xray.DataArray(np.random.random_sample(size=(5, 4))) da.where(da < 0.5) da.where(da < 0.5).to_masked_array(copy=True) - Added new flag "drop_variables" to ``xray.open_dataset`` for excluding variables from being parsed. This may be useful to drop variables with problems or inconsistent values. Bug fixes ~~~~~~~~~ - Fixed aggregation functions (e.g., sum and mean) on big-endian arrays when bottleneck is installed (:issue:`489`). - Dataset aggregation functions dropped variables with unsigned integer dtype (:issue:`505`). - ``.any()`` and ``.all()`` were not lazy when used on xray objects containing dask arrays. - Fixed an error when attempting to saving datetime64 variables to netCDF files when the first element is ``NaT`` (:issue:`528`). - Fix pickle on DataArray objects (:issue:`515`). - Fixed unnecessary coercion of float64 to float32 when using netcdf3 and netcdf4_classic formats (:issue:`526`). v0.5.2 (16 July 2015) --------------------- This release contains bug fixes, several additional options for opening and saving netCDF files, and a backwards incompatible rewrite of the advanced options for ``xray.concat``. Backwards incompatible changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The optional arguments ``concat_over`` and ``mode`` in ``xray.concat`` have been removed and replaced by ``data_vars`` and ``coords``. The new arguments are both more easily understood and more robustly implemented, and allowed us to fix a bug where ``concat`` accidentally loaded data into memory. If you set values for these optional arguments manually, you will need to update your code. The default behavior should be unchanged. Enhancements ~~~~~~~~~~~~ - ``xray.open_mfdataset`` now supports a ``preprocess`` argument for preprocessing datasets prior to concatenaton. This is useful if datasets cannot be otherwise merged automatically, e.g., if the original datasets have conflicting index coordinates (:issue:`443`). - ``xray.open_dataset`` and ``xray.open_mfdataset`` now use a global thread lock by default for reading from netCDF files with dask. This avoids possible segmentation faults for reading from netCDF4 files when HDF5 is not configured properly for concurrent access (:issue:`444`). - Added support for serializing arrays of complex numbers with ``engine='h5netcdf'``. - The new ``xray.save_mfdataset`` function allows for saving multiple datasets to disk simultaneously. This is useful when processing large datasets with dask.array. For example, to save a dataset too big to fit into memory to one file per year, we could write: .. jupyter-input:: years, datasets = zip(*ds.groupby("time.year")) paths = ["%s.nc" % y for y in years] xray.save_mfdataset(datasets, paths) Bug fixes ~~~~~~~~~ - Fixed ``min``, ``max``, ``argmin`` and ``argmax`` for arrays with string or unicode types (:issue:`453`). - ``xray.open_dataset`` and ``xray.open_mfdataset`` support supplying chunks as a single integer. - Fixed a bug in serializing scalar datetime variable to netCDF. - Fixed a bug that could occur in serialization of 0-dimensional integer arrays. - Fixed a bug where concatenating DataArrays was not always lazy (:issue:`464`). - When reading datasets with h5netcdf, bytes attributes are decoded to strings. This allows conventions decoding to work properly on Python 3 (:issue:`451`). v0.5.1 (15 June 2015) --------------------- This minor release fixes a few bugs and an inconsistency with pandas. It also adds the ``pipe`` method, copied from pandas. Enhancements ~~~~~~~~~~~~ - Added ``xray.Dataset.pipe``, replicating the `new pandas method`_ in version 0.16.2. See :ref:`transforming datasets` for more details. - ``xray.Dataset.assign`` and ``xray.Dataset.assign_coords`` now assign new variables in sorted (alphabetical) order, mirroring the behavior in pandas. Previously, the order was arbitrary. .. _new pandas method: http://pandas.pydata.org/pandas-docs/version/0.16.2/whatsnew.html#pipe Bug fixes ~~~~~~~~~ - ``xray.concat`` fails in an edge case involving identical coordinate variables (:issue:`425`) - We now decode variables loaded from netCDF3 files with the scipy engine using native endianness (:issue:`416`). This resolves an issue when aggregating these arrays with bottleneck installed. v0.5 (1 June 2015) ------------------ Highlights ~~~~~~~~~~ The headline feature in this release is experimental support for out-of-core computing (data that doesn't fit into memory) with :doc:`user-guide/dask`. This includes a new top-level function ``xray.open_mfdataset`` that makes it easy to open a collection of netCDF (using dask) as a single ``xray.Dataset`` object. For more on dask, read the `blog post introducing xray + dask`_ and the new documentation section :doc:`user-guide/dask`. .. _blog post introducing xray + dask: https://www.anaconda.com/blog/developer-blog/xray-dask-out-core-labeled-arrays-python/ Dask makes it possible to harness parallelism and manipulate gigantic datasets with xray. It is currently an optional dependency, but it may become required in the future. Backwards incompatible changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The logic used for choosing which variables are concatenated with ``xray.concat`` has changed. Previously, by default any variables which were equal across a dimension were not concatenated. This lead to some surprising behavior, where the behavior of groupby and concat operations could depend on runtime values (:issue:`268`). For example: .. jupyter-input:: ds = xray.Dataset({"x": 0}) xray.concat([ds, ds], dim="y") .. jupyter-output:: Dimensions: () Coordinates: *empty* Data variables: x int64 0 Now, the default always concatenates data variables: .. code:: python In [1]: ds = xray.Dataset({"x": 0}) In [2]: xray.concat([ds, ds], dim="y") Out[2]: Size: 16B Dimensions: (y: 2) Dimensions without coordinates: y Data variables: x (y) int64 16B 0 0 .. code:: python xray.concat([ds, ds], dim="y") To obtain the old behavior, supply the argument ``concat_over=[]``. Enhancements ~~~~~~~~~~~~ - New ``xray.Dataset.to_dataarray`` and enhanced ``xray.DataArray.to_dataset`` methods make it easy to switch back and forth between arrays and datasets: .. code:: python ds = xray.Dataset( {"a": 1, "b": ("x", [1, 2, 3])}, coords={"c": 42}, attrs={"Conventions": "None"}, ) ds.to_dataarray() ds.to_dataarray().to_dataset(dim="variable") - New ``xray.Dataset.fillna`` method to fill missing values, modeled off the pandas method of the same name: .. code:: python array = xray.DataArray([np.nan, 1, np.nan, 3], dims="x") array.fillna(0) ``fillna`` works on both ``Dataset`` and ``DataArray`` objects, and uses index based alignment and broadcasting like standard binary operations. It also can be applied by group, as illustrated in :ref:`/examples/weather-data.ipynb#Fill-missing-values-with-climatology`. - New ``xray.Dataset.assign`` and ``xray.Dataset.assign_coords`` methods patterned off the new :py:meth:`DataFrame.assign ` method in pandas: .. code:: python ds = xray.Dataset({"y": ("x", [1, 2, 3])}) ds.assign(z=lambda ds: ds.y**2) ds.assign_coords(z=("x", ["a", "b", "c"])) These methods return a new Dataset (or DataArray) with updated data or coordinate variables. - ``xray.Dataset.sel`` now supports the ``method`` parameter, which works like the parameter of the same name on ``xray.Dataset.reindex``. It provides a simple interface for doing nearest-neighbor interpolation: .. use verbatim because I can't seem to install pandas 0.16.1 on RTD :( .. jupyter-input:: ds.sel(x=1.1, method="nearest") .. jupyter-output:: Dimensions: () Coordinates: x int64 1 Data variables: y int64 2 .. jupyter-input:: ds.sel(x=[1.1, 2.1], method="pad") .. jupyter-output:: Dimensions: (x: 2) Coordinates: * x (x) int64 1 2 Data variables: y (x) int64 2 3 See :ref:`nearest neighbor lookups` for more details. - You can now control the underlying backend used for accessing remote datasets (via OPeNDAP) by specifying ``engine='netcdf4'`` or ``engine='pydap'``. - xray now provides experimental support for reading and writing netCDF4 files directly via `h5py`_ with the `h5netcdf`_ package, avoiding the netCDF4-Python package. You will need to install h5netcdf and specify ``engine='h5netcdf'`` to try this feature. - Accessing data from remote datasets now has retrying logic (with exponential backoff) that should make it robust to occasional bad responses from DAP servers. - You can control the width of the Dataset repr with ``xray.set_options``. It can be used either as a context manager, in which case the default is restored outside the context: .. code:: python ds = xray.Dataset({"x": np.arange(1000)}) with xray.set_options(display_width=40): print(ds) Or to set a global option: .. jupyter-input:: xray.set_options(display_width=80) The default value for the ``display_width`` option is 80. .. _h5py: http://www.h5py.org/ .. _h5netcdf: https://github.com/shoyer/h5netcdf Deprecations ~~~~~~~~~~~~ - The method ``load_data()`` has been renamed to the more succinct ``xray.Dataset.load``. v0.4.1 (18 March 2015) ---------------------- The release contains bug fixes and several new features. All changes should be fully backwards compatible. Enhancements ~~~~~~~~~~~~ - New documentation sections on :ref:`time-series` and :ref:`combining multiple files`. - ``xray.Dataset.resample`` lets you resample a dataset or data array to a new temporal resolution. The syntax is the `same as pandas`_, except you need to supply the time dimension explicitly: .. code:: python time = pd.date_range("2000-01-01", freq="6H", periods=10) array = xray.DataArray(np.arange(10), [("time", time)]) array.resample("1D", dim="time") You can specify how to do the resampling with the ``how`` argument and other options such as ``closed`` and ``label`` let you control labeling: .. code:: python array.resample("1D", dim="time", how="sum", label="right") If the desired temporal resolution is higher than the original data (upsampling), xray will insert missing values: .. code:: python array.resample("3H", "time") - ``first`` and ``last`` methods on groupby objects let you take the first or last examples from each group along the grouped axis: .. code:: python array.groupby("time.day").first() These methods combine well with ``resample``: .. code:: python array.resample("1D", dim="time", how="first") - ``xray.Dataset.swap_dims`` allows for easily swapping one dimension out for another: .. code:: python ds = xray.Dataset({"x": range(3), "y": ("x", list("abc"))}) ds.swap_dims({"x": "y"}) This was possible in earlier versions of xray, but required some contortions. - ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now accept an ``engine`` argument to explicitly select which underlying library (netcdf4 or scipy) is used for reading/writing a netCDF file. .. _same as pandas: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#up-and-downsampling Bug fixes ~~~~~~~~~ - Fixed a bug where data netCDF variables read from disk with ``engine='scipy'`` could still be associated with the file on disk, even after closing the file (:issue:`341`). This manifested itself in warnings about mmapped arrays and segmentation faults (if the data was accessed). - Silenced spurious warnings about all-NaN slices when using nan-aware aggregation methods (:issue:`344`). - Dataset aggregations with ``keep_attrs=True`` now preserve attributes on data variables, not just the dataset itself. - Tests for xray now pass when run on Windows (:issue:`360`). - Fixed a regression in v0.4 where saving to netCDF could fail with the error ``ValueError: could not automatically determine time units``. v0.4 (2 March, 2015) -------------------- This is one of the biggest releases yet for xray: it includes some major changes that may break existing code, along with the usual collection of minor enhancements and bug fixes. On the plus side, this release includes all hitherto planned breaking changes, so the upgrade path for xray should be smoother going forward. Breaking changes ~~~~~~~~~~~~~~~~ - We now automatically align index labels in arithmetic, dataset construction, merging and updating. This means the need for manually invoking methods like ``xray.align`` and ``xray.Dataset.reindex_like`` should be vastly reduced. :ref:`For arithmetic`, we align based on the **intersection** of labels: .. code:: python lhs = xray.DataArray([1, 2, 3], [("x", [0, 1, 2])]) rhs = xray.DataArray([2, 3, 4], [("x", [1, 2, 3])]) lhs + rhs :ref:`For dataset construction and merging`, we align based on the **union** of labels: .. code:: python xray.Dataset({"foo": lhs, "bar": rhs}) :ref:`For update and __setitem__`, we align based on the **original** object: .. code:: python lhs.coords["rhs"] = rhs lhs - Aggregations like ``mean`` or ``median`` now skip missing values by default: .. code:: python xray.DataArray([1, 2, np.nan, 3]).mean() You can turn this behavior off by supplying the keyword argument ``skipna=False``. These operations are lightning fast thanks to integration with bottleneck_, which is a new optional dependency for xray (numpy is used if bottleneck is not installed). - Scalar coordinates no longer conflict with constant arrays with the same value (e.g., in arithmetic, merging datasets and concat), even if they have different shape (:issue:`243`). For example, the coordinate ``c`` here persists through arithmetic, even though it has different shapes on each DataArray: .. code:: python a = xray.DataArray([1, 2], coords={"c": 0}, dims="x") b = xray.DataArray([1, 2], coords={"c": ("x", [0, 0])}, dims="x") (a + b).coords This functionality can be controlled through the ``compat`` option, which has also been added to the ``xray.Dataset`` constructor. - Datetime shortcuts such as ``'time.month'`` now return a ``DataArray`` with the name ``'month'``, not ``'time.month'`` (:issue:`345`). This makes it easier to index the resulting arrays when they are used with ``groupby``: .. code:: python time = xray.DataArray( pd.date_range("2000-01-01", periods=365), dims="time", name="time" ) counts = time.groupby("time.month").count() counts.sel(month=2) Previously, you would need to use something like ``counts.sel(**{'time.month': 2}})``, which is much more awkward. - The ``season`` datetime shortcut now returns an array of string labels such ``'DJF'``: .. code-block:: ipython In[92]: ds = xray.Dataset({"t": pd.date_range("2000-01-01", periods=12, freq="M")}) In[93]: ds["t.season"] Out[93]: array(['DJF', 'DJF', 'MAM', ..., 'SON', 'SON', 'DJF'], dtype='`_. - Use functions that return generic ndarrays with DataArray.groupby.apply and Dataset.apply (:issue:`327` and :issue:`329`). Thanks Jeff Gerard! - Consolidated the functionality of ``dumps`` (writing a dataset to a netCDF3 bytestring) into ``xray.Dataset.to_netcdf`` (:issue:`333`). - ``xray.Dataset.to_netcdf`` now supports writing to groups in netCDF4 files (:issue:`333`). It also finally has a full docstring -- you should read it! - ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now work on netCDF3 files when netcdf4-python is not installed as long as scipy is available (:issue:`333`). - The new ``xray.Dataset.drop`` and ``xray.DataArray.drop`` methods makes it easy to drop explicitly listed variables or index labels: .. code:: python # drop variables ds = xray.Dataset({"x": 0, "y": 1}) ds.drop("x") # drop index labels arr = xray.DataArray([1, 2, 3], coords=[("x", list("abc"))]) arr.drop(["a", "c"], dim="x") - ``xray.Dataset.broadcast_equals`` has been added to correspond to the new ``compat`` option. - Long attributes are now truncated at 500 characters when printing a dataset (:issue:`338`). This should make things more convenient for working with datasets interactively. - Added a new documentation example, :ref:`/examples/monthly-means.ipynb`. Thanks Joe Hamman! Bug fixes ~~~~~~~~~ - Several bug fixes related to decoding time units from netCDF files (:issue:`316`, :issue:`330`). Thanks Stefan Pfenninger! - xray no longer requires ``decode_coords=False`` when reading datasets with unparsable coordinate attributes (:issue:`308`). - Fixed ``DataArray.loc`` indexing with ``...`` (:issue:`318`). - Fixed an edge case that resulting in an error when reindexing multi-dimensional variables (:issue:`315`). - Slicing with negative step sizes (:issue:`312`). - Invalid conversion of string arrays to numeric dtype (:issue:`305`). - Fixed ``repr()`` on dataset objects with non-standard dates (:issue:`347`). Deprecations ~~~~~~~~~~~~ - ``dump`` and ``dumps`` have been deprecated in favor of ``xray.Dataset.to_netcdf``. - ``drop_vars`` has been deprecated in favor of ``xray.Dataset.drop``. Future plans ~~~~~~~~~~~~ The biggest feature I'm excited about working toward in the immediate future is supporting out-of-core operations in xray using Dask_, a part of the Blaze_ project. For a preview of using Dask with weather data, read `this blog post`_ by Matthew Rocklin. See :issue:`328` for more details. .. _Dask: https://dask.org .. _Blaze: https://blaze.pydata.org .. _this blog post: https://matthewrocklin.com/blog/work/2015/02/13/Towards-OOC-Slicing-and-Stacking v0.3.2 (23 December, 2014) -------------------------- This release focused on bug-fixes, speedups and resolving some niggling inconsistencies. There are a few cases where the behavior of xray differs from the previous version. However, I expect that in almost all cases your code will continue to run unmodified. .. warning:: xray now requires pandas v0.15.0 or later. This was necessary for supporting TimedeltaIndex without too many painful hacks. Backwards incompatible changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Arrays of :py:class:`datetime.datetime` objects are now automatically cast to ``datetime64[ns]`` arrays when stored in an xray object, using machinery borrowed from pandas: .. code:: python from datetime import datetime xray.Dataset({"t": [datetime(2000, 1, 1)]}) - xray now has support (including serialization to netCDF) for :py:class:`~pandas.TimedeltaIndex`. :py:class:`datetime.timedelta` objects are thus accordingly cast to ``timedelta64[ns]`` objects when appropriate. - Masked arrays are now properly coerced to use ``NaN`` as a sentinel value (:issue:`259`). Enhancements ~~~~~~~~~~~~ - Due to popular demand, we have added experimental attribute style access as a shortcut for dataset variables, coordinates and attributes: .. code:: python ds = xray.Dataset({"tmin": ([], 25, {"units": "celsius"})}) ds.tmin.units Tab-completion for these variables should work in editors such as IPython. However, setting variables or attributes in this fashion is not yet supported because there are some unresolved ambiguities (:issue:`300`). - You can now use a dictionary for indexing with labeled dimensions. This provides a safe way to do assignment with labeled dimensions: .. code:: python array = xray.DataArray(np.zeros(5), dims=["x"]) array[dict(x=slice(3))] = 1 array - Non-index coordinates can now be faithfully written to and restored from netCDF files. This is done according to CF conventions when possible by using the ``coordinates`` attribute on a data variable. When not possible, xray defines a global ``coordinates`` attribute. - Preliminary support for converting ``xray.DataArray`` objects to and from CDAT_ ``cdms2`` variables. - We sped up any operation that involves creating a new Dataset or DataArray (e.g., indexing, aggregation, arithmetic) by a factor of 30 to 50%. The full speed up requires cyordereddict_ to be installed. .. _CDAT: http://uvcdat.llnl.gov/ .. _cyordereddict: https://github.com/shoyer/cyordereddict Bug fixes ~~~~~~~~~ - Fix for ``to_dataframe()`` with 0d string/object coordinates (:issue:`287`) - Fix for ``to_netcdf`` with 0d string variable (:issue:`284`) - Fix writing datetime64 arrays to netcdf if NaT is present (:issue:`270`) - Fix align silently upcasts data arrays when NaNs are inserted (:issue:`264`) Future plans ~~~~~~~~~~~~ - I am contemplating switching to the terms "coordinate variables" and "data variables" instead of the (currently used) "coordinates" and "variables", following their use in `CF Conventions`_ (:issue:`293`). This would mostly have implications for the documentation, but I would also change the ``Dataset`` attribute ``vars`` to ``data``. - I no longer certain that automatic label alignment for arithmetic would be a good idea for xray -- it is a feature from pandas that I have not missed (:issue:`186`). - The main API breakage that I *do* anticipate in the next release is finally making all aggregation operations skip missing values by default (:issue:`130`). I'm pretty sick of writing ``ds.reduce(np.nanmean, 'time')``. - The next version of xray (0.4) will remove deprecated features and aliases whose use currently raises a warning. If you have opinions about any of these anticipated changes, I would love to hear them -- please add a note to any of the referenced GitHub issues. .. _CF Conventions: http://cfconventions.org/Data/cf-conventions/cf-conventions-1.6/build/cf-conventions.html v0.3.1 (22 October, 2014) ------------------------- This is mostly a bug-fix release to make xray compatible with the latest release of pandas (v0.15). We added several features to better support working with missing values and exporting xray objects to pandas. We also reorganized the internal API for serializing and deserializing datasets, but this change should be almost entirely transparent to users. Other than breaking the experimental DataStore API, there should be no backwards incompatible changes. New features ~~~~~~~~~~~~ - Added ``xray.Dataset.count`` and ``xray.Dataset.dropna`` methods, copied from pandas, for working with missing values (:issue:`247`, :issue:`58`). - Added ``xray.DataArray.to_pandas`` for converting a data array into the pandas object with the same dimensionality (1D to Series, 2D to DataFrame, etc.) (:issue:`255`). - Support for reading gzipped netCDF3 files (:issue:`239`). - Reduced memory usage when writing netCDF files (:issue:`251`). - 'missing_value' is now supported as an alias for the '_FillValue' attribute on netCDF variables (:issue:`245`). - Trivial indexes, equivalent to ``range(n)`` where ``n`` is the length of the dimension, are no longer written to disk (:issue:`245`). Bug fixes ~~~~~~~~~ - Compatibility fixes for pandas v0.15 (:issue:`262`). - Fixes for display and indexing of ``NaT`` (not-a-time) (:issue:`238`, :issue:`240`) - Fix slicing by label was an argument is a data array (:issue:`250`). - Test data is now shipped with the source distribution (:issue:`253`). - Ensure order does not matter when doing arithmetic with scalar data arrays (:issue:`254`). - Order of dimensions preserved with ``DataArray.to_dataframe`` (:issue:`260`). v0.3 (21 September 2014) ------------------------ New features ~~~~~~~~~~~~ - **Revamped coordinates**: "coordinates" now refer to all arrays that are not used to index a dimension. Coordinates are intended to allow for keeping track of arrays of metadata that describe the grid on which the points in "variable" arrays lie. They are preserved (when unambiguous) even though mathematical operations. - **Dataset math** ``xray.Dataset`` objects now support all arithmetic operations directly. Dataset-array operations map across all dataset variables; dataset-dataset operations act on each pair of variables with the same name. - **GroupBy math**: This provides a convenient shortcut for normalizing by the average value of a group. - The dataset ``__repr__`` method has been entirely overhauled; dataset objects now show their values when printed. - You can now index a dataset with a list of variables to return a new dataset: ``ds[['foo', 'bar']]``. Backwards incompatible changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ``Dataset.__eq__`` and ``Dataset.__ne__`` are now element-wise operations instead of comparing all values to obtain a single boolean. Use the method ``xray.Dataset.equals`` instead. Deprecations ~~~~~~~~~~~~ - ``Dataset.noncoords`` is deprecated: use ``Dataset.vars`` instead. - ``Dataset.select_vars`` deprecated: index a ``Dataset`` with a list of variable names instead. - ``DataArray.select_vars`` and ``DataArray.drop_vars`` deprecated: use ``xray.DataArray.reset_coords`` instead. v0.2 (14 August 2014) --------------------- This is major release that includes some new features and quite a few bug fixes. Here are the highlights: - There is now a direct constructor for ``DataArray`` objects, which makes it possible to create a DataArray without using a Dataset. This is highlighted in the refreshed ``tutorial``. - You can perform aggregation operations like ``mean`` directly on ``xray.Dataset`` objects, thanks to Joe Hamman. These aggregation methods also worked on grouped datasets. - xray now works on Python 2.6, thanks to Anna Kuznetsova. - A number of methods and attributes were given more sensible (usually shorter) names: ``labeled`` -> ``sel``, ``indexed`` -> ``isel``, ``select`` -> ``select_vars``, ``unselect`` -> ``drop_vars``, ``dimensions`` -> ``dims``, ``coordinates`` -> ``coords``, ``attributes`` -> ``attrs``. - New ``xray.Dataset.load_data`` and ``xray.Dataset.close`` methods for datasets facilitate lower level of control of data loaded from disk. v0.1.1 (20 May 2014) -------------------- xray 0.1.1 is a bug-fix release that includes changes that should be almost entirely backwards compatible with v0.1: - Python 3 support (:issue:`53`) - Required numpy version relaxed to 1.7 (:issue:`129`) - Return numpy.datetime64 arrays for non-standard calendars (:issue:`126`) - Support for opening datasets associated with NetCDF4 groups (:issue:`127`) - Bug-fixes for concatenating datetime arrays (:issue:`134`) Special thanks to new contributors Thomas Kluyver, Joe Hamman and Alistair Miles. v0.1 (2 May 2014) ----------------- Initial release. ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������python-xarray-2026.01.0/doc/api-hidden.rst����������������������������������������������������������0000664�0001750�0001750�00000051102�15136607163�020343� 0����������������������������������������������������������������������������������������������������ustar �alastair������������������������alastair���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������.. Generate API reference pages, but don't display these in tables. .. This extra page is a work around for sphinx not having any support for .. hiding an autosummary table. :orphan: .. currentmodule:: xarray .. autosummary:: :toctree: generated/ core.coordinates.DatasetCoordinates.get core.coordinates.DatasetCoordinates.items core.coordinates.DatasetCoordinates.keys core.coordinates.DatasetCoordinates.values core.coordinates.DatasetCoordinates.dims core.coordinates.DatasetCoordinates.sizes core.coordinates.DatasetCoordinates.dtypes core.coordinates.DatasetCoordinates.variables core.coordinates.DatasetCoordinates.xindexes core.coordinates.DatasetCoordinates.indexes core.coordinates.DatasetCoordinates.to_dataset core.coordinates.DatasetCoordinates.to_index core.coordinates.DatasetCoordinates.update core.coordinates.DatasetCoordinates.assign core.coordinates.DatasetCoordinates.merge core.coordinates.DatasetCoordinates.copy core.coordinates.DatasetCoordinates.equals core.coordinates.DatasetCoordinates.identical computation.rolling.DatasetCoarsen.boundary computation.rolling.DatasetCoarsen.coord_func computation.rolling.DatasetCoarsen.obj computation.rolling.DatasetCoarsen.side computation.rolling.DatasetCoarsen.trim_excess computation.rolling.DatasetCoarsen.windows computation.rolling.DatasetRolling.center computation.rolling.DatasetRolling.dim computation.rolling.DatasetRolling.min_periods computation.rolling.DatasetRolling.obj computation.rolling.DatasetRolling.rollings computation.rolling.DatasetRolling.window computation.weighted.DatasetWeighted.obj computation.weighted.DatasetWeighted.weights Dataset.load_store Dataset.dump_to_store DataArray.astype DataArray.item core.coordinates.DataArrayCoordinates.get core.coordinates.DataArrayCoordinates.items core.coordinates.DataArrayCoordinates.keys core.coordinates.DataArrayCoordinates.values core.coordinates.DataArrayCoordinates.dims core.coordinates.DataArrayCoordinates.sizes core.coordinates.DataArrayCoordinates.dtypes core.coordinates.DataArrayCoordinates.variables core.coordinates.DataArrayCoordinates.xindexes core.coordinates.DataArrayCoordinates.indexes core.coordinates.DataArrayCoordinates.to_dataset core.coordinates.DataArrayCoordinates.to_index core.coordinates.DataArrayCoordinates.update core.coordinates.DataArrayCoordinates.assign core.coordinates.DataArrayCoordinates.merge core.coordinates.DataArrayCoordinates.copy core.coordinates.DataArrayCoordinates.equals core.coordinates.DataArrayCoordinates.identical computation.rolling.DataArrayCoarsen.boundary computation.rolling.DataArrayCoarsen.coord_func computation.rolling.DataArrayCoarsen.obj computation.rolling.DataArrayCoarsen.side computation.rolling.DataArrayCoarsen.trim_excess computation.rolling.DataArrayCoarsen.windows computation.rolling.DataArrayRolling.center computation.rolling.DataArrayRolling.dim computation.rolling.DataArrayRolling.min_periods computation.rolling.DataArrayRolling.obj computation.rolling.DataArrayRolling.window computation.rolling.DataArrayRolling.window_labels computation.weighted.DataArrayWeighted.obj computation.weighted.DataArrayWeighted.weights core.coordinates.DataTreeCoordinates.get core.coordinates.DataTreeCoordinates.items core.coordinates.DataTreeCoordinates.keys core.coordinates.DataTreeCoordinates.values core.coordinates.DataTreeCoordinates.dims core.coordinates.DataTreeCoordinates.sizes core.coordinates.DataTreeCoordinates.dtypes core.coordinates.DataTreeCoordinates.variables core.coordinates.DataTreeCoordinates.xindexes core.coordinates.DataTreeCoordinates.indexes core.coordinates.DataTreeCoordinates.to_dataset core.coordinates.DataTreeCoordinates.to_index core.coordinates.DataTreeCoordinates.update core.coordinates.DataTreeCoordinates.assign core.coordinates.DataTreeCoordinates.merge core.coordinates.DataTreeCoordinates.copy core.coordinates.DataTreeCoordinates.equals core.coordinates.DataTreeCoordinates.identical core.accessor_dt.DatetimeAccessor.ceil core.accessor_dt.DatetimeAccessor.floor core.accessor_dt.DatetimeAccessor.round core.accessor_dt.DatetimeAccessor.strftime core.accessor_dt.DatetimeAccessor.calendar core.accessor_dt.DatetimeAccessor.date core.accessor_dt.DatetimeAccessor.day core.accessor_dt.DatetimeAccessor.dayofweek core.accessor_dt.DatetimeAccessor.dayofyear core.accessor_dt.DatetimeAccessor.days_in_month core.accessor_dt.DatetimeAccessor.daysinmonth core.accessor_dt.DatetimeAccessor.hour core.accessor_dt.DatetimeAccessor.is_leap_year core.accessor_dt.DatetimeAccessor.is_month_end core.accessor_dt.DatetimeAccessor.is_month_start core.accessor_dt.DatetimeAccessor.is_quarter_end core.accessor_dt.DatetimeAccessor.is_quarter_start core.accessor_dt.DatetimeAccessor.is_year_end core.accessor_dt.DatetimeAccessor.is_year_start core.accessor_dt.DatetimeAccessor.isocalendar core.accessor_dt.DatetimeAccessor.microsecond core.accessor_dt.DatetimeAccessor.minute core.accessor_dt.DatetimeAccessor.month core.accessor_dt.DatetimeAccessor.nanosecond core.accessor_dt.DatetimeAccessor.quarter core.accessor_dt.DatetimeAccessor.season core.accessor_dt.DatetimeAccessor.second core.accessor_dt.DatetimeAccessor.time core.accessor_dt.DatetimeAccessor.week core.accessor_dt.DatetimeAccessor.weekday core.accessor_dt.DatetimeAccessor.weekofyear core.accessor_dt.DatetimeAccessor.year core.accessor_dt.TimedeltaAccessor.ceil core.accessor_dt.TimedeltaAccessor.floor core.accessor_dt.TimedeltaAccessor.round core.accessor_dt.TimedeltaAccessor.days core.accessor_dt.TimedeltaAccessor.microseconds core.accessor_dt.TimedeltaAccessor.nanoseconds core.accessor_dt.TimedeltaAccessor.seconds core.accessor_str.StringAccessor.capitalize core.accessor_str.StringAccessor.casefold core.accessor_str.StringAccessor.cat core.accessor_str.StringAccessor.center core.accessor_str.StringAccessor.contains core.accessor_str.StringAccessor.count core.accessor_str.StringAccessor.decode core.accessor_str.StringAccessor.encode core.accessor_str.StringAccessor.endswith core.accessor_str.StringAccessor.extract core.accessor_str.StringAccessor.extractall core.accessor_str.StringAccessor.find core.accessor_str.StringAccessor.findall core.accessor_str.StringAccessor.format core.accessor_str.StringAccessor.get core.accessor_str.StringAccessor.get_dummies core.accessor_str.StringAccessor.index core.accessor_str.StringAccessor.isalnum core.accessor_str.StringAccessor.isalpha core.accessor_str.StringAccessor.isdecimal core.accessor_str.StringAccessor.isdigit core.accessor_str.StringAccessor.islower core.accessor_str.StringAccessor.isnumeric core.accessor_str.StringAccessor.isspace core.accessor_str.StringAccessor.istitle core.accessor_str.StringAccessor.isupper core.accessor_str.StringAccessor.join core.accessor_str.StringAccessor.len core.accessor_str.StringAccessor.ljust core.accessor_str.StringAccessor.lower core.accessor_str.StringAccessor.lstrip core.accessor_str.StringAccessor.match core.accessor_str.StringAccessor.normalize core.accessor_str.StringAccessor.pad core.accessor_str.StringAccessor.partition core.accessor_str.StringAccessor.repeat core.accessor_str.StringAccessor.replace core.accessor_str.StringAccessor.rfind core.accessor_str.StringAccessor.rindex core.accessor_str.StringAccessor.rjust core.accessor_str.StringAccessor.rpartition core.accessor_str.StringAccessor.rsplit core.accessor_str.StringAccessor.rstrip core.accessor_str.StringAccessor.slice core.accessor_str.StringAccessor.slice_replace core.accessor_str.StringAccessor.split core.accessor_str.StringAccessor.startswith core.accessor_str.StringAccessor.strip core.accessor_str.StringAccessor.swapcase core.accessor_str.StringAccessor.title core.accessor_str.StringAccessor.translate core.accessor_str.StringAccessor.upper core.accessor_str.StringAccessor.wrap core.accessor_str.StringAccessor.zfill Variable.all Variable.any Variable.argmax Variable.argmin Variable.argsort Variable.astype Variable.broadcast_equals Variable.chunk Variable.clip Variable.coarsen Variable.compute Variable.concat Variable.conj Variable.conjugate Variable.copy Variable.count Variable.cumprod Variable.cumsum Variable.equals Variable.fillna Variable.get_axis_num Variable.identical Variable.isel Variable.isnull Variable.item Variable.load Variable.load_async Variable.max Variable.mean Variable.median Variable.min Variable.no_conflicts Variable.notnull Variable.pad Variable.prod Variable.quantile Variable.rank Variable.reduce Variable.roll Variable.rolling_window Variable.round Variable.searchsorted Variable.set_dims Variable.shift Variable.squeeze Variable.stack Variable.std Variable.sum Variable.to_base_variable Variable.to_coord Variable.to_dict Variable.to_index Variable.to_index_variable Variable.to_variable Variable.transpose Variable.unstack Variable.var Variable.where Variable.T Variable.attrs Variable.chunks Variable.data Variable.dims Variable.dtype Variable.encoding Variable.drop_encoding Variable.imag Variable.nbytes Variable.ndim Variable.real Variable.shape Variable.size Variable.sizes Variable.values IndexVariable.all IndexVariable.any IndexVariable.argmax IndexVariable.argmin IndexVariable.argsort IndexVariable.astype IndexVariable.broadcast_equals IndexVariable.chunk IndexVariable.clip IndexVariable.coarsen IndexVariable.compute IndexVariable.concat IndexVariable.conj IndexVariable.conjugate IndexVariable.copy IndexVariable.count IndexVariable.cumprod IndexVariable.cumsum IndexVariable.equals IndexVariable.fillna IndexVariable.get_axis_num IndexVariable.get_level_variable IndexVariable.identical IndexVariable.isel IndexVariable.isnull IndexVariable.item IndexVariable.load IndexVariable.max IndexVariable.mean IndexVariable.median IndexVariable.min IndexVariable.no_conflicts IndexVariable.notnull IndexVariable.pad IndexVariable.prod IndexVariable.quantile IndexVariable.rank IndexVariable.reduce IndexVariable.roll IndexVariable.rolling_window IndexVariable.round IndexVariable.searchsorted IndexVariable.set_dims IndexVariable.shift IndexVariable.squeeze IndexVariable.stack IndexVariable.std IndexVariable.sum IndexVariable.to_base_variable IndexVariable.to_coord IndexVariable.to_dict IndexVariable.to_index IndexVariable.to_index_variable IndexVariable.to_variable IndexVariable.transpose IndexVariable.unstack IndexVariable.var IndexVariable.where IndexVariable.T IndexVariable.attrs IndexVariable.chunks IndexVariable.data IndexVariable.dims IndexVariable.dtype IndexVariable.encoding IndexVariable.imag IndexVariable.level_names IndexVariable.name IndexVariable.nbytes IndexVariable.ndim IndexVariable.real IndexVariable.shape IndexVariable.size IndexVariable.sizes IndexVariable.values NamedArray.all NamedArray.any NamedArray.attrs NamedArray.broadcast_to NamedArray.chunks NamedArray.chunksizes NamedArray.copy NamedArray.count NamedArray.cumprod NamedArray.cumsum NamedArray.data NamedArray.dims NamedArray.dtype NamedArray.expand_dims NamedArray.get_axis_num NamedArray.max NamedArray.mean NamedArray.median NamedArray.min NamedArray.nbytes NamedArray.ndim NamedArray.prod NamedArray.reduce NamedArray.shape NamedArray.size NamedArray.sizes NamedArray.std NamedArray.sum NamedArray.var plot.plot plot.line plot.step plot.hist plot.contour plot.contourf plot.imshow plot.pcolormesh plot.scatter plot.surface CFTimeIndex.all CFTimeIndex.any CFTimeIndex.append CFTimeIndex.argsort CFTimeIndex.argmax CFTimeIndex.argmin CFTimeIndex.asof CFTimeIndex.asof_locs CFTimeIndex.astype CFTimeIndex.calendar CFTimeIndex.ceil CFTimeIndex.contains CFTimeIndex.copy CFTimeIndex.days_in_month CFTimeIndex.delete CFTimeIndex.difference CFTimeIndex.drop CFTimeIndex.drop_duplicates CFTimeIndex.droplevel CFTimeIndex.dropna CFTimeIndex.duplicated CFTimeIndex.equals CFTimeIndex.factorize CFTimeIndex.fillna CFTimeIndex.floor CFTimeIndex.format CFTimeIndex.get_indexer CFTimeIndex.get_indexer_for CFTimeIndex.get_indexer_non_unique CFTimeIndex.get_level_values CFTimeIndex.get_loc CFTimeIndex.get_slice_bound CFTimeIndex.get_value CFTimeIndex.groupby CFTimeIndex.holds_integer CFTimeIndex.identical CFTimeIndex.insert CFTimeIndex.intersection CFTimeIndex.is_ CFTimeIndex.is_boolean CFTimeIndex.is_categorical CFTimeIndex.is_floating CFTimeIndex.is_integer CFTimeIndex.is_interval CFTimeIndex.is_numeric CFTimeIndex.is_object CFTimeIndex.isin CFTimeIndex.isna CFTimeIndex.isnull CFTimeIndex.item CFTimeIndex.join CFTimeIndex.map CFTimeIndex.max CFTimeIndex.memory_usage CFTimeIndex.min CFTimeIndex.notna CFTimeIndex.notnull CFTimeIndex.nunique CFTimeIndex.putmask CFTimeIndex.ravel CFTimeIndex.reindex CFTimeIndex.rename CFTimeIndex.repeat CFTimeIndex.round CFTimeIndex.searchsorted CFTimeIndex.set_names CFTimeIndex.shift CFTimeIndex.slice_indexer CFTimeIndex.slice_locs CFTimeIndex.sort CFTimeIndex.sort_values CFTimeIndex.sortlevel CFTimeIndex.strftime CFTimeIndex.symmetric_difference CFTimeIndex.take CFTimeIndex.to_datetimeindex CFTimeIndex.to_flat_index CFTimeIndex.to_frame CFTimeIndex.to_list CFTimeIndex.to_numpy CFTimeIndex.to_series CFTimeIndex.tolist CFTimeIndex.transpose CFTimeIndex.union CFTimeIndex.unique CFTimeIndex.value_counts CFTimeIndex.view CFTimeIndex.where CFTimeIndex.T CFTimeIndex.array CFTimeIndex.asi8 CFTimeIndex.date_type CFTimeIndex.day CFTimeIndex.dayofweek CFTimeIndex.dayofyear CFTimeIndex.dtype CFTimeIndex.empty CFTimeIndex.freq CFTimeIndex.has_duplicates CFTimeIndex.hasnans CFTimeIndex.hour CFTimeIndex.inferred_type CFTimeIndex.is_monotonic_increasing CFTimeIndex.is_monotonic_decreasing CFTimeIndex.is_unique CFTimeIndex.microsecond CFTimeIndex.minute CFTimeIndex.month CFTimeIndex.name CFTimeIndex.names CFTimeIndex.nbytes CFTimeIndex.ndim CFTimeIndex.nlevels CFTimeIndex.second CFTimeIndex.shape CFTimeIndex.size CFTimeIndex.values CFTimeIndex.year indexes.RangeIndex.start indexes.RangeIndex.stop indexes.RangeIndex.step backends.NetCDF4DataStore.close backends.NetCDF4DataStore.encode backends.NetCDF4DataStore.encode_attribute backends.NetCDF4DataStore.encode_variable backends.NetCDF4DataStore.get_attrs backends.NetCDF4DataStore.get_dimensions backends.NetCDF4DataStore.get_encoding backends.NetCDF4DataStore.get_variables backends.NetCDF4DataStore.load backends.NetCDF4DataStore.open backends.NetCDF4DataStore.open_store_variable backends.NetCDF4DataStore.prepare_variable backends.NetCDF4DataStore.set_attribute backends.NetCDF4DataStore.set_attributes backends.NetCDF4DataStore.set_dimension backends.NetCDF4DataStore.set_dimensions backends.NetCDF4DataStore.set_variable backends.NetCDF4DataStore.set_variables backends.NetCDF4DataStore.store backends.NetCDF4DataStore.store_dataset backends.NetCDF4DataStore.sync backends.NetCDF4DataStore.autoclose backends.NetCDF4DataStore.ds backends.NetCDF4DataStore.format backends.NetCDF4DataStore.is_remote backends.NetCDF4DataStore.lock backends.NetCDF4BackendEntrypoint.description backends.NetCDF4BackendEntrypoint.url backends.NetCDF4BackendEntrypoint.guess_can_open backends.NetCDF4BackendEntrypoint.open_dataset backends.H5NetCDFStore.autoclose backends.H5NetCDFStore.close backends.H5NetCDFStore.encode backends.H5NetCDFStore.encode_attribute backends.H5NetCDFStore.encode_variable backends.H5NetCDFStore.format backends.H5NetCDFStore.get_attrs backends.H5NetCDFStore.get_dimensions backends.H5NetCDFStore.get_encoding backends.H5NetCDFStore.get_variables backends.H5NetCDFStore.is_remote backends.H5NetCDFStore.load backends.H5NetCDFStore.lock backends.H5NetCDFStore.open backends.H5NetCDFStore.open_store_variable backends.H5NetCDFStore.prepare_variable backends.H5NetCDFStore.set_attribute backends.H5NetCDFStore.set_attributes backends.H5NetCDFStore.set_dimension backends.H5NetCDFStore.set_dimensions backends.H5NetCDFStore.set_variable backends.H5NetCDFStore.set_variables backends.H5NetCDFStore.store backends.H5NetCDFStore.store_dataset backends.H5NetCDFStore.sync backends.H5NetCDFStore.ds backends.H5netcdfBackendEntrypoint.description backends.H5netcdfBackendEntrypoint.url backends.H5netcdfBackendEntrypoint.guess_can_open backends.H5netcdfBackendEntrypoint.open_dataset backends.PydapDataStore.close backends.PydapDataStore.get_attrs backends.PydapDataStore.get_dimensions backends.PydapDataStore.get_encoding backends.PydapDataStore.get_variables backends.PydapDataStore.load backends.PydapDataStore.open backends.PydapDataStore.open_store_variable backends.PydapBackendEntrypoint.description backends.PydapBackendEntrypoint.url backends.PydapBackendEntrypoint.guess_can_open backends.PydapBackendEntrypoint.open_dataset backends.ScipyDataStore.close backends.ScipyDataStore.encode backends.ScipyDataStore.encode_attribute backends.ScipyDataStore.encode_variable backends.ScipyDataStore.get_attrs backends.ScipyDataStore.get_dimensions backends.ScipyDataStore.get_encoding backends.ScipyDataStore.get_variables backends.ScipyDataStore.load backends.ScipyDataStore.open_store_variable backends.ScipyDataStore.prepare_variable backends.ScipyDataStore.set_attribute backends.ScipyDataStore.set_attributes backends.ScipyDataStore.set_dimension backends.ScipyDataStore.set_dimensions backends.ScipyDataStore.set_variable backends.ScipyDataStore.set_variables backends.ScipyDataStore.store backends.ScipyDataStore.store_dataset backends.ScipyDataStore.sync backends.ScipyDataStore.ds backends.ScipyBackendEntrypoint.description backends.ScipyBackendEntrypoint.url backends.ScipyBackendEntrypoint.guess_can_open backends.ScipyBackendEntrypoint.open_dataset backends.ZarrStore.close backends.ZarrStore.encode_attribute backends.ZarrStore.encode_variable backends.ZarrStore.get_attrs backends.ZarrStore.get_dimensions backends.ZarrStore.get_variables backends.ZarrStore.open_group backends.ZarrStore.open_store_variable backends.ZarrStore.set_attributes backends.ZarrStore.set_dimensions backends.ZarrStore.set_variables backends.ZarrStore.store backends.ZarrStore.sync backends.ZarrStore.ds backends.ZarrBackendEntrypoint.description backends.ZarrBackendEntrypoint.url backends.ZarrBackendEntrypoint.guess_can_open backends.ZarrBackendEntrypoint.open_dataset backends.StoreBackendEntrypoint.description backends.StoreBackendEntrypoint.url backends.StoreBackendEntrypoint.guess_can_open backends.StoreBackendEntrypoint.open_dataset backends.FileManager.acquire backends.FileManager.acquire_context backends.FileManager.close backends.CachingFileManager.acquire backends.CachingFileManager.acquire_context backends.CachingFileManager.close backends.DummyFileManager.acquire backends.DummyFileManager.acquire_context backends.DummyFileManager.close backends.BackendArray backends.BackendEntrypoint.guess_can_open backends.BackendEntrypoint.open_dataset core.indexing.IndexingSupport core.indexing.explicit_indexing_adapter core.indexing.BasicIndexer core.indexing.OuterIndexer core.indexing.VectorizedIndexer core.indexing.LazilyIndexedArray core.indexing.LazilyVectorizedIndexedArray conventions.decode_cf_variables coding.variables.CFMaskCoder coding.variables.CFScaleOffsetCoder coding.strings.CharacterArrayCoder coding.strings.EncodedStringCoder coding.times.CFTimedeltaCoder coding.times.CFDatetimeCoder groupers.Grouper groupers.Resampler groupers.EncodedGroups ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������python-xarray-2026.01.0/doc/conf.py�����������������������������������������������������������������0000664�0001750�0001750�00000036156�15136607163�017122� 0����������������������������������������������������������������������������������������������������ustar �alastair������������������������alastair���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������import datetime import inspect import os import pathlib import subprocess import sys from contextlib import suppress from textwrap import dedent, indent import packaging.version import sphinx_autosummary_accessors import yaml from sphinx.application import Sphinx from sphinx.util import logging import xarray LOGGER = logging.getLogger("conf") allowed_failures = set() print("python exec:", sys.executable) print("sys.path:", sys.path) print(f"xarray: {xarray.__version__}, {xarray.__file__}") with suppress(ImportError): import matplotlib matplotlib.use("Agg") try: import cartopy # noqa: F401 except ImportError: allowed_failures.update( [ "gallery/plot_cartopy_facetgrid.py", ] ) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinxcontrib.mermaid", "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.intersphinx", "sphinx.ext.extlinks", "sphinx.ext.mathjax", "sphinx.ext.napoleon", "jupyter_sphinx", "nbsphinx", "sphinx_autosummary_accessors", "sphinx.ext.linkcode", "sphinxext.opengraph", "sphinx_copybutton", "sphinxext.rediraffe", "sphinx_design", "sphinx_inline_tabs", "sphinx_remove_toctrees", "sphinx_llm.txt", ] extlinks = { "issue": ("https://github.com/pydata/xarray/issues/%s", "GH%s"), "pull": ("https://github.com/pydata/xarray/pull/%s", "PR%s"), "discussion": ("https://github.com/pydata/xarray/discussions/%s", "D%s"), } # sphinx-copybutton configuration copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.{3,}: | {5,8}: " copybutton_prompt_is_regexp = True # NBSphinx configuration nbsphinx_timeout = 600 nbsphinx_execute = "always" nbsphinx_allow_errors = False nbsphinx_requirejs_path = "" # png2x/retina rendering of figues in docs would also need to modify custom.css: # https://github.com/spatialaudio/nbsphinx/issues/464#issuecomment-652729126 # .rst-content .image-reference img { # max-width: unset; # width: 100% !important; # height: auto !important; # } # nbsphinx_execute_arguments = [ # "--InlineBackend.figure_formats=['png2x']", # ] nbsphinx_prolog = """ {% set docname = env.doc2path(env.docname, base=None) %} You can run this notebook in a `live session `_ |Binder| or view it `on Github `_. .. |Binder| image:: https://mybinder.org/badge.svg :target: https://mybinder.org/v2/gh/pydata/xarray/main?urlpath=lab/tree/doc/{{ docname }} """ # AutoDoc configuration autosummary_generate = True autodoc_typehints = "none" # Napoleon configuration napoleon_google_docstring = False napoleon_numpy_docstring = True napoleon_use_param = False napoleon_use_rtype = False napoleon_preprocess_types = True napoleon_type_aliases = { # general terms "sequence": ":term:`sequence`", "iterable": ":term:`iterable`", "callable": ":py:func:`callable`", "dict_like": ":term:`dict-like `", "dict-like": ":term:`dict-like `", "path-like": ":term:`path-like `", "mapping": ":term:`mapping`", "file-like": ":term:`file-like `", # special terms # "same type as caller": "*same type as caller*", # does not work, yet # "same type as values": "*same type as values*", # does not work, yet # stdlib type aliases "MutableMapping": "~collections.abc.MutableMapping", "sys.stdout": ":obj:`sys.stdout`", "timedelta": "~datetime.timedelta", "string": ":class:`string `", # numpy terms "array_like": ":term:`array_like`", "array-like": ":term:`array-like `", "scalar": ":term:`scalar`", "array": ":term:`array`", "hashable": ":term:`hashable `", # matplotlib terms "color-like": ":py:func:`color-like `", "matplotlib colormap name": ":doc:`matplotlib colormap name `", "matplotlib axes object": ":py:class:`matplotlib axes object `", "colormap": ":py:class:`colormap `", # xarray terms "dim name": ":term:`dimension name `", "var name": ":term:`variable name `", # objects without namespace: xarray "DataArray": "~xarray.DataArray", "Dataset": "~xarray.Dataset", "Variable": "~xarray.Variable", "DataTree": "~xarray.DataTree", "DatasetGroupBy": "~xarray.core.groupby.DatasetGroupBy", "DataArrayGroupBy": "~xarray.core.groupby.DataArrayGroupBy", "Grouper": "~xarray.groupers.Grouper", "Resampler": "~xarray.groupers.Resampler", # objects without namespace: numpy "ndarray": "~numpy.ndarray", "MaskedArray": "~numpy.ma.MaskedArray", "dtype": "~numpy.dtype", "ComplexWarning": "~numpy.ComplexWarning", # objects without namespace: pandas "Index": "~pandas.Index", "MultiIndex": "~pandas.MultiIndex", "CategoricalIndex": "~pandas.CategoricalIndex", "TimedeltaIndex": "~pandas.TimedeltaIndex", "DatetimeIndex": "~pandas.DatetimeIndex", "IntervalIndex": "~pandas.IntervalIndex", "Series": "~pandas.Series", "DataFrame": "~pandas.DataFrame", "Categorical": "~pandas.Categorical", "Path": "~~pathlib.Path", # objects with abbreviated namespace (from pandas) "pd.Index": "~pandas.Index", "pd.NaT": "~pandas.NaT", } autodoc_type_aliases = napoleon_type_aliases # Keep both in sync # mermaid config mermaid_version = "11.6.0" # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates", sphinx_autosummary_accessors.templates_path] # The master toctree document. master_doc = "index" remove_from_toctrees = ["generated/*"] # The language for content autogenerated by Sphinx. language = "en" # General information about the project. project = "xarray" copyright = f"2014-{datetime.datetime.now().year}, xarray Developers" # The short Y.M.D version. v = packaging.version.parse(xarray.__version__) version = ".".join(str(p) for p in v.release) # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. today_fmt = "%Y-%m-%d" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ["_build", "debug.ipynb", "**.ipynb_checkpoints"] # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "pydata_sphinx_theme" html_title = "" html_context = { "github_user": "pydata", "github_repo": "xarray", "github_version": "main", "doc_path": "doc", } # https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/layout.html#references html_theme_options = { #"announcement":"🍾 Xarray is now 10 years old! 🎉", "logo": {"image_dark": "https://docs.xarray.dev/en/stable/_static/logos/Xarray_Logo_FullColor_InverseRGB_Final.svg"}, "github_url":"https://github.com/pydata/xarray", "show_version_warning_banner":True, "use_edit_page_button":True, "header_links_before_dropdown": 8, "navbar_align": "left", "footer_center":["last-updated"], # Instead of adding these to the header bar they are linked in 'getting help' and 'contributing' # "icon_links": [ # { # "name": "Discord", # "url": "https://discord.com/invite/wEKPCt4PDu", # "icon": "fa-brands fa-discord", # }, # { # "name": "X", # "url": "https://x.com/xarray_dev", # "icon": "fa-brands fa-x-twitter", # }, # { # "name": "Bluesky", # "url": "https://bsky.app/profile/xarray.bsky.social", # "icon": "fa-brands fa-bluesky", # }, # ] } # pydata_sphinx_theme use_edit_page_button with github link seems better html_show_sourcelink = False # The name of an image file (relative to this directory) to place at the top # of the sidebar. html_logo = "_static/logos/Xarray_Logo_RGB_Final.svg" # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. html_favicon = "_static/logos/Xarray_Icon_Final.svg" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] html_css_files = ["style.css"] # configuration for sphinxext.opengraph ogp_site_url = "https://docs.xarray.dev/en/latest/" ogp_image = "https://docs.xarray.dev/en/stable/_static/logos/Xarray_Logo_RGB_Final.png" ogp_custom_meta_tags = ( '', '', '', ) # Redirects for pages that were moved to new locations rediraffe_redirects = { "terminology.rst": "user-guide/terminology.rst", "data-structures.rst": "user-guide/data-structures.rst", "indexing.rst": "user-guide/indexing.rst", "interpolation.rst": "user-guide/interpolation.rst", "computation.rst": "user-guide/computation.rst", "groupby.rst": "user-guide/groupby.rst", "reshaping.rst": "user-guide/reshaping.rst", "combining.rst": "user-guide/combining.rst", "time-series.rst": "user-guide/time-series.rst", "weather-climate.rst": "user-guide/weather-climate.rst", "pandas.rst": "user-guide/pandas.rst", "io.rst": "user-guide/io.rst", "dask.rst": "user-guide/dask.rst", "plotting.rst": "user-guide/plotting.rst", "duckarrays.rst": "user-guide/duckarrays.rst", "related-projects.rst": "user-guide/ecosystem.rst", "faq.rst": "get-help/faq.rst", "why-xarray.rst": "getting-started-guide/why-xarray.rst", "installing.rst": "getting-started-guide/installing.rst", "quick-overview.rst": "getting-started-guide/quick-overview.rst", "contributing.rst": "contribute/contributing.rst", "developers-meeting.rst": "contribute/developers-meeting.rst", } # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. html_last_updated_fmt = today_fmt # Output file base name for HTML help builder. htmlhelp_basename = "xarraydoc" # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { "cftime": ("https://unidata.github.io/cftime", None), "cubed": ("https://cubed-dev.github.io/cubed/", None), "dask": ("https://docs.dask.org/en/latest", None), "flox": ("https://flox.readthedocs.io/en/latest/", None), "hypothesis": ("https://hypothesis.readthedocs.io/en/latest/", None), "iris": ("https://scitools-iris.readthedocs.io/en/latest", None), "matplotlib": ("https://matplotlib.org/stable/", None), "numba": ("https://numba.readthedocs.io/en/stable/", None), "numpy": ("https://numpy.org/doc/stable", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), "python": ("https://docs.python.org/3/", None), "scipy": ("https://docs.scipy.org/doc/scipy", None), "sparse": ("https://sparse.pydata.org/en/latest/", None), "xarray-tutorial": ("https://tutorial.xarray.dev/", None), "zarr": ("https://zarr.readthedocs.io/en/stable/", None), "xarray-lmfit": ("https://xarray-lmfit.readthedocs.io/stable", None), } # based on numpy doc/source/conf.py def linkcode_resolve(domain, info): """ Determine the URL corresponding to Python object """ if domain != "py": return None modname = info["module"] fullname = info["fullname"] submod = sys.modules.get(modname) if submod is None: return None obj = submod for part in fullname.split("."): try: obj = getattr(obj, part) except AttributeError: return None try: fn = inspect.getsourcefile(inspect.unwrap(obj)) except TypeError: fn = None if not fn: return None try: source, lineno = inspect.getsourcelines(obj) except OSError: lineno = None if lineno: linespec = f"#L{lineno}-L{lineno + len(source) - 1}" else: linespec = "" fn = os.path.relpath(fn, start=os.path.dirname(xarray.__file__)) if "+" in xarray.__version__: return f"https://github.com/pydata/xarray/blob/main/xarray/{fn}{linespec}" else: return ( f"https://github.com/pydata/xarray/blob/" f"v{xarray.__version__}/xarray/{fn}{linespec}" ) def html_page_context(app, pagename, templatename, context, doctree): # Disable edit button for docstring generated pages if "generated" in pagename: context["theme_use_edit_page_button"] = False def update_gallery(app: Sphinx): """Update the gallery page.""" LOGGER.info("Updating gallery page...") gallery = yaml.safe_load(pathlib.Path(app.srcdir, "gallery.yml").read_bytes()) for key in gallery: items = [ f""" .. grid-item-card:: :text-align: center :link: {item['path']} .. image:: {item['thumbnail']} :alt: {item['title']} +++ {item['title']} """ for item in gallery[key] ] items_md = indent(dedent("\n".join(items)), prefix=" ") markdown = f""" .. grid:: 1 2 2 2 :gutter: 2 {items_md} """ pathlib.Path(app.srcdir, f"{key}-gallery.txt").write_text(markdown) LOGGER.info(f"{key} gallery page updated.") LOGGER.info("Gallery page updated.") def update_videos(app: Sphinx): """Update the videos page.""" LOGGER.info("Updating videos page...") videos = yaml.safe_load(pathlib.Path(app.srcdir, "videos.yml").read_bytes()) items = [] for video in videos: authors = " | ".join(video["authors"]) item = f""" .. grid-item-card:: {" ".join(video["title"].split())} :text-align: center .. raw:: html {video['src']} +++ {authors} """ items.append(item) items_md = indent(dedent("\n".join(items)), prefix=" ") markdown = f""" .. grid:: 1 2 2 2 :gutter: 2 {items_md} """ pathlib.Path(app.srcdir, "videos-gallery.txt").write_text(markdown) LOGGER.info("Videos page updated.") def setup(app: Sphinx): app.connect("html-page-context", html_page_context) app.connect("builder-inited", update_gallery) app.connect("builder-inited", update_videos) python-xarray-2026.01.0/doc/getting-started-guide/0000775000175000017500000000000015136607163022010 5ustar alastairalastairpython-xarray-2026.01.0/doc/getting-started-guide/index.rst0000664000175000017500000000053715136607163023656 0ustar alastairalastair################ Getting Started ################ The getting started guide aims to get you using Xarray productively as quickly as possible. It is designed as an entry point for new users, and it provided an introduction to Xarray's main concepts. .. toctree:: :maxdepth: 2 why-xarray installing quick-overview tutorials-and-videos python-xarray-2026.01.0/doc/getting-started-guide/installing.rst0000664000175000017500000001444215136607163024713 0ustar alastairalastair.. _installing: Installation ============ Required dependencies --------------------- - Python (3.11 or later) - `numpy `__ (1.26 or later) - `packaging `__ (24.1 or later) - `pandas `__ (2.2 or later) .. _optional-dependencies: Optional dependencies --------------------- .. note:: If you are using pip to install xarray, optional dependencies can be installed by specifying *extras*. :ref:`installation-instructions` for both pip and conda are given below. For netCDF and IO ~~~~~~~~~~~~~~~~~ - `netCDF4 `__: recommended if you want to use xarray for reading or writing netCDF files - `scipy `__: used as a fallback for reading/writing netCDF3 - `pydap `__: used as a fallback for accessing OPeNDAP - `h5netcdf `__: an alternative library for reading and writing netCDF4 files that does not use the netCDF-C libraries - `zarr `__: for chunked, compressed, N-dimensional arrays. - `cftime `__: recommended if you want to encode/decode datetimes for non-standard calendars or dates before year 1678 or after year 2262. - `iris `__: for conversion to and from iris' Cube objects For accelerating xarray ~~~~~~~~~~~~~~~~~~~~~~~ - `scipy `__: necessary to enable the interpolation features for xarray objects - `bottleneck `__: speeds up NaN-skipping and rolling window aggregations by a large factor - `numbagg `_: for exponential rolling window operations For parallel computing ~~~~~~~~~~~~~~~~~~~~~~ - `dask.array `__: required for :ref:`dask`. For plotting ~~~~~~~~~~~~ - `matplotlib `__: required for :ref:`plotting` - `cartopy `__: recommended for :ref:`plot-maps` - `seaborn `__: for better color palettes - `nc-time-axis `__: for plotting cftime.datetime objects Alternative data containers ~~~~~~~~~~~~~~~~~~~~~~~~~~~ - `sparse `_: for sparse arrays - `pint `_: for units of measure - Any numpy-like objects that support `NEP-18 `_. Note that while such libraries theoretically should work, they are untested. Integration tests are in the process of being written for individual libraries. .. _mindeps_policy: Minimum dependency versions --------------------------- Xarray adopts a rolling policy regarding the minimum supported version of its dependencies: - **Python:** 30 months (`NEP-29 `_) - **numpy:** 18 months (`NEP-29 `_) - **all other libraries:** 12 months This means the latest minor (X.Y) version from N months prior. Patch versions (x.y.Z) are not pinned, and only the latest available at the moment of publishing the xarray release is guaranteed to work. You can see the actual minimum tested versions: ``_ .. _installation-instructions: Instructions ------------ Xarray itself is a pure Python package, but its dependencies are not. The easiest way to get everything installed is to use conda_. To install xarray with its recommended dependencies using the conda command line tool:: $ conda install -c conda-forge xarray dask netCDF4 bottleneck .. _conda: https://docs.conda.io If you require other :ref:`optional-dependencies` add them to the line above. We recommend using the community maintained `conda-forge `__ channel, as some of the dependencies are difficult to build. New releases may also appear in conda-forge before being updated in the default channel. If you don't use conda, be sure you have the required dependencies (numpy and pandas) installed first. Then, install xarray with pip:: $ python -m pip install xarray We also maintain other dependency sets for different subsets of functionality:: $ python -m pip install "xarray[io]" # Install optional dependencies for handling I/O $ python -m pip install "xarray[accel]" # Install optional dependencies for accelerating xarray $ python -m pip install "xarray[parallel]" # Install optional dependencies for dask arrays $ python -m pip install "xarray[viz]" # Install optional dependencies for visualization $ python -m pip install "xarray[complete]" # Install all the above The above commands should install most of the `optional dependencies`_. However, some packages which are either not listed on PyPI or require extra installation steps are excluded. To know which dependencies would be installed, take a look at the ``[project.optional-dependencies]`` section in ``pyproject.toml``: .. literalinclude:: ../../pyproject.toml :language: toml :start-at: [project.optional-dependencies] :end-before: [build-system] Development versions -------------------- To install the most recent development version, install from github:: $ python -m pip install git+https://github.com/pydata/xarray.git or from TestPyPI:: $ python -m pip install --index-url https://test.pypi.org/simple --extra-index-url https://pypi.org/simple --pre xarray Testing ------- To run the test suite after installing xarray, install (via pypi or conda) `py.test `__ and run ``pytest`` in the root directory of the xarray repository. Performance Monitoring ~~~~~~~~~~~~~~~~~~~~~~ .. TODO: uncomment once we have a working setup see https://github.com/pydata/xarray/pull/5066 A fixed-point performance monitoring of (a part of) our code can be seen on `this page `__. To run these benchmark tests in a local machine, first install - `airspeed-velocity `__: a tool for benchmarking Python packages over their lifetime. and run ``asv run # this will install some conda environments in ./.asv/envs`` python-xarray-2026.01.0/doc/getting-started-guide/why-xarray.rst0000664000175000017500000001333715136607163024664 0ustar alastairalastairOverview: Why xarray? ===================== Xarray introduces labels in the form of dimensions, coordinates and attributes on top of raw NumPy-like multidimensional arrays, which allows for a more intuitive, more concise, and less error-prone developer experience. What labels enable ------------------ Multi-dimensional (a.k.a. N-dimensional, ND) arrays (sometimes called "tensors") are an essential part of computational science. They are encountered in a wide range of fields, including physics, astronomy, geoscience, bioinformatics, engineering, finance, and deep learning. In Python, NumPy_ provides the fundamental data structure and API for working with raw ND arrays. However, real-world datasets are usually more than just raw numbers; they have labels which encode information about how the array values map to locations in space, time, etc. Xarray doesn't just keep track of labels on arrays -- it uses them to provide a powerful and concise interface. For example: - Apply operations over dimensions by name: ``x.sum('time')``. - Select values by label (or logical location) instead of integer location: ``x.loc['2014-01-01']`` or ``x.sel(time='2014-01-01')``. - Mathematical operations (e.g., ``x - y``) vectorize across multiple dimensions (array broadcasting) based on dimension names, not shape. - Easily use the `split-apply-combine `_ paradigm with ``groupby``: ``x.groupby('time.dayofyear').mean()``. - Database-like alignment based on coordinate labels that smoothly handles missing values: ``x, y = xr.align(x, y, join='outer')``. - Keep track of arbitrary metadata in the form of a Python dictionary: ``x.attrs``. The N-dimensional nature of xarray's data structures makes it suitable for dealing with multi-dimensional scientific data, and its use of dimension names instead of axis labels (``dim='time'`` instead of ``axis=0``) makes such arrays much more manageable than the raw numpy ndarray: with xarray, you don't need to keep track of the order of an array's dimensions or insert dummy dimensions of size 1 to align arrays (e.g., using ``np.newaxis``). The immediate payoff of using xarray is that you'll write less code. The long-term payoff is that you'll understand what you were thinking when you come back to look at it weeks or months later. Core data structures -------------------- Xarray has two core data structures, which build upon and extend the core strengths of NumPy_ and pandas_. Both data structures are fundamentally N-dimensional: - :py:class:`~xarray.DataArray` is our implementation of a labeled, N-dimensional array. It is an N-D generalization of a :py:class:`pandas.Series`. The name ``DataArray`` itself is borrowed from Fernando Perez's datarray_ project, which prototyped a similar data structure. - :py:class:`~xarray.Dataset` is a multi-dimensional, in-memory array database. It is a dict-like container of ``DataArray`` objects aligned along any number of shared dimensions, and serves a similar purpose in xarray to the :py:class:`pandas.DataFrame`. The value of attaching labels to numpy's :py:class:`numpy.ndarray` may be fairly obvious, but the dataset may need more motivation. The power of the dataset over a plain dictionary is that, in addition to pulling out arrays by name, it is possible to select or combine data along a dimension across all arrays simultaneously. Like a :py:class:`~pandas.DataFrame`, datasets facilitate array operations with heterogeneous data -- the difference is that the arrays in a dataset can have not only different data types, but also different numbers of dimensions. This data model is borrowed from the netCDF_ file format, which also provides xarray with a natural and portable serialization format. NetCDF is very popular in the geosciences, and there are existing libraries for reading and writing netCDF in many programming languages, including Python. Xarray distinguishes itself from many tools for working with netCDF data in-so-far as it provides data structures for in-memory analytics that both utilize and preserve labels. You only need to do the tedious work of adding metadata once, not every time you save a file. Goals and aspirations --------------------- Xarray contributes domain-agnostic data-structures and tools for labeled multi-dimensional arrays to Python's SciPy_ ecosystem for numerical computing. In particular, xarray builds upon and integrates with NumPy_ and pandas_: - Our user-facing interfaces aim to be more explicit versions of those found in NumPy/pandas. - Compatibility with the broader ecosystem is a major goal: it should be easy to get your data in and out. - We try to keep a tight focus on functionality and interfaces related to labeled data, and leverage other Python libraries for everything else, e.g., NumPy/pandas for fast arrays/indexing (xarray itself contains no compiled code), Dask_ for parallel computing, matplotlib_ for plotting, etc. Xarray is a collaborative and community driven project, run entirely on volunteer effort (see :ref:`contributing`). Our target audience is anyone who needs N-dimensional labeled arrays in Python. Originally, development was driven by the data analysis needs of physical scientists (especially geoscientists who already know and love netCDF_), but it has become a much more broadly useful tool, and is still under active development. See our technical :ref:`roadmap` for more details, and feel free to reach out with questions about whether xarray is the right tool for your needs. .. _datarray: https://github.com/BIDS/datarray .. _Dask: https://www.dask.org .. _matplotlib: https://matplotlib.org .. _netCDF: https://www.unidata.ucar.edu/software/netcdf .. _NumPy: https://numpy.org .. _pandas: https://pandas.pydata.org .. _SciPy: https://www.scipy.org python-xarray-2026.01.0/doc/getting-started-guide/quick-overview.rst0000664000175000017500000003102315136607163025521 0ustar alastairalastair############## Quick overview ############## Here are some quick examples of what you can do with :py:class:`xarray.DataArray` objects. Everything is explained in much more detail in the rest of the documentation. To begin, import numpy, pandas and xarray using their customary abbreviations: .. jupyter-execute:: import numpy as np import pandas as pd import xarray as xr Create a DataArray ------------------ You can make a DataArray from scratch by supplying data in the form of a numpy array or list, with optional *dimensions* and *coordinates*: .. jupyter-execute:: data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]}) data In this case, we have generated a 2D array, assigned the names *x* and *y* to the two dimensions respectively and associated two *coordinate labels* '10' and '20' with the two locations along the x dimension. If you supply a pandas :py:class:`~pandas.Series` or :py:class:`~pandas.DataFrame`, metadata is copied directly: .. jupyter-execute:: xr.DataArray(pd.Series(range(3), index=list("abc"), name="foo")) Here are the key properties for a ``DataArray``: .. jupyter-execute:: # like in pandas, values is a numpy array that you can modify in-place data.values data.dims data.coords # you can use this dictionary to store arbitrary metadata data.attrs Indexing -------- Xarray supports four kinds of indexing. Since we have assigned coordinate labels to the x dimension we can use label-based indexing along that dimension just like pandas. The four examples below all yield the same result (the value at ``x=10``) but at varying levels of convenience and intuitiveness. .. jupyter-execute:: # positional and by integer label, like numpy data[0, :] # loc or "location": positional and coordinate label, like pandas data.loc[10] # isel or "integer select": by dimension name and integer label data.isel(x=0) # sel or "select": by dimension name and coordinate label data.sel(x=10) Unlike positional indexing, label-based indexing frees us from having to know how our array is organized. All we need to know are the dimension name and the label we wish to index i.e. ``data.sel(x=10)`` works regardless of whether ``x`` is the first or second dimension of the array and regardless of whether ``10`` is the first or second element of ``x``. We have already told xarray that x is the first dimension when we created ``data``: xarray keeps track of this so we don't have to. For more, see :ref:`indexing`. Attributes ---------- While you're setting up your DataArray, it's often a good idea to set metadata attributes. A useful choice is to set ``data.attrs['long_name']`` and ``data.attrs['units']`` since xarray will use these, if present, to automatically label your plots. These special names were chosen following the `NetCDF Climate and Forecast (CF) Metadata Conventions `_. ``attrs`` is just a Python dictionary, so you can assign anything you wish. .. jupyter-execute:: data.attrs["long_name"] = "random velocity" data.attrs["units"] = "metres/sec" data.attrs["description"] = "A random variable created as an example." data.attrs["random_attribute"] = 123 data.attrs # you can add metadata to coordinates too data.x.attrs["units"] = "x units" Computation ----------- Data arrays work very similarly to numpy ndarrays: .. jupyter-execute:: data + 10 np.sin(data) # transpose data.T data.sum() However, aggregation operations can use dimension names instead of axis numbers: .. jupyter-execute:: data.mean(dim="x") Arithmetic operations broadcast based on dimension name. This means you don't need to insert dummy dimensions for alignment: .. jupyter-execute:: a = xr.DataArray(np.random.randn(3), [data.coords["y"]]) b = xr.DataArray(np.random.randn(4), dims="z") a b a + b It also means that in most cases you do not need to worry about the order of dimensions: .. jupyter-execute:: data - data.T Operations also align based on index labels: .. jupyter-execute:: data[:-1] - data[:1] For more, see :ref:`compute`. GroupBy ------- Xarray supports grouped operations using a very similar API to pandas (see :ref:`groupby`): .. jupyter-execute:: labels = xr.DataArray(["E", "F", "E"], [data.coords["y"]], name="labels") labels data.groupby(labels).mean("y") data.groupby(labels).map(lambda x: x - x.min()) Plotting -------- Visualizing your datasets is quick and convenient: .. jupyter-execute:: data.plot() Note the automatic labeling with names and units. Our effort in adding metadata attributes has paid off! Many aspects of these figures are customizable: see :ref:`plotting`. pandas ------ Xarray objects can be easily converted to and from pandas objects using the :py:meth:`~xarray.DataArray.to_series`, :py:meth:`~xarray.DataArray.to_dataframe` and :py:meth:`~pandas.DataFrame.to_xarray` methods: .. jupyter-execute:: series = data.to_series() series # convert back series.to_xarray() Datasets -------- :py:class:`xarray.Dataset` is a dict-like container of aligned ``DataArray`` objects. You can think of it as a multi-dimensional generalization of the :py:class:`pandas.DataFrame`: .. jupyter-execute:: ds = xr.Dataset(dict(foo=data, bar=("x", [1, 2]), baz=np.pi)) ds This creates a dataset with three DataArrays named ``foo``, ``bar`` and ``baz``. Use dictionary or dot indexing to pull out ``Dataset`` variables as ``DataArray`` objects but note that assignment only works with dictionary indexing: .. jupyter-execute:: ds["foo"] ds.foo When creating ``ds``, we specified that ``foo`` is identical to ``data`` created earlier, ``bar`` is one-dimensional with single dimension ``x`` and associated values '1' and '2', and ``baz`` is a scalar not associated with any dimension in ``ds``. Variables in datasets can have different ``dtype`` and even different dimensions, but all dimensions are assumed to refer to points in the same shared coordinate system i.e. if two variables have dimension ``x``, that dimension must be identical in both variables. For example, when creating ``ds`` xarray automatically *aligns* ``bar`` with ``DataArray`` ``foo``, i.e., they share the same coordinate system so that ``ds.bar['x'] == ds.foo['x'] == ds['x']``. Consequently, the following works without explicitly specifying the coordinate ``x`` when creating ``ds['bar']``: .. jupyter-execute:: ds.bar.sel(x=10) You can do almost everything you can do with ``DataArray`` objects with ``Dataset`` objects (including indexing and arithmetic) if you prefer to work with multiple variables at once. Read & write netCDF files ------------------------- NetCDF is the recommended file format for xarray objects. Users from the geosciences will recognize that the :py:class:`~xarray.Dataset` data model looks very similar to a netCDF file (which, in fact, inspired it). You can directly read and write xarray objects to disk using :py:meth:`~xarray.Dataset.to_netcdf`, :py:func:`~xarray.open_dataset` and :py:func:`~xarray.open_dataarray`: .. jupyter-execute:: filename = "example.nc" .. jupyter-execute:: :hide-code: # Ensure the file is located in a unique temporary directory # so that it doesn't conflict with parallel builds of the # documentation. import tempfile import os.path tempdir = tempfile.TemporaryDirectory() filename = os.path.join(tempdir.name, filename) .. jupyter-execute:: ds.to_netcdf(filename) reopened = xr.open_dataset(filename) reopened .. jupyter-execute:: :hide-code: reopened.close() tempdir.cleanup() It is common for datasets to be distributed across multiple files (commonly one file per timestep). Xarray supports this use-case by providing the :py:meth:`~xarray.open_mfdataset` and the :py:meth:`~xarray.save_mfdataset` methods. For more, see :ref:`io`. .. _quick-overview-datatrees: DataTrees --------- :py:class:`xarray.DataTree` is a tree-like container of :py:class:`~xarray.DataArray` objects, organised into multiple mutually alignable groups. You can think of it like a (recursive) ``dict`` of :py:class:`~xarray.Dataset` objects, where coordinate variables and their indexes are inherited down to children. Let's first make some example xarray datasets: .. jupyter-execute:: import numpy as np import xarray as xr data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]}) ds = xr.Dataset({"foo": data, "bar": ("x", [1, 2]), "baz": np.pi}) ds ds2 = ds.interp(coords={"x": [10, 12, 14, 16, 18, 20]}) ds2 ds3 = xr.Dataset( {"people": ["alice", "bob"], "heights": ("people", [1.57, 1.82])}, coords={"species": "human"}, ) ds3 Now we'll put these datasets into a hierarchical DataTree: .. jupyter-execute:: dt = xr.DataTree.from_dict( {"simulation/coarse": ds, "simulation/fine": ds2, "/": ds3} ) dt This created a DataTree with nested groups. We have one root group, containing information about individual people. This root group can be named, but here it is unnamed, and is referenced with ``"/"``. This structure is similar to a unix-like filesystem. The root group then has one subgroup ``simulation``, which contains no data itself but does contain another two subgroups, named ``fine`` and ``coarse``. The (sub)subgroups ``fine`` and ``coarse`` contain two very similar datasets. They both have an ``"x"`` dimension, but the dimension is of different lengths in each group, which makes the data in each group unalignable. In the root group we placed some completely unrelated information, in order to show how a tree can store heterogeneous data. Remember to keep unalignable dimensions in sibling groups because a DataTree inherits coordinates down through its child nodes. You can see this inheritance in the above representation of the DataTree. The coordinates ``people`` and ``species`` defined in the root ``/`` node are shown in the child nodes both ``/simulation/coarse`` and ``/simulation/fine``. All coordinates in parent-descendent lineage must be alignable to form a DataTree. If your input data is not aligned, you can still get a nested ``dict`` of :py:class:`~xarray.Dataset` objects with :py:func:`~xarray.open_groups` and then apply any required changes to ensure alignment before converting to a :py:class:`~xarray.DataTree`. The constraints on each group are the same as the constraint on DataArrays within a single dataset with the addition of requiring parent-descendent coordinate agreement. We created the subgroups using a filesystem-like syntax, and accessing groups works the same way. We can access individual DataArrays in a similar fashion. .. jupyter-execute:: dt["simulation/coarse/foo"] We can also view the data in a particular group as a read-only :py:class:`~xarray.Datatree.DatasetView` using :py:attr:`xarray.Datatree.dataset`: .. jupyter-execute:: dt["simulation/coarse"].dataset We can get a copy of the :py:class:`~xarray.Dataset` including the inherited coordinates by calling the :py:class:`~xarray.datatree.to_dataset` method: .. jupyter-execute:: ds_inherited = dt["simulation/coarse"].to_dataset() ds_inherited And you can get a copy of just the node local values of :py:class:`~xarray.Dataset` by setting the ``inherit`` keyword to ``False``: .. jupyter-execute:: ds_node_local = dt["simulation/coarse"].to_dataset(inherit=False) ds_node_local .. note:: We intend to eventually implement most :py:class:`~xarray.Dataset` methods (indexing, aggregation, arithmetic, etc) on :py:class:`~xarray.DataTree` objects, but many methods have not been implemented yet. .. Operations map over subtrees, so we can take a mean over the ``x`` dimension of both the ``fine`` and ``coarse`` groups just by: .. .. jupyter-execute:: .. avg = dt["simulation"].mean(dim="x") .. avg .. Here the ``"x"`` dimension used is always the one local to that subgroup. .. You can do almost everything you can do with :py:class:`~xarray.Dataset` objects with :py:class:`~xarray.DataTree` objects .. (including indexing and arithmetic), as operations will be mapped over every subgroup in the tree. .. This allows you to work with multiple groups of non-alignable variables at once. .. tip:: If all of your variables are mutually alignable (i.e., they live on the same grid, such that every common dimension name maps to the same length), then you probably don't need :py:class:`xarray.DataTree`, and should consider just sticking with :py:class:`xarray.Dataset`. python-xarray-2026.01.0/doc/getting-started-guide/tutorials-and-videos.rst0000664000175000017500000000252015136607163026616 0ustar alastairalastair Tutorials and Videos ==================== There are an abundance of tutorials and videos available for learning how to use *xarray*. Often, these tutorials are taught to workshop attendees at conferences or other events. We highlight a number of these resources below, but this is by no means an exhaustive list! Tutorials ---------- - `Xarray's Tutorials`_ repository - The `UW eScience Institute's Geohackweek`_ tutorial on xarray for geospatial data scientists. - `Nicolas Fauchereau's 2015 tutorial`_ on xarray for netCDF users. Videos ------- .. include:: ../videos-gallery.txt Books, Chapters and Articles ----------------------------- - Stephan Hoyer and Joe Hamman's `Journal of Open Research Software paper`_ describing the xarray project. .. _Xarray's Tutorials: https://xarray-contrib.github.io/xarray-tutorial/ .. _Journal of Open Research Software paper: https://doi.org/10.5334/jors.148 .. _UW eScience Institute's Geohackweek : https://geohackweek.github.io/nDarrays/ .. _tutorial: https://github.com/Unidata/unidata-users-workshop/blob/master/notebooks/xray-tutorial.ipynb .. _with answers: https://github.com/Unidata/unidata-users-workshop/blob/master/notebooks/xray-tutorial-with-answers.ipynb .. _Nicolas Fauchereau's 2015 tutorial: https://nbviewer.iPython.org/github/nicolasfauchereau/metocean/blob/master/notebooks/xray.ipynb python-xarray-2026.01.0/doc/contribute/0000775000175000017500000000000015136607163017766 5ustar alastairalastairpython-xarray-2026.01.0/doc/contribute/index.rst0000664000175000017500000000142715136607163021633 0ustar alastairalastair######################## Xarray Developer's Guide ######################## We welcome your skills and enthusiasm at the Xarray project! There are numerous opportunities to contribute beyond just writing code. All contributions, including bug reports, bug fixes, documentation improvements, enhancement suggestions, and other ideas are welcome. Please review our Contributor's guide for more guidance. In this section you will also find documentation on the internal organization of Xarray's source code, the roadmap for current development priorities, as well as how to engage with core maintainers of the Xarray codebase. .. toctree:: :maxdepth: 2 :hidden: contributing ../internals/index ../roadmap ../whats-new developers-meeting Team python-xarray-2026.01.0/doc/contribute/contributing.rst0000664000175000017500000012460415136607163023236 0ustar alastairalastair.. _contributing: ********************** Contributing to xarray ********************** .. note:: Large parts of this document came from the `Pandas Contributing Guide `_. Overview ======== We welcome your skills and enthusiasm at the xarray project!. There are numerous opportunities to contribute beyond just writing code. All contributions, including bug reports, bug fixes, documentation improvements, enhancement suggestions, and other ideas are welcome. If you have any questions on the process or how to fix something feel free to ask us! The recommended place to ask a question is on `GitHub Discussions `_ , but we also have a `Discord `_ and a `mailing list `_. There is also a `"python-xarray" tag on Stack Overflow `_ which we monitor for questions. We also have a biweekly community call, details of which are announced on the `Developers meeting `_. You are very welcome to join! Though we would love to hear from you, there is no expectation to contribute during the meeting either - you are always welcome to just sit in and listen. This project is a community effort, and everyone is welcome to contribute. Everyone within the community is expected to abide by our `code of conduct `_. Where to start? =============== If you are brand new to *xarray* or open-source development, we recommend going through the `GitHub "issues" tab `_ to find issues that interest you. Some issues are particularly suited for new contributors by the label `Documentation `__ and `good first issue `_ where you could start out. These are well documented issues, that do not require a deep understanding of the internals of xarray. Once you've found an interesting issue, you can return here to get your development environment setup. The xarray project does not assign issues. Issues are "assigned" by opening a Pull Request(PR). .. _contributing.bug_reports: Bug reports and enhancement requests ==================================== Bug reports are an important part of making *xarray* more stable. Having a complete bug report will allow others to reproduce the bug and provide insight into fixing. Trying out the bug-producing code on the *main* branch is often a worthwhile exercise to confirm that the bug still exists. It is also worth searching existing bug reports and pull requests to see if the issue has already been reported and/or fixed. Submitting a bug report ----------------------- If you find a bug in the code or documentation, do not hesitate to submit a ticket to the `Issue Tracker `_. You are also welcome to post feature requests or pull requests. If you are reporting a bug, please use the provided template which includes the following: #. Include a short, self-contained Python snippet reproducing the problem. You can format the code nicely by using `GitHub Flavored Markdown `_:: ```python import xarray as xr ds = xr.Dataset(...) ... ``` #. Include the full version string of *xarray* and its dependencies. You can use the built in function:: ```python import xarray as xr xr.show_versions() ... ``` #. Explain why the current behavior is wrong/not desired and what you expect instead. The issue will then show up to the *xarray* community and be open to comments/ideas from others. See this `stackoverflow article for tips on writing a good bug report `_ . .. _contributing.github: Now that you have an issue you want to fix, enhancement to add, or documentation to improve, you need to learn how to work with GitHub and the *xarray* code base. .. _contributing.version_control: Version control, Git, and GitHub ================================ The code is hosted on `GitHub `_. To contribute you will need to sign up for a `free GitHub account `_. We use `Git `_ for version control to allow many people to work together on the project. Some great resources for learning Git: * the `GitHub help pages `_. * the `NumPy's documentation `_. * Matthew Brett's `Pydagogue `_. Getting started with Git ------------------------ `GitHub has instructions for setting up Git `__ including installing git, setting up your SSH key, and configuring git. All these steps need to be completed before you can work seamlessly between your local repository and GitHub. .. note:: The following instructions assume you want to learn how to interact with github via the git command-line utility, but contributors who are new to git may find it easier to use other tools instead such as `Github Desktop `_. .. _contributing.dev_workflow: Development workflow ==================== To keep your work well organized, with readable history, and in turn make it easier for project maintainers to see what you've done, and why you did it, we recommend you to follow workflow: 1. `Create an account `_ on GitHub if you do not already have one. 2. You will need your own fork to work on the code. Go to the `xarray project page `_ and hit the ``Fork`` button near the top of the page. This creates a copy of the code under your account on the GitHub server. 3. Clone your fork to your machine:: git clone https://github.com/your-user-name/xarray.git cd xarray git remote add upstream https://github.com/pydata/xarray.git This creates the directory ``xarray`` and connects your repository to the upstream (main project) *xarray* repository. 4. Copy tags across from the xarray repository:: git fetch --tags upstream This will ensure that when you create a development environment a reasonable version number is created. .. _contributing.dev_env: Creating a development environment ---------------------------------- To test out code changes locally, you'll need to build *xarray* from source, which requires a Python environment. If you're making documentation changes, you can skip to :ref:`contributing.documentation` but you won't be able to build the documentation locally before pushing your changes. .. note:: For small changes, such as fixing a typo, you don't necessarily need to build and test xarray locally. If you make your changes then :ref:`commit and push them to a new branch `, xarray's automated :ref:`continuous integration tests ` will run and check your code in various ways. You can then try to fix these problems by committing and pushing more commits to the same branch. You can also avoid building the documentation locally by instead :ref:`viewing the updated documentation via the CI `. To speed up this feedback loop or for more complex development tasks you should build and test xarray locally. .. _contributing.dev_python: Creating a Python Environment ----------------------------- .. attention:: Xarray recently switched development workflows to use `Pixi `_ instead of Conda (PR https://github.com/pydata/xarray/pull/10888 ). If there are any edits to the contributing instructions that would improve clarity, please open a PR! Xarray uses `Pixi `_ to manage development environments. Before starting any development, you'll need to create an isolated xarray development environment: - `Install Pixi `_ - preferably the same version as the one listed in our ``ci.yaml`` `file `_ - Some features in Pixi are in active development, and xarray depends on these features. Using the same version results in the best dev experience. - Instructions for installing specific versions of Pixi can be seen on the Pixi installation page. - Make sure that you have :ref:`cloned the repository ` - ``cd`` to the *xarray* source directory That's it! Now you're ready to contribute to Xarray. Pixi defines multiple environments as well as tasks to help you with development. These include tasks for: - running the test suite - building the documentation - running the static type checker - running code formatters and linters Some of these tasks can be run in several environments (e.g., the test suite is run in environments with different, dependencies as well as different Python versions to make sure we have wide support for Xarray). Some of these tasks are only run in a single environment (e.g., building the documentation or running pre-commit hooks). You can see all available environments and tasks by running:: pixi list For example: - ``pixi run doc`` will build the documentation - ``pixi run mypy`` will run the static type checker - ``pixi run test`` will run the test suite - ``pixi run pre-commit`` will run all code formatters and linters - defined via the pre-commit hooks When running ``pixi run test`` you will be prompted to select which environment you want to use. You can specify the environment directly by providing the ``-e`` flag, e.g., ``pixi run -e my_environment test`` . Our CI setup uses Pixi as well - you can easily reproduce CI tests by running the same tasks in the same environments as defined in the CI. You can enter any of the defined environments with:: pixi shell -e my_environment This is similar to "activating" an environment in Conda. To exit this shell type ``exit`` or press ``Ctrl-D``. All these Pixi environments and tasks are defined in the ``pixi.toml`` file in the root of the repository. Install pre-commit hooks ------------------------- You can either run pre-commit manually via Pixi as described above, or set up git hooks to run pre-commit automatically. This is done by: .. code-block:: sh pixi shell -e pre-commit # enter the pre-commit environment pre-commit install # install the git hooks # or pre-commit uninstall # uninstall the git hooks Now, every time you make a git commit, all the pre-commit hooks will be run automatically using the pre-commit that comes with Pixi. Alternatively you can use a separate installation of ``pre-commit`` (e.g., install globally using Pixi (``pixi install -g pre_commit``), or via `Homebrew `_ ). If you want to commit without running ``pre-commit`` hooks, you can use ``git commit --no-verify``. Update the ``main`` branch -------------------------- First make sure you have :ref:`created a development environment `. Before starting a new set of changes, fetch all changes from ``upstream/main``, and start a new feature branch from that. From time to time you should fetch the upstream changes from GitHub: :: git fetch --tags upstream git merge upstream/main This will combine your commits with the latest *xarray* git ``main``. If this leads to merge conflicts, you must resolve these before submitting your pull request. If you have uncommitted changes, you will need to ``git stash`` them prior to updating. This will effectively store your changes, which can be reapplied after updating. Create a new feature branch --------------------------- Create a branch to save your changes, even before you start making changes. You want your ``main branch`` to contain only production-ready code:: git checkout -b shiny-new-feature This changes your working directory to the ``shiny-new-feature`` branch. Keep any changes in this branch specific to one bug or feature so it is clear what the branch brings to *xarray*. You can have many "shiny-new-features" and switch in between them using the ``git checkout`` command. Generally, you will want to keep your feature branches on your public GitHub fork of xarray. To do this, you ``git push`` this new branch up to your GitHub repo. Generally (if you followed the instructions in these pages, and by default), git will have a link to your fork of the GitHub repo, called ``origin``. You push up to your own fork with: :: git push origin shiny-new-feature In git >= 1.7 you can ensure that the link is correctly set by using the ``--set-upstream`` option: :: git push --set-upstream origin shiny-new-feature From now on git will know that ``shiny-new-feature`` is related to the ``shiny-new-feature branch`` in the GitHub repo. The editing workflow -------------------- 1. Make some changes 2. See which files have changed with ``git status``. You'll see a listing like this one: :: # On branch shiny-new-feature # Changed but not updated: # (use "git add ..." to update what will be committed) # (use "git checkout -- ..." to discard changes in working directory) # # modified: README 3. Check what the actual changes are with ``git diff``. 4. Build the `documentation `__ for the documentation changes. 5. `Run the test suite `_ for code changes. Commit and push your changes ---------------------------- 1. To commit all modified files into the local copy of your repo, do ``git commit -am 'A commit message'``. 2. To push the changes up to your forked repo on GitHub, do a ``git push``. Open a pull request ------------------- When you're ready or need feedback on your code, open a Pull Request (PR) so that the xarray developers can give feedback and eventually include your suggested code into the ``main`` branch. `Pull requests (PRs) on GitHub `_ are the mechanism for contributing to xarray's code and documentation. Enter a title for the set of changes with some explanation of what you've done. Follow the PR template, which looks like this. :: [ ]Closes #xxxx [ ]Tests added [ ]User visible changes (including notable bug fixes) are documented in whats-new.rst [ ]New functions/methods are listed in api.rst Mention anything you'd like particular attention for - such as a complicated change or some code you are not happy with. If you don't think your request is ready to be merged, just say so in your pull request message and use the "Draft PR" feature of GitHub. This is a good way of getting some preliminary code review. .. _contributing.documentation: Contributing to the documentation ================================= If you're not the developer type, contributing to the documentation is still of huge value. You don't even have to be an expert on *xarray* to do so! In fact, there are sections of the docs that are worse off after being written by experts. If something in the docs doesn't make sense to you, updating the relevant section after you figure it out is a great way to ensure it will help the next person. .. contents:: Documentation: :local: About the *xarray* documentation -------------------------------- The documentation is written in **reStructuredText**, which is almost like writing in plain English, and built using `Sphinx `__. The Sphinx Documentation has an excellent `introduction to reST `__. Review the Sphinx docs to perform more complex changes to the documentation as well. Some other important things to know about the docs: - The *xarray* documentation consists of two parts: the docstrings in the code itself and the docs in this folder ``xarray/doc/``. The docstrings are meant to provide a clear explanation of the usage of the individual functions, while the documentation in this folder consists of tutorial-like overviews per topic together with some other information (what's new, installation, etc). - The docstrings follow the **NumPy Docstring Standard**, which is used widely in the Scientific Python community. This standard specifies the format of the different sections of the docstring. Refer to the `documentation for the Numpy docstring format `_ for a detailed explanation, or look at some of the existing functions to extend it in a similar manner. - The documentation makes heavy use of the `jupyter-sphinx extension `_. The ``jupyter-execute`` directive lets you put code in the documentation which will be run during the doc build. For example: .. code:: rst .. jupyter-execute:: x = 2 x**3 will be rendered as: .. jupyter-execute:: x = 2 x**3 Almost all code examples in the docs are run (and the output saved) during the doc build. This approach means that code examples will always be up to date, but it does make building the docs a bit more complex. - Our API documentation in ``doc/api.rst`` houses the auto-generated documentation from the docstrings. For classes, there are a few subtleties around controlling which methods and attributes have pages auto-generated. Every method should be included in a ``toctree`` in ``api.rst``, else Sphinx will emit a warning. How to build the *xarray* documentation --------------------------------------- Requirements ~~~~~~~~~~~~ Make sure to follow the instructions on :ref:`creating a development environment` above. Once you have Pixi installed - you can build the documentation using the command:: pixi run doc Then you can find the HTML output files in the folder ``xarray/doc/_build/html/``. To see what the documentation now looks like with your changes, you can view the HTML build locally by opening the files in your local browser. For example, if you normally use Google Chrome as your browser, you could enter:: google-chrome _build/html/quick-overview.html in the terminal, running from within the ``doc/`` folder. You should now see a new tab pop open in your local browser showing the ``quick-overview`` page of the documentation. The different pages of this local build of the documentation are linked together, so you can browse the whole documentation by following links the same way you would on the officially-hosted xarray docs site. The first time you build the docs, it will take quite a while because it has to run all the code examples and build all the generated docstring pages. In subsequent evocations, Sphinx will try to only build the pages that have been modified. If you want to do a full clean build, do:: pixi run doc-clean Writing ReST pages ------------------ Most documentation is either in the docstrings of individual classes and methods, in explicit ``.rst`` files, or in examples and tutorials. All of these use the `ReST `_ syntax and are processed by `Sphinx `_. This section contains additional information and conventions how ReST is used in the xarray documentation. Section formatting ~~~~~~~~~~~~~~~~~~ We aim to follow the recommendations from the `Python documentation `_ and the `Sphinx reStructuredText documentation `_ for section markup characters, - ``*`` with overline, for chapters - ``=``, for heading - ``-``, for sections - ``~``, for subsections - ``**`` text ``**``, for **bold** text Referring to other documents and sections ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `Sphinx `_ allows internal `references `_ between documents. Documents can be linked with the ``:doc:`` directive: :: See the :doc:`/getting-started-guide/installing` See the :doc:`/getting-started-guide/quick-overview` will render as: See the `Installation `_ See the `Quick Overview `_ Including figures and files ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Image files can be directly included in pages with the ``image::`` directive. .. _contributing.code: Contributing to the code base ============================= .. contents:: Code Base: :local: Code standards -------------- Writing good code is not just about what you write. It is also about *how* you write it. During :ref:`Continuous Integration ` testing, several tools will be run to check your code for stylistic errors. Generating any warnings will cause the test to fail. Thus, good style is a requirement for submitting code to *xarray*. In addition, because a lot of people use our library, it is important that we do not make sudden changes to the code that could have the potential to break a lot of user code as a result, that is, we need it to be as *backwards compatible* as possible to avoid mass breakages. Code Formatting ~~~~~~~~~~~~~~~ xarray uses several tools to ensure a consistent code format throughout the project: - `ruff `_ for formatting, code quality checks and standardized order in imports, and - `mypy `_ for static type checking on `type hints `_. We highly recommend that you setup `pre-commit hooks `_ to automatically run all the above tools every time you make a git commit. This can be done by running:: pre-commit install from the root of the xarray repository. You can skip the pre-commit checks with ``git commit --no-verify``. Backwards Compatibility ~~~~~~~~~~~~~~~~~~~~~~~ Please try to maintain backwards compatibility. *xarray* has a growing number of users with lots of existing code, so don't break it if at all possible. If you think breakage is required, clearly state why as part of the pull request. Be especially careful when changing function and method signatures, because any change may require a deprecation warning. For example, if your pull request means that the argument ``old_arg`` to ``func`` is no longer valid, instead of simply raising an error if a user passes ``old_arg``, we would instead catch it: .. code-block:: python def func(new_arg, old_arg=None): if old_arg is not None: from xarray.core.utils import emit_user_level_warning emit_user_level_warning( "`old_arg` has been deprecated, and in the future will raise an error." "Please use `new_arg` from now on.", DeprecationWarning, ) # Still do what the user intended here This temporary check would then be removed in a subsequent version of xarray. This process of first warning users before actually breaking their code is known as a "deprecation cycle", and makes changes significantly easier to handle both for users of xarray, and for developers of other libraries that depend on xarray. .. _contributing.ci: Testing With Continuous Integration ----------------------------------- The *xarray* test suite runs automatically via the `GitHub Actions `__, continuous integration service, once your pull request is submitted. A pull-request will be considered for merging when you have an all 'green' build. If any tests are failing, then you will get a red 'X', where you can click through to see the individual failed tests. This is an example of a green build. .. image:: ../_static/ci.png .. note:: Each time you push to your PR branch, a new run of the tests will be triggered on the CI. If they haven't already finished, tests for any older commits on the same branch will be automatically cancelled. .. _contributing.tdd: Test-driven development/code writing ------------------------------------ *xarray* is serious about testing and strongly encourages contributors to embrace `test-driven development (TDD) `_. This development process "relies on the repetition of a very short development cycle: first the developer writes an (initially failing) automated test case that defines a desired improvement or new function, then produces the minimum amount of code to pass that test." So, before actually writing any code, you should write your tests. Often the test can be taken from the original GitHub issue. However, it is always worth considering additional use cases and writing corresponding tests. Adding tests is one of the most common requests after code is pushed to *xarray*. Therefore, it is worth getting in the habit of writing tests ahead of time so that this is never an issue. Like many packages, *xarray* uses `pytest `_ and the convenient extensions in `numpy.testing `_. Writing tests ~~~~~~~~~~~~~ All tests should go into the ``tests`` subdirectory of the specific package. This folder contains many current examples of tests, and we suggest looking to these for inspiration. The ``xarray.testing`` module has many special ``assert`` functions that make it easier to make statements about whether DataArray or Dataset objects are equivalent. The easiest way to verify that your code is correct is to explicitly construct the result you expect, then compare the actual result to the expected correct result:: def test_constructor_from_0d(): expected = Dataset({None: ([], 0)})[None] actual = DataArray(0) assert_identical(expected, actual) Transitioning to ``pytest`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ *xarray* existing test structure is *mostly* class-based, meaning that you will typically find tests wrapped in a class. .. code-block:: python class TestReallyCoolFeature: ... Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer testing framework that will facilitate testing and developing. Thus, instead of writing test classes, we will write test functions like this: .. code-block:: python def test_really_cool_feature(): ... Using ``pytest`` ~~~~~~~~~~~~~~~~ Here is an example of a self-contained set of tests that illustrate multiple features that we like to use. - functional style: tests are like ``test_*`` and *only* take arguments that are either fixtures or parameters - ``pytest.mark`` can be used to set metadata on test functions, e.g. ``skip`` or ``xfail``. - using ``parametrize``: allow testing of multiple cases - to set a mark on a parameter, ``pytest.param(..., marks=...)`` syntax should be used - ``fixture``, code for object construction, on a per-test basis - using bare ``assert`` for scalars and truth-testing - ``assert_equal`` and ``assert_identical`` from the ``xarray.testing`` module for xarray object comparisons. - the typical pattern of constructing an ``expected`` and comparing versus the ``result`` We would name this file ``test_cool_feature.py`` and put in an appropriate place in the ``xarray/tests/`` structure. .. code-block:: python import pytest import numpy as np import xarray as xr from xarray.testing import assert_equal @pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64"]) def test_dtypes(dtype): assert str(np.dtype(dtype)) == dtype @pytest.mark.parametrize( "dtype", [ "float32", pytest.param("int16", marks=pytest.mark.skip), pytest.param( "int32", marks=pytest.mark.xfail(reason="to show how it works") ), ], ) def test_mark(dtype): assert str(np.dtype(dtype)) == "float32" @pytest.fixture def dataarray(): return xr.DataArray([1, 2, 3]) @pytest.fixture(params=["int8", "int16", "int32", "int64"]) def dtype(request): return request.param def test_series(dataarray, dtype): result = dataarray.astype(dtype) assert result.dtype == dtype expected = xr.DataArray(np.array([1, 2, 3], dtype=dtype)) assert_equal(result, expected) A test run of this yields .. code-block:: shell ((xarray) $ pytest test_cool_feature.py -v ================================= test session starts ================================== platform darwin -- Python 3.10.6, pytest-7.2.0, pluggy-1.0.0 -- cachedir: .pytest_cache plugins: hypothesis-6.56.3, cov-4.0.0 collected 11 items xarray/tests/test_cool_feature.py::test_dtypes[int8] PASSED [ 9%] xarray/tests/test_cool_feature.py::test_dtypes[int16] PASSED [ 18%] xarray/tests/test_cool_feature.py::test_dtypes[int32] PASSED [ 27%] xarray/tests/test_cool_feature.py::test_dtypes[int64] PASSED [ 36%] xarray/tests/test_cool_feature.py::test_mark[float32] PASSED [ 45%] xarray/tests/test_cool_feature.py::test_mark[int16] SKIPPED (unconditional skip) [ 54%] xarray/tests/test_cool_feature.py::test_mark[int32] XFAIL (to show how it works) [ 63%] xarray/tests/test_cool_feature.py::test_series[int8] PASSED [ 72%] xarray/tests/test_cool_feature.py::test_series[int16] PASSED [ 81%] xarray/tests/test_cool_feature.py::test_series[int32] PASSED [ 90%] xarray/tests/test_cool_feature.py::test_series[int64] PASSED [100%] ==================== 9 passed, 1 skipped, 1 xfailed in 1.83 seconds ==================== Tests that we have ``parametrized`` are now accessible via the test name, for example we could run these with ``-k int8`` to sub-select *only* those tests which match ``int8``. .. code-block:: shell ((xarray) bash-3.2$ pytest test_cool_feature.py -v -k int8 ================================== test session starts ================================== platform darwin -- Python 3.10.6, pytest-7.2.0, pluggy-1.0.0 -- cachedir: .pytest_cache plugins: hypothesis-6.56.3, cov-4.0.0 collected 11 items test_cool_feature.py::test_dtypes[int8] PASSED test_cool_feature.py::test_series[int8] PASSED Running the test suite ---------------------- The tests can then be run directly inside your Git clone (without having to install *xarray*) by typing:: pytest xarray The tests suite is exhaustive and takes a few minutes. Often it is worth running only a subset of tests first around your changes before running the entire suite. The easiest way to do this is with:: pytest xarray/path/to/test.py -k regex_matching_test_name Or with one of the following constructs:: pytest xarray/tests/[test-module].py pytest xarray/tests/[test-module].py::[TestClass] pytest xarray/tests/[test-module].py::[TestClass]::[test_method] Using `pytest-xdist `_, one can speed up local testing on multicore machines, by running pytest with the optional -n argument:: pytest xarray -n 4 This can significantly reduce the time it takes to locally run tests before submitting a pull request. For more, see the `pytest `_ documentation. Running the performance test suite ---------------------------------- Performance matters and it is worth considering whether your code has introduced performance regressions. *xarray* is starting to write a suite of benchmarking tests using `asv `__ to enable easy monitoring of the performance of critical *xarray* operations. These benchmarks are all found in the ``xarray/asv_bench`` directory. To use all features of asv, you will need either ``conda`` or ``virtualenv``. For more details please check the `asv installation webpage `_. To install asv:: python -m pip install asv If you need to run a benchmark, change your directory to ``asv_bench/`` and run:: asv continuous -f 1.1 upstream/main HEAD You can replace ``HEAD`` with the name of the branch you are working on, and report benchmarks that changed by more than 10%. The command uses ``conda`` by default for creating the benchmark environments. If you want to use virtualenv instead, write:: asv continuous -f 1.1 -E virtualenv upstream/main HEAD The ``-E virtualenv`` option should be added to all ``asv`` commands that run benchmarks. The default value is defined in ``asv.conf.json``. Running the full benchmark suite can take up to one hour and use up a few GBs of RAM. Usually it is sufficient to paste only a subset of the results into the pull request to show that the committed changes do not cause unexpected performance regressions. You can run specific benchmarks using the ``-b`` flag, which takes a regular expression. For example, this will only run tests from a ``xarray/asv_bench/benchmarks/groupby.py`` file:: asv continuous -f 1.1 upstream/main HEAD -b ^groupby If you want to only run a specific group of tests from a file, you can do it using ``.`` as a separator. For example:: asv continuous -f 1.1 upstream/main HEAD -b groupby.GroupByMethods will only run the ``GroupByMethods`` benchmark defined in ``groupby.py``. You can also run the benchmark suite using the version of *xarray* already installed in your current Python environment. This can be useful if you do not have ``virtualenv`` or ``conda``, or are using the ``setup.py develop`` approach discussed above; for the in-place build you need to set ``PYTHONPATH``, e.g. ``PYTHONPATH="$PWD/.." asv [remaining arguments]``. You can run benchmarks using an existing Python environment by:: asv run -e -E existing or, to use a specific Python interpreter,:: asv run -e -E existing:python3.10 This will display stderr from the benchmarks, and use your local ``python`` that comes from your ``$PATH``. Learn `how to write a benchmark and how to use asv from the documentation `_ . .. TODO: uncomment once we have a working setup see https://github.com/pydata/xarray/pull/5066 The *xarray* benchmarking suite is run remotely and the results are available `here `_. Documenting your code --------------------- Changes should be reflected in the release notes located in ``doc/whats-new.rst``. This file contains an ongoing change log for each release. Add an entry to this file to document your fix, enhancement or (unavoidable) breaking change. Make sure to include the GitHub issue number when adding your entry (using ``:issue:`1234```, where ``1234`` is the issue/pull request number). If your code is an enhancement, it is most likely necessary to add usage examples to the existing documentation. This can be done by following the :ref:`guidelines for contributing to the documentation `. .. _contributing.changes: Contributing your changes to *xarray* ===================================== .. _contributing.committing: Committing your code -------------------- Keep style fixes to a separate commit to make your pull request more readable. Once you've made changes, you can see them by typing:: git status If you have created a new file, it is not being tracked by git. Add it by typing:: git add path/to/file-to-be-added.py Doing 'git status' again should give something like:: # On branch shiny-new-feature # # modified: /relative/path/to/file-you-added.py # The following defines how a commit message should ideally be structured: * A subject line with ``< 72`` chars. * One blank line. * Optionally, a commit message body. Please reference the relevant GitHub issues in your commit message using ``GH1234`` or ``#1234``. Either style is fine, but the former is generally preferred. Now you can commit your changes in your local repository:: git commit -m .. _contributing.pushing: Pushing your changes -------------------- When you want your changes to appear publicly on your GitHub page, push your forked feature branch's commits:: git push origin shiny-new-feature Here ``origin`` is the default name given to your remote repository on GitHub. You can see the remote repositories:: git remote -v If you added the upstream repository as described above you will see something like:: origin git@github.com:yourname/xarray.git (fetch) origin git@github.com:yourname/xarray.git (push) upstream git://github.com/pydata/xarray.git (fetch) upstream git://github.com/pydata/xarray.git (push) Now your code is on GitHub, but it is not yet a part of the *xarray* project. For that to happen, a pull request needs to be submitted on GitHub. .. _contributing.review: Review your code ---------------- When you're ready to ask for a code review, file a pull request. Before you do, once again make sure that you have followed all the guidelines outlined in this document regarding code style, tests, performance tests, and documentation. You should also double check your branch changes against the branch it was based on: #. Navigate to your repository on GitHub -- https://github.com/your-user-name/xarray #. Click on ``Branches`` #. Click on the ``Compare`` button for your feature branch #. Select the ``base`` and ``compare`` branches, if necessary. This will be ``main`` and ``shiny-new-feature``, respectively. .. _contributing.pr: Finally, make the pull request ------------------------------ If everything looks good, you are ready to make a pull request. A pull request is how code from a local repository becomes available to the GitHub community and can be looked at and eventually merged into the ``main`` version. This pull request and its associated changes will eventually be committed to the ``main`` branch and available in the next release. To submit a pull request: #. Navigate to your repository on GitHub #. Click on the ``Pull Request`` button #. You can then click on ``Commits`` and ``Files Changed`` to make sure everything looks okay one last time #. Write a description of your changes in the ``Preview Discussion`` tab #. Click ``Send Pull Request``. This request then goes to the repository maintainers, and they will review the code. If you have made updates to the documentation, you can now see a preview of the updated docs by clicking on "Details" under the ``docs/readthedocs.org`` check near the bottom of the list of checks that run automatically when submitting a PR, then clicking on the "View Docs" button on the right (not the big green button, the small black one further down). .. image:: ../_static/view-docs.png If you need to make more changes, you can make them in your branch, add them to a new commit, push them to GitHub, and the pull request will automatically be updated. Pushing them to GitHub again is done by:: git push origin shiny-new-feature This will automatically update your pull request with the latest code and restart the :ref:`Continuous Integration ` tests. .. _contributing.delete: Delete your merged branch (optional) ------------------------------------ Once your feature branch is accepted into upstream, you'll probably want to get rid of the branch. First, update your ``main`` branch to check that the merge was successful:: git fetch upstream git checkout main git merge upstream/main Then you can do:: git branch -D shiny-new-feature You need to use an upper-case ``-D`` because the branch was squashed into a single commit before merging. Be careful with this because ``git`` won't warn you if you accidentally delete an unmerged branch. If you didn't delete your branch using GitHub's interface, then it will still exist on GitHub. To delete it there do:: git push origin --delete shiny-new-feature .. _contributing.checklist: PR checklist ------------ - **Properly comment and document your code.** See `"Documenting your code" `_. - **Test that the documentation builds correctly** by typing ``make html`` in the ``doc`` directory. This is not strictly necessary, but this may be easier than waiting for CI to catch a mistake. See `"Contributing to the documentation" `_. - **Test your code**. - Write new tests if needed. See `"Test-driven development/code writing" `_. - Test the code using `Pytest `_. Running all tests (type ``pytest`` in the root directory) takes a while, so feel free to only run the tests you think are needed based on your PR (example: ``pytest xarray/tests/test_dataarray.py``). CI will catch any failing tests. - By default, the upstream dev CI is disabled on pull request and push events. You can override this behavior per commit by adding a ``[test-upstream]`` tag to the first line of the commit message. For documentation-only commits, you can skip the CI per commit by adding a ``[skip-ci]`` tag to the first line of the commit message. - **Properly format your code** and verify that it passes the formatting guidelines set by `ruff `_. See `"Code formatting" `_. You can use `pre-commit `_ to run these automatically on each commit. - Run ``pre-commit run --all-files`` in the root directory. This may modify some files. Confirm and commit any formatting changes. - **Push your code** and `create a PR on GitHub `_. - **Use a helpful title for your pull request** by summarizing the main contributions rather than using the latest commit message. If the PR addresses an `issue `_, please `reference it `_. python-xarray-2026.01.0/doc/contribute/developers-meeting.rst0000664000175000017500000000240415136607163024316 0ustar alastairalastair.. _developers-meeting: Developers meeting ------------------ Xarray developers meet bi-weekly every other Wednesday. The meeting occurs on `Zoom `__. Find the `notes for the meeting here `__. There is a :issue:`GitHub issue for changes to the meeting<4001>`. You can subscribe to this calendar to be notified of changes: * `Google Calendar `__ * `iCal `__ .. raw:: html python-xarray-2026.01.0/doc/internals/0000775000175000017500000000000015136607163017607 5ustar alastairalastairpython-xarray-2026.01.0/doc/internals/index.rst0000664000175000017500000000176615136607163021462 0ustar alastairalastair.. _internals: Xarray Internals ================ Xarray builds upon two of the foundational libraries of the scientific Python stack, NumPy and pandas. It is written in pure Python (no C or Cython extensions), which makes it easy to develop and extend. Instead, we push compiled code to :ref:`optional dependencies`. The pages in this section are intended for: * Contributors to xarray who wish to better understand some of the internals, * Developers from other fields who wish to extend xarray with domain-specific logic, perhaps to support a new scientific community of users, * Developers of other packages who wish to interface xarray with their existing tools, e.g. by creating a backend for reading a new file format, or wrapping a custom array type. .. toctree:: :maxdepth: 2 :hidden: internal-design interoperability duck-arrays-integration chunked-arrays extending-xarray how-to-add-new-backend how-to-create-custom-index zarr-encoding-spec time-coding python-xarray-2026.01.0/doc/internals/interoperability.rst0000664000175000017500000000634515136607163023736 0ustar alastairalastair.. _interoperability: Interoperability of Xarray ========================== Xarray is designed to be extremely interoperable, in many orthogonal ways. Making xarray as flexible as possible is the common theme of most of the goals on our :ref:`roadmap`. This interoperability comes via a set of flexible abstractions into which the user can plug in. The current full list is: - :ref:`Custom file backends ` via the :py:class:`~xarray.backends.BackendEntrypoint` system, - Numpy-like :ref:`"duck" array wrapping `, which supports the `Python Array API Standard `_, - :ref:`Chunked distributed array computation ` via the :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint` system, - Custom :py:class:`~xarray.Index` objects for :ref:`flexible label-based lookups `, - Extending xarray objects with domain-specific methods via :ref:`custom accessors `. .. warning:: One obvious way in which xarray could be more flexible is that whilst subclassing xarray objects is possible, we currently don't support it in most transformations, instead recommending composition over inheritance. See the :ref:`internal design page ` for the rationale and look at the corresponding `GH issue `_ if you're interested in improving support for subclassing! .. note:: If you think there is another way in which xarray could become more generically flexible then please tell us your ideas by `raising an issue to request the feature `_! Whilst xarray was originally designed specifically to open ``netCDF4`` files as :py:class:`numpy.ndarray` objects labelled by :py:class:`pandas.Index` objects, it is entirely possible today to: - lazily open an xarray object directly from a custom binary file format (e.g. using ``xarray.open_dataset(path, engine='my_custom_format')``, - handle the data as any API-compliant numpy-like array type (e.g. sparse or GPU-backed), - distribute out-of-core computation across that array type in parallel (e.g. via :ref:`dask`), - track the physical units of the data through computations (e.g via `pint-xarray `_), - query the data via custom index logic optimized for specific applications (e.g. an :py:class:`~xarray.Index` object backed by a KDTree structure), - attach domain-specific logic via accessor methods (e.g. to understand geographic Coordinate Reference System metadata), - organize hierarchical groups of xarray data in a :py:class:`xarray.DataTree` (e.g. to treat heterogeneous simulation and observational data together during analysis). All of these features can be provided simultaneously, using libraries compatible with the rest of the scientific python ecosystem. In this situation xarray would be essentially a thin wrapper acting as pure-python framework, providing a common interface and separation of concerns via various domain-agnostic abstractions. Most of the remaining pages in the documentation of xarray's internals describe these various types of interoperability in more detail. python-xarray-2026.01.0/doc/internals/duck-arrays-integration.rst0000664000175000017500000000656415136607163025122 0ustar alastairalastair .. _internals.duckarrays: Integrating with duck arrays ============================= .. warning:: This is an experimental feature. Please report any bugs or other difficulties on `xarray's issue tracker `_. Xarray can wrap custom numpy-like arrays (":term:`duck array`\s") - see the :ref:`user guide documentation `. This page is intended for developers who are interested in wrapping a new custom array type with xarray. .. _internals.duckarrays.requirements: Duck array requirements ~~~~~~~~~~~~~~~~~~~~~~~ Xarray does not explicitly check that required methods are defined by the underlying duck array object before attempting to wrap the given array. However, a wrapped array type should at a minimum define these attributes: * ``shape`` property, * ``dtype`` property, * ``ndim`` property, * ``__array__`` method, * ``__array_ufunc__`` method, * ``__array_function__`` method. These need to be defined consistently with :py:class:`numpy.ndarray`, for example the array ``shape`` property needs to obey `numpy's broadcasting rules `_ (see also the `Python Array API standard's explanation `_ of these same rules). .. _internals.duckarrays.array_api_standard: Python Array API standard support ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ As an integration library xarray benefits greatly from the standardization of duck-array libraries' APIs, and so is a big supporter of the `Python Array API Standard `_. We aim to support any array libraries that follow the Array API standard out-of-the-box. However, xarray does occasionally call some numpy functions which are not (yet) part of the standard (e.g. :py:meth:`xarray.DataArray.pad` calls :py:func:`numpy.pad`). See `xarray issue #7848 `_ for a list of such functions. We can still support dispatching on these functions through the array protocols above, it just means that if you exclusively implement the methods in the Python Array API standard then some features in xarray will not work. Custom inline reprs ~~~~~~~~~~~~~~~~~~~ In certain situations (e.g. when printing the collapsed preview of variables of a ``Dataset``), xarray will display the repr of a :term:`duck array` in a single line, truncating it to a certain number of characters. If that would drop too much information, the :term:`duck array` may define a ``_repr_inline_`` method that takes ``max_width`` (number of characters) as an argument .. code:: python class MyDuckArray: ... def _repr_inline_(self, max_width): """format to a single line with at most max_width characters""" ... ... To avoid duplicated information, this method must omit information about the shape and :term:`dtype`. For example, the string representation of a ``dask`` array or a ``sparse`` matrix would be: .. jupyter-execute:: import dask.array as da import xarray as xr import numpy as np import sparse .. jupyter-execute:: a = da.linspace(0, 1, 20, chunks=2) a .. jupyter-execute:: b = np.eye(10) b[[5, 7, 3, 0], [6, 8, 2, 9]] = 2 b = sparse.COO.from_numpy(b) b .. jupyter-execute:: xr.Dataset(dict(a=("x", a), b=(("y", "z"), b))) python-xarray-2026.01.0/doc/internals/internal-design.rst0000664000175000017500000002362615136607163023435 0ustar alastairalastair.. jupyter-execute:: :hide-code: import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) np.set_printoptions(threshold=10, edgeitems=2) .. _internal design: Internal Design =============== This page gives an overview of the internal design of xarray. In totality, the Xarray project defines 4 key data structures. In order of increasing complexity, they are: - :py:class:`xarray.Variable`, - :py:class:`xarray.DataArray`, - :py:class:`xarray.Dataset`, - :py:class:`xarray.DataTree`. The user guide lists only :py:class:`xarray.DataArray` and :py:class:`xarray.Dataset`, but :py:class:`~xarray.Variable` is the fundamental object internally, and :py:class:`~xarray.DataTree` is a natural generalisation of :py:class:`xarray.Dataset`. .. note:: Our :ref:`roadmap` includes plans to document :py:class:`~xarray.Variable` as fully public API. Internally private :ref:`lazy indexing classes ` are used to avoid loading more data than necessary, and flexible indexes classes (derived from :py:class:`~xarray.indexes.Index`) provide performant label-based lookups. .. _internal design.data structures: Data Structures --------------- The :ref:`data structures` page in the user guide explains the basics and concentrates on user-facing behavior, whereas this section explains how xarray's data structure classes actually work internally. .. _internal design.data structures.variable: Variable Objects ~~~~~~~~~~~~~~~~ The core internal data structure in xarray is the :py:class:`~xarray.Variable`, which is used as the basic building block behind xarray's :py:class:`~xarray.Dataset`, :py:class:`~xarray.DataArray` types. A :py:class:`~xarray.Variable` consists of: - ``dims``: A tuple of dimension names. - ``data``: The N-dimensional array (typically a NumPy or Dask array) storing the Variable's data. It must have the same number of dimensions as the length of ``dims``. - ``attrs``: A dictionary of metadata associated with this array. By convention, xarray's built-in operations never use this metadata. - ``encoding``: Another dictionary used to store information about how these variable's data is represented on disk. See :ref:`io.encoding` for more details. :py:class:`~xarray.Variable` has an interface similar to NumPy arrays, but extended to make use of named dimensions. For example, it uses ``dim`` in preference to an ``axis`` argument for methods like ``mean``, and supports :ref:`compute.broadcasting`. However, unlike ``Dataset`` and ``DataArray``, the basic ``Variable`` does not include coordinate labels along each axis. :py:class:`~xarray.Variable` is public API, but because of its incomplete support for labeled data, it is mostly intended for advanced uses, such as in xarray itself, for writing new backends, or when creating custom indexes. You can access the variable objects that correspond to xarray objects via the (readonly) :py:attr:`Dataset.variables ` and :py:attr:`DataArray.variable ` attributes. .. _internal design.dataarray: DataArray Objects ~~~~~~~~~~~~~~~~~ The simplest data structure used by most users is :py:class:`~xarray.DataArray`. A :py:class:`~xarray.DataArray` is a composite object consisting of multiple :py:class:`~xarray.Variable` objects which store related data. A single :py:class:`~xarray.Variable` is referred to as the "data variable", and stored under the :py:attr:`~xarray.DataArray.variable`` attribute. A :py:class:`~xarray.DataArray` inherits all of the properties of this data variable, i.e. ``dims``, ``data``, ``attrs`` and ``encoding``, all of which are implemented by forwarding on to the underlying ``Variable`` object. In addition, a :py:class:`~xarray.DataArray` stores additional ``Variable`` objects stored in a dict under the private ``_coords`` attribute, each of which is referred to as a "Coordinate Variable". These coordinate variable objects are only allowed to have ``dims`` that are a subset of the data variable's ``dims``, and each dim has a specific length. This means that the full :py:attr:`~xarray.DataArray.size` of the dataarray can be represented by a dictionary mapping dimension names to integer sizes. The underlying data variable has this exact same size, and the attached coordinate variables have sizes which are some subset of the size of the data variable. Another way of saying this is that all coordinate variables must be "alignable" with the data variable. When a coordinate is accessed by the user (e.g. via the dict-like :py:class:`~xarray.DataArray.__getitem__` syntax), then a new ``DataArray`` is constructed by finding all coordinate variables that have compatible dimensions and re-attaching them before the result is returned. This is why most users never see the ``Variable`` class underlying each coordinate variable - it is always promoted to a ``DataArray`` before returning. Lookups are performed by special :py:class:`~xarray.indexes.Index` objects, which are stored in a dict under the private ``_indexes`` attribute. Indexes must be associated with one or more coordinates, and essentially act by translating a query given in physical coordinate space (typically via the :py:meth:`~xarray.DataArray.sel` method) into a set of integer indices in array index space that can be used to index the underlying n-dimensional array-like ``data``. Indexing in array index space (typically performed via the :py:meth:`~xarray.DataArray.isel` method) does not require consulting an ``Index`` object. Finally a :py:class:`~xarray.DataArray` defines a :py:attr:`~xarray.DataArray.name` attribute, which refers to its data variable but is stored on the wrapping ``DataArray`` class. The ``name`` attribute is primarily used when one or more :py:class:`~xarray.DataArray` objects are promoted into a :py:class:`~xarray.Dataset` (e.g. via :py:meth:`~xarray.DataArray.to_dataset`). Note that the underlying :py:class:`~xarray.Variable` objects are all unnamed, so they can always be referred to uniquely via a dict-like mapping. .. _internal design.dataset: Dataset Objects ~~~~~~~~~~~~~~~ The :py:class:`~xarray.Dataset` class is a generalization of the :py:class:`~xarray.DataArray` class that can hold multiple data variables. Internally all data variables and coordinate variables are stored under a single ``variables`` dict, and coordinates are specified by storing their names in a private ``_coord_names`` dict. The dataset's ``dims`` are the set of all dims present across any variable, but (similar to in dataarrays) coordinate variables cannot have a dimension that is not present on any data variable. When a data variable or coordinate variable is accessed, a new ``DataArray`` is again constructed from all compatible coordinates before returning. .. _internal design.subclassing: .. note:: The way that selecting a variable from a ``DataArray`` or ``Dataset`` actually involves internally wrapping the ``Variable`` object back up into a ``DataArray``/``Dataset`` is the primary reason :ref:`we recommend against subclassing ` Xarray objects. The main problem it creates is that we currently cannot easily guarantee that for example selecting a coordinate variable from your ``SubclassedDataArray`` would return an instance of ``SubclassedDataArray`` instead of just an :py:class:`xarray.DataArray`. See `GH issue `_ for more details. .. _internal design.lazy indexing: Lazy Indexing Classes --------------------- Lazy Loading ~~~~~~~~~~~~ If we open a ``Variable`` object from disk using :py:func:`~xarray.open_dataset` we can see that the actual values of the array wrapped by the data variable are not displayed. .. jupyter-execute:: da = xr.tutorial.open_dataset("air_temperature")["air"] var = da.variable var We can see the size, and the dtype of the underlying array, but not the actual values. This is because the values have not yet been loaded. If we look at the private attribute :py:meth:`~xarray.Variable._data` containing the underlying array object, we see something interesting: .. jupyter-execute:: var._data You're looking at one of xarray's internal Lazy Indexing Classes. These powerful classes are hidden from the user, but provide important functionality. Calling the public :py:attr:`~xarray.Variable.data` property loads the underlying array into memory. .. jupyter-execute:: var.data This array is now cached, which we can see by accessing the private attribute again: .. jupyter-execute:: var._data Lazy Indexing ~~~~~~~~~~~~~ The purpose of these lazy indexing classes is to prevent more data being loaded into memory than is necessary for the subsequent analysis, by deferring loading data until after indexing is performed. Let's open the data from disk again. .. jupyter-execute:: da = xr.tutorial.open_dataset("air_temperature")["air"] var = da.variable Now, notice how even after subsetting the data has does not get loaded: .. jupyter-execute:: var.isel(time=0) The shape has changed, but the values are still not shown. Looking at the private attribute again shows how this indexing information was propagated via the hidden lazy indexing classes: .. jupyter-execute:: var.isel(time=0)._data .. note:: Currently only certain indexing operations are lazy, not all array operations. For discussion of making all array operations lazy see `GH issue #5081 `_. Lazy Dask Arrays ~~~~~~~~~~~~~~~~ Note that xarray's implementation of Lazy Indexing classes is completely separate from how :py:class:`dask.array.Array` objects evaluate lazily. Dask-backed xarray objects delay almost all operations until :py:meth:`~xarray.DataArray.compute` is called (either explicitly or implicitly via :py:meth:`~xarray.DataArray.plot` for example). The exceptions to this laziness are operations whose output shape is data-dependent, such as when calling :py:meth:`~xarray.DataArray.where`. python-xarray-2026.01.0/doc/internals/time-coding.rst0000664000175000017500000005725615136607163022557 0ustar alastairalastair.. jupyter-execute:: :hide-code: import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) np.set_printoptions(threshold=20) int64_max = np.iinfo("int64").max int64_min = np.iinfo("int64").min + 1 uint64_max = np.iinfo("uint64").max .. _internals.timecoding: Time Coding =========== This page gives an overview how xarray encodes and decodes times and which conventions and functions are used. Pandas functionality -------------------- to_datetime ~~~~~~~~~~~ The function :py:func:`pandas.to_datetime` is used within xarray for inferring units and for testing purposes. In normal operation :py:func:`pandas.to_datetime` returns a :py:class:`pandas.Timestamp` (for scalar input) or :py:class:`pandas.DatetimeIndex` (for array-like input) which are related to ``np.datetime64`` values with a resolution inherited from the input (can be one of ``'s'``, ``'ms'``, ``'us'``, ``'ns'``). If no resolution can be inherited ``'ns'`` is assumed. That has the implication that the maximum usable time range for those cases is approximately +/- 292 years centered around the Unix epoch (1970-01-01). To accommodate that, we carefully check the units/resolution in the encoding and decoding step. When the arguments are numeric (not strings or ``np.datetime64`` values) ``"unit"`` can be anything from ``'Y'``, ``'W'``, ``'D'``, ``'h'``, ``'m'``, ``'s'``, ``'ms'``, ``'us'`` or ``'ns'``, though the returned resolution will be ``"ns"``. .. jupyter-execute:: print(f"Minimum datetime: {pd.to_datetime(int64_min, unit="ns")}") print(f"Maximum datetime: {pd.to_datetime(int64_max, unit="ns")}") For input values which can't be represented in nanosecond resolution an :py:class:`pandas.OutOfBoundsDatetime` exception is raised: .. jupyter-execute:: try: dtime = pd.to_datetime(int64_max, unit="us") except Exception as err: print(err) .. jupyter-execute:: try: dtime = pd.to_datetime(uint64_max, unit="ns") print("Wrong:", dtime) dtime = pd.to_datetime([uint64_max], unit="ns") except Exception as err: print(err) ``np.datetime64`` values can be extracted with :py:meth:`pandas.Timestamp.to_numpy` and :py:meth:`pandas.DatetimeIndex.to_numpy`. The returned resolution depends on the internal representation. This representation can be changed using :py:meth:`pandas.Timestamp.as_unit` and :py:meth:`pandas.DatetimeIndex.as_unit` respectively. ``as_unit`` takes one of ``'s'``, ``'ms'``, ``'us'``, ``'ns'`` as an argument. That means we are able to represent datetimes with second, millisecond, microsecond or nanosecond resolution. .. jupyter-execute:: time = pd.to_datetime(np.datetime64(0, "D")) print("Datetime:", time, np.asarray([time.to_numpy()]).dtype) print("Datetime as_unit('ms'):", time.as_unit("ms")) print("Datetime to_numpy():", time.as_unit("ms").to_numpy()) .. jupyter-execute:: time = pd.to_datetime(np.array([-1000, 1, 2], dtype="datetime64[Y]")) print("DatetimeIndex:", time) print("DatetimeIndex as_unit('us'):", time.as_unit("us")) print("DatetimeIndex to_numpy():", time.as_unit("us").to_numpy()) .. warning:: Input data with resolution higher than ``'ns'`` (eg. ``'ps'``, ``'fs'``, ``'as'``) is truncated (not rounded) at the ``'ns'``-level. This is `currently broken `_ for the ``'ps'`` input, where it is interpreted as ``'ns'``. .. jupyter-execute:: print("Good:", pd.to_datetime([np.datetime64(1901901901901, "as")])) print("Good:", pd.to_datetime([np.datetime64(1901901901901, "fs")])) print(" Bad:", pd.to_datetime([np.datetime64(1901901901901, "ps")])) print("Good:", pd.to_datetime([np.datetime64(1901901901901, "ns")])) print("Good:", pd.to_datetime([np.datetime64(1901901901901, "us")])) print("Good:", pd.to_datetime([np.datetime64(1901901901901, "ms")])) .. warning:: Care has to be taken, as some configurations of input data will raise. The following shows, that we are safe to use :py:func:`pandas.to_datetime` when providing :py:class:`numpy.datetime64` as scalar or numpy array as input. .. jupyter-execute:: print( "Works:", np.datetime64(1901901901901, "s"), pd.to_datetime(np.datetime64(1901901901901, "s")), ) print( "Works:", np.array([np.datetime64(1901901901901, "s")]), pd.to_datetime(np.array([np.datetime64(1901901901901, "s")])), ) try: pd.to_datetime([np.datetime64(1901901901901, "s")]) except Exception as err: print("Raises:", err) try: pd.to_datetime(1901901901901, unit="s") except Exception as err: print("Raises:", err) try: pd.to_datetime([1901901901901], unit="s") except Exception as err: print("Raises:", err) try: pd.to_datetime(np.array([1901901901901]), unit="s") except Exception as err: print("Raises:", err) to_timedelta ~~~~~~~~~~~~ The function :py:func:`pandas.to_timedelta` is used within xarray for inferring units and for testing purposes. In normal operation :py:func:`pandas.to_timedelta` returns a :py:class:`pandas.Timedelta` (for scalar input) or :py:class:`pandas.TimedeltaIndex` (for array-like input) which are ``np.timedelta64`` values with ``ns`` resolution internally. That has the implication, that the usable timedelta covers only roughly 585 years. To accommodate for that, we are working around that limitation in the encoding and decoding step. .. jupyter-execute:: f"Maximum timedelta range: ({pd.to_timedelta(int64_min, unit="ns")}, {pd.to_timedelta(int64_max, unit="ns")})" For input values which can't be represented in nanosecond resolution an :py:class:`pandas.OutOfBoundsTimedelta` exception is raised: .. jupyter-execute:: try: delta = pd.to_timedelta(int64_max, unit="us") except Exception as err: print("First:", err) .. jupyter-execute:: try: delta = pd.to_timedelta(uint64_max, unit="ns") except Exception as err: print("Second:", err) When arguments are numeric (not strings or ``np.timedelta64`` values) "unit" can be anything from ``'W'``, ``'D'``, ``'h'``, ``'m'``, ``'s'``, ``'ms'``, ``'us'`` or ``'ns'``, though the returned resolution will be ``"ns"``. ``np.timedelta64`` values can be extracted with :py:meth:`pandas.Timedelta.to_numpy` and :py:meth:`pandas.TimedeltaIndex.to_numpy`. The returned resolution depends on the internal representation. This representation can be changed using :py:meth:`pandas.Timedelta.as_unit` and :py:meth:`pandas.TimedeltaIndex.as_unit` respectively. ``as_unit`` takes one of ``'s'``, ``'ms'``, ``'us'``, ``'ns'`` as an argument. That means we are able to represent timedeltas with second, millisecond, microsecond or nanosecond resolution. .. jupyter-execute:: delta = pd.to_timedelta(np.timedelta64(1, "D")) print("Timedelta:", delta, np.asarray([delta.to_numpy()]).dtype) print("Timedelta as_unit('ms'):", delta.as_unit("ms")) print("Timedelta to_numpy():", delta.as_unit("ms").to_numpy()) .. jupyter-execute:: delta = pd.to_timedelta([0, 1, 2], unit="D") print("TimedeltaIndex:", delta) print("TimedeltaIndex as_unit('ms'):", delta.as_unit("ms")) print("TimedeltaIndex to_numpy():", delta.as_unit("ms").to_numpy()) .. warning:: Care has to be taken, as some configurations of input data will raise. The following shows, that we are safe to use :py:func:`pandas.to_timedelta` when providing :py:class:`numpy.timedelta64` as scalar or numpy array as input. .. jupyter-execute:: print( "Works:", np.timedelta64(1901901901901, "s"), pd.to_timedelta(np.timedelta64(1901901901901, "s")), ) print( "Works:", np.array([np.timedelta64(1901901901901, "s")]), pd.to_timedelta(np.array([np.timedelta64(1901901901901, "s")])), ) try: pd.to_timedelta([np.timedelta64(1901901901901, "s")]) except Exception as err: print("Raises:", err) try: pd.to_timedelta(1901901901901, unit="s") except Exception as err: print("Raises:", err) try: pd.to_timedelta([1901901901901], unit="s") except Exception as err: print("Raises:", err) try: pd.to_timedelta(np.array([1901901901901]), unit="s") except Exception as err: print("Raises:", err) Timestamp ~~~~~~~~~ :py:class:`pandas.Timestamp` is used within xarray to wrap strings of CF encoding reference times and datetime.datetime. When arguments are numeric (not strings) "unit" can be anything from ``'Y'``, ``'W'``, ``'D'``, ``'h'``, ``'m'``, ``'s'``, ``'ms'``, ``'us'`` or ``'ns'``, though the returned resolution will be ``"ns"``. In normal operation :py:class:`pandas.Timestamp` holds the timestamp in the provided resolution, but only one of ``'s'``, ``'ms'``, ``'us'``, ``'ns'``. Lower resolution input is automatically converted to ``'s'``, higher resolution input is truncated to ``'ns'``. The same conversion rules apply here as for :py:func:`pandas.to_timedelta` (see `to_timedelta`_). Depending on the internal resolution Timestamps can be represented in the range: .. jupyter-execute:: for unit in ["s", "ms", "us", "ns"]: print( f"unit: {unit!r} time range ({pd.Timestamp(int64_min, unit=unit)}, {pd.Timestamp(int64_max, unit=unit)})" ) Since relaxing the resolution, this enhances the range to several hundreds of thousands of centuries with microsecond representation. ``NaT`` will be at ``np.iinfo("int64").min`` for all of the different representations. .. warning:: When initialized with a datetime string this is only defined from ``-9999-01-01`` to ``9999-12-31``. .. jupyter-execute:: try: print("Works:", pd.Timestamp("-9999-01-01 00:00:00")) print("Works, too:", pd.Timestamp("9999-12-31 23:59:59")) print(pd.Timestamp("10000-01-01 00:00:00")) except Exception as err: print("Errors:", err) .. note:: :py:class:`pandas.Timestamp` is the only current possibility to correctly import time reference strings. It handles non-ISO formatted strings, keeps the resolution of the strings (``'s'``, ``'ms'`` etc.) and imports time zones. When initialized with :py:class:`numpy.datetime64` instead of a string it even overcomes the above limitation of the possible time range. .. jupyter-execute:: try: print("Handles non-ISO:", pd.Timestamp("92-1-8 151542")) print( "Keeps resolution 1:", pd.Timestamp("1992-10-08 15:15:42"), pd.Timestamp("1992-10-08 15:15:42").unit, ) print( "Keeps resolution 2:", pd.Timestamp("1992-10-08 15:15:42.5"), pd.Timestamp("1992-10-08 15:15:42.5").unit, ) print( "Keeps timezone:", pd.Timestamp("1992-10-08 15:15:42.5 -6:00"), pd.Timestamp("1992-10-08 15:15:42.5 -6:00").unit, ) print( "Extends timerange :", pd.Timestamp(np.datetime64("-10000-10-08 15:15:42.5001")), pd.Timestamp(np.datetime64("-10000-10-08 15:15:42.5001")).unit, ) except Exception as err: print("Errors:", err) DatetimeIndex ~~~~~~~~~~~~~ :py:class:`pandas.DatetimeIndex` is used to wrap ``np.datetime64`` values or other datetime-likes when encoding. The resolution of the DatetimeIndex depends on the input, but can be only one of ``'s'``, ``'ms'``, ``'us'``, ``'ns'``. Lower resolution input is automatically converted to ``'s'``, higher resolution input is cut to ``'ns'``. :py:class:`pandas.DatetimeIndex` will raise :py:class:`pandas.OutOfBoundsDatetime` if the input can't be represented in the given resolution. .. jupyter-execute:: try: print( "Works:", pd.DatetimeIndex( np.array(["1992-01-08", "1992-01-09"], dtype="datetime64[D]") ), ) print( "Works:", pd.DatetimeIndex( np.array( ["1992-01-08 15:15:42", "1992-01-09 15:15:42"], dtype="datetime64[s]", ) ), ) print( "Works:", pd.DatetimeIndex( np.array( ["1992-01-08 15:15:42.5", "1992-01-09 15:15:42.0"], dtype="datetime64[ms]", ) ), ) print( "Works:", pd.DatetimeIndex( np.array( ["1970-01-01 00:00:00.401501601701801901", "1970-01-01 00:00:00"], dtype="datetime64[as]", ) ), ) print( "Works:", pd.DatetimeIndex( np.array( ["-10000-01-01 00:00:00.401501", "1970-01-01 00:00:00"], dtype="datetime64[us]", ) ), ) except Exception as err: print("Errors:", err) CF Conventions Time Handling ---------------------------- Xarray tries to adhere to the latest version of the `CF Conventions`_. Relevant is the section on `Time Coordinate`_ and the `Calendar`_ subsection. .. _CF Conventions: https://cfconventions.org .. _Time Coordinate: https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#time-coordinate .. _Calendar: https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#calendar CF time decoding ~~~~~~~~~~~~~~~~ Decoding of ``values`` with a time unit specification like ``"seconds since 1992-10-8 15:15:42.5 -6:00"`` into datetimes using the CF conventions is a multistage process. 1. If we have a non-standard calendar (e.g. ``"noleap"``) decoding is done with the ``cftime`` package, which is not covered in this section. For the ``"standard"``/``"gregorian"`` calendar as well as the ``"proleptic_gregorian"`` calendar the above outlined pandas functionality is used. 2. The ``"standard"``/``"gregorian"`` calendar and the ``"proleptic_gregorian"`` are equivalent for any dates and reference times >= ``"1582-10-15"``. First the reference time is checked and any timezone information stripped off. In a second step, the minimum and maximum ``values`` are checked if they can be represented in the current reference time resolution. At the same time integer overflow would be caught. For the ``"standard"``/``"gregorian"`` calendar the dates are checked to be >= ``"1582-10-15"``. If anything fails, the decoding is attempted with ``cftime``. 3. As the unit (here ``"seconds"``) and the resolution of the reference time ``"1992-10-8 15:15:42.5 -6:00"`` (here ``"milliseconds"``) might be different, the decoding resolution is aligned to the higher resolution of the two. Users may also specify their wanted target resolution by setting the ``time_unit`` keyword argument to one of ``'s'``, ``'ms'``, ``'us'``, ``'ns'`` (default ``'ns'``). This will be included in the alignment process. This is done by multiplying the ``values`` by the ratio of nanoseconds per time unit and nanoseconds per reference time unit. To retain consistency for ``NaT`` values a mask is kept and re-introduced after the multiplication. 4. Times encoded as floating point values are checked for fractional parts and the resolution is enhanced in an iterative process until a fitting resolution (or ``'ns'``) is found. A ``SerializationWarning`` is issued to make the user aware of the possibly problematic encoding. 5. Finally, the ``values`` (at this point converted to ``int64`` values) are cast to ``datetime64[unit]`` (using the above retrieved unit) and added to the reference time :py:class:`pandas.Timestamp`. .. jupyter-execute:: calendar = "proleptic_gregorian" values = np.array([-1000 * 365, 0, 1000 * 365], dtype="int64") units = "days since 2000-01-01 00:00:00.000001" dt = xr.coding.times.decode_cf_datetime(values, units, calendar, time_unit="s") assert dt.dtype == "datetime64[us]" dt .. jupyter-execute:: units = "microseconds since 2000-01-01 00:00:00" dt = xr.coding.times.decode_cf_datetime(values, units, calendar, time_unit="s") assert dt.dtype == "datetime64[us]" dt .. jupyter-execute:: values = np.array([0, 0.25, 0.5, 0.75, 1.0], dtype="float64") units = "days since 2000-01-01 00:00:00.001" dt = xr.coding.times.decode_cf_datetime(values, units, calendar, time_unit="s") assert dt.dtype == "datetime64[ms]" dt .. jupyter-execute:: values = np.array([0, 0.25, 0.5, 0.75, 1.0], dtype="float64") units = "hours since 2000-01-01" dt = xr.coding.times.decode_cf_datetime(values, units, calendar, time_unit="s") assert dt.dtype == "datetime64[s]" dt .. jupyter-execute:: values = np.array([0, 0.25, 0.5, 0.75, 1.0], dtype="float64") units = "hours since 2000-01-01 00:00:00 03:30" dt = xr.coding.times.decode_cf_datetime(values, units, calendar, time_unit="s") assert dt.dtype == "datetime64[s]" dt .. jupyter-execute:: values = np.array([-2002 * 365 - 121, -366, 365, 2000 * 365 + 119], dtype="int64") units = "days since 0001-01-01 00:00:00" dt = xr.coding.times.decode_cf_datetime(values, units, calendar, time_unit="s") assert dt.dtype == "datetime64[s]" dt CF time encoding ~~~~~~~~~~~~~~~~ For encoding the process is more or less a reversal of the above, but we have to make some decisions on default values. 1. Infer ``data_units`` from the given ``dates``. 2. Infer ``units`` (either cleanup given ``units`` or use ``data_units`` 3. Infer the calendar name from the given ``dates``. 4. If dates are :py:class:`cftime.datetime` objects then encode with ``cftime.date2num`` 5. Retrieve ``time_units`` and ``ref_date`` from ``units`` 6. Check ``ref_date`` >= ``1582-10-15``, otherwise -> ``cftime`` 7. Wrap ``dates`` with pd.DatetimeIndex 8. Subtracting ``ref_date`` (:py:class:`pandas.Timestamp`) from above :py:class:`pandas.DatetimeIndex` will return :py:class:`pandas.TimedeltaIndex` 9. Align resolution of :py:class:`pandas.TimedeltaIndex` with resolution of ``time_units`` 10. Retrieve needed ``units`` and ``delta`` to faithfully encode into int64 11. Divide ``time_deltas`` by ``delta``, use floor division (integer) or normal division (float) 12. Return result .. jupyter-execute:: calendar = "proleptic_gregorian" dates = np.array( [ "-2000-01-01T00:00:00", "0000-01-01T00:00:00", "0002-01-01T00:00:00", "2000-01-01T00:00:00", ], dtype="datetime64[s]", ) orig_values = np.array( [-2002 * 365 - 121, -366, 365, 2000 * 365 + 119], dtype="int64" ) units = "days since 0001-01-01 00:00:00" values, _, _ = xr.coding.times.encode_cf_datetime( dates, units, calendar, dtype=np.dtype("int64") ) print(values, units) np.testing.assert_array_equal(values, orig_values) .. jupyter-execute:: :stderr: dates = np.array( [ "-2000-01-01T01:00:00", "0000-01-01T00:00:00", "0002-01-01T00:00:00", "2000-01-01T00:00:00", ], dtype="datetime64[s]", ) orig_values = np.array( [-2002 * 365 - 121, -366, 365, 2000 * 365 + 119], dtype="int64" ) orig_values *= 24 # Convert to hours orig_values[0] += 1 # Adjust for the hour offset in dates above units = "days since 0001-01-01 00:00:00" values, units, _ = xr.coding.times.encode_cf_datetime( dates, units, calendar, dtype=np.dtype("int64") ) print(values, units) np.testing.assert_array_equal(values, orig_values) .. _internals.default_timeunit: Default Time Unit ~~~~~~~~~~~~~~~~~ The current default time unit of xarray is ``'ns'``. When setting keyword argument ``time_unit`` unit to ``'s'`` (the lowest resolution pandas allows) datetimes will be converted to at least ``'s'``-resolution, if possible. The same holds true for ``'ms'`` and ``'us'``. .. jupyter-execute:: datetimes1_filename = "test-datetimes1.nc" .. jupyter-execute:: :hide-code: # Ensure the file is located in a unique temporary directory # so that it doesn't conflict with parallel builds of the # documentation. import tempfile import os.path tempdir = tempfile.TemporaryDirectory() datetimes1_filename = os.path.join(tempdir.name, datetimes1_filename) .. jupyter-execute:: attrs = {"units": "hours since 2000-01-01"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) ds.to_netcdf(datetimes1_filename) .. jupyter-execute:: xr.open_dataset(datetimes1_filename) .. jupyter-execute:: coder = xr.coders.CFDatetimeCoder(time_unit="s") xr.open_dataset(datetimes1_filename, decode_times=coder) If a coarser unit is requested the datetimes are decoded into their native on-disk resolution, if possible. .. jupyter-execute:: datetimes2_filename = "test-datetimes2.nc" .. jupyter-execute:: :hide-code: datetimes2_filename = os.path.join(tempdir.name, datetimes2_filename) .. jupyter-execute:: attrs = {"units": "milliseconds since 2000-01-01"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) ds.to_netcdf(datetimes2_filename) .. jupyter-execute:: xr.open_dataset(datetimes2_filename) .. jupyter-execute:: coder = xr.coders.CFDatetimeCoder(time_unit="s") xr.open_dataset(datetimes2_filename, decode_times=coder) Similar logic applies for decoding timedelta values. The default resolution is ``"ns"``: .. jupyter-execute:: timedeltas1_filename = "test-timedeltas1.nc" .. jupyter-execute:: :hide-code: timedeltas1_filename = os.path.join(tempdir.name, timedeltas1_filename) .. jupyter-execute:: attrs = {"units": "hours"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) ds.to_netcdf(timedeltas1_filename) .. jupyter-execute:: :stderr: xr.open_dataset(timedeltas1_filename) By default, timedeltas will be decoded to the same resolution as datetimes: .. jupyter-execute:: coder = xr.coders.CFDatetimeCoder(time_unit="s") xr.open_dataset(timedeltas1_filename, decode_times=coder, decode_timedelta=True) but if one would like to decode timedeltas to a different resolution, one can provide a coder specifically for timedeltas to ``decode_timedelta``: .. jupyter-execute:: timedelta_coder = xr.coders.CFTimedeltaCoder(time_unit="ms") xr.open_dataset( timedeltas1_filename, decode_times=coder, decode_timedelta=timedelta_coder ) As with datetimes, if a coarser unit is requested the timedeltas are decoded into their native on-disk resolution, if possible: .. jupyter-execute:: timedeltas2_filename = "test-timedeltas2.nc" .. jupyter-execute:: :hide-code: timedeltas2_filename = os.path.join(tempdir.name, timedeltas2_filename) .. jupyter-execute:: attrs = {"units": "milliseconds"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) ds.to_netcdf(timedeltas2_filename) .. jupyter-execute:: xr.open_dataset(timedeltas2_filename, decode_timedelta=True) .. jupyter-execute:: coder = xr.coders.CFDatetimeCoder(time_unit="s") xr.open_dataset(timedeltas2_filename, decode_times=coder, decode_timedelta=True) To opt-out of timedelta decoding (see issue `Undesired decoding to timedelta64 `_) pass ``False`` to ``decode_timedelta``: .. jupyter-execute:: xr.open_dataset(timedeltas2_filename, decode_timedelta=False) .. note:: Note that in the future the default value of ``decode_timedelta`` will be ``False`` rather than ``None``. .. jupyter-execute:: :hide-code: # Cleanup tempdir.cleanup() python-xarray-2026.01.0/doc/internals/how-to-create-custom-index.rst0000664000175000017500000002256015136607163025441 0ustar alastairalastair.. currentmodule:: xarray .. _internals.custom indexes: How to create a custom index ============================ .. warning:: This feature is highly experimental. Support for custom indexes has been introduced in v2022.06.0 and is still incomplete. API is subject to change without deprecation notice. However we encourage you to experiment and report issues that arise. Xarray's built-in support for label-based indexing (e.g. ``ds.sel(latitude=40, method="nearest")``) and alignment operations relies on :py:class:`pandas.Index` objects. Pandas Indexes are powerful and suitable for many applications but also have some limitations: - it only works with 1-dimensional coordinates where explicit labels are fully loaded in memory - it is hard to reuse it with irregular data for which there exist more efficient, tree-based structures to perform data selection - it doesn't support extra metadata that may be required for indexing and alignment (e.g., a coordinate reference system) Fortunately, Xarray now allows extending this functionality with custom indexes, which can be implemented in 3rd-party libraries. The Index base class -------------------- Every Xarray index must inherit from the :py:class:`Index` base class. It is for example the case of Xarray built-in ``PandasIndex`` and ``PandasMultiIndex`` subclasses, which wrap :py:class:`pandas.Index` and :py:class:`pandas.MultiIndex` respectively. The ``Index`` API closely follows the :py:class:`Dataset` and :py:class:`DataArray` API, e.g., for an index to support :py:meth:`DataArray.sel` it needs to implement :py:meth:`Index.sel`, to support :py:meth:`DataArray.stack` and :py:meth:`DataArray.unstack` it needs to implement :py:meth:`Index.stack` and :py:meth:`Index.unstack`, etc. Some guidelines and examples are given below. More details can be found in the documented :py:class:`Index` API. Minimal requirements -------------------- Every index must at least implement the :py:meth:`Index.from_variables` class method, which is used by Xarray to build a new index instance from one or more existing coordinates in a Dataset or DataArray. Since any collection of coordinates can be passed to that method (i.e., the number, order and dimensions of the coordinates are all arbitrary), it is the responsibility of the index to check the consistency and validity of those input coordinates. For example, :py:class:`~xarray.indexes.PandasIndex` accepts only one coordinate and :py:class:`~xarray.indexes.PandasMultiIndex` accepts one or more 1-dimensional coordinates that must all share the same dimension. Other, custom indexes need not have the same constraints, e.g., - a georeferenced raster index which only accepts two 1-d coordinates with distinct dimensions - a staggered grid index which takes coordinates with different dimension name suffixes (e.g., "_c" and "_l" for center and left) Optional requirements --------------------- Pretty much everything else is optional. Depending on the method, in the absence of a (re)implementation, an index will either raise a ``NotImplementedError`` or won't do anything specific (just drop, pass or copy itself from/to the resulting Dataset or DataArray). For example, you can just skip re-implementing :py:meth:`Index.rename` if there is no internal attribute or object to rename according to the new desired coordinate or dimension names. In the case of ``PandasIndex``, we rename the underlying ``pandas.Index`` object and/or update the ``PandasIndex.dim`` attribute since the associated dimension name has been changed. Wrap index data as coordinate data ---------------------------------- In some cases it is possible to reuse the index's underlying object or structure as coordinate data and hence avoid data duplication. For ``PandasIndex`` and ``PandasMultiIndex``, we leverage the fact that ``pandas.Index`` objects expose some array-like API. In Xarray we use some wrappers around those underlying objects as a thin compatibility layer to preserve dtypes, handle explicit and n-dimensional indexing, etc. Other structures like tree-based indexes (e.g., kd-tree) may differ too much from arrays to reuse it as coordinate data. If the index data can be reused as coordinate data, the ``Index`` subclass should implement :py:meth:`Index.create_variables`. This method accepts a dictionary of variable names as keys and :py:class:`Variable` objects as values (used for propagating variable metadata) and should return a dictionary of new :py:class:`Variable` or :py:class:`IndexVariable` objects. Data selection -------------- For an index to support label-based selection, it needs to at least implement :py:meth:`Index.sel`. This method accepts a dictionary of labels where the keys are coordinate names (already filtered for the current index) and the values can be pretty much anything (e.g., a slice, a tuple, a list, a numpy array, a :py:class:`Variable` or a :py:class:`DataArray`). It is the responsibility of the index to properly handle those input labels. :py:meth:`Index.sel` must return an instance of :py:class:`IndexSelResult`. The latter is a small data class that holds positional indexers (indices) and that may also hold new variables, new indexes, names of variables or indexes to drop, names of dimensions to rename, etc. For example, this is useful in the case of ``PandasMultiIndex`` as it allows Xarray to convert it into a single ``PandasIndex`` when only one level remains after the selection. The :py:class:`IndexSelResult` class is also used to merge results from label-based selection performed by different indexes. Note that it is now possible to have two distinct indexes for two 1-d coordinates sharing the same dimension, but it is not currently possible to use those two indexes in the same call to :py:meth:`Dataset.sel`. Optionally, the index may also implement :py:meth:`Index.isel`. In the case of ``PandasIndex`` we use it to create a new index object by just indexing the underlying ``pandas.Index`` object. In other cases this may not be possible, e.g., a kd-tree object may not be easily indexed. If ``Index.isel()`` is not implemented, the index in just dropped in the DataArray or Dataset resulting from the selection. Alignment --------- For an index to support alignment, it needs to implement: - :py:meth:`Index.equals`, which compares the index with another index and returns either ``True`` or ``False`` - :py:meth:`Index.join`, which combines the index with another index and returns a new Index object - :py:meth:`Index.reindex_like`, which queries the index with another index and returns positional indexers that are used to re-index Dataset or DataArray variables along one or more dimensions Xarray ensures that those three methods are called with an index of the same type as argument. Meta-indexes ------------ Nothing prevents writing a custom Xarray index that itself encapsulates other Xarray index(es). We call such index a "meta-index". Here is a small example of a meta-index for geospatial, raster datasets (i.e., regularly spaced 2-dimensional data) that internally relies on two ``PandasIndex`` instances for the x and y dimensions respectively: .. code-block:: python from xarray import Index from xarray.core.indexes import PandasIndex from xarray.core.indexing import merge_sel_results class RasterIndex(Index): def __init__(self, xy_indexes): assert len(xy_indexes) == 2 # must have two distinct dimensions dim = [idx.dim for idx in xy_indexes.values()] assert dim[0] != dim[1] self._xy_indexes = xy_indexes @classmethod def from_variables(cls, variables): assert len(variables) == 2 xy_indexes = { k: PandasIndex.from_variables({k: v}) for k, v in variables.items() } return cls(xy_indexes) def create_variables(self, variables): idx_variables = {} for index in self._xy_indexes.values(): idx_variables.update(index.create_variables(variables)) return idx_variables def sel(self, labels): results = [] for k, index in self._xy_indexes.items(): if k in labels: results.append(index.sel({k: labels[k]})) return merge_sel_results(results) This basic index only supports label-based selection. Providing a full-featured index by implementing the other ``Index`` methods should be pretty straightforward for this example, though. This example is also not very useful unless we add some extra functionality on top of the two encapsulated ``PandasIndex`` objects, such as a coordinate reference system. How to use a custom index ------------------------- You can use :py:meth:`Dataset.set_xindex` or :py:meth:`DataArray.set_xindex` to assign a custom index to a Dataset or DataArray, e.g., using the ``RasterIndex`` above: .. code-block:: python import numpy as np import xarray as xr da = xr.DataArray( np.random.uniform(size=(100, 50)), coords={"x": ("x", np.arange(50)), "y": ("y", np.arange(100))}, dims=("y", "x"), ) # Build a RasterIndex from the 'x' and 'y' coordinates # Xarray creates default indexes for the 'x' and 'y' coordinates # this will automatically drop those indexes da_raster = da.set_xindex(["x", "y"], RasterIndex) # RasterIndex now takes care of label-based selection selected = da_raster.sel(x=10, y=slice(20, 50)) python-xarray-2026.01.0/doc/internals/how-to-add-new-backend.rst0000664000175000017500000004511715136607163024470 0ustar alastairalastair.. _add_a_backend: How to add a new backend ------------------------ Adding a new backend for read support to Xarray does not require one to integrate any code in Xarray; all you need to do is: - Create a class that inherits from Xarray :py:class:`~xarray.backends.BackendEntrypoint` and implements the method ``open_dataset`` see :ref:`RST backend_entrypoint` - Declare this class as an external plugin in your project configuration, see :ref:`RST backend_registration` If you also want to support lazy loading and dask see :ref:`RST lazy_loading`. Note that the new interface for backends is available from Xarray version >= 0.18 onwards. You can see what backends are currently available in your working environment with :py:class:`~xarray.backends.list_engines()`. .. _RST backend_entrypoint: BackendEntrypoint subclassing +++++++++++++++++++++++++++++ Your ``BackendEntrypoint`` sub-class is the primary interface with Xarray, and it should implement the following attributes and methods: - the ``open_dataset`` method (mandatory) - the ``open_dataset_parameters`` attribute (optional) - the ``guess_can_open`` method (optional) - the ``description`` attribute (optional) - the ``url`` attribute (optional). This is what a ``BackendEntrypoint`` subclass should look like: .. code-block:: python from xarray.backends import BackendEntrypoint class MyBackendEntrypoint(BackendEntrypoint): def open_dataset( self, filename_or_obj, *, drop_variables=None, # other backend specific keyword arguments # `chunks` and `cache` DO NOT go here, they are handled by xarray ): return my_open_dataset(filename_or_obj, drop_variables=drop_variables) open_dataset_parameters = ["filename_or_obj", "drop_variables"] def guess_can_open(self, filename_or_obj): try: _, ext = os.path.splitext(filename_or_obj) except TypeError: return False return ext in {".my_format", ".my_fmt"} description = "Use .my_format files in Xarray" url = "https://link_to/your_backend/documentation" ``BackendEntrypoint`` subclass methods and attributes are detailed in the following. .. _RST open_dataset: open_dataset ^^^^^^^^^^^^ The backend ``open_dataset`` shall implement reading from file, the variables decoding and it shall instantiate the output Xarray class :py:class:`~xarray.Dataset`. The following is an example of the high level processing steps: .. code-block:: python def open_dataset( self, filename_or_obj, *, drop_variables=None, decode_times=True, decode_timedelta=True, decode_coords=True, my_backend_option=None, ): vars, attrs, coords = my_reader( filename_or_obj, drop_variables=drop_variables, my_backend_option=my_backend_option, ) vars, attrs, coords = my_decode_variables( vars, attrs, decode_times, decode_timedelta, decode_coords ) # see also conventions.decode_cf_variables ds = xr.Dataset(vars, attrs=attrs, coords=coords) ds.set_close(my_close_method) return ds The output :py:class:`~xarray.Dataset` shall implement the additional custom method ``close``, used by Xarray to ensure the related files are eventually closed. This method shall be set by using :py:meth:`~xarray.Dataset.set_close`. The input of ``open_dataset`` method are one argument (``filename_or_obj``) and one keyword argument (``drop_variables``): - ``filename_or_obj``: can be any object but usually it is a string containing a path or an instance of :py:class:`pathlib.Path`. - ``drop_variables``: can be ``None`` or an iterable containing the variable names to be dropped when reading the data. If it makes sense for your backend, your ``open_dataset`` method should implement in its interface the following boolean keyword arguments, called **decoders**, which default to ``None``: - ``mask_and_scale`` - ``decode_times`` - ``decode_timedelta`` - ``use_cftime`` - ``concat_characters`` - ``decode_coords`` Note: all the supported decoders shall be declared explicitly in backend ``open_dataset`` signature and adding a ``**kwargs`` is not allowed. These keyword arguments are explicitly defined in Xarray :py:func:`~xarray.open_dataset` signature. Xarray will pass them to the backend only if the User explicitly sets a value different from ``None``. For more details on decoders see :ref:`RST decoders`. Your backend can also take as input a set of backend-specific keyword arguments. All these keyword arguments can be passed to :py:func:`~xarray.open_dataset` grouped either via the ``backend_kwargs`` parameter or explicitly using the syntax ``**kwargs``. If you don't want to support the lazy loading, then the :py:class:`~xarray.Dataset` shall contain values as a :py:class:`numpy.ndarray` and your work is almost done. .. _RST open_dataset_parameters: open_dataset_parameters ^^^^^^^^^^^^^^^^^^^^^^^ ``open_dataset_parameters`` is the list of backend ``open_dataset`` parameters. It is not a mandatory parameter, and if the backend does not provide it explicitly, Xarray creates a list of them automatically by inspecting the backend signature. If ``open_dataset_parameters`` is not defined, but ``**kwargs`` and ``*args`` are in the backend ``open_dataset`` signature, Xarray raises an error. On the other hand, if the backend provides the ``open_dataset_parameters``, then ``**kwargs`` and ``*args`` can be used in the signature. However, this practice is discouraged unless there is a good reasons for using ``**kwargs`` or ``*args``. .. _RST guess_can_open: guess_can_open ^^^^^^^^^^^^^^ ``guess_can_open`` is used to identify the proper engine to open your data file automatically in case the engine is not specified explicitly. If you are not interested in supporting this feature, you can skip this step since :py:class:`~xarray.backends.BackendEntrypoint` already provides a default :py:meth:`~xarray.backends.BackendEntrypoint.guess_can_open` that always returns ``False``. Backend ``guess_can_open`` takes as input the ``filename_or_obj`` parameter of Xarray :py:meth:`~xarray.open_dataset`, and returns a boolean. .. _RST properties: description and url ^^^^^^^^^^^^^^^^^^^^ ``description`` is used to provide a short text description of the backend. ``url`` is used to include a link to the backend's documentation or code. These attributes are surfaced when a user prints :py:class:`~xarray.backends.BackendEntrypoint`. If ``description`` or ``url`` are not defined, an empty string is returned. .. _RST decoders: Decoders ^^^^^^^^ The decoders implement specific operations to transform data from on-disk representation to Xarray representation. A classic example is the “time” variable decoding operation. In NetCDF, the elements of the “time” variable are stored as integers, and the unit contains an origin (for example: "seconds since 1970-1-1"). In this case, Xarray transforms the pair integer-unit in a :py:class:`numpy.datetime64`. The standard coders implemented in Xarray are: - :py:class:`xarray.coding.strings.CharacterArrayCoder()` - :py:class:`xarray.coding.strings.EncodedStringCoder()` - :py:class:`xarray.coding.variables.UnsignedIntegerCoder()` - :py:class:`xarray.coding.variables.CFMaskCoder()` - :py:class:`xarray.coding.variables.CFScaleOffsetCoder()` - :py:class:`xarray.coding.times.CFTimedeltaCoder()` - :py:class:`xarray.coding.times.CFDatetimeCoder()` Xarray coders all have the same interface. They have two methods: ``decode`` and ``encode``. The method ``decode`` takes a ``Variable`` in on-disk format and returns a ``Variable`` in Xarray format. Variable attributes no more applicable after the decoding, are dropped and stored in the ``Variable.encoding`` to make them available to the ``encode`` method, which performs the inverse transformation. In the following an example on how to use the coders ``decode`` method: .. jupyter-execute:: :hide-code: import xarray as xr import numpy as np .. jupyter-execute:: var = xr.Variable( dims=("x",), data=np.arange(10.0), attrs={"scale_factor": 10, "add_offset": 2} ) var .. jupyter-execute:: coder = xr.coding.variables.CFScaleOffsetCoder() decoded_var = coder.decode(var) decoded_var .. jupyter-execute:: decoded_var.encoding Some of the transformations can be common to more backends, so before implementing a new decoder, be sure Xarray does not already implement that one. The backends can reuse Xarray’s decoders, either instantiating the coders and using the method ``decode`` directly or using the higher-level function :py:func:`~xarray.conventions.decode_cf_variables` that groups Xarray decoders. In some cases, the transformation to apply strongly depends on the on-disk data format. Therefore, you may need to implement your own decoder. An example of such a case is when you have to deal with the time format of a grib file. grib format is very different from the NetCDF one: in grib, the time is stored in two attributes dataDate and dataTime as strings. Therefore, it is not possible to reuse the Xarray time decoder, and implementing a new one is mandatory. Decoders can be activated or deactivated using the boolean keywords of Xarray :py:meth:`~xarray.open_dataset` signature: ``mask_and_scale``, ``decode_times``, ``decode_timedelta``, ``use_cftime``, ``concat_characters``, ``decode_coords``. Such keywords are passed to the backend only if the User sets a value different from ``None``. Note that the backend does not necessarily have to implement all the decoders, but it shall declare in its ``open_dataset`` interface only the boolean keywords related to the supported decoders. .. _RST backend_registration: How to register a backend +++++++++++++++++++++++++ Define a new entrypoint in your ``pyproject.toml`` (or ``setup.cfg/setup.py`` for older configurations), with: - group: ``xarray.backends`` - name: the name to be passed to :py:meth:`~xarray.open_dataset` as ``engine`` - object reference: the reference of the class that you have implemented. You can declare the entrypoint in your project configuration like so: .. tab:: pyproject.toml .. code:: toml [project.entry-points."xarray.backends"] my_engine = "my_package.my_module:MyBackendEntrypoint" .. tab:: pyproject.toml [Poetry] .. code-block:: toml [tool.poetry.plugins."xarray.backends"] my_engine = "my_package.my_module:MyBackendEntrypoint" .. tab:: setup.cfg .. code-block:: cfg [options.entry_points] xarray.backends = my_engine = my_package.my_module:MyBackendEntrypoint .. tab:: setup.py .. code-block:: setuptools.setup( entry_points={ "xarray.backends": [ "my_engine=my_package.my_module:MyBackendEntrypoint" ], }, ) See the `Python Packaging User Guide `_ for more information on entrypoints and details of the syntax. If you're using Poetry, note that table name in ``pyproject.toml`` is slightly different. See `the Poetry docs `_ for more information on plugins. .. _RST lazy_loading: How to support lazy loading +++++++++++++++++++++++++++ If you want to make your backend effective with big datasets, then you should take advantage of xarray's support for lazy loading and indexing. Basically, when your backend constructs the ``Variable`` objects, you need to replace the :py:class:`numpy.ndarray` inside the variables with a custom :py:class:`~xarray.backends.BackendArray` subclass that supports lazy loading and indexing. See the example below: .. code-block:: python backend_array = MyBackendArray() data = indexing.LazilyIndexedArray(backend_array) var = xr.Variable(dims, data, attrs=attrs, encoding=encoding) Where: - :py:class:`~xarray.core.indexing.LazilyIndexedArray` is a wrapper class provided by Xarray that manages the lazy loading and indexing. - ``MyBackendArray`` should be implemented by the backend and must inherit from :py:class:`~xarray.backends.BackendArray`. BackendArray subclassing ^^^^^^^^^^^^^^^^^^^^^^^^ The BackendArray subclass must implement the following method and attributes: - the ``__getitem__`` method that takes an index as an input and returns a `NumPy `__ array, - the ``shape`` attribute, - the ``dtype`` attribute. It may also optionally implement an additional ``async_getitem`` method. Xarray supports different types of :doc:`/user-guide/indexing`, that can be grouped in three types of indexes: :py:class:`~xarray.core.indexing.BasicIndexer`, :py:class:`~xarray.core.indexing.OuterIndexer`, and :py:class:`~xarray.core.indexing.VectorizedIndexer`. This implies that the implementation of the method ``__getitem__`` can be tricky. In order to simplify this task, Xarray provides a helper function, :py:func:`~xarray.core.indexing.explicit_indexing_adapter`, that transforms all the input indexer types (basic, outer, vectorized) in a tuple which is interpreted correctly by your backend. This is an example ``BackendArray`` subclass implementation: .. code-block:: python from xarray.backends import BackendArray class MyBackendArray(BackendArray): def __init__( self, shape, dtype, lock, # other backend specific keyword arguments ): self.shape = shape self.dtype = dtype self.lock = lock def __getitem__( self, key: xarray.core.indexing.ExplicitIndexer ) -> np.typing.ArrayLike: return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.BASIC, self._raw_indexing_method, ) def _raw_indexing_method(self, key: tuple) -> np.typing.ArrayLike: # thread safe method that access to data on disk with self.lock: ... return item Note that ``BackendArray.__getitem__`` must be thread safe to support multi-thread processing. The :py:func:`~xarray.core.indexing.explicit_indexing_adapter` method takes in input the ``key``, the array ``shape`` and the following parameters: - ``indexing_support``: the type of index supported by ``raw_indexing_method`` - ``raw_indexing_method``: a method that shall take in input a key in the form of a tuple and return an indexed :py:class:`numpy.ndarray`. For more details see :py:class:`~xarray.core.indexing.IndexingSupport` and :ref:`RST indexing`. Async support ^^^^^^^^^^^^^ Backends can also optionally support loading data asynchronously via xarray's asynchronous loading methods (e.g. ``~xarray.Dataset.load_async``). To support async loading the ``BackendArray`` subclass must additionally implement the ``BackendArray.async_getitem`` method. Note that implementing this method is only necessary if you want to be able to load data from different xarray objects concurrently. Even without this method your ``BackendArray`` implementation is still free to concurrently load chunks of data for a single ``Variable`` itself, so long as it does so behind the synchronous ``__getitem__`` interface. Dask support ^^^^^^^^^^^^ In order to support `Dask Distributed `__ and :py:mod:`multiprocessing`, the ``BackendArray`` subclass should be serializable either with :ref:`io.pickle` or `cloudpickle `__. That implies that all the reference to open files should be dropped. For opening files, we therefore suggest to use the helper class provided by Xarray :py:class:`~xarray.backends.CachingFileManager`. .. _RST indexing: Indexing examples ^^^^^^^^^^^^^^^^^ **BASIC** In the ``BASIC`` indexing support, numbers and slices are supported. Example: .. jupyter-input:: # () shall return the full array backend_array._raw_indexing_method(()) .. jupyter-output:: array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]) .. jupyter-input:: # shall support integers backend_array._raw_indexing_method(1, 1) .. jupyter-output:: 5 .. jupyter-input:: # shall support slices backend_array._raw_indexing_method(slice(0, 3), slice(2, 4)) .. jupyter-output:: array([[2, 3], [6, 7], [10, 11]]) **OUTER** The ``OUTER`` indexing shall support number, slices and in addition it shall support also lists of integers. The outer indexing is equivalent to combining multiple input list with ``itertools.product()``: .. jupyter-input:: backend_array._raw_indexing_method([0, 1], [0, 1, 2]) .. jupyter-output:: array([[0, 1, 2], [4, 5, 6]]) .. jupyter-input:: # shall support integers backend_array._raw_indexing_method(1, 1) .. jupyter-output:: 5 **OUTER_1VECTOR** The ``OUTER_1VECTOR`` indexing shall supports number, slices and at most one list. The behaviour with the list shall be the same as ``OUTER`` indexing. If you support more complex indexing as explicit indexing or numpy indexing, you can have a look to the implementation of Zarr backend and Scipy backend, currently available in :py:mod:`~xarray.backends` module. .. _RST preferred_chunks: Preferred chunk sizes ^^^^^^^^^^^^^^^^^^^^^ To potentially improve performance with lazy loading, the backend may define for each variable the chunk sizes that it prefers---that is, sizes that align with how the variable is stored. (Note that the backend is not directly involved in `Dask `__ chunking, because Xarray internally manages chunking.) To define the preferred chunk sizes, store a mapping within the variable's encoding under the key ``"preferred_chunks"`` (that is, ``var.encoding["preferred_chunks"]``). The mapping's keys shall be the names of dimensions with preferred chunk sizes, and each value shall be the corresponding dimension's preferred chunk sizes expressed as either an integer (such as ``{"dim1": 1000, "dim2": 2000}``) or a tuple of integers (such as ``{"dim1": (1000, 100), "dim2": (2000, 2000, 2000)}``). Xarray uses the preferred chunk sizes in some special cases of the ``chunks`` argument of the :py:func:`~xarray.open_dataset` and :py:func:`~xarray.open_mfdataset` functions. If ``chunks`` is a ``dict``, then for any dimensions missing from the keys or whose value is ``None``, Xarray sets the chunk sizes to the preferred sizes. If ``chunks`` equals ``"auto"``, then Xarray seeks ideal chunk sizes informed by the preferred chunk sizes. Specifically, it determines the chunk sizes using :py:func:`dask.array.core.normalize_chunks` with the ``previous_chunks`` argument set according to the preferred chunk sizes. python-xarray-2026.01.0/doc/internals/extending-xarray.rst0000664000175000017500000001063615136607163023640 0ustar alastairalastair .. _internals.accessors: Extending xarray using accessors ================================ .. jupyter-execute:: :hide-code: import xarray as xr import numpy as np Xarray is designed as a general purpose library and hence tries to avoid including overly domain specific functionality. But inevitably, the need for more domain specific logic arises. .. _internals.accessors.composition: Composition over Inheritance ---------------------------- One potential solution to this problem is to subclass Dataset and/or DataArray to add domain specific functionality. However, inheritance is not very robust. It's easy to inadvertently use internal APIs when subclassing, which means that your code may break when xarray upgrades. Furthermore, many builtin methods will only return native xarray objects. The standard advice is to use :issue:`composition over inheritance <706>`, but reimplementing an API as large as xarray's on your own objects can be an onerous task, even if most methods are only forwarding to xarray implementations. (For an example of a project which took this approach of subclassing see `UXarray `_). If you simply want the ability to call a function with the syntax of a method call, then the builtin :py:meth:`~xarray.DataArray.pipe` method (copied from pandas) may suffice. .. _internals.accessors.writing accessors: Writing Custom Accessors ------------------------ To resolve this issue for more complex cases, xarray has the :py:func:`~xarray.register_dataset_accessor`, :py:func:`~xarray.register_dataarray_accessor` and :py:func:`~xarray.register_datatree_accessor` decorators for adding custom "accessors" on xarray objects, thereby "extending" the functionality of your xarray object. Here's how you might use these decorators to write a custom "geo" accessor implementing a geography specific extension to xarray: .. literalinclude:: ../examples/_code/accessor_example.py In general, the only restriction on the accessor class is that the ``__init__`` method must have a single parameter: the ``Dataset`` or ``DataArray`` object it is supposed to work on. This achieves the same result as if the ``Dataset`` class had a cached property defined that returns an instance of your class: .. code-block:: python class Dataset: ... @property def geo(self): return GeoAccessor(self) However, using the register accessor decorators is preferable to simply adding your own ad-hoc property (i.e., ``Dataset.geo = property(...)``), for several reasons: 1. It ensures that the name of your property does not accidentally conflict with any other attributes or methods (including other accessors). 2. Instances of accessor object will be cached on the xarray object that creates them. This means you can save state on them (e.g., to cache computed properties). 3. Using an accessor provides an implicit namespace for your custom functionality that clearly identifies it as separate from built-in xarray methods. .. note:: Accessors are created once per DataArray and Dataset instance. New instances, like those created from arithmetic operations or when accessing a DataArray from a Dataset (ex. ``ds[var_name]``), will have new accessors created. Back in an interactive IPython session, we can use these properties: .. jupyter-execute:: :hide-code: exec(open("examples/_code/accessor_example.py").read()) .. jupyter-execute:: ds = xr.Dataset({"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)}) ds.geo.center .. jupyter-execute:: ds.geo.plot() The intent here is that libraries that extend xarray could add such an accessor to implement subclass specific functionality rather than using actual subclasses or patching in a large number of domain specific methods. For further reading on ways to write new accessors and the philosophy behind the approach, see https://github.com/pydata/xarray/issues/1080. To help users keep things straight, please `let us know `_ if you plan to write a new accessor for an open source library. Existing open source accessors and the libraries that implement them are available in the list on the :ref:`ecosystem` page. To make documenting accessors with ``sphinx`` and ``sphinx.ext.autosummary`` easier, you can use `sphinx-autosummary-accessors`_. .. _sphinx-autosummary-accessors: https://sphinx-autosummary-accessors.readthedocs.io/ python-xarray-2026.01.0/doc/internals/chunked-arrays.rst0000664000175000017500000001375515136607163023274 0ustar alastairalastair.. currentmodule:: xarray .. _internals.chunkedarrays: Alternative chunked array types =============================== .. warning:: This is a *highly* experimental feature. Please report any bugs or other difficulties on `xarray's issue tracker `_. In particular see discussion on `xarray issue #6807 `_ Xarray can wrap chunked dask arrays (see :ref:`dask`), but can also wrap any other chunked array type that exposes the correct interface. This allows us to support using other frameworks for distributed and out-of-core processing, with user code still written as xarray commands. In particular xarray also supports wrapping :py:class:`cubed.Array` objects (see `Cubed's documentation `_ and the `cubed-xarray package `_). The basic idea is that by wrapping an array that has an explicit notion of ``.chunks``, xarray can expose control over the choice of chunking scheme to users via methods like :py:meth:`DataArray.chunk` whilst the wrapped array actually implements the handling of processing all of the chunks. Chunked array methods and "core operations" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A chunked array needs to meet all the :ref:`requirements for normal duck arrays `, but must also implement additional features. Chunked arrays have additional attributes and methods, such as ``.chunks`` and ``.rechunk``. Furthermore, Xarray dispatches chunk-aware computations across one or more chunked arrays using special functions known as "core operations". Examples include ``map_blocks``, ``blockwise``, and ``apply_gufunc``. The core operations are generalizations of functions first implemented in :py:mod:`dask.array`. The implementation of these functions is specific to the type of arrays passed to them. For example, when applying the ``map_blocks`` core operation, :py:class:`dask.array.Array` objects must be processed by :py:func:`dask.array.map_blocks`, whereas :py:class:`cubed.Array` objects must be processed by :py:func:`cubed.map_blocks`. In order to use the correct implementation of a core operation for the array type encountered, xarray dispatches to the corresponding subclass of :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint`, also known as a "Chunk Manager". Therefore **a full list of the operations that need to be defined is set by the API of the** :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint` **abstract base class**. Note that chunked array methods are also currently dispatched using this class. Chunked array creation is also handled by this class. As chunked array objects have a one-to-one correspondence with in-memory numpy arrays, it should be possible to create a chunked array from a numpy array by passing the desired chunking pattern to an implementation of :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint.from_array``. .. note:: The :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint` abstract base class is mostly just acting as a namespace for containing the chunked-aware function primitives. Ideally in the future we would have an API standard for chunked array types which codified this structure, making the entrypoint system unnecessary. .. currentmodule:: xarray.namedarray.parallelcompat .. autoclass:: xarray.namedarray.parallelcompat.ChunkManagerEntrypoint :members: Registering a new ChunkManagerEntrypoint subclass ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Rather than hard-coding various chunk managers to deal with specific chunked array implementations, xarray uses an entrypoint system to allow developers of new chunked array implementations to register their corresponding subclass of :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint`. To register a new entrypoint you need to add an entry to the ``setup.cfg`` like this:: [options.entry_points] xarray.chunkmanagers = dask = xarray.namedarray.daskmanager:DaskManager See also `cubed-xarray `_ for another example. To check that the entrypoint has worked correctly, you may find it useful to display the available chunkmanagers using the internal function :py:func:`~xarray.namedarray.parallelcompat.list_chunkmanagers`. .. autofunction:: list_chunkmanagers User interface ~~~~~~~~~~~~~~ Once the chunkmanager subclass has been registered, xarray objects wrapping the desired array type can be created in 3 ways: #. By manually passing the array type to the :py:class:`~xarray.DataArray` constructor, see the examples for :ref:`numpy-like arrays `, #. Calling :py:meth:`~xarray.DataArray.chunk`, passing the keyword arguments ``chunked_array_type`` and ``from_array_kwargs``, #. Calling :py:func:`~xarray.open_dataset`, passing the keyword arguments ``chunked_array_type`` and ``from_array_kwargs``. The latter two methods ultimately call the chunkmanager's implementation of ``.from_array``, to which they pass the ``from_array_kwargs`` dict. The ``chunked_array_type`` kwarg selects which registered chunkmanager subclass to dispatch to. It defaults to ``'dask'`` if Dask is installed, otherwise it defaults to whichever chunkmanager is registered if only one is registered. If multiple chunkmanagers are registered, the ``chunk_manager`` configuration option (which can be set using :py:func:`set_options`) will be used to determine which chunkmanager to use, defaulting to ``'dask'``. Parallel processing without chunks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To use a parallel array type that does not expose a concept of chunks explicitly, none of the information on this page is theoretically required. Such an array type (e.g. `Ramba `_ or `Arkouda `_) could be wrapped using xarray's existing support for :ref:`numpy-like "duck" arrays `. python-xarray-2026.01.0/doc/internals/zarr-encoding-spec.rst0000664000175000017500000001667115136607163024046 0ustar alastairalastair.. currentmodule:: xarray .. _zarr_encoding: Zarr Encoding Specification ============================ In implementing support for the `Zarr `_ storage format, Xarray developers made some *ad hoc* choices about how to store NetCDF data in Zarr. Future versions of the Zarr spec will likely include a more formal convention for the storage of the NetCDF data model in Zarr; see `Zarr spec repo `_ for ongoing discussion. First, Xarray can only read and write Zarr groups. There is currently no support for reading / writing individual Zarr arrays. Zarr groups are mapped to Xarray ``Dataset`` objects. Second, from Xarray's point of view, the key difference between NetCDF and Zarr is that all NetCDF arrays have *dimension names* while Zarr arrays do not. In Zarr v2, Xarray uses an ad-hoc convention to encode and decode the name of each array's dimensions. However, starting with Zarr v3, the ``dimension_names`` attribute provides a formal convention for storing the NetCDF data model in Zarr. Dimension Encoding in Zarr Formats ----------------------------------- Xarray encodes array dimensions differently depending on the Zarr format version: **Zarr V2 Format:** Xarray uses a special Zarr array attribute: ``_ARRAY_DIMENSIONS``. The value of this attribute is a list of dimension names (strings), for example ``["time", "lon", "lat"]``. When writing data to Zarr V2, Xarray sets this attribute on all variables based on the variable dimensions. This attribute is visible when accessing arrays directly with zarr-python. **Zarr V3 Format:** Xarray uses the native ``dimension_names`` field in the array metadata. This is part of the official Zarr V3 specification and is not stored as a regular attribute. When accessing arrays with zarr-python, this information is available in the array's metadata but not in the attributes dictionary. When reading a Zarr group, Xarray looks for dimension information in the appropriate location based on the format version, raising an error if it can't be found. The dimension information is used to define the variable dimension names and then (for Zarr V2) removed from the attributes dictionary returned to the user. CF Conventions -------------- Xarray uses its standard CF encoding/decoding functionality for handling metadata (see :py:func:`decode_cf`). This includes encoding concepts such as dimensions and coordinates. The ``coordinates`` attribute, which lists coordinate variables (e.g., ``"yc xc"`` for spatial coordinates), is one part of the broader CF conventions used to describe metadata in NetCDF and Zarr. Compatibility and Reading ------------------------- Because of these encoding choices, Xarray cannot read arbitrary Zarr arrays, but only Zarr data with valid dimension metadata. Xarray supports: - Zarr V2 arrays with ``_ARRAY_DIMENSIONS`` attributes - Zarr V3 arrays with ``dimension_names`` metadata - `NCZarr `_ format (dimension names are defined in the ``.zarray`` file) After decoding the dimension information and assigning the variable dimensions, Xarray proceeds to [optionally] decode each variable using its standard CF decoding machinery used for NetCDF data. Finally, it's worth noting that Xarray writes (and attempts to read) "consolidated metadata" by default (the ``.zmetadata`` file), which is another non-standard Zarr extension, albeit one implemented upstream in Zarr-Python. You do not need to write consolidated metadata to make Zarr stores readable in Xarray, but because Xarray can open these stores much faster, users will see a warning about poor performance when reading non-consolidated stores unless they explicitly set ``consolidated=False``. See :ref:`io.zarr.consolidated_metadata` for more details. Examples: Zarr Format Differences ---------------------------------- The following examples demonstrate how dimension and coordinate encoding differs between Zarr format versions. We'll use the same tutorial dataset but write it in different formats to show what users will see when accessing the files directly with zarr-python. **Example 1: Zarr V2 Format** .. jupyter-execute:: zarr_v2_filename = "example_v2.zarr" .. jupyter-execute:: :hide-code: import tempfile import os.path tempdir = tempfile.TemporaryDirectory() zarr_v2_filename = os.path.join(tempdir.name, zarr_v2_filename) .. jupyter-execute:: import os import xarray as xr import zarr # Load tutorial dataset and write as Zarr V2 ds = xr.tutorial.load_dataset("rasm") ds.to_zarr(zarr_v2_filename, mode="w", consolidated=False, zarr_format=2) # Open with zarr-python and examine attributes zgroup = zarr.open(zarr_v2_filename) print("Zarr V2 - Tair attributes:") tair_attrs = dict(zgroup["Tair"].attrs) for key, value in tair_attrs.items(): print(f" '{key}': {repr(value)}") **Example 2: Zarr V3 Format** .. jupyter-execute:: zarr_v3_filename = "example_v3.zarr" .. jupyter-execute:: :hide-code: zarr_v3_filename = os.path.join(tempdir.name, zarr_v3_filename) .. jupyter-execute:: # Write the same dataset as Zarr V3 ds.to_zarr(zarr_v3_filename, mode="w", consolidated=False, zarr_format=3) # Open with zarr-python and examine attributes zgroup = zarr.open(zarr_v3_filename) print("Zarr V3 - Tair attributes:") tair_attrs = dict(zgroup["Tair"].attrs) for key, value in tair_attrs.items(): print(f" '{key}': {repr(value)}") # For Zarr V3, dimension information is in metadata tair_array = zgroup["Tair"] print(f"\nZarr V3 - dimension_names in metadata: {tair_array.metadata.dimension_names}") Chunk Key Encoding ------------------ When writing data to Zarr stores, Xarray supports customizing how chunk keys are encoded through the ``chunk_key_encoding`` parameter in the variable's encoding dictionary. This is particularly useful when working with Zarr V2 arrays and you need to control the dimension separator in chunk keys. For example, to specify a custom separator for chunk keys: .. jupyter-execute:: example_filename = "example.zarr" .. jupyter-execute:: :hide-code: example_filename = os.path.join(tempdir.name, example_filename) .. jupyter-execute:: import xarray as xr import numpy as np from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding # Create a custom chunk key encoding with "/" as separator enc = V2ChunkKeyEncoding(separator="/").to_dict() # Create and write a dataset with custom chunk key encoding arr = np.ones((42, 100)) ds = xr.DataArray(arr, name="var1").to_dataset() ds.to_zarr( example_filename, zarr_format=2, mode="w", encoding={"var1": {"chunks": (42, 50), "chunk_key_encoding": enc}}, ) The ``chunk_key_encoding`` option accepts a dictionary that specifies the encoding configuration. For Zarr V2 arrays, you can use the ``V2ChunkKeyEncoding`` class from ``zarr.core.chunk_key_encodings`` to generate this configuration. This is particularly useful when you need to ensure compatibility with specific Zarr V2 storage layouts or when working with tools that expect a particular chunk key format. .. note:: The ``chunk_key_encoding`` option is only relevant when writing to Zarr stores. When reading Zarr arrays, Xarray automatically detects and uses the appropriate chunk key encoding based on the store's format and configuration. .. jupyter-execute:: :hide-code: tempdir.cleanup() python-xarray-2026.01.0/doc/user-guide/0000775000175000017500000000000015136607163017661 5ustar alastairalastairpython-xarray-2026.01.0/doc/user-guide/index.rst0000664000175000017500000000154415136607163021526 0ustar alastairalastair########### User Guide ########### In this user guide, you will find detailed descriptions and examples that describe many common tasks that you can accomplish with Xarray. .. toctree:: :maxdepth: 2 :caption: Data model terminology data-structures hierarchical-data dask .. toctree:: :maxdepth: 2 :caption: Core operations indexing combining reshaping computation groupby interpolation .. toctree:: :maxdepth: 2 :caption: I/O io complex-numbers .. toctree:: :maxdepth: 2 :caption: Visualization plotting .. toctree:: :maxdepth: 2 :caption: Interoperability pandas duckarrays ecosystem .. toctree:: :maxdepth: 2 :caption: Domain-specific workflows time-series weather-climate .. toctree:: :maxdepth: 2 :caption: Options and Testing options testing python-xarray-2026.01.0/doc/user-guide/terminology.rst0000664000175000017500000003532115136607163022767 0ustar alastairalastair.. currentmodule:: xarray .. _terminology: Terminology =========== *Xarray terminology differs slightly from CF, mathematical conventions, and pandas; so we've put together a glossary of its terms. Here,* ``arr`` *refers to an xarray* :py:class:`DataArray` *in the examples. For more complete examples, please consult the relevant documentation.* .. jupyter-execute:: :hide-code: import numpy as np import xarray as xr .. glossary:: DataArray A multi-dimensional array with labeled or named dimensions. ``DataArray`` objects add metadata such as dimension names, coordinates, and attributes (defined below) to underlying "unlabeled" data structures such as numpy and Dask arrays. If its optional ``name`` property is set, it is a *named DataArray*. Dataset A dict-like collection of ``DataArray`` objects with aligned dimensions. Thus, most operations that can be performed on the dimensions of a single ``DataArray`` can be performed on a dataset. Datasets have data variables (see **Variable** below), dimensions, coordinates, and attributes. Variable A `NetCDF-like variable `_ consisting of dimensions, data, and attributes which describe a single array. The main functional difference between variables and numpy arrays is that numerical operations on variables implement array broadcasting by dimension name. Each ``DataArray`` has an underlying variable that can be accessed via ``arr.variable``. However, a variable is not fully described outside of either a ``Dataset`` or a ``DataArray``. .. note:: The :py:class:`Variable` class is low-level interface and can typically be ignored. However, the word "variable" appears often enough in the code and documentation that is useful to understand. Dimension In mathematics, the *dimension* of data is loosely the number of degrees of freedom for it. A *dimension axis* is a set of all points in which all but one of these degrees of freedom is fixed. We can think of each dimension axis as having a name, for example the "x dimension". In xarray, a ``DataArray`` object's *dimensions* are its named dimension axes ``da.dims``, and the name of the ``i``-th dimension is ``da.dims[i]``. If an array is created without specifying dimension names, the default dimension names will be ``dim_0``, ``dim_1``, and so forth. Coordinate An array that labels a dimension or set of dimensions of another ``DataArray``. In the usual one-dimensional case, the coordinate array's values can loosely be thought of as tick labels along a dimension. We distinguish :term:`Dimension coordinate` vs. :term:`Non-dimension coordinate` and :term:`Indexed coordinate` vs. :term:`Non-indexed coordinate`. A coordinate named ``x`` can be retrieved from ``arr.coords["x"]``. A ``DataArray`` can have more coordinates than dimensions because a single dimension can be labeled by multiple coordinate arrays. However, only one coordinate array can be assigned as a particular dimension's dimension coordinate array. Dimension coordinate A one-dimensional coordinate array assigned to ``arr`` with both a name and dimension name in ``arr.dims``. Usually (but not always), a dimension coordinate is also an :term:`Indexed coordinate` so that it can be used for label-based indexing and alignment, like the index found on a :py:class:`pandas.DataFrame` or :py:class:`pandas.Series`. Non-dimension coordinate A coordinate array assigned to ``arr`` with a name in ``arr.coords`` but *not* in ``arr.dims``. These coordinates arrays can be one-dimensional or multidimensional, and they are useful for auxiliary labeling. As an example, multidimensional coordinates are often used in geoscience datasets when :doc:`the data's physical coordinates (such as latitude and longitude) differ from their logical coordinates <../examples/multidimensional-coords>`. Printing ``arr.coords`` will print all of ``arr``'s coordinate names, with the corresponding dimension(s) in parentheses. For example, ``coord_name (dim_name) 1 2 3 ...``. Indexed coordinate A coordinate which has an associated :term:`Index`. Generally this means that the coordinate labels can be used for indexing (selection) and/or alignment. An indexed coordinate may have one or more arbitrary dimensions although in most cases it is also a :term:`Dimension coordinate`. It may or may not be grouped with other indexed coordinates depending on whether they share the same index. Indexed coordinates are marked by an asterisk ``*`` when printing a ``DataArray`` or ``Dataset``. Non-indexed coordinate A coordinate which has no associated :term:`Index`. It may still represent fixed labels along one or more dimensions but it cannot be used for label-based indexing and alignment. Index An *index* is a data structure optimized for efficient data selection and alignment within a discrete or continuous space that is defined by coordinate labels (unless it is a functional index). By default, Xarray creates a :py:class:`~xarray.indexes.PandasIndex` object (i.e., a :py:class:`pandas.Index` wrapper) for each :term:`Dimension coordinate`. For more advanced use cases (e.g., staggered or irregular grids, geospatial indexes), Xarray also accepts any instance of a specialized :py:class:`~xarray.indexes.Index` subclass that is associated to one or more arbitrary coordinates. The index associated with the coordinate ``x`` can be retrieved by ``arr.xindexes[x]`` (or ``arr.indexes["x"]`` if the index is convertible to a :py:class:`pandas.Index` object). If two coordinates ``x`` and ``y`` share the same index, ``arr.xindexes[x]`` and ``arr.xindexes[y]`` both return the same :py:class:`~xarray.indexes.Index` object. name The names of dimensions, coordinates, DataArray objects and data variables can be anything as long as they are :term:`hashable`. However, it is preferred to use :py:class:`str` typed names. scalar By definition, a scalar is not an :term:`array` and when converted to one, it has 0 dimensions. That means that, e.g., :py:class:`int`, :py:class:`float`, and :py:class:`str` objects are "scalar" while :py:class:`list` or :py:class:`tuple` are not. duck array `Duck arrays`__ are array implementations that behave like numpy arrays. They have to define the ``shape``, ``dtype`` and ``ndim`` properties. For integration with ``xarray``, the ``__array__``, ``__array_ufunc__`` and ``__array_function__`` protocols are also required. __ https://numpy.org/neps/nep-0022-ndarray-duck-typing-overview.html Aligning Aligning refers to the process of ensuring that two or more DataArrays or Datasets have the same dimensions and coordinates, so that they can be combined or compared properly. .. jupyter-execute:: x = xr.DataArray( [[25, 35], [10, 24]], dims=("lat", "lon"), coords={"lat": [35.0, 40.0], "lon": [100.0, 120.0]}, ) y = xr.DataArray( [[20, 5], [7, 13]], dims=("lat", "lon"), coords={"lat": [35.0, 42.0], "lon": [100.0, 120.0]}, ) a, b = xr.align(x, y) # By default, an "inner join" is performed # so "a" is a copy of "x" where coordinates match "y" a Broadcasting A technique that allows operations to be performed on arrays with different shapes and dimensions. When performing operations on arrays with different shapes and dimensions, xarray will automatically attempt to broadcast the arrays to a common shape before the operation is applied. .. jupyter-execute:: # 'a' has shape (3,) and 'b' has shape (4,) a = xr.DataArray(np.array([1, 2, 3]), dims=["x"]) b = xr.DataArray(np.array([4, 5, 6, 7]), dims=["y"]) # 2D array with shape (3, 4) a + b Merging Merging is used to combine two or more Datasets or DataArrays that have different variables or coordinates along the same dimensions. When merging, xarray aligns the variables and coordinates of the different datasets along the specified dimensions and creates a new ``Dataset`` containing all the variables and coordinates. .. jupyter-execute:: # create two 1D arrays with names arr1 = xr.DataArray( [1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]}, name="arr1" ) arr2 = xr.DataArray( [4, 5, 6], dims=["x"], coords={"x": [20, 30, 40]}, name="arr2" ) # merge the two arrays into a new dataset merged_ds = xr.Dataset({"arr1": arr1, "arr2": arr2}) merged_ds Concatenating Concatenating is used to combine two or more Datasets or DataArrays along a dimension. When concatenating, xarray arranges the datasets or dataarrays along a new dimension, and the resulting ``Dataset`` or ``Dataarray`` will have the same variables and coordinates along the other dimensions. .. jupyter-execute:: a = xr.DataArray([[1, 2], [3, 4]], dims=("x", "y")) b = xr.DataArray([[5, 6], [7, 8]], dims=("x", "y")) c = xr.concat([a, b], dim="c") c Combining Combining is the process of arranging two or more DataArrays or Datasets into a single ``DataArray`` or ``Dataset`` using some combination of merging and concatenation operations. .. jupyter-execute:: ds1 = xr.Dataset( {"data": xr.DataArray([[1, 2], [3, 4]], dims=("x", "y"))}, coords={"x": [1, 2], "y": [3, 4]}, ) ds2 = xr.Dataset( {"data": xr.DataArray([[5, 6], [7, 8]], dims=("x", "y"))}, coords={"x": [2, 3], "y": [4, 5]}, ) # combine the datasets combined_ds = xr.combine_by_coords([ds1, ds2], join="outer") combined_ds lazy Lazily-evaluated operations do not load data into memory until necessary. Instead of doing calculations right away, xarray lets you plan what calculations you want to do, like finding the average temperature in a dataset. This planning is called "lazy evaluation." Later, when you're ready to see the final result, you tell xarray, "Okay, go ahead and do those calculations now!" That's when xarray starts working through the steps you planned and gives you the answer you wanted. This lazy approach helps save time and memory because xarray only does the work when you actually need the results. labeled Labeled data has metadata describing the context of the data, not just the raw data values. This contextual information can be labels for array axes (i.e. dimension names) tick labels along axes (stored as Coordinate variables) or unique names for each array. These labels provide context and meaning to the data, making it easier to understand and work with. If you have temperature data for different cities over time. Using xarray, you can label the dimensions: one for cities and another for time. serialization Serialization is the process of converting your data into a format that makes it easy to save and share. When you serialize data in xarray, you're taking all those temperature measurements, along with their labels and other information, and turning them into a format that can be stored in a file or sent over the internet. xarray objects can be serialized into formats which store the labels alongside the data. Some supported serialization formats are files that can then be stored or transferred (e.g. netCDF), whilst others are protocols that allow for data access over a network (e.g. Zarr). indexing :ref:`Indexing` is how you select subsets of your data which you are interested in. - Label-based Indexing: Selecting data by passing a specific label and comparing it to the labels stored in the associated coordinates. You can use labels to specify what you want like "Give me the temperature for New York on July 15th." - Positional Indexing: You can use numbers to refer to positions in the data like "Give me the third temperature value" This is useful when you know the order of your data but don't need to remember the exact labels. - Slicing: You can take a "slice" of your data, like you might want all temperatures from July 1st to July 10th. xarray supports slicing for both positional and label-based indexing. DataTree A tree-like collection of ``Dataset`` objects. A *tree* is made up of one or more *nodes*, each of which can store the same information as a single ``Dataset`` (accessed via ``.dataset``). This data is stored in the same way as in a ``Dataset``, i.e. in the form of data :term:`variables`, :term:`dimensions`, :term:`coordinates`, and attributes. The nodes in a tree are linked to one another, and each node is its own instance of ``DataTree`` object. Each node can have zero or more *children* (stored in a dictionary-like manner under their corresponding *names*), and those child nodes can themselves have children. If a node is a child of another node that other node is said to be its *parent*. Nodes can have a maximum of one parent, and if a node has no parent it is said to be the *root* node of that *tree*. Subtree A section of a *tree*, consisting of a *node* along with all the child nodes below it (and the child nodes below them, i.e. all so-called *descendant* nodes). Excludes the parent node and all nodes above. Group Another word for a subtree, reflecting how the hierarchical structure of a ``DataTree`` allows for grouping related data together. Analogous to a single `netCDF group `_ or `Zarr group `_. python-xarray-2026.01.0/doc/user-guide/indexing.rst0000664000175000017500000006701715136607163022233 0ustar alastairalastair.. _indexing: Indexing and selecting data =========================== .. jupyter-execute:: :hide-code: :hide-output: import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) %xmode minimal Xarray offers extremely flexible indexing routines that combine the best features of NumPy and pandas for data selection. The most basic way to access elements of a :py:class:`~xarray.DataArray` object is to use Python's ``[]`` syntax, such as ``array[i, j]``, where ``i`` and ``j`` are both integers. As xarray objects can store coordinates corresponding to each dimension of an array, label-based indexing similar to ``pandas.DataFrame.loc`` is also possible. In label-based indexing, the element position ``i`` is automatically looked-up from the coordinate values. Dimensions of xarray objects have names, so you can also lookup the dimensions by name, instead of remembering their positional order. Quick overview -------------- In total, xarray supports four different kinds of indexing, as described below and summarized in this table: .. |br| raw:: html
    +------------------+--------------+---------------------------------+--------------------------------+ | Dimension lookup | Index lookup | ``DataArray`` syntax | ``Dataset`` syntax | +==================+==============+=================================+================================+ | Positional | By integer | ``da[:, 0]`` | *not available* | +------------------+--------------+---------------------------------+--------------------------------+ | Positional | By label | ``da.loc[:, 'IA']`` | *not available* | +------------------+--------------+---------------------------------+--------------------------------+ | By name | By integer | ``da.isel(space=0)`` or |br| | ``ds.isel(space=0)`` or |br| | | | | ``da[dict(space=0)]`` | ``ds[dict(space=0)]`` | +------------------+--------------+---------------------------------+--------------------------------+ | By name | By label | ``da.sel(space='IA')`` or |br| | ``ds.sel(space='IA')`` or |br| | | | | ``da.loc[dict(space='IA')]`` | ``ds.loc[dict(space='IA')]`` | +------------------+--------------+---------------------------------+--------------------------------+ More advanced indexing is also possible for all the methods by supplying :py:class:`~xarray.DataArray` objects as indexer. See :ref:`vectorized_indexing` for the details. Positional indexing ------------------- Indexing a :py:class:`~xarray.DataArray` directly works (mostly) just like it does for numpy arrays, except that the returned object is always another DataArray: .. jupyter-execute:: da = xr.DataArray( np.random.rand(4, 3), [ ("time", pd.date_range("2000-01-01", periods=4)), ("space", ["IA", "IL", "IN"]), ], ) da[:2] .. jupyter-execute:: da[0, 0] .. jupyter-execute:: da[:, [2, 1]] Attributes are persisted in all indexing operations. .. warning:: Positional indexing deviates from the NumPy when indexing with multiple arrays like ``da[[0, 1], [0, 1]]``, as described in :ref:`vectorized_indexing`. Xarray also supports label-based indexing, just like pandas. Because we use a :py:class:`pandas.Index` under the hood, label based indexing is very fast. To do label based indexing, use the :py:attr:`~xarray.DataArray.loc` attribute: .. jupyter-execute:: da.loc["2000-01-01":"2000-01-02", "IA"] In this example, the selected is a subpart of the array in the range '2000-01-01':'2000-01-02' along the first coordinate ``time`` and with 'IA' value from the second coordinate ``space``. You can perform any of the `label indexing operations supported by pandas`__, including indexing with individual, slices and lists/arrays of labels, as well as indexing with boolean arrays. Like pandas, label based indexing in xarray is *inclusive* of both the start and stop bounds. __ https://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-label Setting values with label based indexing is also supported: .. jupyter-execute:: da.loc["2000-01-01", ["IL", "IN"]] = -10 da Indexing with dimension names ----------------------------- With the dimension names, we do not have to rely on dimension order and can use them explicitly to slice data. There are two ways to do this: 1. Use the :py:meth:`~xarray.DataArray.sel` and :py:meth:`~xarray.DataArray.isel` convenience methods: .. jupyter-execute:: # index by integer array indices da.isel(space=0, time=slice(None, 2)) .. jupyter-execute:: # index by dimension coordinate labels da.sel(time=slice("2000-01-01", "2000-01-02")) 2. Use a dictionary as the argument for array positional or label based array indexing: .. jupyter-execute:: # index by integer array indices da[dict(space=0, time=slice(None, 2))] .. jupyter-execute:: # index by dimension coordinate labels da.loc[dict(time=slice("2000-01-01", "2000-01-02"))] The arguments to these methods can be any objects that could index the array along the dimension given by the keyword, e.g., labels for an individual value, :py:class:`Python slice` objects or 1-dimensional arrays. .. note:: We would love to be able to do indexing with labeled dimension names inside brackets, but unfortunately, `Python does not yet support indexing with keyword arguments`__ like ``da[space=0]`` __ https://legacy.python.org/dev/peps/pep-0472/ .. _nearest neighbor lookups: Nearest neighbor lookups ------------------------ The label based selection methods :py:meth:`~xarray.Dataset.sel`, :py:meth:`~xarray.Dataset.reindex` and :py:meth:`~xarray.Dataset.reindex_like` all support ``method`` and ``tolerance`` keyword argument. The method parameter allows for enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``, ``'backfill'`` or ``'nearest'``: .. jupyter-execute:: da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])]) da.sel(x=[1.1, 1.9], method="nearest") .. jupyter-execute:: da.sel(x=0.1, method="backfill") .. jupyter-execute:: da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") Tolerance limits the maximum distance for valid matches with an inexact lookup: .. jupyter-execute:: da.reindex(x=[1.1, 1.5], method="nearest", tolerance=0.2) The method parameter is not yet supported if any of the arguments to ``.sel()`` is a ``slice`` object: .. jupyter-execute:: :raises: da.sel(x=slice(1, 3), method="nearest") However, you don't need to use ``method`` to do inexact slicing. Slicing already returns all values inside the range (inclusive), as long as the index labels are monotonic increasing: .. jupyter-execute:: da.sel(x=slice(0.9, 3.1)) Indexing axes with monotonic decreasing labels also works, as long as the ``slice`` or ``.loc`` arguments are also decreasing: .. jupyter-execute:: reversed_da = da[::-1] reversed_da.loc[3.1:0.9] .. note:: If you want to interpolate along coordinates rather than looking up the nearest neighbors, use :py:meth:`~xarray.Dataset.interp` and :py:meth:`~xarray.Dataset.interp_like`. See :ref:`interpolation ` for the details. Dataset indexing ---------------- We can also use these methods to index all variables in a dataset simultaneously, returning a new dataset: .. jupyter-execute:: da = xr.DataArray( np.random.rand(4, 3), [ ("time", pd.date_range("2000-01-01", periods=4)), ("space", ["IA", "IL", "IN"]), ], ) ds = da.to_dataset(name="foo") ds.isel(space=[0], time=[0]) .. jupyter-execute:: ds.sel(time="2000-01-01") Positional indexing on a dataset is not supported because the ordering of dimensions in a dataset is somewhat ambiguous (it can vary between different arrays). However, you can do normal indexing with dimension names: .. jupyter-execute:: ds[dict(space=[0], time=[0])] .. jupyter-execute:: ds.loc[dict(time="2000-01-01")] Dropping labels and dimensions ------------------------------ The :py:meth:`~xarray.Dataset.drop_sel` method returns a new object with the listed index labels along a dimension dropped: .. jupyter-execute:: ds.drop_sel(space=["IN", "IL"]) ``drop_sel`` is both a ``Dataset`` and ``DataArray`` method. Use :py:meth:`~xarray.Dataset.drop_dims` to drop a full dimension from a Dataset. Any variables with these dimensions are also dropped: .. jupyter-execute:: ds.drop_dims("time") .. _masking with where: Masking with ``where`` ---------------------- Indexing methods on xarray objects generally return a subset of the original data. However, it is sometimes useful to select an object with the same shape as the original data, but with some elements masked. To do this type of selection in xarray, use :py:meth:`~xarray.DataArray.where`: .. jupyter-execute:: da = xr.DataArray(np.arange(16).reshape(4, 4), dims=["x", "y"]) da.where(da.x + da.y < 4) This is particularly useful for ragged indexing of multi-dimensional data, e.g., to apply a 2D mask to an image. Note that ``where`` follows all the usual xarray broadcasting and alignment rules for binary operations (e.g., ``+``) between the object being indexed and the condition, as described in :ref:`compute`: .. jupyter-execute:: da.where(da.y < 2) By default ``where`` maintains the original size of the data. For cases where the selected data size is much smaller than the original data, use of the option ``drop=True`` clips coordinate elements that are fully masked: .. jupyter-execute:: da.where(da.y < 2, drop=True) .. _selecting values with isin: Selecting values with ``isin`` ------------------------------ To check whether elements of an xarray object contain a single object, you can compare with the equality operator ``==`` (e.g., ``arr == 3``). To check multiple values, use :py:meth:`~xarray.DataArray.isin`: .. jupyter-execute:: da = xr.DataArray([1, 2, 3, 4, 5], dims=["x"]) da.isin([2, 4]) :py:meth:`~xarray.DataArray.isin` works particularly well with :py:meth:`~xarray.DataArray.where` to support indexing by arrays that are not already labels of an array: .. jupyter-execute:: lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=["x"]) da.where(lookup.isin([-2, -4]), drop=True) However, some caution is in order: when done repeatedly, this type of indexing is significantly slower than using :py:meth:`~xarray.DataArray.sel`. .. _vectorized_indexing: Vectorized Indexing ------------------- Like numpy and pandas, xarray supports indexing many array elements at once in a vectorized manner. If you only provide integers, slices, or unlabeled arrays (array without dimension names, such as ``np.ndarray``, ``list``, but not :py:meth:`~xarray.DataArray` or :py:meth:`~xarray.Variable`) indexing can be understood as orthogonally. Each indexer component selects independently along the corresponding dimension, similar to how vector indexing works in Fortran or MATLAB, or after using the :py:func:`numpy.ix_` helper: .. jupyter-execute:: da = xr.DataArray( np.arange(12).reshape((3, 4)), dims=["x", "y"], coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]}, ) da .. jupyter-execute:: da[[0, 2, 2], [1, 3]] For more flexibility, you can supply :py:meth:`~xarray.DataArray` objects as indexers. Dimensions on resultant arrays are given by the ordered union of the indexers' dimensions: .. jupyter-execute:: ind_x = xr.DataArray([0, 1], dims=["x"]) ind_y = xr.DataArray([0, 1], dims=["y"]) da[ind_x, ind_y] # orthogonal indexing Slices or sequences/arrays without named-dimensions are treated as if they have the same dimension which is indexed along: .. jupyter-execute:: # Because [0, 1] is used to index along dimension 'x', # it is assumed to have dimension 'x' da[[0, 1], ind_x] Furthermore, you can use multi-dimensional :py:meth:`~xarray.DataArray` as indexers, where the resultant array dimension is also determined by indexers' dimension: .. jupyter-execute:: ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da[ind] Similar to how `NumPy's advanced indexing`_ works, vectorized indexing for xarray is based on our :ref:`broadcasting rules `. See :ref:`indexing.rules` for the complete specification. .. _NumPy's advanced indexing: https://numpy.org/doc/stable/user/basics.indexing.html#advanced-indexing Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: .. jupyter-execute:: ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da.isel(y=ind) # same as da[:, ind] .. jupyter-execute:: ind = xr.DataArray([["a", "b"], ["b", "a"]], dims=["a", "b"]) da.loc[:, ind] # same as da.sel(y=ind) These methods may also be applied to ``Dataset`` objects .. jupyter-execute:: ds = da.to_dataset(name="bar") ds.isel(x=xr.DataArray([0, 1, 2], dims=["points"])) Vectorized indexing may be used to extract information from the nearest grid cells of interest, for example, the nearest climate model grid cells to a collection specified weather station latitudes and longitudes. To trigger vectorized indexing behavior you will need to provide the selection dimensions with a new shared output dimension name. In the example below, the selections of the closest latitude and longitude are renamed to an output dimension named "points": .. jupyter-execute:: ds = xr.tutorial.open_dataset("air_temperature") # Define target latitude and longitude (where weather stations might be) target_lon = xr.DataArray([200, 201, 202, 205], dims="points") target_lat = xr.DataArray([31, 41, 42, 42], dims="points") # Retrieve data at the grid cells nearest to the target latitudes and longitudes da = ds["air"].sel(lon=target_lon, lat=target_lat, method="nearest") da .. tip:: If you are lazily loading your data from disk, not every form of vectorized indexing is supported (or if supported, may not be supported efficiently). You may find increased performance by loading your data into memory first, e.g., with :py:meth:`~xarray.Dataset.load`. .. note:: If an indexer is a :py:meth:`~xarray.DataArray`, its coordinates should not conflict with the selected subpart of the target array (except for the explicitly indexed dimensions with ``.loc``/``.sel``). Otherwise, ``IndexError`` will be raised. .. _assigning_values: Assigning values with indexing ------------------------------ To select and assign values to a portion of a :py:meth:`~xarray.DataArray` you can use indexing with ``.loc`` : .. jupyter-execute:: ds = xr.tutorial.open_dataset("air_temperature") # add an empty 2D dataarray ds["empty"] = xr.full_like(ds.air.mean("time"), fill_value=0) # modify one grid point using loc() ds["empty"].loc[dict(lon=260, lat=30)] = 100 # modify a 2D region using loc() lc = ds.coords["lon"] la = ds.coords["lat"] ds["empty"].loc[ dict(lon=lc[(lc > 220) & (lc < 260)], lat=la[(la > 20) & (la < 60)]) ] = 100 or :py:meth:`~xarray.where`: .. jupyter-execute:: # modify one grid point using xr.where() ds["empty"] = xr.where( (ds.coords["lat"] == 20) & (ds.coords["lon"] == 260), 100, ds["empty"] ) # or modify a 2D region using xr.where() mask = ( (ds.coords["lat"] > 20) & (ds.coords["lat"] < 60) & (ds.coords["lon"] > 220) & (ds.coords["lon"] < 260) ) ds["empty"] = xr.where(mask, 100, ds["empty"]) Vectorized indexing can also be used to assign values to xarray object. .. jupyter-execute:: da = xr.DataArray( np.arange(12).reshape((3, 4)), dims=["x", "y"], coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]}, ) da .. jupyter-execute:: da[0] = -1 # assignment with broadcasting da .. jupyter-execute:: ind_x = xr.DataArray([0, 1], dims=["x"]) ind_y = xr.DataArray([0, 1], dims=["y"]) da[ind_x, ind_y] = -2 # assign -2 to (ix, iy) = (0, 0) and (1, 1) da .. jupyter-execute:: da[ind_x, ind_y] += 100 # increment is also possible da Like ``numpy.ndarray``, value assignment sometimes works differently from what one may expect. .. jupyter-execute:: da = xr.DataArray([0, 1, 2, 3], dims=["x"]) ind = xr.DataArray([0, 0, 0], dims=["x"]) da[ind] -= 1 da Where the 0th element will be subtracted 1 only once. This is because ``v[0] = v[0] - 1`` is called three times, rather than ``v[0] = v[0] - 1 - 1 - 1``. See `Assigning values to indexed arrays`__ for the details. __ https://numpy.org/doc/stable/user/basics.indexing.html#assigning-values-to-indexed-arrays .. note:: Dask array does not support value assignment (see :ref:`dask` for the details). .. note:: Coordinates in both the left- and right-hand-side arrays should not conflict with each other. Otherwise, ``IndexError`` will be raised. .. warning:: Do not try to assign values when using any of the indexing methods ``isel`` or ``sel``:: # DO NOT do this da.isel(space=0) = 0 Instead, values can be assigned using dictionary-based indexing:: da[dict(space=0)] = 0 Assigning values with the chained indexing using ``.sel`` or ``.isel`` fails silently. .. jupyter-execute:: da = xr.DataArray([0, 1, 2, 3], dims=["x"]) # DO NOT do this da.isel(x=[0, 1, 2])[1] = -1 da You can also assign values to all variables of a :py:class:`Dataset` at once: .. jupyter-execute:: :stderr: ds_org = xr.tutorial.open_dataset("eraint_uvz").isel( latitude=slice(56, 59), longitude=slice(255, 258), level=0 ) # set all values to 0 ds = xr.zeros_like(ds_org) ds .. jupyter-execute:: # by integer ds[dict(latitude=2, longitude=2)] = 1 ds["u"] .. jupyter-execute:: ds["v"] .. jupyter-execute:: # by label ds.loc[dict(latitude=47.25, longitude=[11.25, 12])] = 100 ds["u"] .. jupyter-execute:: # dataset as new values new_dat = ds_org.loc[dict(latitude=48, longitude=[11.25, 12])] new_dat .. jupyter-execute:: ds.loc[dict(latitude=47.25, longitude=[11.25, 12])] = new_dat ds["u"] The dimensions can differ between the variables in the dataset, but all variables need to have at least the dimensions specified in the indexer dictionary. The new values must be either a scalar, a :py:class:`DataArray` or a :py:class:`Dataset` itself that contains all variables that also appear in the dataset to be modified. .. _more_advanced_indexing: More advanced indexing ----------------------- The use of :py:meth:`~xarray.DataArray` objects as indexers enables very flexible indexing. The following is an example of the pointwise indexing: .. jupyter-execute:: da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=["x", "y"]) da .. jupyter-execute:: da.isel(x=xr.DataArray([0, 1, 6], dims="z"), y=xr.DataArray([0, 1, 0], dims="z")) where three elements at ``(ix, iy) = ((0, 0), (1, 1), (6, 0))`` are selected and mapped along a new dimension ``z``. If you want to add a coordinate to the new dimension ``z``, you can supply a :py:class:`~xarray.DataArray` with a coordinate, .. jupyter-execute:: da.isel( x=xr.DataArray([0, 1, 6], dims="z", coords={"z": ["a", "b", "c"]}), y=xr.DataArray([0, 1, 0], dims="z"), ) Analogously, label-based pointwise-indexing is also possible by the ``.sel`` method: .. jupyter-execute:: da = xr.DataArray( np.random.rand(4, 3), [ ("time", pd.date_range("2000-01-01", periods=4)), ("space", ["IA", "IL", "IN"]), ], ) times = xr.DataArray( pd.to_datetime(["2000-01-03", "2000-01-02", "2000-01-01"]), dims="new_time" ) da.sel(space=xr.DataArray(["IA", "IL", "IN"], dims=["new_time"]), time=times) .. _align and reindex: Align and reindex ----------------- Xarray's ``reindex``, ``reindex_like`` and ``align`` impose a ``DataArray`` or ``Dataset`` onto a new set of coordinates corresponding to dimensions. The original values are subset to the index labels still found in the new labels, and values corresponding to new labels not found in the original object are in-filled with ``NaN``. Xarray operations that combine multiple objects generally automatically align their arguments to share the same indexes. However, manual alignment can be useful for greater control and for increased performance. To reindex a particular dimension, use :py:meth:`~xarray.DataArray.reindex`: .. jupyter-execute:: da.reindex(space=["IA", "CA"]) The :py:meth:`~xarray.DataArray.reindex_like` method is a useful shortcut. To demonstrate, we will make a subset DataArray with new values: .. jupyter-execute:: foo = da.rename("foo") baz = (10 * da[:2, :2]).rename("baz") baz Reindexing ``foo`` with ``baz`` selects out the first two values along each dimension: .. jupyter-execute:: foo.reindex_like(baz) The opposite operation asks us to reindex to a larger shape, so we fill in the missing values with ``NaN``: .. jupyter-execute:: baz.reindex_like(foo) The :py:func:`~xarray.align` function lets us perform more flexible database-like ``'inner'``, ``'outer'``, ``'left'`` and ``'right'`` joins: .. jupyter-execute:: xr.align(foo, baz, join="inner") .. jupyter-execute:: xr.align(foo, baz, join="outer") Both ``reindex_like`` and ``align`` work interchangeably between :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects, and with any number of matching dimension names: .. jupyter-execute:: ds .. jupyter-execute:: ds.reindex_like(baz) .. jupyter-execute:: other = xr.DataArray(["a", "b", "c"], dims="other") # this is a no-op, because there are no shared dimension names ds.reindex_like(other) .. _indexing.missing_coordinates: Missing coordinate labels ------------------------- Coordinate labels for each dimension are optional (as of xarray v0.9). Label based indexing with ``.sel`` and ``.loc`` uses standard positional, integer-based indexing as a fallback for dimensions without a coordinate label: .. jupyter-execute:: da = xr.DataArray([1, 2, 3], dims="x") da.sel(x=[0, -1]) Alignment between xarray objects where one or both do not have coordinate labels succeeds only if all dimensions of the same name have the same length. Otherwise, it raises an informative error: .. jupyter-execute:: :raises: xr.align(da, da[:2]) Underlying Indexes ------------------ Xarray uses the :py:class:`pandas.Index` internally to perform indexing operations. If you need to access the underlying indexes, they are available through the :py:attr:`~xarray.DataArray.indexes` attribute. .. jupyter-execute:: da = xr.DataArray( np.random.rand(4, 3), [ ("time", pd.date_range("2000-01-01", periods=4)), ("space", ["IA", "IL", "IN"]), ], ) da .. jupyter-execute:: da.indexes .. jupyter-execute:: da.indexes["time"] Use :py:meth:`~xarray.DataArray.get_index` to get an index for a dimension, falling back to a default :py:class:`pandas.RangeIndex` if it has no coordinate labels: .. jupyter-execute:: da = xr.DataArray([1, 2, 3], dims="x") da .. jupyter-execute:: da.get_index("x") .. _copies_vs_views: Copies vs. Views ---------------- Whether array indexing returns a view or a copy of the underlying data depends on the nature of the labels. For positional (integer) indexing, xarray follows the same `rules`_ as NumPy: * Positional indexing with only integers and slices returns a view. * Positional indexing with arrays or lists returns a copy. The rules for label based indexing are more complex: * Label-based indexing with only slices returns a view. * Label-based indexing with arrays returns a copy. * Label-based indexing with scalars returns a view or a copy, depending upon if the corresponding positional indexer can be represented as an integer or a slice object. The exact rules are determined by pandas. Whether data is a copy or a view is more predictable in xarray than in pandas, so unlike pandas, xarray does not produce `SettingWithCopy warnings`_. However, you should still avoid assignment with chained indexing. Note that other operations (such as :py:meth:`~xarray.DataArray.values`) may also return views rather than copies. .. _SettingWithCopy warnings: https://pandas.pydata.org/pandas-docs/stable/indexing.html#returning-a-view-versus-a-copy .. _rules: https://numpy.org/doc/stable/user/basics.copies.html .. _multi-level indexing: Multi-level indexing -------------------- Just like pandas, advanced indexing on multi-level indexes is possible with ``loc`` and ``sel``. You can slice a multi-index by providing multiple indexers, i.e., a tuple of slices, labels, list of labels, or any selector allowed by pandas: .. jupyter-execute:: midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two")) mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))]) mda .. jupyter-execute:: mda.sel(x=(list("ab"), [0])) You can also select multiple elements by providing a list of labels or tuples or a slice of tuples: .. jupyter-execute:: mda.sel(x=[("a", 0), ("b", 1)]) Additionally, xarray supports dictionaries: .. jupyter-execute:: mda.sel(x={"one": "a", "two": 0}) For convenience, ``sel`` also accepts multi-index levels directly as keyword arguments: .. jupyter-execute:: mda.sel(one="a", two=0) Note that using ``sel`` it is not possible to mix a dimension indexer with level indexers for that dimension (e.g., ``mda.sel(x={'one': 'a'}, two=0)`` will raise a ``ValueError``). Like pandas, xarray handles partial selection on multi-index (level drop). As shown below, it also renames the dimension / coordinate when the multi-index is reduced to a single index. .. jupyter-execute:: mda.loc[{"one": "a"}, ...] Unlike pandas, xarray does not guess whether you provide index levels or dimensions when using ``loc`` in some ambiguous cases. For example, for ``mda.loc[{'one': 'a', 'two': 0}]`` and ``mda.loc['a', 0]`` xarray always interprets ('one', 'two') and ('a', 0) as the names and labels of the 1st and 2nd dimension, respectively. You must specify all dimensions or use the ellipsis in the ``loc`` specifier, e.g. in the example above, ``mda.loc[{'one': 'a', 'two': 0}, :]`` or ``mda.loc[('a', 0), ...]``. .. _indexing.rules: Indexing rules -------------- Here we describe the full rules xarray uses for vectorized indexing. Note that this is for the purposes of explanation: for the sake of efficiency and to support various backends, the actual implementation is different. 0. (Only for label based indexing.) Look up positional indexes along each dimension from the corresponding :py:class:`pandas.Index`. 1. A full slice object ``:`` is inserted for each dimension without an indexer. 2. ``slice`` objects are converted into arrays, given by ``np.arange(*slice.indices(...))``. 3. Assume dimension names for array indexers without dimensions, such as ``np.ndarray`` and ``list``, from the dimensions to be indexed along. For example, ``v.isel(x=[0, 1])`` is understood as ``v.isel(x=xr.DataArray([0, 1], dims=['x']))``. 4. For each variable in a ``Dataset`` or ``DataArray`` (the array and its coordinates): a. Broadcast all relevant indexers based on their dimension names (see :ref:`compute.broadcasting` for full details). b. Index the underling array by the broadcast indexers, using NumPy's advanced indexing rules. 5. If any indexer DataArray has coordinates and no coordinate with the same name exists, attach them to the indexed object. .. note:: Only 1-dimensional boolean arrays can be used as indexers. python-xarray-2026.01.0/doc/user-guide/ecosystem.rst0000664000175000017500000002534115136607163022433 0ustar alastairalastair.. _ecosystem: Xarray related projects ----------------------- Below is a list of existing open source projects that build functionality upon xarray. See also section :ref:`internals` for more details on how to build xarray extensions. We also maintain the `xarray-contrib `_ GitHub organization as a place to curate projects that build upon xarray. Geosciences ~~~~~~~~~~~ - `aospy `_: Automated analysis and management of gridded climate data. - `argopy `_: xarray-based Argo data access, manipulation and visualisation for standard users as well as Argo experts. - `cf_xarray `_: Provides an accessor (DataArray.cf or Dataset.cf) that allows you to interpret Climate and Forecast metadata convention attributes present on xarray objects. - `climpred `_: Analysis of ensemble forecast models for climate prediction. - `geocube `_: Tool to convert geopandas vector data into rasterized xarray data. - `GeoWombat `_: Utilities for analysis of remotely sensed and gridded raster data at scale (easily tame Landsat, Sentinel, Quickbird, and PlanetScope). - `grib2io `_: Utility to work with GRIB2 files including an xarray backend, DASK support for parallel reading in open_mfdataset, lazy loading of data, editing of GRIB2 attributes and GRIB2IO DataArray attrs, and spatial interpolation and reprojection of GRIB2 messages and GRIB2IO Datasets/DataArrays for both grid to grid and grid to stations. - `gsw-xarray `_: a wrapper around `gsw `_ that adds CF compliant attributes when possible, units, name. - `infinite-diff `_: xarray-based finite-differencing, focused on gridded climate/meteorology data - `marc_analysis `_: Analysis package for CESM/MARC experiments and output. - `MetPy `_: A collection of tools in Python for reading, visualizing, and performing calculations with weather data. - `MPAS-Analysis `_: Analysis for simulations produced with Model for Prediction Across Scales (MPAS) components and the Accelerated Climate Model for Energy (ACME). - `OGGM `_: Open Global Glacier Model - `Oocgcm `_: Analysis of large gridded geophysical datasets - `Open Data Cube `_: Analysis toolkit of continental scale Earth Observation data from satellites. - `Pangaea `_: xarray extension for gridded land surface & weather model output). - `Pangeo `_: A community effort for big data geoscience in the cloud. - `PyGDX `_: Python 3 package for accessing data stored in GAMS Data eXchange (GDX) files. Also uses a custom subclass. - `pyinterp `_: Python 3 package for interpolating geo-referenced data used in the field of geosciences. - `pyXpcm `_: xarray-based Profile Classification Modelling (PCM), mostly for ocean data. - `Regionmask `_: plotting and creation of masks of spatial regions - `rioxarray `_: geospatial xarray extension powered by rasterio - `salem `_: Adds geolocalised subsetting, masking, and plotting operations to xarray's data structures via accessors. - `SatPy `_ : Library for reading and manipulating meteorological remote sensing data and writing it to various image and data file formats. - `SARXarray `_: xarray extension for reading and processing large Synthetic Aperture Radar (SAR) data stacks. - `shxarray `_: Convert, filter,and map geodesy related spherical harmonic representations of gravity and terrestrial water storage through an xarray extension. - `Spyfit `_: FTIR spectroscopy of the atmosphere - `windspharm `_: Spherical harmonic wind analysis in Python. - `wradlib `_: An Open Source Library for Weather Radar Data Processing. - `wrf-python `_: A collection of diagnostic and interpolation routines for use with output of the Weather Research and Forecasting (WRF-ARW) Model. - `xarray-eopf `_: An xarray backend implementation for opening ESA EOPF data products in Zarr format. - `xarray-regrid `_: xarray extension for regridding rectilinear data. - `xarray-simlab `_: xarray extension for computer model simulations. - `xarray-spatial `_: Numba-accelerated raster-based spatial processing tools (NDVI, curvature, zonal-statistics, proximity, hillshading, viewshed, etc.) - `xarray-topo `_: xarray extension for topographic analysis and modelling. - `xbpch `_: xarray interface for bpch files. - `xCDAT `_: An extension of xarray for climate data analysis on structured grids. - `xclim `_: A library for calculating climate science indices with unit handling built from xarray and dask. - `xESMF `_: Universal regridder for geospatial data. - `xgcm `_: Extends the xarray data model to understand finite volume grid cells (common in General Circulation Models) and provides interpolation and difference operations for such grids. - `xmitgcm `_: a python package for reading `MITgcm `_ binary MDS files into xarray data structures. - `xnemogcm `_: a package to read `NEMO `_ output files and add attributes to interface with xgcm. Machine Learning ~~~~~~~~~~~~~~~~ - `ArviZ `_: Exploratory analysis of Bayesian models, built on top of xarray. - `Darts `_: User-friendly modern machine learning for time series in Python. - `Elm `_: Parallel machine learning on xarray data structures - `sklearn-xarray (1) `_: Combines scikit-learn and xarray (1). - `sklearn-xarray (2) `_: Combines scikit-learn and xarray (2). - `xbatcher `_: Batch Generation from Xarray Datasets. Other domains ~~~~~~~~~~~~~ - `ptsa `_: EEG Time Series Analysis - `pycalphad `_: Computational Thermodynamics in Python - `pyomeca `_: Python framework for biomechanical analysis - `movement `_: A Python toolbox for analysing animal body movements Extend xarray capabilities ~~~~~~~~~~~~~~~~~~~~~~~~~~ - `Collocate `_: Collocate xarray trajectories in arbitrary physical dimensions - `eofs `_: EOF analysis in Python. - `hypothesis-gufunc `_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input. - `ntv-pandas `_ : A tabular analyzer and a semantic, compact and reversible converter for multidimensional and tabular data - `nxarray `_: NeXus input/output capability for xarray. - `xarray-compare `_: xarray extension for data comparison. - `xarray-dataclasses `_: xarray extension for typed DataArray and Dataset creation. - `xarray_einstats `_: Statistics, linear algebra and einops for xarray - `xarray_extras `_: Advanced algorithms for xarray objects (e.g. integrations/interpolations). - `xeofs `_: PCA/EOF analysis and related techniques, integrated with xarray and Dask for efficient handling of large-scale data. - `xpublish `_: Publish Xarray Datasets via a Zarr compatible REST API. - `xrft `_: Fourier transforms for xarray data. - `xr-scipy `_: A lightweight scipy wrapper for xarray. - `X-regression `_: Multiple linear regression from Statsmodels library coupled with Xarray library. - `xskillscore `_: Metrics for verifying forecasts. - `xyzpy `_: Easily generate high dimensional data, including parallelization. - `xarray-lmfit `_: xarray extension for curve fitting using `lmfit `_. Visualization ~~~~~~~~~~~~~ - `datashader `_, `geoviews `_, `holoviews `_, : visualization packages for large data. - `hvplot `_ : A high-level plotting API for the PyData ecosystem built on HoloViews. - `psyplot `_: Interactive data visualization with python. - `xarray-leaflet `_: An xarray extension for tiled map plotting based on ipyleaflet. - `xtrude `_: An xarray extension for 3D terrain visualization based on pydeck. - `pyvista-xarray `_: xarray DataArray accessor for 3D visualization with `PyVista `_ and DataSet engines for reading VTK data formats. Non-Python projects ~~~~~~~~~~~~~~~~~~~ - `xframe `_: C++ data structures inspired by xarray. - `AxisArrays `_, `NamedArrays `_ and `YAXArrays.jl `_: similar data structures for Julia. More projects can be found at the `"xarray" Github topic `_. python-xarray-2026.01.0/doc/user-guide/data-structures.rst0000664000175000017500000010406115136607163023547 0ustar alastairalastair.. _data structures: Data Structures =============== .. jupyter-execute:: :hide-code: :hide-output: import numpy as np import pandas as pd import xarray as xr import matplotlib.pyplot as plt np.random.seed(123456) np.set_printoptions(threshold=10) %xmode minimal DataArray --------- :py:class:`xarray.DataArray` is xarray's implementation of a labeled, multi-dimensional array. It has several key properties: - ``values``: a :py:class:`numpy.ndarray` or :ref:`numpy-like array ` holding the array's values - ``dims``: dimension names for each axis (e.g., ``('x', 'y', 'z')``) - ``coords``: a dict-like container of arrays (*coordinates*) that label each point (e.g., 1-dimensional arrays of numbers, datetime objects or strings) - ``attrs``: :py:class:`dict` to hold arbitrary metadata (*attributes*) Xarray uses ``dims`` and ``coords`` to enable its core metadata aware operations. Dimensions provide names that xarray uses instead of the ``axis`` argument found in many numpy functions. Coordinates enable fast label based indexing and alignment, building on the functionality of the ``index`` found on a pandas :py:class:`~pandas.DataFrame` or :py:class:`~pandas.Series`. DataArray objects also can have a ``name`` and can hold arbitrary metadata in the form of their ``attrs`` property. Names and attributes are strictly for users and user-written code: xarray makes no attempt to interpret them, and propagates them only in unambiguous cases. For reading and writing attributes xarray relies on the capabilities of the supported backends. (see FAQ, :ref:`approach to metadata`). .. _creating a dataarray: Creating a DataArray ~~~~~~~~~~~~~~~~~~~~ The :py:class:`~xarray.DataArray` constructor takes: - ``data``: a multi-dimensional array of values (e.g., a numpy ndarray, a :ref:`numpy-like array `, :py:class:`~pandas.Series`, :py:class:`~pandas.DataFrame` or ``pandas.Panel``) - ``coords``: a list or dictionary of coordinates. If a list, it should be a list of tuples where the first element is the dimension name and the second element is the corresponding coordinate array_like object. - ``dims``: a list of dimension names. If omitted and ``coords`` is a list of tuples, dimension names are taken from ``coords``. - ``attrs``: a dictionary of attributes to add to the instance - ``name``: a string that names the instance .. jupyter-execute:: data = np.random.rand(4, 3) locs = ["IA", "IL", "IN"] times = pd.date_range("2000-01-01", periods=4) foo = xr.DataArray(data, coords=[times, locs], dims=["time", "space"]) foo Only ``data`` is required; all of other arguments will be filled in with default values: .. jupyter-execute:: xr.DataArray(data) As you can see, dimension names are always present in the xarray data model: if you do not provide them, defaults of the form ``dim_N`` will be created. However, coordinates are always optional, and dimensions do not have automatic coordinate labels. .. note:: This is different from pandas, where axes always have tick labels, which default to the integers ``[0, ..., n-1]``. Prior to xarray v0.9, xarray copied this behavior: default coordinates for each dimension would be created if coordinates were not supplied explicitly. This is no longer the case. Coordinates can be specified in the following ways: - A list of values with length equal to the number of dimensions, providing coordinate labels for each dimension. Each value must be of one of the following forms: * A :py:class:`~xarray.DataArray` or :py:class:`~xarray.Variable` * A tuple of the form ``(dims, data[, attrs])``, which is converted into arguments for :py:class:`~xarray.Variable` * A pandas object or scalar value, which is converted into a ``DataArray`` * A 1D array or list, which is interpreted as values for a one dimensional coordinate variable along the same dimension as its name - A dictionary of ``{coord_name: coord}`` where values are of the same form as the list. Supplying coordinates as a dictionary allows other coordinates than those corresponding to dimensions (more on these later). If you supply ``coords`` as a dictionary, you must explicitly provide ``dims``. As a list of tuples: .. jupyter-execute:: xr.DataArray(data, coords=[("time", times), ("space", locs)]) As a dictionary: .. jupyter-execute:: xr.DataArray( data, coords={ "time": times, "space": locs, "const": 42, "ranking": ("space", [1, 2, 3]), }, dims=["time", "space"], ) As a dictionary with coords across multiple dimensions: .. jupyter-execute:: xr.DataArray( data, coords={ "time": times, "space": locs, "const": 42, "ranking": (("time", "space"), np.arange(12).reshape(4, 3)), }, dims=["time", "space"], ) If you create a ``DataArray`` by supplying a pandas :py:class:`~pandas.Series`, :py:class:`~pandas.DataFrame` or ``pandas.Panel``, any non-specified arguments in the ``DataArray`` constructor will be filled in from the pandas object: .. jupyter-execute:: df = pd.DataFrame({"x": [0, 1], "y": [2, 3]}, index=["a", "b"]) df.index.name = "abc" df.columns.name = "xyz" df .. jupyter-execute:: xr.DataArray(df) DataArray properties ~~~~~~~~~~~~~~~~~~~~ Let's take a look at the important properties on our array: .. jupyter-execute:: foo.values .. jupyter-execute:: foo.dims .. jupyter-execute:: foo.coords .. jupyter-execute:: foo.attrs .. jupyter-execute:: print(foo.name) You can modify ``values`` inplace: .. jupyter-execute:: foo.values = 1.0 * foo.values .. note:: The array values in a :py:class:`~xarray.DataArray` have a single (homogeneous) data type. To work with heterogeneous or structured data types in xarray, use coordinates, or put separate ``DataArray`` objects in a single :py:class:`~xarray.Dataset` (see below). Now fill in some of that missing metadata: .. jupyter-execute:: foo.name = "foo" foo.attrs["units"] = "meters" foo The :py:meth:`~xarray.DataArray.rename` method is another option, returning a new data array: .. jupyter-execute:: foo.rename("bar") DataArray Coordinates ~~~~~~~~~~~~~~~~~~~~~ The ``coords`` property is ``dict`` like. Individual coordinates can be accessed from the coordinates by name, or even by indexing the data array itself: .. jupyter-execute:: foo.coords["time"] .. jupyter-execute:: foo["time"] These are also :py:class:`~xarray.DataArray` objects, which contain tick-labels for each dimension. Coordinates can also be set or removed by using the dictionary like syntax: .. jupyter-execute:: foo["ranking"] = ("space", [1, 2, 3]) foo.coords .. jupyter-execute:: del foo["ranking"] foo.coords For more details, see :ref:`coordinates` below. Dataset ------- :py:class:`xarray.Dataset` is xarray's multi-dimensional equivalent of a :py:class:`~pandas.DataFrame`. It is a dict-like container of labeled arrays (:py:class:`~xarray.DataArray` objects) with aligned dimensions. It is designed as an in-memory representation of the data model from the `netCDF`__ file format. __ https://www.unidata.ucar.edu/software/netcdf/ In addition to the dict-like interface of the dataset itself, which can be used to access any variable in a dataset, datasets have four key properties: - ``dims``: a dictionary mapping from dimension names to the fixed length of each dimension (e.g., ``{'x': 6, 'y': 6, 'time': 8}``) - ``data_vars``: a dict-like container of DataArrays corresponding to variables - ``coords``: another dict-like container of DataArrays intended to label points used in ``data_vars`` (e.g., arrays of numbers, datetime objects or strings) - ``attrs``: :py:class:`dict` to hold arbitrary metadata The distinction between whether a variable falls in data or coordinates (borrowed from `CF conventions`_) is mostly semantic, and you can probably get away with ignoring it if you like: dictionary like access on a dataset will supply variables found in either category. However, xarray does make use of the distinction for indexing and computations. Coordinates indicate constant/fixed/independent quantities, unlike the varying/measured/dependent quantities that belong in data. .. _CF conventions: https://cfconventions.org/ Here is an example of how we might structure a dataset for a weather forecast: .. image:: ../_static/dataset-diagram.png In this example, it would be natural to call ``temperature`` and ``precipitation`` "data variables" and all the other arrays "coordinate variables" because they label the points along the dimensions. (see [1]_ for more background on this example). Creating a Dataset ~~~~~~~~~~~~~~~~~~ To make an :py:class:`~xarray.Dataset` from scratch, supply dictionaries for any variables (``data_vars``), coordinates (``coords``) and attributes (``attrs``). - ``data_vars`` should be a dictionary with each key as the name of the variable and each value as one of: * A :py:class:`~xarray.DataArray` or :py:class:`~xarray.Variable` * A tuple of the form ``(dims, data[, attrs])``, which is converted into arguments for :py:class:`~xarray.Variable` * A pandas object, which is converted into a ``DataArray`` * A 1D array or list, which is interpreted as values for a one dimensional coordinate variable along the same dimension as its name - ``coords`` should be a dictionary of the same form as ``data_vars``. - ``attrs`` should be a dictionary. Let's create some fake data for the example we show above. In this example dataset, we will represent measurements of the temperature and pressure that were made under various conditions: * the measurements were made on four different days; * they were made at two separate locations, which we will represent using their latitude and longitude; and * they were made using instruments by three different manufacturers, which we will refer to as ``'manufac1'``, ``'manufac2'``, and ``'manufac3'``. .. jupyter-execute:: np.random.seed(0) temperature = 15 + 8 * np.random.randn(2, 3, 4) precipitation = 10 * np.random.rand(2, 3, 4) lon = [-99.83, -99.32] lat = [42.25, 42.21] instruments = ["manufac1", "manufac2", "manufac3"] time = pd.date_range("2014-09-06", periods=4) reference_time = pd.Timestamp("2014-09-05") # for real use cases, its good practice to supply array attributes such as # units, but we won't bother here for the sake of brevity ds = xr.Dataset( { "temperature": (["loc", "instrument", "time"], temperature), "precipitation": (["loc", "instrument", "time"], precipitation), }, coords={ "lon": (["loc"], lon), "lat": (["loc"], lat), "instrument": instruments, "time": time, "reference_time": reference_time, }, ) ds Here we pass :py:class:`xarray.DataArray` objects or a pandas object as values in the dictionary: .. jupyter-execute:: xr.Dataset(dict(bar=foo)) .. jupyter-execute:: xr.Dataset(dict(bar=foo.to_pandas())) Where a pandas object is supplied as a value, the names of its indexes are used as dimension names, and its data is aligned to any existing dimensions. You can also create a dataset from: - A :py:class:`pandas.DataFrame` or ``pandas.Panel`` along its columns and items respectively, by passing it into the :py:class:`~xarray.Dataset` directly - A :py:class:`pandas.DataFrame` with :py:meth:`Dataset.from_dataframe `, which will additionally handle MultiIndexes See :ref:`pandas` - A netCDF file on disk with :py:func:`~xarray.open_dataset`. See :ref:`io`. Dataset contents ~~~~~~~~~~~~~~~~ :py:class:`~xarray.Dataset` implements the Python mapping interface, with values given by :py:class:`xarray.DataArray` objects: .. jupyter-execute:: print("temperature" in ds) ds["temperature"] Valid keys include each listed coordinate and data variable. Data and coordinate variables are also contained separately in the :py:attr:`~xarray.Dataset.data_vars` and :py:attr:`~xarray.Dataset.coords` dictionary-like attributes: .. jupyter-execute:: ds.data_vars .. jupyter-execute:: ds.coords Finally, like data arrays, datasets also store arbitrary metadata in the form of ``attributes``: .. jupyter-execute:: print(ds.attrs) ds.attrs["title"] = "example attribute" ds Xarray does not enforce any restrictions on attributes, but serialization to some file formats may fail if you use objects that are not strings, numbers or :py:class:`numpy.ndarray` objects. As a useful shortcut, you can use attribute style access for reading (but not setting) variables and attributes: .. jupyter-execute:: ds.temperature This is particularly useful in an exploratory context, because you can tab-complete these variable names with tools like IPython. .. _dictionary_like_methods: Dictionary like methods ~~~~~~~~~~~~~~~~~~~~~~~ We can update a dataset in-place using Python's standard dictionary syntax. For example, to create this example dataset from scratch, we could have written: .. jupyter-execute:: ds = xr.Dataset() ds["temperature"] = (("loc", "instrument", "time"), temperature) ds["temperature_double"] = (("loc", "instrument", "time"), temperature * 2) ds["precipitation"] = (("loc", "instrument", "time"), precipitation) ds.coords["lat"] = (("loc",), lat) ds.coords["lon"] = (("loc",), lon) ds.coords["time"] = pd.date_range("2014-09-06", periods=4) ds.coords["reference_time"] = pd.Timestamp("2014-09-05") To change the variables in a ``Dataset``, you can use all the standard dictionary methods, including ``values``, ``items``, ``__delitem__``, ``get`` and :py:meth:`~xarray.Dataset.update`. Note that assigning a ``DataArray`` or pandas object to a ``Dataset`` variable using ``__setitem__`` or ``update`` will :ref:`automatically align` the array(s) to the original dataset's indexes. You can copy a ``Dataset`` by calling the :py:meth:`~xarray.Dataset.copy` method. By default, the copy is shallow, so only the container will be copied: the arrays in the ``Dataset`` will still be stored in the same underlying :py:class:`numpy.ndarray` objects. You can copy all data by calling ``ds.copy(deep=True)``. .. _transforming datasets: Transforming datasets ~~~~~~~~~~~~~~~~~~~~~ In addition to dictionary-like methods (described above), xarray has additional methods (like pandas) for transforming datasets into new objects. For removing variables, you can select and drop an explicit list of variables by indexing with a list of names or using the :py:meth:`~xarray.Dataset.drop_vars` methods to return a new ``Dataset``. These operations keep around coordinates: .. jupyter-execute:: ds[["temperature"]] .. jupyter-execute:: ds[["temperature", "temperature_double"]] .. jupyter-execute:: ds.drop_vars("temperature") To remove a dimension, you can use :py:meth:`~xarray.Dataset.drop_dims` method. Any variables using that dimension are dropped: .. jupyter-execute:: ds.drop_dims("time") As an alternate to dictionary-like modifications, you can use :py:meth:`~xarray.Dataset.assign` and :py:meth:`~xarray.Dataset.assign_coords`. These methods return a new dataset with additional (or replaced) values: .. jupyter-execute:: ds.assign(temperature2=2 * ds.temperature) There is also the :py:meth:`~xarray.Dataset.pipe` method that allows you to use a method call with an external function (e.g., ``ds.pipe(func)``) instead of simply calling it (e.g., ``func(ds)``). This allows you to write pipelines for transforming your data (using "method chaining") instead of writing hard to follow nested function calls: .. jupyter-input:: # these lines are equivalent, but with pipe we can make the logic flow # entirely from left to right plt.plot((2 * ds.temperature.sel(loc=0)).mean("instrument")) (ds.temperature.sel(loc=0).pipe(lambda x: 2 * x).mean("instrument").pipe(plt.plot)) Both ``pipe`` and ``assign`` replicate the pandas methods of the same names (:py:meth:`DataFrame.pipe ` and :py:meth:`DataFrame.assign `). With xarray, there is no performance penalty for creating new datasets, even if variables are lazily loaded from a file on disk. Creating new objects instead of mutating existing objects often results in easier to understand code, so we encourage using this approach. Renaming variables ~~~~~~~~~~~~~~~~~~ Another useful option is the :py:meth:`~xarray.Dataset.rename` method to rename dataset variables: .. jupyter-execute:: ds.rename({"temperature": "temp", "precipitation": "precip"}) The related :py:meth:`~xarray.Dataset.swap_dims` method allows you do to swap dimension and non-dimension variables: .. jupyter-execute:: ds.coords["day"] = ("time", [6, 7, 8, 9]) ds.swap_dims({"time": "day"}) DataTree -------- :py:class:`~xarray.DataTree` is ``xarray``'s highest-level data structure, able to organise heterogeneous data which could not be stored inside a single :py:class:`~xarray.Dataset` object. This includes representing the recursive structure of multiple `groups`_ within a netCDF file or `Zarr Store`_. .. _groups: https://www.unidata.ucar.edu/software/netcdf/workshops/2011/groups-types/GroupsIntro.html .. _Zarr Store: https://zarr.readthedocs.io/en/stable/tutorial.html#groups Each :py:class:`~xarray.DataTree` object (or "node") contains the same data that a single :py:class:`xarray.Dataset` would (i.e. :py:class:`~xarray.DataArray` objects stored under hashable keys), and so has the same key properties: - ``dims``: a dictionary mapping of dimension names to lengths, for the variables in this node, and this node's ancestors, - ``data_vars``: a dict-like container of DataArrays corresponding to variables in this node, - ``coords``: another dict-like container of DataArrays, corresponding to coordinate variables in this node, and this node's ancestors, - ``attrs``: dict to hold arbitrary metadata relevant to data in this node. A single :py:class:`~xarray.DataTree` object acts much like a single :py:class:`~xarray.Dataset` object, and has a similar set of dict-like methods defined upon it. However, :py:class:`~xarray.DataTree`\s can also contain other :py:class:`~xarray.DataTree` objects, so they can be thought of as nested dict-like containers of both :py:class:`xarray.DataArray`\s and :py:class:`~xarray.DataTree`\s. A single datatree object is known as a "node", and its position relative to other nodes is defined by two more key properties: - ``children``: A dictionary mapping from names to other :py:class:`~xarray.DataTree` objects, known as its "child nodes". - ``parent``: The single :py:class:`~xarray.DataTree` object whose children this datatree is a member of, known as its "parent node". Each child automatically knows about its parent node, and a node without a parent is known as a "root" node (represented by the ``parent`` attribute pointing to ``None``). Nodes can have multiple children, but as each child node has at most one parent, there can only ever be one root node in a given tree. The overall structure is technically a connected acyclic undirected rooted graph, otherwise known as a `"Tree" `_. :py:class:`~xarray.DataTree` objects can also optionally have a ``name`` as well as ``attrs``, just like a :py:class:`~xarray.DataArray`. Again these are not normally used unless explicitly accessed by the user. .. _creating a datatree: Creating a DataTree ~~~~~~~~~~~~~~~~~~~ One way to create a :py:class:`~xarray.DataTree` from scratch is to create each node individually, specifying the nodes' relationship to one another as you create each one. The :py:class:`~xarray.DataTree` constructor takes: - ``dataset``: The data that will be stored in this node, represented by a single :py:class:`xarray.Dataset`. - ``children``: The various child nodes (if there are any), given as a mapping from string keys to :py:class:`~xarray.DataTree` objects. - ``name``: A string to use as the name of this node. Let's make a single datatree node with some example data in it: .. jupyter-execute:: ds1 = xr.Dataset({"foo": "orange"}) dt = xr.DataTree(name="root", dataset=ds1) dt At this point we have created a single node datatree with no parent and no children. .. jupyter-execute:: print(dt.parent is None) dt.children We can add a second node to this tree, assigning it to the parent node ``dt``: .. jupyter-execute:: dataset2 = xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])}) dt2 = xr.DataTree(name="a", dataset=dataset2) # Add the child Datatree to the root node dt.children = {"child-node": dt2} dt More idiomatically you can create a tree from a dictionary of ``Datasets`` and ``DataTrees``. In this case we add a new node under ``dt["child-node"]`` by providing the explicit path under ``"child-node"`` as the dictionary key: .. jupyter-execute:: # create a third Dataset ds3 = xr.Dataset({"zed": np.nan}) # create a tree from a dictionary of DataTrees and Datasets dt = xr.DataTree.from_dict({"/": dt, "/child-node/new-zed-node": ds3}) We have created a tree with three nodes in it: .. jupyter-execute:: dt Consistency checks are enforced. For instance, if we try to create a cycle, where the root node is also a child of a descendant, the constructor will raise an (:py:class:`~xarray.InvalidTreeError`): .. jupyter-execute:: :raises: dt["child-node"].children = {"new-child": dt} Alternatively you can also create a :py:class:`~xarray.DataTree` object from: - A dictionary mapping directory-like paths to either :py:class:`~xarray.DataTree` nodes or data, using :py:meth:`xarray.DataTree.from_dict()`, - A well formed netCDF or Zarr file on disk with :py:func:`~xarray.open_datatree()`. See :ref:`reading and writing files `. For data files with groups that do not align see :py:func:`xarray.open_groups` or target each group individually :py:func:`xarray.open_dataset(group='groupname') `. For more information about coordinate alignment see :ref:`datatree-inheritance` DataTree Contents ~~~~~~~~~~~~~~~~~ Like :py:class:`~xarray.Dataset`, :py:class:`~xarray.DataTree` implements the python mapping interface, but with values given by either :py:class:`~xarray.DataArray` objects or other :py:class:`~xarray.DataTree` objects. .. jupyter-execute:: dt["child-node"] .. jupyter-execute:: dt["foo"] Iterating over keys will iterate over both the names of variables and child nodes. We can also access all the data in a single node, and its inherited coordinates, through a dataset-like view .. jupyter-execute:: dt["child-node"].dataset This demonstrates the fact that the data in any one node is equivalent to the contents of a single :py:class:`~xarray.Dataset` object. The :py:attr:`DataTree.dataset ` property returns an immutable view, but we can instead extract the node's data contents as a new and mutable :py:class:`~xarray.Dataset` object via :py:meth:`DataTree.to_dataset() `: .. jupyter-execute:: dt["child-node"].to_dataset() Like with :py:class:`~xarray.Dataset`, you can access the data and coordinate variables of a node separately via the :py:attr:`~xarray.DataTree.data_vars` and :py:attr:`~xarray.DataTree.coords` attributes: .. jupyter-execute:: dt["child-node"].data_vars .. jupyter-execute:: dt["child-node"].coords Dictionary-like methods ~~~~~~~~~~~~~~~~~~~~~~~ We can update a datatree in-place using Python's standard dictionary syntax, similar to how we can for Dataset objects. For example, to create this example DataTree from scratch, we could have written: .. jupyter-execute:: dt = xr.DataTree(name="root") dt["foo"] = "orange" dt["child-node"] = xr.DataTree( dataset=xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])}) ) dt["child-node/new-zed-node/zed"] = np.nan dt To change the variables in a node of a :py:class:`~xarray.DataTree`, you can use all the standard dictionary methods, including ``values``, ``items``, ``__delitem__``, ``get`` and :py:meth:`xarray.DataTree.update`. Note that assigning a :py:class:`~xarray.DataTree` object to a :py:class:`~xarray.DataTree` variable using ``__setitem__`` or :py:meth:`~xarray.DataTree.update` will :ref:`automatically align ` the array(s) to the original node's indexes. If you copy a :py:class:`~xarray.DataTree` using the :py:func:`copy` function or the :py:meth:`xarray.DataTree.copy` method it will copy the subtree, meaning that node and children below it, but no parents above it. Like for :py:class:`~xarray.Dataset`, this copy is shallow by default, but you can copy all the underlying data arrays by calling ``dt.copy(deep=True)``. .. _datatree-inheritance: DataTree Inheritance ~~~~~~~~~~~~~~~~~~~~ DataTree implements a simple inheritance mechanism. Coordinates, dimensions and their associated indices are propagated from downward starting from the root node to all descendent nodes. Coordinate inheritance was inspired by the NetCDF-CF inherited dimensions, but DataTree's inheritance is slightly stricter yet easier to reason about. The constraint that this puts on a DataTree is that dimensions and indices that are inherited must be aligned with any direct descendant node's existing dimension or index. This allows descendants to use dimensions defined in ancestor nodes, without duplicating that information. But as a consequence, if a dimension-name is defined in on a node and that same dimension-name exists in one of its ancestors, they must align (have the same index and size). Some examples: .. jupyter-execute:: # Set up coordinates time = xr.DataArray(data=["2022-01", "2023-01"], dims="time") stations = xr.DataArray(data=list("abcdef"), dims="station") lon = [-100, -80, -60] lat = [10, 20, 30] # Set up fake data wind_speed = xr.DataArray(np.ones((2, 6)) * 2, dims=("time", "station")) pressure = xr.DataArray(np.ones((2, 6)) * 3, dims=("time", "station")) air_temperature = xr.DataArray(np.ones((2, 6)) * 4, dims=("time", "station")) dewpoint = xr.DataArray(np.ones((2, 6)) * 5, dims=("time", "station")) infrared = xr.DataArray(np.ones((2, 3, 3)) * 6, dims=("time", "lon", "lat")) true_color = xr.DataArray(np.ones((2, 3, 3)) * 7, dims=("time", "lon", "lat")) dt2 = xr.DataTree.from_dict( { "/": xr.Dataset( coords={"time": time}, ), "/weather": xr.Dataset( coords={"station": stations}, data_vars={ "wind_speed": wind_speed, "pressure": pressure, }, ), "/weather/temperature": xr.Dataset( data_vars={ "air_temperature": air_temperature, "dewpoint": dewpoint, }, ), "/satellite": xr.Dataset( coords={"lat": lat, "lon": lon}, data_vars={ "infrared": infrared, "true_color": true_color, }, ), }, ) dt2 Here there are four different coordinate variables, which apply to variables in the DataTree in different ways: ``time`` is a shared coordinate used by both ``weather`` and ``satellite`` variables ``station`` is used only for ``weather`` variables ``lat`` and ``lon`` are only use for ``satellite`` images Coordinate variables are inherited to descendent nodes, which is only possible because variables at different levels of a hierarchical DataTree are always aligned. Placing the ``time`` variable at the root node automatically indicates that it applies to all descendent nodes. Similarly, ``station`` is in the base ``weather`` node, because it applies to all weather variables, both directly in ``weather`` and in the ``temperature`` sub-tree. Notice the inherited coordinates are explicitly shown in the tree representation under ``Inherited coordinates:``. .. jupyter-execute:: dt2["/weather"] Accessing any of the lower level trees through the :py:func:`.dataset ` property automatically includes coordinates from higher levels (e.g., ``time`` and ``station``): .. jupyter-execute:: dt2["/weather/temperature"].dataset Similarly, when you retrieve a Dataset through :py:func:`~xarray.DataTree.to_dataset` , the inherited coordinates are included by default unless you exclude them with the ``inherit`` flag: .. jupyter-execute:: dt2["/weather/temperature"].to_dataset() .. jupyter-execute:: dt2["/weather/temperature"].to_dataset(inherit=False) For more examples and further discussion see :ref:`alignment and coordinate inheritance `. .. _coordinates: Coordinates ----------- Coordinates are ancillary variables stored for ``DataArray`` and ``Dataset`` objects in the ``coords`` attribute: .. jupyter-execute:: ds.coords Unlike attributes, xarray *does* interpret and persist coordinates in operations that transform xarray objects. There are two types of coordinates in xarray: - **dimension coordinates** are one dimensional coordinates with a name equal to their sole dimension (marked by ``*`` when printing a dataset or data array). They are used for label based indexing and alignment, like the ``index`` found on a pandas :py:class:`~pandas.DataFrame` or :py:class:`~pandas.Series`. Indeed, these "dimension" coordinates use a :py:class:`pandas.Index` internally to store their values. - **non-dimension coordinates** are variables that contain coordinate data, but are not a dimension coordinate. They can be multidimensional (see :ref:`/examples/multidimensional-coords.ipynb`), and there is no relationship between the name of a non-dimension coordinate and the name(s) of its dimension(s). Non-dimension coordinates can be useful for indexing or plotting; otherwise, xarray does not make any direct use of the values associated with them. They are not used for alignment or automatic indexing, nor are they required to match when doing arithmetic (see :ref:`coordinates math`). .. note:: Xarray's terminology differs from the `CF terminology`_, where the "dimension coordinates" are called "coordinate variables", and the "non-dimension coordinates" are called "auxiliary coordinate variables" (see :issue:`1295` for more details). .. _CF terminology: https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#terminology Modifying coordinates ~~~~~~~~~~~~~~~~~~~~~ To entirely add or remove coordinate arrays, you can use dictionary like syntax, as shown above. To convert back and forth between data and coordinates, you can use the :py:meth:`~xarray.Dataset.set_coords` and :py:meth:`~xarray.Dataset.reset_coords` methods: .. jupyter-execute:: ds.reset_coords() .. jupyter-execute:: ds.set_coords(["temperature", "precipitation"]) .. jupyter-execute:: ds["temperature"].reset_coords(drop=True) Notice that these operations skip coordinates with names given by dimensions, as used for indexing. This mostly because we are not entirely sure how to design the interface around the fact that xarray cannot store a coordinate and variable with the name but different values in the same dictionary. But we do recognize that supporting something like this would be useful. Coordinates methods ~~~~~~~~~~~~~~~~~~~ ``Coordinates`` objects also have a few useful methods, mostly for converting them into dataset objects: .. jupyter-execute:: ds.coords.to_dataset() The merge method is particularly interesting, because it implements the same logic used for merging coordinates in arithmetic operations (see :ref:`compute`): .. jupyter-execute:: alt = xr.Dataset(coords={"z": [10], "lat": 0, "lon": 0}) ds.coords.merge(alt.coords) The ``coords.merge`` method may be useful if you want to implement your own binary operations that act on xarray objects. In the future, we hope to write more helper functions so that you can easily make your functions act like xarray's built-in arithmetic. Indexes ~~~~~~~ To convert a coordinate (or any ``DataArray``) into an actual :py:class:`pandas.Index`, use the :py:meth:`~xarray.DataArray.to_index` method: .. jupyter-execute:: ds["time"].to_index() A useful shortcut is the ``indexes`` property (on both ``DataArray`` and ``Dataset``), which lazily constructs a dictionary whose keys are given by each dimension and whose the values are ``Index`` objects: .. jupyter-execute:: ds.indexes MultiIndex coordinates ~~~~~~~~~~~~~~~~~~~~~~ Xarray supports labeling coordinate values with a :py:class:`pandas.MultiIndex`: .. jupyter-execute:: midx = pd.MultiIndex.from_arrays( [["R", "R", "V", "V"], [0.1, 0.2, 0.7, 0.9]], names=("band", "wn") ) mda = xr.DataArray(np.random.rand(4), coords={"spec": midx}, dims="spec") mda For convenience multi-index levels are directly accessible as "virtual" or "derived" coordinates (marked by ``-`` when printing a dataset or data array): .. jupyter-execute:: mda["band"] .. jupyter-execute:: mda.wn Indexing with multi-index levels is also possible using the ``sel`` method (see :ref:`multi-level indexing`). Unlike other coordinates, "virtual" level coordinates are not stored in the ``coords`` attribute of ``DataArray`` and ``Dataset`` objects (although they are shown when printing the ``coords`` attribute). Consequently, most of the coordinates related methods don't apply for them. It also can't be used to replace one particular level. Because in a ``DataArray`` or ``Dataset`` object each multi-index level is accessible as a "virtual" coordinate, its name must not conflict with the names of the other levels, coordinates and data variables of the same object. Even though xarray sets default names for multi-indexes with unnamed levels, it is recommended that you explicitly set the names of the levels. .. [1] Latitude and longitude are 2D arrays because the dataset uses `projected coordinates`__. ``reference_time`` refers to the reference time at which the forecast was made, rather than ``time`` which is the valid time for which the forecast applies. __ https://en.wikipedia.org/wiki/Map_projection python-xarray-2026.01.0/doc/user-guide/plotting.rst0000664000175000017500000006661015136607163022264 0ustar alastairalastair.. currentmodule:: xarray .. _plotting: Plotting ======== Introduction ------------ Labeled data enables expressive computations. These same labels can also be used to easily create informative plots. Xarray's plotting capabilities are centered around :py:class:`DataArray` objects. To plot :py:class:`Dataset` objects simply access the relevant DataArrays, i.e. ``dset['var1']``. Dataset specific plotting routines are also available (see :ref:`plot-dataset`). Here we focus mostly on arrays 2d or larger. If your data fits nicely into a pandas DataFrame then you're better off using one of the more developed tools there. Xarray plotting functionality is a thin wrapper around the popular `matplotlib `_ library. Matplotlib syntax and function names were copied as much as possible, which makes for an easy transition between the two. Matplotlib must be installed before xarray can plot. To use xarray's plotting capabilities with time coordinates containing ``cftime.datetime`` objects `nc-time-axis `_ v1.3.0 or later needs to be installed. For more extensive plotting applications consider the following projects: - `Seaborn `_: "provides a high-level interface for drawing attractive statistical graphics." Integrates well with pandas. - `HoloViews `_ and `GeoViews `_: "Composable, declarative data structures for building even complex visualizations easily." Includes native support for xarray objects. - `hvplot `_: ``hvplot`` makes it very easy to produce dynamic plots (backed by ``Holoviews`` or ``Geoviews``) by adding a ``hvplot`` accessor to DataArrays. - `Cartopy `_: Provides cartographic tools. Imports ~~~~~~~ .. jupyter-execute:: :hide-code: # Use defaults so we don't get gridlines in generated docs import matplotlib as mpl mpl.rcdefaults() The following imports are necessary for all of the examples. .. jupyter-execute:: import cartopy.crs as ccrs import matplotlib.pyplot as plt import numpy as np import pandas as pd import xarray as xr For these examples we'll use the North American air temperature dataset. .. jupyter-execute:: airtemps = xr.tutorial.open_dataset("air_temperature") airtemps .. jupyter-execute:: # Convert to celsius air = airtemps.air - 273.15 # copy attributes to get nice figure labels and change Kelvin to Celsius air.attrs = airtemps.air.attrs air.attrs["units"] = "deg C" .. note:: Until :issue:`1614` is solved, you might need to copy over the metadata in ``attrs`` to get informative figure labels (as was done above). DataArrays ---------- One Dimension ~~~~~~~~~~~~~ ================ Simple Example ================ The simplest way to make a plot is to call the :py:func:`DataArray.plot()` method. .. jupyter-execute:: air1d = air.isel(lat=10, lon=10) air1d.plot(); Xarray uses the coordinate name along with metadata ``attrs.long_name``, ``attrs.standard_name``, ``DataArray.name`` and ``attrs.units`` (if available) to label the axes. The names ``long_name``, ``standard_name`` and ``units`` are copied from the `CF-conventions spec `_. When choosing names, the order of precedence is ``long_name``, ``standard_name`` and finally ``DataArray.name``. The y-axis label in the above plot was constructed from the ``long_name`` and ``units`` attributes of ``air1d``. .. jupyter-execute:: air1d.attrs ====================== Additional Arguments ====================== Additional arguments are passed directly to the matplotlib function which does the work. For example, :py:func:`xarray.plot.line` calls matplotlib.pyplot.plot_ passing in the index and the array values as x and y, respectively. So to make a line plot with blue triangles a matplotlib format string can be used: .. _matplotlib.pyplot.plot: https://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot .. jupyter-execute:: air1d[:200].plot.line("b-^"); .. note:: Not all xarray plotting methods support passing positional arguments to the wrapped matplotlib functions, but they do all support keyword arguments. Keyword arguments work the same way, and are more explicit. .. jupyter-execute:: air1d[:200].plot.line(color="purple", marker="o"); ========================= Adding to Existing Axis ========================= To add the plot to an existing axis pass in the axis as a keyword argument ``ax``. This works for all xarray plotting methods. In this example ``axs`` is an array consisting of the left and right axes created by ``plt.subplots``. .. jupyter-execute:: fig, axs = plt.subplots(ncols=2) print(axs) air1d.plot(ax=axs[0]) air1d.plot.hist(ax=axs[1]); On the right is a histogram created by :py:func:`xarray.plot.hist`. .. _plotting.figsize: ============================= Controlling the figure size ============================= You can pass a ``figsize`` argument to all xarray's plotting methods to control the figure size. For convenience, xarray's plotting methods also support the ``aspect`` and ``size`` arguments which control the size of the resulting image via the formula ``figsize = (aspect * size, size)``: .. jupyter-execute:: air1d.plot(aspect=2, size=3); This feature also works with :ref:`plotting.faceting`. For facet plots, ``size`` and ``aspect`` refer to a single panel (so that ``aspect * size`` gives the width of each facet in inches), while ``figsize`` refers to the entire figure (as for matplotlib's ``figsize`` argument). .. note:: If ``figsize`` or ``size`` are used, a new figure is created, so this is mutually exclusive with the ``ax`` argument. .. note:: The convention used by xarray (``figsize = (aspect * size, size)``) is borrowed from seaborn: it is therefore `not equivalent to matplotlib's`_. .. _not equivalent to matplotlib's: https://github.com/mwaskom/seaborn/issues/746 .. _plotting.multiplelines: ========================= Determine x-axis values ========================= Per default dimension coordinates are used for the x-axis (here the time coordinates). However, you can also use non-dimension coordinates, MultiIndex levels, and dimensions without coordinates along the x-axis. To illustrate this, let's calculate a 'decimal day' (epoch) from the time and assign it as a non-dimension coordinate: .. jupyter-execute:: decimal_day = (air1d.time - air1d.time[0]) / pd.Timedelta("1d") air1d_multi = air1d.assign_coords(decimal_day=("time", decimal_day.data)) air1d_multi To use ``'decimal_day'`` as x coordinate it must be explicitly specified: .. jupyter-execute:: air1d_multi.plot(x="decimal_day"); Creating a new MultiIndex named ``'date'`` from ``'time'`` and ``'decimal_day'``, it is also possible to use a MultiIndex level as x-axis: .. jupyter-execute:: air1d_multi = air1d_multi.set_index(date=("time", "decimal_day")) air1d_multi.plot(x="decimal_day"); Finally, if a dataset does not have any coordinates it enumerates all data points: .. jupyter-execute:: air1d_multi = air1d_multi.drop_vars(["date", "time", "decimal_day"]) air1d_multi.plot(); The same applies to 2D plots below. ==================================================== Multiple lines showing variation along a dimension ==================================================== It is possible to make line plots of two-dimensional data by calling :py:func:`xarray.plot.line` with appropriate arguments. Consider the 3D variable ``air`` defined above. We can use line plots to check the variation of air temperature at three different latitudes along a longitude line: .. jupyter-execute:: air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time"); It is required to explicitly specify either 1. ``x``: the dimension to be used for the x-axis, or 2. ``hue``: the dimension you want to represent by multiple lines. Thus, we could have made the previous plot by specifying ``hue='lat'`` instead of ``x='time'``. If required, the automatic legend can be turned off using ``add_legend=False``. Alternatively, ``hue`` can be passed directly to :py:func:`xarray.plot.line` as ``air.isel(lon=10, lat=[19,21,22]).plot.line(hue='lat')``. ======================== Dimension along y-axis ======================== It is also possible to make line plots such that the data are on the x-axis and a dimension is on the y-axis. This can be done by specifying the appropriate ``y`` keyword argument. .. jupyter-execute:: air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon"); ============ Step plots ============ As an alternative, also a step plot similar to matplotlib's ``plt.step`` can be made using 1D data. .. jupyter-execute:: air1d[:20].plot.step(where="mid"); The argument ``where`` defines where the steps should be placed, options are ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy when plotting data grouped with :py:meth:`Dataset.groupby_bins`. .. jupyter-execute:: air_grp = air.mean(["time", "lon"]).groupby_bins("lat", [0, 23.5, 66.5, 90]) air_mean = air_grp.mean() air_std = air_grp.std() air_mean.plot.step() (air_mean + air_std).plot.step(ls=":") (air_mean - air_std).plot.step(ls=":") plt.ylim(-20, 30) plt.title("Zonal mean temperature"); In this case, the actual boundaries of the bins are used and the ``where`` argument is ignored. Other axes kwargs ~~~~~~~~~~~~~~~~~ The keyword arguments ``xincrease`` and ``yincrease`` let you control the axes direction. .. jupyter-execute:: air.isel(time=10, lon=[10, 11]).plot.line( y="lat", hue="lon", xincrease=False, yincrease=False ); In addition, one can use ``xscale, yscale`` to set axes scaling; ``xticks, yticks`` to set axes ticks and ``xlim, ylim`` to set axes limits. These accept the same values as the matplotlib methods ``ax.set_(x,y)scale()``, ``ax.set_(x,y)ticks()``, ``ax.set_(x,y)lim()``, respectively. Two Dimensions ~~~~~~~~~~~~~~ ================ Simple Example ================ The default method :py:meth:`DataArray.plot` calls :py:func:`xarray.plot.pcolormesh` by default when the data is two-dimensional. .. jupyter-execute:: air2d = air.isel(time=500) air2d.plot(); All 2d plots in xarray allow the use of the keyword arguments ``yincrease`` and ``xincrease``. .. jupyter-execute:: air2d.plot(yincrease=False); .. note:: We use :py:func:`xarray.plot.pcolormesh` as the default two-dimensional plot method because it is more flexible than :py:func:`xarray.plot.imshow`. However, for large arrays, ``imshow`` can be much faster than ``pcolormesh``. If speed is important to you and you are plotting a regular mesh, consider using ``imshow``. ================ Missing Values ================ Xarray plots data with :ref:`missing_values`. .. jupyter-execute:: bad_air2d = air2d.copy() bad_air2d[dict(lat=slice(0, 10), lon=slice(0, 25))] = np.nan bad_air2d.plot(); ======================== Nonuniform Coordinates ======================== It's not necessary for the coordinates to be evenly spaced. Both :py:func:`xarray.plot.pcolormesh` (default) and :py:func:`xarray.plot.contourf` can produce plots with nonuniform coordinates. .. jupyter-execute:: b = air2d.copy() # Apply a nonlinear transformation to one of the coords b.coords["lat"] = np.log(b.coords["lat"]) b.plot(); ==================== Other types of plot ==================== There are several other options for plotting 2D data. Contour plot using :py:meth:`DataArray.plot.contour()` .. jupyter-execute:: air2d.plot.contour(); Filled contour plot using :py:meth:`DataArray.plot.contourf()` .. jupyter-execute:: air2d.plot.contourf(); Surface plot using :py:meth:`DataArray.plot.surface()` .. jupyter-execute:: # transpose just to make the example look a bit nicer air2d.T.plot.surface(); ==================== Calling Matplotlib ==================== Since this is a thin wrapper around matplotlib, all the functionality of matplotlib is available. .. jupyter-execute:: air2d.plot(cmap=plt.cm.Blues) plt.title("These colors prove North America\nhas fallen in the ocean") plt.ylabel("latitude") plt.xlabel("longitude"); .. note:: Xarray methods update label information and generally play around with the axes. So any kind of updates to the plot should be done *after* the call to the xarray's plot. In the example below, ``plt.xlabel`` effectively does nothing, since ``d_ylog.plot()`` updates the xlabel. .. jupyter-execute:: plt.xlabel("Never gonna see this.") air2d.plot(); =========== Colormaps =========== Xarray borrows logic from Seaborn to infer what kind of color map to use. For example, consider the original data in Kelvins rather than Celsius: .. jupyter-execute:: airtemps.air.isel(time=0).plot(); The Celsius data contain 0, so a diverging color map was used. The Kelvins do not have 0, so the default color map was used. .. _robust-plotting: ======== Robust ======== Outliers often have an extreme effect on the output of the plot. Here we add two bad data points. This affects the color scale, washing out the plot. .. jupyter-execute:: air_outliers = airtemps.air.isel(time=0).copy() air_outliers[0, 0] = 100 air_outliers[-1, -1] = 400 air_outliers.plot(); This plot shows that we have outliers. The easy way to visualize the data without the outliers is to pass the parameter ``robust=True``. This will use the 2nd and 98th percentiles of the data to compute the color limits. .. jupyter-execute:: air_outliers.plot(robust=True); Observe that the ranges of the color bar have changed. The arrows on the color bar indicate that the colors include data points outside the bounds. ==================== Discrete Colormaps ==================== It is often useful, when visualizing 2d data, to use a discrete colormap, rather than the default continuous colormaps that matplotlib uses. The ``levels`` keyword argument can be used to generate plots with discrete colormaps. For example, to make a plot with 8 discrete color intervals: .. jupyter-execute:: air2d.plot(levels=8); It is also possible to use a list of levels to specify the boundaries of the discrete colormap: .. jupyter-execute:: air2d.plot(levels=[0, 12, 18, 30]); You can also specify a list of discrete colors through the ``colors`` argument: .. jupyter-execute:: flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"] air2d.plot(levels=[0, 12, 18, 30], colors=flatui); Finally, if you have `Seaborn `_ installed, you can also specify a seaborn color palette to the ``cmap`` argument. Note that ``levels`` *must* be specified with seaborn color palettes if using ``imshow`` or ``pcolormesh`` (but not with ``contour`` or ``contourf``, since levels are chosen automatically). .. jupyter-execute:: air2d.plot(levels=10, cmap="husl"); .. _plotting.faceting: Faceting ~~~~~~~~ Faceting here refers to splitting an array along one or two dimensions and plotting each group. Xarray's basic plotting is useful for plotting two dimensional arrays. What about three or four dimensional arrays? That's where facets become helpful. The general approach to plotting here is called “small multiples”, where the same kind of plot is repeated multiple times, and the specific use of small multiples to display the same relationship conditioned on one or more other variables is often called a “trellis plot”. Consider the temperature data set. There are 4 observations per day for two years which makes for 2920 values along the time dimension. One way to visualize this data is to make a separate plot for each time period. The faceted dimension should not have too many values; faceting on the time dimension will produce 2920 plots. That's too much to be helpful. To handle this situation try performing an operation that reduces the size of the data in some way. For example, we could compute the average air temperature for each month and reduce the size of this dimension from 2920 -> 12. A simpler way is to just take a slice on that dimension. So let's use a slice to pick 6 times throughout the first year. .. jupyter-execute:: t = air.isel(time=slice(0, 365 * 4, 250)) t.coords ================ Simple Example ================ The easiest way to create faceted plots is to pass in ``row`` or ``col`` arguments to the xarray plotting methods/functions. This returns a :py:class:`xarray.plot.FacetGrid` object. .. jupyter-execute:: g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3); Faceting also works for line plots. .. jupyter-execute:: g_simple_line = t.isel(lat=slice(0, None, 4)).plot( x="lon", hue="lat", col="time", col_wrap=3 ); =============== 4 dimensional =============== For 4 dimensional arrays we can use the rows and columns of the grids. Here we create a 4 dimensional array by taking the original data and adding a fixed amount. Now we can see how the temperature maps would compare if one were much hotter. .. jupyter-execute:: t2 = t.isel(time=slice(0, 2)) t4d = xr.concat([t2, t2 + 40], pd.Index(["normal", "hot"], name="fourth_dim")) # This is a 4d array t4d.coords t4d.plot(x="lon", y="lat", col="time", row="fourth_dim"); ================ Other features ================ Faceted plotting supports other arguments common to xarray 2d plots. .. jupyter-execute:: hasoutliers = t.isel(time=slice(0, 5)).copy() hasoutliers[0, 0, 0] = -100 hasoutliers[-1, -1, -1] = 400 g = hasoutliers.plot.pcolormesh( x="lon", y="lat", col="time", col_wrap=3, robust=True, cmap="viridis", cbar_kwargs={"label": "this has outliers"}, ) =================== FacetGrid Objects =================== The object returned, ``g`` in the above examples, is a :py:class:`~xarray.plot.FacetGrid` object that links a :py:class:`DataArray` to a matplotlib figure with a particular structure. This object can be used to control the behavior of the multiple plots. It borrows an API and code from `Seaborn's FacetGrid `_. The structure is contained within the ``axs`` and ``name_dicts`` attributes, both 2d NumPy object arrays. .. jupyter-execute:: g.axs .. jupyter-execute:: g.name_dicts It's possible to select the :py:class:`xarray.DataArray` or :py:class:`xarray.Dataset` corresponding to the FacetGrid through the ``name_dicts``. .. jupyter-execute:: g.data.loc[g.name_dicts[0, 0]] Here is an example of using the lower level API and then modifying the axes after they have been plotted. .. jupyter-execute:: g = t.plot.imshow(x="lon", y="lat", col="time", col_wrap=3, robust=True) for i, ax in enumerate(g.axs.flat): ax.set_title("Air Temperature %d" % i) bottomright = g.axs[-1, -1] bottomright.annotate("bottom right", (240, 40)); :py:class:`~xarray.plot.FacetGrid` objects have methods that let you customize the automatically generated axis labels, axis ticks and plot titles. See :py:meth:`~xarray.plot.FacetGrid.set_titles`, :py:meth:`~xarray.plot.FacetGrid.set_xlabels`, :py:meth:`~xarray.plot.FacetGrid.set_ylabels` and :py:meth:`~xarray.plot.FacetGrid.set_ticks` for more information. Plotting functions can be applied to each subset of the data by calling :py:meth:`~xarray.plot.FacetGrid.map_dataarray` or to each subplot by calling :py:meth:`~xarray.plot.FacetGrid.map`. TODO: add an example of using the ``map`` method to plot dataset variables (e.g., with ``plt.quiver``). .. _plot-dataset: Datasets -------- Xarray has limited support for plotting Dataset variables against each other. Consider this dataset .. jupyter-execute:: ds = xr.tutorial.scatter_example_dataset(seed=42) ds Scatter ~~~~~~~ Let's plot the ``A`` DataArray as a function of the ``y`` coord .. jupyter-execute:: with xr.set_options(display_expand_data=False): display(ds.A) .. jupyter-execute:: ds.A.plot.scatter(x="y"); Same plot can be displayed using the dataset: .. jupyter-execute:: ds.plot.scatter(x="y", y="A"); Now suppose we want to scatter the ``A`` DataArray against the ``B`` DataArray .. jupyter-execute:: ds.plot.scatter(x="A", y="B"); The ``hue`` kwarg lets you vary the color by variable value .. jupyter-execute:: ds.plot.scatter(x="A", y="B", hue="w"); You can force a legend instead of a colorbar by setting ``add_legend=True, add_colorbar=False``. .. jupyter-execute:: ds.plot.scatter(x="A", y="B", hue="w", add_legend=True, add_colorbar=False); .. jupyter-execute:: ds.plot.scatter(x="A", y="B", hue="w", add_legend=False, add_colorbar=True); The ``markersize`` kwarg lets you vary the point's size by variable value. You can additionally pass ``size_norm`` to control how the variable's values are mapped to point sizes. .. jupyter-execute:: ds.plot.scatter(x="A", y="B", hue="y", markersize="z"); The ``z`` kwarg lets you plot the data along the z-axis as well. .. jupyter-execute:: ds.plot.scatter(x="A", y="B", z="z", hue="y", markersize="x"); Faceting is also possible .. jupyter-execute:: ds.plot.scatter(x="A", y="B", hue="y", markersize="x", row="x", col="w"); And adding the z-axis .. jupyter-execute:: ds.plot.scatter(x="A", y="B", z="z", hue="y", markersize="x", row="x", col="w"); For more advanced scatter plots, we recommend converting the relevant data variables to a pandas DataFrame and using the extensive plotting capabilities of ``seaborn``. Quiver ~~~~~~ Visualizing vector fields is supported with quiver plots: .. jupyter-execute:: ds.isel(w=1, z=1).plot.quiver(x="x", y="y", u="A", v="B"); where ``u`` and ``v`` denote the x and y direction components of the arrow vectors. Again, faceting is also possible: .. jupyter-execute:: ds.plot.quiver(x="x", y="y", u="A", v="B", col="w", row="z", scale=4); ``scale`` is required for faceted quiver plots. The scale determines the number of data units per arrow length unit, i.e. a smaller scale parameter makes the arrow longer. Streamplot ~~~~~~~~~~ Visualizing vector fields is also supported with streamline plots: .. jupyter-execute:: ds.isel(w=1, z=1).plot.streamplot(x="x", y="y", u="A", v="B"); where ``u`` and ``v`` denote the x and y direction components of the vectors tangent to the streamlines. Again, faceting is also possible: .. jupyter-execute:: ds.plot.streamplot(x="x", y="y", u="A", v="B", col="w", row="z"); .. _plot-maps: Maps ---- To follow this section you'll need to have Cartopy installed and working. This script will plot the air temperature on a map. .. jupyter-execute:: :stderr: air = xr.tutorial.open_dataset("air_temperature").air p = air.isel(time=0).plot( subplot_kws=dict(projection=ccrs.Orthographic(-80, 35), facecolor="gray"), transform=ccrs.PlateCarree(), ) p.axes.set_global() p.axes.coastlines(); When faceting on maps, the projection can be transferred to the ``plot`` function using the ``subplot_kws`` keyword. The axes for the subplots created by faceting are accessible in the object returned by ``plot``: .. jupyter-execute:: p = air.isel(time=[0, 4]).plot( transform=ccrs.PlateCarree(), col="time", subplot_kws={"projection": ccrs.Orthographic(-80, 35)}, ) for ax in p.axs.flat: ax.coastlines() ax.gridlines() Details ------- Ways to Use ~~~~~~~~~~~ There are three ways to use the xarray plotting functionality: 1. Use ``plot`` as a convenience method for a DataArray. 2. Access a specific plotting method from the ``plot`` attribute of a DataArray. 3. Directly from the xarray plot submodule. These are provided for user convenience; they all call the same code. .. jupyter-execute:: da = xr.DataArray(range(5)) fig, axs = plt.subplots(ncols=2, nrows=2) da.plot(ax=axs[0, 0]) da.plot.line(ax=axs[0, 1]) xr.plot.plot(da, ax=axs[1, 0]) xr.plot.line(da, ax=axs[1, 1]); Here the output is the same. Since the data is 1 dimensional the line plot was used. The convenience method :py:meth:`xarray.DataArray.plot` dispatches to an appropriate plotting function based on the dimensions of the ``DataArray`` and whether the coordinates are sorted and uniformly spaced. This table describes what gets plotted: =============== =========================== Dimensions Plotting function --------------- --------------------------- 1 :py:func:`xarray.plot.line` 2 :py:func:`xarray.plot.pcolormesh` Anything else :py:func:`xarray.plot.hist` =============== =========================== Coordinates ~~~~~~~~~~~ If you'd like to find out what's really going on in the coordinate system, read on. .. jupyter-execute:: a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=("y", "x", "z"), name="temperature") a0[0, 0, 0] = 1 a = a0.isel(z=0) a The plot will produce an image corresponding to the values of the array. Hence the top left pixel will be a different color than the others. Before reading on, you may want to look at the coordinates and think carefully about what the limits, labels, and orientation for each of the axes should be. .. jupyter-execute:: a.plot(); It may seem strange that the values on the y axis are decreasing with -0.5 on the top. This is because the pixels are centered over their coordinates, and the axis labels and ranges correspond to the values of the coordinates. Multidimensional coordinates ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ See also: :ref:`/examples/multidimensional-coords.ipynb`. You can plot irregular grids defined by multidimensional coordinates with xarray, but you'll have to tell the plot function to use these coordinates instead of the default ones: .. jupyter-execute:: lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4)) lon += lat / 10 lat += lon / 10 da = xr.DataArray( np.arange(20).reshape(4, 5), dims=["y", "x"], coords={"lat": (("y", "x"), lat), "lon": (("y", "x"), lon)}, ) da.plot.pcolormesh(x="lon", y="lat"); Note that in this case, xarray still follows the pixel centered convention. This might be undesirable in some cases, for example when your data is defined on a polar projection (:issue:`781`). This is why the default is to not follow this convention when plotting on a map: .. jupyter-execute:: :stderr: ax = plt.subplot(projection=ccrs.PlateCarree()) da.plot.pcolormesh(x="lon", y="lat", ax=ax) ax.scatter(lon, lat, transform=ccrs.PlateCarree()) ax.coastlines() ax.gridlines(draw_labels=True); You can however decide to infer the cell boundaries and use the ``infer_intervals`` keyword: .. jupyter-execute:: ax = plt.subplot(projection=ccrs.PlateCarree()) da.plot.pcolormesh(x="lon", y="lat", ax=ax, infer_intervals=True) ax.scatter(lon, lat, transform=ccrs.PlateCarree()) ax.coastlines() ax.gridlines(draw_labels=True); .. note:: The data model of xarray does not support datasets with `cell boundaries`_ yet. If you want to use these coordinates, you'll have to make the plots outside the xarray framework. .. _cell boundaries: https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#cell-boundaries One can also make line plots with multidimensional coordinates. In this case, ``hue`` must be a dimension name, not a coordinate name. .. jupyter-execute:: f, ax = plt.subplots(2, 1) da.plot.line(x="lon", hue="y", ax=ax[0]) da.plot.line(x="lon", hue="x", ax=ax[1]); python-xarray-2026.01.0/doc/user-guide/reshaping.rst0000664000175000017500000002641515136607163022403 0ustar alastairalastair.. _reshape: ############################### Reshaping and reorganizing data ############################### Reshaping and reorganizing data refers to the process of changing the structure or organization of data by modifying dimensions, array shapes, order of values, or indexes. Xarray provides several methods to accomplish these tasks. These methods are particularly useful for reshaping xarray objects for use in machine learning packages, such as scikit-learn, that usually require two-dimensional numpy arrays as inputs. Reshaping can also be required before passing data to external visualization tools, for example geospatial data might expect input organized into a particular format corresponding to stacks of satellite images. Importing the library --------------------- .. jupyter-execute:: :hide-code: import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) # Use defaults so we don't get gridlines in generated docs import matplotlib as mpl mpl.rcdefaults() Reordering dimensions --------------------- To reorder dimensions on a :py:class:`~xarray.DataArray` or across all variables on a :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.transpose`. An ellipsis (`...`) can be used to represent all other dimensions: .. jupyter-execute:: ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])}) ds.transpose("y", "z", "x") # equivalent to ds.transpose(..., "x") .. jupyter-execute:: ds.transpose() # reverses all dimensions Expand and squeeze dimensions ----------------------------- To expand a :py:class:`~xarray.DataArray` or all variables on a :py:class:`~xarray.Dataset` along a new dimension, use :py:meth:`~xarray.DataArray.expand_dims` .. jupyter-execute:: expanded = ds.expand_dims("w") expanded This method attaches a new dimension with size 1 to all data variables. To remove such a size-1 dimension from the :py:class:`~xarray.DataArray` or :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.squeeze` .. jupyter-execute:: expanded.squeeze("w") Converting between datasets and arrays -------------------------------------- To convert from a Dataset to a DataArray, use :py:meth:`~xarray.Dataset.to_dataarray`: .. jupyter-execute:: arr = ds.to_dataarray() arr This method broadcasts all data variables in the dataset against each other, then concatenates them along a new dimension into a new array while preserving coordinates. To convert back from a DataArray to a Dataset, use :py:meth:`~xarray.DataArray.to_dataset`: .. jupyter-execute:: arr.to_dataset(dim="variable") The broadcasting behavior of ``to_dataarray`` means that the resulting array includes the union of data variable dimensions: .. jupyter-execute:: ds2 = xr.Dataset({"a": 0, "b": ("x", [3, 4, 5])}) # the input dataset has 4 elements ds2 .. jupyter-execute:: # the resulting array has 6 elements ds2.to_dataarray() Otherwise, the result could not be represented as an orthogonal array. If you use ``to_dataset`` without supplying the ``dim`` argument, the DataArray will be converted into a Dataset of one variable: .. jupyter-execute:: arr.to_dataset(name="combined") .. _reshape.stack: Stack and unstack ----------------- As part of xarray's nascent support for :py:class:`pandas.MultiIndex`, we have implemented :py:meth:`~xarray.DataArray.stack` and :py:meth:`~xarray.DataArray.unstack` method, for combining or splitting dimensions: .. jupyter-execute:: array = xr.DataArray( np.random.randn(2, 3), coords=[("x", ["a", "b"]), ("y", [0, 1, 2])] ) stacked = array.stack(z=("x", "y")) stacked .. jupyter-execute:: stacked.unstack("z") As elsewhere in xarray, an ellipsis (`...`) can be used to represent all unlisted dimensions: .. jupyter-execute:: stacked = array.stack(z=[..., "x"]) stacked These methods are modeled on the :py:class:`pandas.DataFrame` methods of the same name, although in xarray they always create new dimensions rather than adding to the existing index or columns. Like :py:meth:`DataFrame.unstack`, xarray's ``unstack`` always succeeds, even if the multi-index being unstacked does not contain all possible levels. Missing levels are filled in with ``NaN`` in the resulting object: .. jupyter-execute:: stacked2 = stacked[::2] stacked2 .. jupyter-execute:: stacked2.unstack("z") However, xarray's ``stack`` has an important difference from pandas: unlike pandas, it does not automatically drop missing values. Compare: .. jupyter-execute:: array = xr.DataArray([[np.nan, 1], [2, 3]], dims=["x", "y"]) array.stack(z=("x", "y")) .. jupyter-execute:: array.to_pandas().stack() We departed from pandas's behavior here because predictable shapes for new array dimensions is necessary for :ref:`dask`. .. _reshape.stacking_different: Stacking different variables together ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ These stacking and unstacking operations are particularly useful for reshaping xarray objects for use in machine learning packages, such as `scikit-learn `_, that usually require two-dimensional numpy arrays as inputs. For datasets with only one variable, we only need ``stack`` and ``unstack``, but combining multiple variables in a :py:class:`xarray.Dataset` is more complicated. If the variables in the dataset have matching numbers of dimensions, we can call :py:meth:`~xarray.Dataset.to_dataarray` and then stack along the the new coordinate. But :py:meth:`~xarray.Dataset.to_dataarray` will broadcast the dataarrays together, which will effectively tile the lower dimensional variable along the missing dimensions. The method :py:meth:`xarray.Dataset.to_stacked_array` allows combining variables of differing dimensions without this wasteful copying while :py:meth:`xarray.DataArray.to_unstacked_dataset` reverses this operation. Just as with :py:meth:`xarray.Dataset.stack` the stacked coordinate is represented by a :py:class:`pandas.MultiIndex` object. These methods are used like this: .. jupyter-execute:: data = xr.Dataset( data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])}, coords={"y": ["u", "v", "w"]}, ) data .. jupyter-execute:: stacked = data.to_stacked_array("z", sample_dims=["x"]) stacked .. jupyter-execute:: unstacked = stacked.to_unstacked_dataset("z") unstacked In this example, ``stacked`` is a two dimensional array that we can easily pass to a scikit-learn or another generic numerical method. .. note:: Unlike with ``stack``, in ``to_stacked_array``, the user specifies the dimensions they **do not** want stacked. For a machine learning task, these unstacked dimensions can be interpreted as the dimensions over which samples are drawn, whereas the stacked coordinates are the features. Naturally, all variables should possess these sampling dimensions. .. _reshape.set_index: Set and reset index ------------------- Complementary to stack / unstack, xarray's ``.set_index``, ``.reset_index`` and ``.reorder_levels`` allow easy manipulation of ``DataArray`` or ``Dataset`` multi-indexes without modifying the data and its dimensions. You can create a multi-index from several 1-dimensional variables and/or coordinates using :py:meth:`~xarray.DataArray.set_index`: .. jupyter-execute:: da = xr.DataArray( np.random.rand(4), coords={ "band": ("x", ["a", "a", "b", "b"]), "wavenumber": ("x", np.linspace(200, 400, 4)), }, dims="x", ) da .. jupyter-execute:: mda = da.set_index(x=["band", "wavenumber"]) mda These coordinates can now be used for indexing, e.g., .. jupyter-execute:: mda.sel(band="a") Conversely, you can use :py:meth:`~xarray.DataArray.reset_index` to extract multi-index levels as coordinates (this is mainly useful for serialization): .. jupyter-execute:: mda.reset_index("x") :py:meth:`~xarray.DataArray.reorder_levels` allows changing the order of multi-index levels: .. jupyter-execute:: mda.reorder_levels(x=["wavenumber", "band"]) As of xarray v0.9 coordinate labels for each dimension are optional. You can also use ``.set_index`` / ``.reset_index`` to add / remove labels for one or several dimensions: .. jupyter-execute:: array = xr.DataArray([1, 2, 3], dims="x") array .. jupyter-execute:: array["c"] = ("x", ["a", "b", "c"]) array.set_index(x="c") .. jupyter-execute:: array = array.set_index(x="c") array = array.reset_index("x", drop=True) .. _reshape.shift_and_roll: Shift and roll -------------- To adjust coordinate labels, you can use the :py:meth:`~xarray.Dataset.shift` and :py:meth:`~xarray.Dataset.roll` methods: .. jupyter-execute:: array = xr.DataArray([1, 2, 3, 4], dims="x") array.shift(x=2) .. jupyter-execute:: array.roll(x=2, roll_coords=True) .. _reshape.sort: Sort ---- One may sort a DataArray/Dataset via :py:meth:`~xarray.DataArray.sortby` and :py:meth:`~xarray.Dataset.sortby`. The input can be an individual or list of 1D ``DataArray`` objects: .. jupyter-execute:: ds = xr.Dataset( { "A": (("x", "y"), [[1, 2], [3, 4]]), "B": (("x", "y"), [[5, 6], [7, 8]]), }, coords={"x": ["b", "a"], "y": [1, 0]}, ) dax = xr.DataArray([100, 99], [("x", [0, 1])]) day = xr.DataArray([90, 80], [("y", [0, 1])]) ds.sortby([day, dax]) As a shortcut, you can refer to existing coordinates by name: .. jupyter-execute:: ds.sortby("x") .. jupyter-execute:: ds.sortby(["y", "x"]) .. jupyter-execute:: ds.sortby(["y", "x"], ascending=False) .. _reshape.coarsen: Reshaping via coarsen --------------------- Whilst :py:class:`~xarray.DataArray.coarsen` is normally used for reducing your data's resolution by applying a reduction function (see the :ref:`page on computation`), it can also be used to reorganise your data without applying a computation via :py:meth:`~xarray.computation.rolling.DataArrayCoarsen.construct`. Taking our example tutorial air temperature dataset over the Northern US .. jupyter-execute:: air = xr.tutorial.open_dataset("air_temperature")["air"] air.isel(time=0).plot(x="lon", y="lat"); we can split this up into sub-regions of size ``(9, 18)`` points using :py:meth:`~xarray.computation.rolling.DataArrayCoarsen.construct`: .. jupyter-execute:: regions = air.coarsen(lat=9, lon=18, boundary="pad").construct( lon=("x_coarse", "x_fine"), lat=("y_coarse", "y_fine") ) with xr.set_options(display_expand_data=False): regions 9 new regions have been created, each of size 9 by 18 points. The ``boundary="pad"`` kwarg ensured that all regions are the same size even though the data does not evenly divide into these sizes. By plotting these 9 regions together via :ref:`faceting` we can see how they relate to the original data. .. jupyter-execute:: regions.isel(time=0).plot( x="x_fine", y="y_fine", col="x_coarse", row="y_coarse", yincrease=False ); We are now free to easily apply any custom computation to each coarsened region of our new dataarray. This would involve specifying that applied functions should act over the ``"x_fine"`` and ``"y_fine"`` dimensions, but broadcast over the ``"x_coarse"`` and ``"y_coarse"`` dimensions. python-xarray-2026.01.0/doc/user-guide/computation.rst0000664000175000017500000007152315136607163022765 0ustar alastairalastair.. currentmodule:: xarray .. _compute: ########### Computation ########### The labels associated with :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects enables some powerful shortcuts for computation, notably including aggregation and broadcasting by dimension names. Basic array math ================ Arithmetic operations with a single DataArray automatically vectorize (like numpy) over all array values: .. jupyter-execute:: :hide-code: :hide-output: import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) %xmode minimal .. jupyter-execute:: arr = xr.DataArray( np.random.default_rng(0).random((2, 3)), [("x", ["a", "b"]), ("y", [10, 20, 30])], ) arr - 3 .. jupyter-execute:: abs(arr) You can also use any of numpy's or scipy's many `ufunc`__ functions directly on a DataArray: __ https://numpy.org/doc/stable/reference/ufuncs.html .. jupyter-execute:: np.sin(arr) Use :py:func:`~xarray.where` to conditionally switch between values: .. jupyter-execute:: xr.where(arr > 0, "positive", "negative") Use ``@`` to compute the :py:func:`~xarray.dot` product: .. jupyter-execute:: arr @ arr Data arrays also implement many :py:class:`numpy.ndarray` methods: .. jupyter-execute:: arr.round(2) .. jupyter-execute:: arr.T .. jupyter-execute:: intarr = xr.DataArray([0, 1, 2, 3, 4, 5]) intarr << 2 # only supported for int types .. jupyter-execute:: intarr >> 1 .. _missing_values: Missing values ============== Xarray represents missing values using the "NaN" (Not a Number) value from NumPy, which is a special floating-point value that indicates a value that is undefined or unrepresentable. There are several methods for handling missing values in xarray: Xarray objects borrow the :py:meth:`~xarray.DataArray.isnull`, :py:meth:`~xarray.DataArray.notnull`, :py:meth:`~xarray.DataArray.count`, :py:meth:`~xarray.DataArray.dropna`, :py:meth:`~xarray.DataArray.fillna`, :py:meth:`~xarray.DataArray.ffill`, and :py:meth:`~xarray.DataArray.bfill` methods for working with missing data from pandas: :py:meth:`~xarray.DataArray.isnull` is a method in xarray that can be used to check for missing or null values in an xarray object. It returns a new xarray object with the same dimensions as the original object, but with boolean values indicating where **missing values** are present. .. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.isnull() In this example, the third and fourth elements of 'x' are NaN, so the resulting :py:class:`~xarray.DataArray` object has 'True' values in the third and fourth positions and 'False' values in the other positions. :py:meth:`~xarray.DataArray.notnull` is a method in xarray that can be used to check for non-missing or non-null values in an xarray object. It returns a new xarray object with the same dimensions as the original object, but with boolean values indicating where **non-missing values** are present. .. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.notnull() In this example, the first two and the last elements of x are not NaN, so the resulting :py:class:`~xarray.DataArray` object has 'True' values in these positions, and 'False' values in the third and fourth positions where NaN is located. :py:meth:`~xarray.DataArray.count` is a method in xarray that can be used to count the number of non-missing values along one or more dimensions of an xarray object. It returns a new xarray object with the same dimensions as the original object, but with each element replaced by the count of non-missing values along the specified dimensions. .. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.count() In this example, 'x' has five elements, but two of them are NaN, so the resulting :py:class:`~xarray.DataArray` object having a single element containing the value '3', which represents the number of non-null elements in x. :py:meth:`~xarray.DataArray.dropna` is a method in xarray that can be used to remove missing or null values from an xarray object. It returns a new xarray object with the same dimensions as the original object, but with missing values removed. .. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.dropna(dim="x") In this example, on calling x.dropna(dim="x") removes any missing values and returns a new :py:class:`~xarray.DataArray` object with only the non-null elements [0, 1, 2] of 'x', in the original order. :py:meth:`~xarray.DataArray.fillna` is a method in xarray that can be used to fill missing or null values in an xarray object with a specified value or method. It returns a new xarray object with the same dimensions as the original object, but with missing values filled. .. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.fillna(-1) In this example, there are two NaN values in 'x', so calling x.fillna(-1) replaces these values with -1 and returns a new :py:class:`~xarray.DataArray` object with five elements, containing the values [0, 1, -1, -1, 2] in the original order. :py:meth:`~xarray.DataArray.ffill` is a method in xarray that can be used to forward fill (or fill forward) missing values in an xarray object along one or more dimensions. It returns a new xarray object with the same dimensions as the original object, but with missing values replaced by the last non-missing value along the specified dimensions. .. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.ffill("x") In this example, there are two NaN values in 'x', so calling x.ffill("x") fills these values with the last non-null value in the same dimension, which are 0 and 1, respectively. The resulting :py:class:`~xarray.DataArray` object has five elements, containing the values [0, 1, 1, 1, 2] in the original order. :py:meth:`~xarray.DataArray.bfill` is a method in xarray that can be used to backward fill (or fill backward) missing values in an xarray object along one or more dimensions. It returns a new xarray object with the same dimensions as the original object, but with missing values replaced by the next non-missing value along the specified dimensions. .. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.bfill("x") In this example, there are two NaN values in 'x', so calling x.bfill("x") fills these values with the next non-null value in the same dimension, which are 2 and 2, respectively. The resulting :py:class:`~xarray.DataArray` object has five elements, containing the values [0, 1, 2, 2, 2] in the original order. Like pandas, xarray uses the float value ``np.nan`` (not-a-number) to represent missing values. Xarray objects also have an :py:meth:`~xarray.DataArray.interpolate_na` method for filling missing values via 1D interpolation. It returns a new xarray object with the same dimensions as the original object, but with missing values interpolated. .. jupyter-execute:: x = xr.DataArray( [0, 1, np.nan, np.nan, 2], dims=["x"], coords={"xx": xr.Variable("x", [0, 1, 1.1, 1.9, 3])}, ) x.interpolate_na(dim="x", method="linear", use_coordinate="xx") In this example, there are two NaN values in 'x', so calling x.interpolate_na(dim="x", method="linear", use_coordinate="xx") fills these values with interpolated values along the "x" dimension using linear interpolation based on the values of the xx coordinate. The resulting :py:class:`~xarray.DataArray` object has five elements, containing the values [0., 1., 1.05, 1.45, 2.] in the original order. Note that the interpolated values are calculated based on the values of the 'xx' coordinate, which has non-integer values, resulting in non-integer interpolated values. Note that xarray slightly diverges from the pandas ``interpolate`` syntax by providing the ``use_coordinate`` keyword which facilitates a clear specification of which values to use as the index in the interpolation. Xarray also provides the ``max_gap`` keyword argument to limit the interpolation to data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na` for more. .. _agg: Aggregation =========== Aggregation methods have been updated to take a ``dim`` argument instead of ``axis``. This allows for very intuitive syntax for aggregation methods that are applied along particular dimension(s): .. jupyter-execute:: arr.sum(dim="x") .. jupyter-execute:: arr.std(["x", "y"]) .. jupyter-execute:: arr.min() If you need to figure out the axis number for a dimension yourself (say, for wrapping code designed to work with numpy arrays), you can use the :py:meth:`~xarray.DataArray.get_axis_num` method: .. jupyter-execute:: arr.get_axis_num("y") These operations automatically skip missing values, like in pandas: .. jupyter-execute:: xr.DataArray([1, 2, np.nan, 3]).mean() If desired, you can disable this behavior by invoking the aggregation method with ``skipna=False``. .. _compute.rolling: Rolling window operations ========================= ``DataArray`` objects include a :py:meth:`~xarray.DataArray.rolling` method. This method supports rolling window aggregation: .. jupyter-execute:: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) arr :py:meth:`~xarray.DataArray.rolling` is applied along one dimension using the name of the dimension as a key (e.g. ``y``) and the window size as the value (e.g. ``3``). We get back a ``Rolling`` object: .. jupyter-execute:: arr.rolling(y=3) Aggregation and summary methods can be applied directly to the ``Rolling`` object: .. jupyter-execute:: r = arr.rolling(y=3) r.reduce(np.std) .. jupyter-execute:: r.mean() Aggregation results are assigned the coordinate at the end of each window by default, but can be centered by passing ``center=True`` when constructing the ``Rolling`` object: .. jupyter-execute:: r = arr.rolling(y=3, center=True) r.mean() As can be seen above, aggregations of windows which overlap the border of the array produce ``nan``\s. Setting ``min_periods`` in the call to ``rolling`` changes the minimum number of observations within the window required to have a value when aggregating: .. jupyter-execute:: r = arr.rolling(y=3, min_periods=2) r.mean() .. jupyter-execute:: r = arr.rolling(y=3, center=True, min_periods=2) r.mean() From version 0.17, xarray supports multidimensional rolling, .. jupyter-execute:: r = arr.rolling(x=2, y=3, min_periods=2) r.mean() .. tip:: Note that rolling window aggregations are faster and use less memory when bottleneck_ is installed. This only applies to numpy-backed xarray objects with 1d-rolling. .. _bottleneck: https://github.com/pydata/bottleneck We can also manually iterate through ``Rolling`` objects: .. code:: python for label, arr_window in r: # arr_window is a view of x ... .. _compute.rolling_exp: While ``rolling`` provides a simple moving average, ``DataArray`` also supports an exponential moving average with :py:meth:`~xarray.DataArray.rolling_exp`. This is similar to pandas' ``ewm`` method. numbagg_ is required. .. _numbagg: https://github.com/numbagg/numbagg .. code:: python arr.rolling_exp(y=3).mean() The ``rolling_exp`` method takes a ``window_type`` kwarg, which can be ``'alpha'``, ``'com'`` (for ``center-of-mass``), ``'span'``, and ``'halflife'``. The default is ``span``. Finally, the rolling object has a ``construct`` method which returns a view of the original ``DataArray`` with the windowed dimension in the last position. You can use this for more advanced rolling operations such as strided rolling, windowed rolling, convolution, short-time FFT etc. .. jupyter-execute:: # rolling with 2-point stride rolling_da = r.construct(x="x_win", y="y_win", stride=2) rolling_da .. jupyter-execute:: rolling_da.mean(["x_win", "y_win"], skipna=False) Because the ``DataArray`` given by ``r.construct('window_dim')`` is a view of the original array, it is memory efficient. You can also use ``construct`` to compute a weighted rolling sum: .. jupyter-execute:: weight = xr.DataArray([0.25, 0.5, 0.25], dims=["window"]) arr.rolling(y=3).construct(y="window").dot(weight) .. note:: numpy's Nan-aggregation functions such as ``nansum`` copy the original array. In xarray, we internally use these functions in our aggregation methods (such as ``.sum()``) if ``skipna`` argument is not specified or set to True. This means ``rolling_da.mean('window_dim')`` is memory inefficient. To avoid this, use ``skipna=False`` as the above example. .. _compute.weighted: Weighted array reductions ========================= :py:class:`DataArray` and :py:class:`Dataset` objects include :py:meth:`DataArray.weighted` and :py:meth:`Dataset.weighted` array reduction methods. They currently support weighted ``sum``, ``mean``, ``std``, ``var`` and ``quantile``. .. jupyter-execute:: coords = dict(month=("month", [1, 2, 3])) prec = xr.DataArray([1.1, 1.0, 0.9], dims=("month",), coords=coords) weights = xr.DataArray([31, 28, 31], dims=("month",), coords=coords) Create a weighted object: .. jupyter-execute:: weighted_prec = prec.weighted(weights) weighted_prec Calculate the weighted sum: .. jupyter-execute:: weighted_prec.sum() Calculate the weighted mean: .. jupyter-execute:: weighted_prec.mean(dim="month") Calculate the weighted quantile: .. jupyter-execute:: weighted_prec.quantile(q=0.5, dim="month") The weighted sum corresponds to: .. jupyter-execute:: weighted_sum = (prec * weights).sum() weighted_sum the weighted mean to: .. jupyter-execute:: weighted_mean = weighted_sum / weights.sum() weighted_mean the weighted variance to: .. jupyter-execute:: weighted_var = weighted_prec.sum_of_squares() / weights.sum() weighted_var and the weighted standard deviation to: .. jupyter-execute:: weighted_std = np.sqrt(weighted_var) weighted_std However, the functions also take missing values in the data into account: .. jupyter-execute:: data = xr.DataArray([np.nan, 2, 4]) weights = xr.DataArray([8, 1, 1]) data.weighted(weights).mean() Using ``(data * weights).sum() / weights.sum()`` would (incorrectly) result in 0.6. If the weights add up to to 0, ``sum`` returns 0: .. jupyter-execute:: data = xr.DataArray([1.0, 1.0]) weights = xr.DataArray([-1.0, 1.0]) data.weighted(weights).sum() and ``mean``, ``std`` and ``var`` return ``nan``: .. jupyter-execute:: data.weighted(weights).mean() .. note:: ``weights`` must be a :py:class:`DataArray` and cannot contain missing values. Missing values can be replaced manually by ``weights.fillna(0)``. .. _compute.coarsen: Coarsen large arrays ==================== :py:class:`DataArray` and :py:class:`Dataset` objects include a :py:meth:`~xarray.DataArray.coarsen` and :py:meth:`~xarray.Dataset.coarsen` methods. This supports block aggregation along multiple dimensions, .. jupyter-execute:: x = np.linspace(0, 10, 300) t = pd.date_range("1999-12-15", periods=364) da = xr.DataArray( np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), dims=["time", "x"], coords={"time": t, "x": x}, ) da In order to take a block mean for every 7 days along ``time`` dimension and every 2 points along ``x`` dimension, .. jupyter-execute:: da.coarsen(time=7, x=2).mean() :py:meth:`~xarray.DataArray.coarsen` raises a ``ValueError`` if the data length is not a multiple of the corresponding window size. You can choose ``boundary='trim'`` or ``boundary='pad'`` options for trimming the excess entries or padding ``nan`` to insufficient entries, .. jupyter-execute:: da.coarsen(time=30, x=2, boundary="trim").mean() If you want to apply a specific function to coordinate, you can pass the function or method name to ``coord_func`` option, .. jupyter-execute:: da.coarsen(time=7, x=2, coord_func={"time": "min"}).mean() You can also :ref:`use coarsen to reshape` without applying a computation. .. _compute.using_coordinates: Computation using Coordinates ============================= Xarray objects have some handy methods for the computation with their coordinates. :py:meth:`~xarray.DataArray.differentiate` computes derivatives by central finite differences using their coordinates, .. jupyter-execute:: a = xr.DataArray([0, 1, 2, 3], dims=["x"], coords=[[0.1, 0.11, 0.2, 0.3]]) a.differentiate("x") This method can be used also for multidimensional arrays, .. jupyter-execute:: a = xr.DataArray( np.arange(8).reshape(4, 2), dims=["x", "y"], coords={"x": [0.1, 0.11, 0.2, 0.3]} ) a.differentiate("x") :py:meth:`~xarray.DataArray.integrate` computes integration based on trapezoidal rule using their coordinates, .. jupyter-execute:: a.integrate("x") .. note:: These methods are limited to simple cartesian geometry. Differentiation and integration along multidimensional coordinate are not supported. .. _compute.polyfit: Fitting polynomials =================== Xarray objects provide an interface for performing linear or polynomial regressions using the least-squares method. :py:meth:`~xarray.DataArray.polyfit` computes the best fitting coefficients along a given dimension and for a given order, .. jupyter-execute:: x = xr.DataArray(np.arange(10), dims=["x"], name="x") a = xr.DataArray(3 + 4 * x, dims=["x"], coords={"x": x}) out = a.polyfit(dim="x", deg=1, full=True) out The method outputs a dataset containing the coefficients (and more if ``full=True``). The inverse operation is done with :py:meth:`~xarray.polyval`, .. jupyter-execute:: xr.polyval(coord=x, coeffs=out.polyfit_coefficients) .. note:: These methods replicate the behaviour of :py:func:`numpy.polyfit` and :py:func:`numpy.polyval`. .. _compute.curvefit: Fitting arbitrary functions =========================== Xarray objects also provide an interface for fitting more complex functions using :py:func:`scipy.optimize.curve_fit`. :py:meth:`~xarray.DataArray.curvefit` accepts user-defined functions and can fit along multiple coordinates. For example, we can fit a relationship between two ``DataArray`` objects, maintaining a unique fit at each spatial coordinate but aggregating over the time dimension: .. jupyter-execute:: def exponential(x, a, xc): return np.exp((x - xc) / a) x = np.arange(-5, 5, 0.1) t = np.arange(-5, 5, 0.1) X, T = np.meshgrid(x, t) Z1 = np.random.uniform(low=-5, high=5, size=X.shape) Z2 = exponential(Z1, 3, X) Z3 = exponential(Z1, 1, -X) ds = xr.Dataset( data_vars=dict( var1=(["t", "x"], Z1), var2=(["t", "x"], Z2), var3=(["t", "x"], Z3) ), coords={"t": t, "x": x}, ) ds[["var2", "var3"]].curvefit( coords=ds.var1, func=exponential, reduce_dims="t", bounds={"a": (0.5, 5), "xc": (-5, 5)}, ) We can also fit multi-dimensional functions, and even use a wrapper function to simultaneously fit a summation of several functions, such as this field containing two gaussian peaks: .. jupyter-execute:: def gaussian_2d(coords, a, xc, yc, xalpha, yalpha): x, y = coords z = a * np.exp( -np.square(x - xc) / 2 / np.square(xalpha) - np.square(y - yc) / 2 / np.square(yalpha) ) return z def multi_peak(coords, *args): z = np.zeros(coords[0].shape) for i in range(len(args) // 5): z += gaussian_2d(coords, *args[i * 5 : i * 5 + 5]) return z x = np.arange(-5, 5, 0.1) y = np.arange(-5, 5, 0.1) X, Y = np.meshgrid(x, y) n_peaks = 2 names = ["a", "xc", "yc", "xalpha", "yalpha"] names = [f"{name}{i}" for i in range(n_peaks) for name in names] Z = gaussian_2d((X, Y), 3, 1, 1, 2, 1) + gaussian_2d((X, Y), 2, -1, -2, 1, 1) Z += np.random.normal(scale=0.1, size=Z.shape) da = xr.DataArray(Z, dims=["y", "x"], coords={"y": y, "x": x}) da.curvefit( coords=["x", "y"], func=multi_peak, param_names=names, kwargs={"maxfev": 10000}, ) .. note:: This method replicates the behavior of :py:func:`scipy.optimize.curve_fit`. .. _compute.broadcasting: Broadcasting by dimension name ============================== ``DataArray`` objects automatically align themselves ("broadcasting" in the numpy parlance) by dimension name instead of axis order. With xarray, you do not need to transpose arrays or insert dimensions of length 1 to get array operations to work, as commonly done in numpy with :py:func:`numpy.reshape` or :py:data:`numpy.newaxis`. This is best illustrated by a few examples. Consider two one-dimensional arrays with different sizes aligned along different dimensions: .. jupyter-execute:: a = xr.DataArray([1, 2], [("x", ["a", "b"])]) a .. jupyter-execute:: b = xr.DataArray([-1, -2, -3], [("y", [10, 20, 30])]) b With xarray, we can apply binary mathematical operations to these arrays, and their dimensions are expanded automatically: .. jupyter-execute:: a * b Moreover, dimensions are always reordered to the order in which they first appeared: .. jupyter-execute:: c = xr.DataArray(np.arange(6).reshape(3, 2), [b["y"], a["x"]]) c .. jupyter-execute:: a + c This means, for example, that you always subtract an array from its transpose: .. jupyter-execute:: c - c.T You can explicitly broadcast xarray data structures by using the :py:func:`~xarray.broadcast` function: .. jupyter-execute:: a2, b2 = xr.broadcast(a, b) a2 .. jupyter-execute:: b2 .. _math automatic alignment: Automatic alignment =================== Xarray enforces alignment between *index* :ref:`coordinates` (that is, coordinates with the same name as a dimension, marked by ``*``) on objects used in binary operations. Similarly to pandas, this alignment is automatic for arithmetic on binary operations. The default result of a binary operation is by the *intersection* (not the union) of coordinate labels: .. jupyter-execute:: arr = xr.DataArray(np.arange(3), [("x", range(3))]) arr + arr[:-1] If coordinate values for a dimension are missing on either argument, all matching dimensions must have the same size: .. jupyter-execute:: :raises: arr + xr.DataArray([1, 2], dims="x") However, one can explicitly change this default automatic alignment type ("inner") via :py:func:`~xarray.set_options()` in context manager: .. jupyter-execute:: with xr.set_options(arithmetic_join="outer"): arr + arr[:1] arr + arr[:1] Before loops or performance critical code, it's a good idea to align arrays explicitly (e.g., by putting them in the same Dataset or using :py:func:`~xarray.align`) to avoid the overhead of repeated alignment with each operation. See :ref:`align and reindex` for more details. .. note:: There is no automatic alignment between arguments when performing in-place arithmetic operations such as ``+=``. You will need to use :ref:`manual alignment`. This ensures in-place arithmetic never needs to modify data types. .. _coordinates math: Coordinates =========== Although index coordinates are aligned, other coordinates are not, and if their values conflict, they will be dropped. This is necessary, for example, because indexing turns 1D coordinates into scalar coordinates: .. jupyter-execute:: arr[0] .. jupyter-execute:: arr[1] .. jupyter-execute:: # notice that the scalar coordinate 'x' is silently dropped arr[1] - arr[0] Still, xarray will persist other coordinates in arithmetic, as long as there are no conflicting values: .. jupyter-execute:: # only one argument has the 'x' coordinate arr[0] + 1 .. jupyter-execute:: # both arguments have the same 'x' coordinate arr[0] - arr[0] Math with datasets ================== Datasets support arithmetic operations by automatically looping over all data variables: .. jupyter-execute:: ds = xr.Dataset( { "x_and_y": (("x", "y"), np.random.randn(3, 5)), "x_only": ("x", np.random.randn(3)), }, coords=arr.coords, ) ds > 0 Datasets support most of the same methods found on data arrays: .. jupyter-execute:: ds.mean(dim="x") .. jupyter-execute:: abs(ds) Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or alternatively you can use :py:meth:`~xarray.Dataset.map` to map a function to each variable in a dataset: .. jupyter-execute:: np.sin(ds) # equivalent to ds.map(np.sin) Datasets also use looping over variables for *broadcasting* in binary arithmetic. You can do arithmetic between any ``DataArray`` and a dataset: .. jupyter-execute:: ds + arr Arithmetic between two datasets matches data variables of the same name: .. jupyter-execute:: ds2 = xr.Dataset({"x_and_y": 0, "x_only": 100}) ds - ds2 Similarly to index based alignment, the result has the intersection of all matching data variables. .. _compute.wrapping-custom: Wrapping custom computation =========================== It doesn't always make sense to do computation directly with xarray objects: - In the inner loop of performance limited code, using xarray can add considerable overhead compared to using NumPy or native Python types. This is particularly true when working with scalars or small arrays (less than ~1e6 elements). Keeping track of labels and ensuring their consistency adds overhead, and xarray's core itself is not especially fast, because it's written in Python rather than a compiled language like C. Also, xarray's high level label-based APIs removes low-level control over how operations are implemented. - Even if speed doesn't matter, it can be important to wrap existing code, or to support alternative interfaces that don't use xarray objects. For these reasons, it is often well-advised to write low-level routines that work with NumPy arrays, and to wrap these routines to work with xarray objects. However, adding support for labels on both :py:class:`~xarray.Dataset` and :py:class:`~xarray.DataArray` can be a bit of a chore. To make this easier, xarray supplies the :py:func:`~xarray.apply_ufunc` helper function, designed for wrapping functions that support broadcasting and vectorization on unlabeled arrays in the style of a NumPy `universal function `_ ("ufunc" for short). ``apply_ufunc`` takes care of everything needed for an idiomatic xarray wrapper, including alignment, broadcasting, looping over ``Dataset`` variables (if needed), and merging of coordinates. In fact, many internal xarray functions/methods are written using ``apply_ufunc``. Simple functions that act independently on each value should work without any additional arguments: .. jupyter-execute:: squared_error = lambda x, y: (x - y) ** 2 arr1 = xr.DataArray([0, 1, 2, 3], dims="x") xr.apply_ufunc(squared_error, arr1, 1) For using more complex operations that consider some array values collectively, it's important to understand the idea of "core dimensions" from NumPy's `generalized ufuncs `_. Core dimensions are defined as dimensions that should *not* be broadcast over. Usually, they correspond to the fundamental dimensions over which an operation is defined, e.g., the summed axis in ``np.sum``. A good clue that core dimensions are needed is the presence of an ``axis`` argument on the corresponding NumPy function. With ``apply_ufunc``, core dimensions are recognized by name, and then moved to the last dimension of any input arguments before applying the given function. This means that for functions that accept an ``axis`` argument, you usually need to set ``axis=-1``. As an example, here is how we would wrap :py:func:`numpy.linalg.norm` to calculate the vector norm: .. code-block:: python def vector_norm(x, dim, ord=None): return xr.apply_ufunc( np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} ) .. jupyter-execute:: :hide-code: def vector_norm(x, dim, ord=None): return xr.apply_ufunc( np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} ) .. jupyter-execute:: vector_norm(arr1, dim="x") Because ``apply_ufunc`` follows a standard convention for ufuncs, it plays nicely with tools for building vectorized functions, like :py:func:`numpy.broadcast_arrays` and :py:class:`numpy.vectorize`. For high performance needs, consider using :doc:`Numba's vectorize and guvectorize `. In addition to wrapping functions, ``apply_ufunc`` can automatically parallelize many functions when using dask by setting ``dask='parallelized'``. See :ref:`dask.automatic-parallelization` for details. :py:func:`~xarray.apply_ufunc` also supports some advanced options for controlling alignment of variables and the form of the result. See the docstring for full details and more examples. python-xarray-2026.01.0/doc/user-guide/duckarrays.rst0000664000175000017500000002527415136607163022575 0ustar alastairalastair.. currentmodule:: xarray .. _userguide.duckarrays: Working with numpy-like arrays ============================== NumPy-like arrays (often known as :term:`duck array`\s) are drop-in replacements for the :py:class:`numpy.ndarray` class but with different features, such as propagating physical units or a different layout in memory. Xarray can often wrap these array types, allowing you to use labelled dimensions and indexes whilst benefiting from the additional features of these array libraries. Some numpy-like array types that xarray already has some support for: * `Cupy `_ - GPU support (see `cupy-xarray `_), * `Sparse `_ - for performant arrays with many zero elements, * `Pint `_ - for tracking the physical units of your data (see `pint-xarray `_), * `Dask `_ - parallel computing on larger-than-memory arrays (see :ref:`using dask with xarray `), * `Cubed `_ - another parallel computing framework that emphasises reliability (see `cubed-xarray `_). .. warning:: This feature should be considered somewhat experimental. Please report any bugs you find on `xarray’s issue tracker `_. .. note:: For information on wrapping dask arrays see :ref:`dask`. Whilst xarray wraps dask arrays in a similar way to that described on this page, chunked array types like :py:class:`dask.array.Array` implement additional methods that require slightly different user code (e.g. calling ``.chunk`` or ``.compute``). See the docs on :ref:`wrapping chunked arrays `. Why "duck"? ----------- Why is it also called a "duck" array? This comes from a common statement of object-oriented programming - "If it walks like a duck, and quacks like a duck, treat it like a duck". In other words, a library like xarray that is capable of using multiple different types of arrays does not have to explicitly check that each one it encounters is permitted (e.g. ``if dask``, ``if numpy``, ``if sparse`` etc.). Instead xarray can take the more permissive approach of simply treating the wrapped array as valid, attempting to call the relevant methods (e.g. ``.mean()``) and only raising an error if a problem occurs (e.g. the method is not found on the wrapped class). This is much more flexible, and allows objects and classes from different libraries to work together more easily. What is a numpy-like array? --------------------------- A "numpy-like array" (also known as a "duck array") is a class that contains array-like data, and implements key numpy-like functionality such as indexing, broadcasting, and computation methods. For example, the `sparse `_ library provides a sparse array type which is useful for representing nD array objects like sparse matrices in a memory-efficient manner. We can create a sparse array object (of the :py:class:`sparse.COO` type) from a numpy array like this: .. jupyter-execute:: from sparse import COO import xarray as xr import numpy as np %xmode minimal .. jupyter-execute:: x = np.eye(4, dtype=np.uint8) # create diagonal identity matrix s = COO.from_numpy(x) s This sparse object does not attempt to explicitly store every element in the array, only the non-zero elements. This approach is much more efficient for large arrays with only a few non-zero elements (such as tri-diagonal matrices). Sparse array objects can be converted back to a "dense" numpy array by calling :py:meth:`sparse.COO.todense`. Just like :py:class:`numpy.ndarray` objects, :py:class:`sparse.COO` arrays support indexing .. jupyter-execute:: s[1, 1] # diagonal elements should be ones .. jupyter-execute:: s[2, 3] # off-diagonal elements should be zero broadcasting, .. jupyter-execute:: x2 = np.zeros( (4, 1), dtype=np.uint8 ) # create second sparse array of different shape s2 = COO.from_numpy(x2) (s * s2) # multiplication requires broadcasting and various computation methods .. jupyter-execute:: s.sum(axis=1) This numpy-like array also supports calling so-called `numpy ufuncs `_ ("universal functions") on it directly: .. jupyter-execute:: np.sum(s, axis=1) Notice that in each case the API for calling the operation on the sparse array is identical to that of calling it on the equivalent numpy array - this is the sense in which the sparse array is "numpy-like". .. note:: For discussion on exactly which methods a class needs to implement to be considered "numpy-like", see :ref:`internals.duckarrays`. Wrapping numpy-like arrays in xarray ------------------------------------ :py:class:`DataArray`, :py:class:`Dataset`, and :py:class:`Variable` objects can wrap these numpy-like arrays. Constructing xarray objects which wrap numpy-like arrays ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The primary way to create an xarray object which wraps a numpy-like array is to pass that numpy-like array instance directly to the constructor of the xarray class. The :ref:`page on xarray data structures ` shows how :py:class:`DataArray` and :py:class:`Dataset` both accept data in various forms through their ``data`` argument, but in fact this data can also be any wrappable numpy-like array. For example, we can wrap the sparse array we created earlier inside a new DataArray object: .. jupyter-execute:: s_da = xr.DataArray(s, dims=["i", "j"]) s_da We can see what's inside - the printable representation of our xarray object (the repr) automatically uses the printable representation of the underlying wrapped array. Of course our sparse array object is still there underneath - it's stored under the ``.data`` attribute of the dataarray: .. jupyter-execute:: s_da.data Array methods ~~~~~~~~~~~~~ We saw above that numpy-like arrays provide numpy methods. Xarray automatically uses these when you call the corresponding xarray method: .. jupyter-execute:: s_da.sum(dim="j") Converting wrapped types ~~~~~~~~~~~~~~~~~~~~~~~~ If you want to change the type inside your xarray object you can use :py:meth:`DataArray.as_numpy`: .. jupyter-execute:: s_da.as_numpy() This returns a new :py:class:`DataArray` object, but now wrapping a normal numpy array. If instead you want to convert to numpy and return that numpy array you can use either :py:meth:`DataArray.to_numpy` or :py:meth:`DataArray.values`, where the former is strongly preferred. The difference is in the way they coerce to numpy - :py:meth:`~DataArray.values` always uses :py:func:`numpy.asarray` which will fail for some array types (e.g. ``cupy``), whereas :py:meth:`~DataArray.to_numpy` uses the correct method depending on the array type. .. jupyter-execute:: s_da.to_numpy() .. jupyter-execute:: :raises: s_da.values This illustrates the difference between :py:meth:`~DataArray.data` and :py:meth:`~DataArray.values`, which is sometimes a point of confusion for new xarray users. Explicitly: :py:meth:`DataArray.data` returns the underlying numpy-like array, regardless of type, whereas :py:meth:`DataArray.values` converts the underlying array to a numpy array before returning it. (This is another reason to use :py:meth:`~DataArray.to_numpy` over :py:meth:`~DataArray.values` - the intention is clearer.) Conversion to numpy as a fallback ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If a wrapped array does not implement the corresponding array method then xarray will often attempt to convert the underlying array to a numpy array so that the operation can be performed. You may want to watch out for this behavior, and report any instances in which it causes problems. Most of xarray's API does support using :term:`duck array` objects, but there are a few areas where the code will still convert to ``numpy`` arrays: - Dimension coordinates, and thus all indexing operations: * :py:meth:`Dataset.sel` and :py:meth:`DataArray.sel` * :py:meth:`Dataset.loc` and :py:meth:`DataArray.loc` * :py:meth:`Dataset.drop_sel` and :py:meth:`DataArray.drop_sel` * :py:meth:`Dataset.reindex`, :py:meth:`Dataset.reindex_like`, :py:meth:`DataArray.reindex` and :py:meth:`DataArray.reindex_like`: duck arrays in data variables and non-dimension coordinates won't be casted - Functions and methods that depend on external libraries or features of ``numpy`` not covered by ``__array_function__`` / ``__array_ufunc__``: * :py:meth:`Dataset.ffill` and :py:meth:`DataArray.ffill` (uses ``bottleneck``) * :py:meth:`Dataset.bfill` and :py:meth:`DataArray.bfill` (uses ``bottleneck``) * :py:meth:`Dataset.interp`, :py:meth:`Dataset.interp_like`, :py:meth:`DataArray.interp` and :py:meth:`DataArray.interp_like` (uses ``scipy``): duck arrays in data variables and non-dimension coordinates will be casted in addition to not supporting duck arrays in dimension coordinates * :py:meth:`Dataset.rolling` and :py:meth:`DataArray.rolling` (requires ``numpy>=1.20``) * :py:meth:`Dataset.rolling_exp` and :py:meth:`DataArray.rolling_exp` (uses ``numbagg``) * :py:meth:`Dataset.interpolate_na` and :py:meth:`DataArray.interpolate_na` (uses :py:class:`numpy.vectorize`) * :py:func:`apply_ufunc` with ``vectorize=True`` (uses :py:class:`numpy.vectorize`) - Incompatibilities between different :term:`duck array` libraries: * :py:meth:`Dataset.chunk` and :py:meth:`DataArray.chunk`: this fails if the data was not already chunked and the :term:`duck array` (e.g. a ``pint`` quantity) should wrap the new ``dask`` array; changing the chunk sizes works however. Extensions using duck arrays ---------------------------- Whilst the features above allow many numpy-like array libraries to be used pretty seamlessly with xarray, it often also makes sense to use an interfacing package to make certain tasks easier. For example the `pint-xarray package `_ offers a custom ``.pint`` accessor (see :ref:`internals.accessors`) which provides convenient access to information stored within the wrapped array (e.g. ``.units`` and ``.magnitude``), and makes creating wrapped pint arrays (and especially xarray-wrapping-pint-wrapping-dask arrays) simpler for the user. We maintain a list of libraries extending ``xarray`` to make working with particular wrapped duck arrays easier. If you know of more that aren't on this list please raise an issue to add them! - `pint-xarray `_ - `cupy-xarray `_ - `cubed-xarray `_ python-xarray-2026.01.0/doc/user-guide/options.rst0000664000175000017500000000154515136607163022113 0ustar alastairalastair.. currentmodule:: xarray .. _options: Configuration ============= Xarray offers a small number of configuration options through :py:func:`set_options`. With these, you can 1. Control the ``repr``: - ``display_expand_attrs`` - ``display_expand_coords`` - ``display_expand_data`` - ``display_expand_data_vars`` - ``display_max_rows`` - ``display_style`` 2. Control behaviour during operations: ``arithmetic_join``, ``keep_attrs``, ``use_bottleneck``. 3. Control colormaps for plots:``cmap_divergent``, ``cmap_sequential``. 4. Aspects of file reading: ``file_cache_maxsize``, ``netcdf_engine_order``, ``warn_on_unclosed_files``. You can set these options either globally :: xr.set_options(arithmetic_join="exact") or locally as a context manager: :: with xr.set_options(arithmetic_join="exact"): # do operation here pass python-xarray-2026.01.0/doc/user-guide/time-series.rst0000664000175000017500000003013215136607163022640 0ustar alastairalastair.. currentmodule:: xarray .. _time-series: ================ Time series data ================ A major use case for xarray is multi-dimensional time-series data. Accordingly, we've copied many of features that make working with time-series data in pandas such a joy to xarray. In most cases, we rely on pandas for the core functionality. .. jupyter-execute:: :hide-code: import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) Creating datetime64 data ------------------------ Xarray uses the numpy dtypes :py:class:`numpy.datetime64` and :py:class:`numpy.timedelta64` with specified units (one of ``"s"``, ``"ms"``, ``"us"`` and ``"ns"``) to represent datetime data, which offer vectorized operations with numpy and smooth integration with pandas. To convert to or create regular arrays of :py:class:`numpy.datetime64` data, we recommend using :py:func:`pandas.to_datetime`, :py:class:`pandas.DatetimeIndex`, or :py:func:`xarray.date_range`: .. jupyter-execute:: pd.to_datetime(["2000-01-01", "2000-02-02"]) .. jupyter-execute:: pd.DatetimeIndex( ["2000-01-01 00:00:00", "2000-02-02 00:00:00"], dtype="datetime64[s]" ) .. jupyter-execute:: xr.date_range("2000-01-01", periods=365) .. jupyter-execute:: xr.date_range("2000-01-01", periods=365, unit="s") .. note:: Care has to be taken to create the output with the wanted resolution. For :py:func:`pandas.date_range` the ``unit``-kwarg has to be specified and for :py:func:`pandas.to_datetime` the selection of the resolution isn't possible at all. For that :py:class:`pd.DatetimeIndex` can be used directly. There is more in-depth information in section :ref:`internals.timecoding`. Alternatively, you can supply arrays of Python ``datetime`` objects. These get converted automatically when used as arguments in xarray objects (with us-resolution): .. jupyter-execute:: import datetime xr.Dataset({"time": datetime.datetime(2000, 1, 1)}) When reading or writing netCDF files, xarray automatically decodes datetime and timedelta arrays using `CF conventions`_ (that is, by using a ``units`` attribute like ``'days since 2000-01-01'``). .. _CF conventions: https://cfconventions.org .. note:: When decoding/encoding datetimes for non-standard calendars or for dates before `1582-10-15`_, xarray uses the `cftime`_ library by default. It was previously packaged with the ``netcdf4-python`` package under the name ``netcdftime`` but is now distributed separately. ``cftime`` is an :ref:`optional dependency` of xarray. .. _cftime: https://unidata.github.io/cftime .. _1582-10-15: https://en.wikipedia.org/wiki/Gregorian_calendar You can manual decode arrays in this form by passing a dataset to :py:func:`decode_cf`: .. jupyter-execute:: attrs = {"units": "hours since 2000-01-01"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) # Default decoding to 'ns'-resolution xr.decode_cf(ds) .. jupyter-execute:: # Decoding to 's'-resolution coder = xr.coders.CFDatetimeCoder(time_unit="s") xr.decode_cf(ds, decode_times=coder) From xarray 2025.01.2 the resolution of the dates can be one of ``"s"``, ``"ms"``, ``"us"`` or ``"ns"``. One limitation of using ``datetime64[ns]`` is that it limits the native representation of dates to those that fall between the years 1678 and 2262, which gets increased significantly with lower resolutions. When a store contains dates outside of these bounds (or dates < `1582-10-15`_ with a Gregorian, also known as standard, calendar), dates will be returned as arrays of :py:class:`cftime.datetime` objects and a :py:class:`CFTimeIndex` will be used for indexing. :py:class:`CFTimeIndex` enables most of the indexing functionality of a :py:class:`pandas.DatetimeIndex`. See :ref:`CFTimeIndex` for more information. Datetime indexing ----------------- Xarray borrows powerful indexing machinery from pandas (see :ref:`indexing`). This allows for several useful and succinct forms of indexing, particularly for ``datetime64`` data. For example, we support indexing with strings for single items and with the ``slice`` object: .. jupyter-execute:: time = pd.date_range("2000-01-01", freq="h", periods=365 * 24) ds = xr.Dataset({"foo": ("time", np.arange(365 * 24)), "time": time}) ds.sel(time="2000-01") .. jupyter-execute:: ds.sel(time=slice("2000-06-01", "2000-06-10")) You can also select a particular time by indexing with a :py:class:`datetime.time` object: .. jupyter-execute:: ds.sel(time=datetime.time(12)) For more details, read the pandas documentation and the section on :ref:`datetime_component_indexing` (i.e. using the ``.dt`` accessor). .. _dt_accessor: Datetime components ------------------- Similar to `pandas accessors`_, the components of datetime objects contained in a given ``DataArray`` can be quickly computed using a special ``.dt`` accessor. .. _pandas accessors: https://pandas.pydata.org/pandas-docs/stable/basics.html#basics-dt-accessors .. jupyter-execute:: time = pd.date_range("2000-01-01", freq="6h", periods=365 * 4) ds = xr.Dataset({"foo": ("time", np.arange(365 * 4)), "time": time}) ds.time.dt.hour .. jupyter-execute:: ds.time.dt.dayofweek The ``.dt`` accessor works on both coordinate dimensions as well as multi-dimensional data. Xarray also supports a notion of "virtual" or "derived" coordinates for `datetime components`__ implemented by pandas, including "year", "month", "day", "hour", "minute", "second", "dayofyear", "week", "dayofweek", "weekday" and "quarter": __ https://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components .. jupyter-execute:: ds["time.month"] .. jupyter-execute:: ds["time.dayofyear"] For use as a derived coordinate, xarray adds ``'season'`` to the list of datetime components supported by pandas: .. jupyter-execute:: ds["time.season"] .. jupyter-execute:: ds["time"].dt.season The set of valid seasons consists of 'DJF', 'MAM', 'JJA' and 'SON', labeled by the first letters of the corresponding months. You can use these shortcuts with both Datasets and DataArray coordinates. In addition, xarray supports rounding operations ``floor``, ``ceil``, and ``round``. These operations require that you supply a `rounding frequency as a string argument.`__ __ https://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases .. jupyter-execute:: ds["time"].dt.floor("D") The ``.dt`` accessor can also be used to generate formatted datetime strings for arrays utilising the same formatting as the standard `datetime.strftime`_. .. _datetime.strftime: https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior .. jupyter-execute:: ds["time"].dt.strftime("%a, %b %d %H:%M") .. _datetime_component_indexing: Indexing Using Datetime Components ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can use use the ``.dt`` accessor when subsetting your data as well. For example, we can subset for the month of January using the following: .. jupyter-execute:: ds.isel(time=(ds.time.dt.month == 1)) You can also search for multiple months (in this case January through March), using ``isin``: .. jupyter-execute:: ds.isel(time=ds.time.dt.month.isin([1, 2, 3])) .. _resampling: Resampling and grouped operations --------------------------------- .. seealso:: For more generic documentation on grouping, see :ref:`groupby`. Datetime components couple particularly well with grouped operations for analyzing features that repeat over time. Here's how to calculate the mean by time of day: .. jupyter-execute:: ds.groupby("time.hour").mean() For upsampling or downsampling temporal resolutions, xarray offers a :py:meth:`Dataset.resample` method building on the core functionality offered by the pandas method of the same name. Resample uses essentially the same api as :py:meth:`pandas.DataFrame.resample` `in pandas`_. .. _in pandas: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#up-and-downsampling For example, we can downsample our dataset from hourly to 6-hourly: .. jupyter-execute:: ds.resample(time="6h") This will create a specialized :py:class:`~xarray.core.resample.DatasetResample` or :py:class:`~xarray.core.resample.DataArrayResample` object which saves information necessary for resampling. All of the reduction methods which work with :py:class:`Dataset` or :py:class:`DataArray` objects can also be used for resampling: .. jupyter-execute:: ds.resample(time="6h").mean() You can also supply an arbitrary reduction function to aggregate over each resampling group: .. jupyter-execute:: ds.resample(time="6h").reduce(np.mean) You can also resample on the time dimension while applying reducing along other dimensions at the same time by specifying the ``dim`` keyword argument .. code-block:: python ds.resample(time="6h").mean(dim=["time", "latitude", "longitude"]) For upsampling, xarray provides six methods: ``asfreq``, ``ffill``, ``bfill``, ``pad``, ``nearest`` and ``interpolate``. ``interpolate`` extends :py:func:`scipy.interpolate.interp1d` and supports all of its schemes. All of these resampling operations work on both Dataset and DataArray objects with an arbitrary number of dimensions. In order to limit the scope of the methods ``ffill``, ``bfill``, ``pad`` and ``nearest`` the ``tolerance`` argument can be set in coordinate units. Data that has indices outside of the given ``tolerance`` are set to ``NaN``. .. jupyter-execute:: ds.resample(time="1h").nearest(tolerance="1h") It is often desirable to center the time values after a resampling operation. That can be accomplished by updating the resampled dataset time coordinate values using time offset arithmetic via the :py:func:`pandas.tseries.frequencies.to_offset` function. .. jupyter-execute:: resampled_ds = ds.resample(time="6h").mean() offset = pd.tseries.frequencies.to_offset("6h") / 2 resampled_ds["time"] = resampled_ds.get_index("time") + offset resampled_ds .. seealso:: For more examples of using grouped operations on a time dimension, see :doc:`../examples/weather-data`. .. _seasonal_grouping: Handling Seasons ~~~~~~~~~~~~~~~~ Two extremely common time series operations are to group by seasons, and resample to a seasonal frequency. Xarray has historically supported some simple versions of these computations. For example, ``.groupby("time.season")`` (where the seasons are DJF, MAM, JJA, SON) and resampling to a seasonal frequency using Pandas syntax: ``.resample(time="QS-DEC")``. Quite commonly one wants more flexibility in defining seasons. For these use-cases, Xarray provides :py:class:`groupers.SeasonGrouper` and :py:class:`groupers.SeasonResampler`. .. currentmodule:: xarray.groupers .. jupyter-execute:: from xarray.groupers import SeasonGrouper ds.groupby(time=SeasonGrouper(["DJF", "MAM", "JJA", "SON"])).mean() Note how the seasons are in the specified order, unlike ``.groupby("time.season")`` where the seasons are sorted alphabetically. .. jupyter-execute:: ds.groupby("time.season").mean() :py:class:`SeasonGrouper` supports overlapping seasons: .. jupyter-execute:: ds.groupby(time=SeasonGrouper(["DJFM", "MAMJ", "JJAS", "SOND"])).mean() Skipping months is allowed: .. jupyter-execute:: ds.groupby(time=SeasonGrouper(["JJAS"])).mean() Use :py:class:`SeasonResampler` to specify custom seasons. .. jupyter-execute:: from xarray.groupers import SeasonResampler ds.resample(time=SeasonResampler(["DJF", "MAM", "JJA", "SON"])).mean() :py:class:`SeasonResampler` is smart enough to correctly handle years for seasons that span the end of the year (e.g. DJF). By default :py:class:`SeasonResampler` will skip any season that is incomplete (e.g. the first DJF season for a time series that starts in Jan). Pass the ``drop_incomplete=False`` kwarg to :py:class:`SeasonResampler` to disable this behaviour. .. jupyter-execute:: from xarray.groupers import SeasonResampler ds.resample( time=SeasonResampler(["DJF", "MAM", "JJA", "SON"], drop_incomplete=False) ).mean() Seasons need not be of the same length: .. jupyter-execute:: ds.resample(time=SeasonResampler(["JF", "MAM", "JJAS", "OND"])).mean() python-xarray-2026.01.0/doc/user-guide/testing.rst0000664000175000017500000002640115136607163022073 0ustar alastairalastair.. _testing: Testing your code ================= .. jupyter-execute:: :hide-code: import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) .. _testing.hypothesis: Hypothesis testing ------------------ .. note:: Testing with hypothesis is a fairly advanced topic. Before reading this section it is recommended that you take a look at our guide to xarray's :ref:`data structures`, are familiar with conventional unit testing in `pytest `_, and have seen the `hypothesis library documentation `_. `The hypothesis library `_ is a powerful tool for property-based testing. Instead of writing tests for one example at a time, it allows you to write tests parameterized by a source of many dynamically generated examples. For example you might have written a test which you wish to be parameterized by the set of all possible integers via :py:func:`hypothesis.strategies.integers()`. Property-based testing is extremely powerful, because (unlike more conventional example-based testing) it can find bugs that you did not even think to look for! Strategies ~~~~~~~~~~ Each source of examples is called a "strategy", and xarray provides a range of custom strategies which produce xarray data structures containing arbitrary data. You can use these to efficiently test downstream code, quickly ensuring that your code can handle xarray objects of all possible structures and contents. These strategies are accessible in the :py:mod:`xarray.testing.strategies` module, which provides .. currentmodule:: xarray .. autosummary:: testing.strategies.supported_dtypes testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes testing.strategies.attrs testing.strategies.variables testing.strategies.unique_subset_of These build upon the numpy and array API strategies offered in :py:mod:`hypothesis.extra.numpy` and :py:mod:`hypothesis.extra.array_api`: .. jupyter-execute:: import hypothesis.extra.numpy as npst Generating Examples ~~~~~~~~~~~~~~~~~~~ To see an example of what each of these strategies might produce, you can call one followed by the ``.example()`` method, which is a general hypothesis method valid for all strategies. .. jupyter-execute:: import xarray.testing.strategies as xrst xrst.variables().example() .. jupyter-execute:: xrst.variables().example() .. jupyter-execute:: xrst.variables().example() You can see that calling ``.example()`` multiple times will generate different examples, giving you an idea of the wide range of data that the xarray strategies can generate. In your tests however you should not use ``.example()`` - instead you should parameterize your tests with the :py:func:`hypothesis.given` decorator: .. jupyter-execute:: from hypothesis import given .. jupyter-execute:: @given(xrst.variables()) def test_function_that_acts_on_variables(var): assert func(var) == ... Chaining Strategies ~~~~~~~~~~~~~~~~~~~ Xarray's strategies can accept other strategies as arguments, allowing you to customise the contents of the generated examples. .. jupyter-execute:: # generate a Variable containing an array with a complex number dtype, but all other details still arbitrary from hypothesis.extra.numpy import complex_number_dtypes xrst.variables(dtype=complex_number_dtypes()).example() This also works with custom strategies, or strategies defined in other packages. For example you could imagine creating a ``chunks`` strategy to specify particular chunking patterns for a dask-backed array. Fixing Arguments ~~~~~~~~~~~~~~~~ If you want to fix one aspect of the data structure, whilst allowing variation in the generated examples over all other aspects, then use :py:func:`hypothesis.strategies.just()`. .. jupyter-execute:: import hypothesis.strategies as st # Generates only variable objects with dimensions ["x", "y"] xrst.variables(dims=st.just(["x", "y"])).example() (This is technically another example of chaining strategies - :py:func:`hypothesis.strategies.just()` is simply a special strategy that just contains a single example.) To fix the length of dimensions you can instead pass ``dims`` as a mapping of dimension names to lengths (i.e. following xarray objects' ``.sizes()`` property), e.g. .. jupyter-execute:: # Generates only variables with dimensions ["x", "y"], of lengths 2 & 3 respectively xrst.variables(dims=st.just({"x": 2, "y": 3})).example() You can also use this to specify that you want examples which are missing some part of the data structure, for instance .. jupyter-execute:: # Generates a Variable with no attributes xrst.variables(attrs=st.just({})).example() Through a combination of chaining strategies and fixing arguments, you can specify quite complicated requirements on the objects your chained strategy will generate. .. jupyter-execute:: fixed_x_variable_y_maybe_z = st.fixed_dictionaries( {"x": st.just(2), "y": st.integers(3, 4)}, optional={"z": st.just(2)} ) fixed_x_variable_y_maybe_z.example() .. jupyter-execute:: special_variables = xrst.variables(dims=fixed_x_variable_y_maybe_z) special_variables.example() .. jupyter-execute:: special_variables.example() Here we have used one of hypothesis' built-in strategies :py:func:`hypothesis.strategies.fixed_dictionaries` to create a strategy which generates mappings of dimension names to lengths (i.e. the ``size`` of the xarray object we want). This particular strategy will always generate an ``x`` dimension of length 2, and a ``y`` dimension of length either 3 or 4, and will sometimes also generate a ``z`` dimension of length 2. By feeding this strategy for dictionaries into the ``dims`` argument of xarray's :py:func:`~st.variables` strategy, we can generate arbitrary :py:class:`~xarray.Variable` objects whose dimensions will always match these specifications. Generating Duck-type Arrays ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array type which presents the same API as a numpy array (so-called "duck array wrapping", see :ref:`wrapping numpy-like arrays `). Imagine we want to write a strategy which generates arbitrary ``Variable`` objects, each of which wraps a :py:class:`sparse.COO` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: 1. Create an xarray object with numpy data and use the hypothesis' ``.map()`` method to convert the underlying array to a different type: .. jupyter-execute:: import sparse .. jupyter-execute:: def convert_to_sparse(var): return var.copy(data=sparse.COO.from_numpy(var.to_numpy())) .. jupyter-execute:: sparse_variables = xrst.variables(dims=xrst.dimension_names(min_dims=1)).map( convert_to_sparse ) sparse_variables.example() .. jupyter-execute:: sparse_variables.example() 2. Pass a function which returns a strategy which generates the duck-typed arrays directly to the ``array_strategy_fn`` argument of the xarray strategies: .. jupyter-execute:: def sparse_random_arrays(shape: tuple[int, ...]) -> sparse._coo.core.COO: """Strategy which generates random sparse.COO arrays""" if shape is None: shape = npst.array_shapes() else: shape = st.just(shape) density = st.integers(min_value=0, max_value=1) # note sparse.random does not accept a dtype kwarg return st.builds(sparse.random, shape=shape, density=density) def sparse_random_arrays_fn( *, shape: tuple[int, ...], dtype: np.dtype ) -> st.SearchStrategy[sparse._coo.core.COO]: return sparse_random_arrays(shape=shape) .. jupyter-execute:: sparse_random_variables = xrst.variables( array_strategy_fn=sparse_random_arrays_fn, dtype=st.just(np.dtype("float64")) ) sparse_random_variables.example() Either approach is fine, but one may be more convenient than the other depending on the type of the duck array which you want to wrap. Compatibility with the Python Array API Standard ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Xarray aims to be compatible with any duck-array type that conforms to the `Python Array API Standard `_ (see our :ref:`docs on Array API Standard support `). .. warning:: The strategies defined in :py:mod:`testing.strategies` are **not** guaranteed to use array API standard-compliant dtypes by default. For example arrays with the dtype ``np.dtype('float16')`` may be generated by :py:func:`testing.strategies.variables` (assuming the ``dtype`` kwarg was not explicitly passed), despite ``np.dtype('float16')`` not being in the array API standard. If the array type you want to generate has an array API-compliant top-level namespace (e.g. that which is conventionally imported as ``xp`` or similar), you can use this neat trick: .. jupyter-execute:: import numpy as xp # compatible in numpy 2.0 # use `import numpy.array_api as xp` in numpy>=1.23,<2.0 from hypothesis.extra.array_api import make_strategies_namespace xps = make_strategies_namespace(xp) xp_variables = xrst.variables( array_strategy_fn=xps.arrays, dtype=xps.scalar_dtypes(), ) xp_variables.example() Another array API-compliant duck array library would replace the import, e.g. ``import cupy as cp`` instead. Testing over Subsets of Dimensions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A common task when testing xarray user code is checking that your function works for all valid input dimensions. We can chain strategies to achieve this, for which the helper strategy :py:func:`~testing.strategies.unique_subset_of` is useful. It works for lists of dimension names .. jupyter-execute:: dims = ["x", "y", "z"] xrst.unique_subset_of(dims).example() .. jupyter-execute:: xrst.unique_subset_of(dims).example() as well as for mappings of dimension names to sizes .. jupyter-execute:: dim_sizes = {"x": 2, "y": 3, "z": 4} xrst.unique_subset_of(dim_sizes).example() .. jupyter-execute:: xrst.unique_subset_of(dim_sizes).example() This is useful because operations like reductions can be performed over any subset of the xarray object's dimensions. For example we can write a pytest test that tests that a reduction gives the expected result when applying that reduction along any possible valid subset of the Variable's dimensions. .. code-block:: python import numpy.testing as npt @given(st.data(), xrst.variables(dims=xrst.dimension_names(min_dims=1))) def test_mean(data, var): """Test that the mean of an xarray Variable is always equal to the mean of the underlying array.""" # specify arbitrary reduction along at least one dimension reduction_dims = data.draw(xrst.unique_subset_of(var.dims, min_size=1)) # create expected result (using nanmean because arrays with Nans will be generated) reduction_axes = tuple(var.get_axis_num(dim) for dim in reduction_dims) expected = np.nanmean(var.data, axis=reduction_axes) # assert property is always satisfied result = var.mean(dim=reduction_dims).data npt.assert_equal(expected, result) python-xarray-2026.01.0/doc/user-guide/io.rst0000664000175000017500000017275315136607163021041 0ustar alastairalastair.. currentmodule:: xarray .. _io: Reading and writing files ========================= Xarray supports direct serialization and IO to several file formats, from simple :ref:`io.pickle` files to the more flexible :ref:`io.netcdf` format (recommended). .. jupyter-execute:: :hide-code: import os import iris import ncdata.iris_xarray import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) You can read different types of files in ``xr.open_dataset`` by specifying the engine to be used: .. code:: python xr.open_dataset("example.nc", engine="netcdf4") The "engine" provides a set of instructions that tells xarray how to read the data and pack them into a ``Dataset`` (or ``Dataarray``). These instructions are stored in an underlying "backend". Xarray comes with several backends that cover many common data formats. Many more backends are available via external libraries, or you can `write your own `_. This diagram aims to help you determine - based on the format of the file you'd like to read - which type of backend you're using and how to use it. Text and boxes are clickable for more information. Following the diagram is detailed information on many popular backends. You can learn more about using and developing backends in the `Xarray tutorial JupyterBook `_. .. _comment: mermaid Flowcharg "link" text gets secondary color background, SVG icon fill gets primary color .. raw:: html .. mermaid:: :config: {"theme":"base","themeVariables":{"fontSize":"20px","primaryColor":"#fff","primaryTextColor":"#fff","primaryBorderColor":"#59c7d6","lineColor":"#e28126","secondaryColor":"#767985"}} :alt: Flowchart illustrating how to choose the right backend engine to read your data flowchart LR built-in-eng["`**Is your data stored in one of these formats?** - netCDF4 - netCDF3 - Zarr - DODS/OPeNDAP - HDF5 `"] built-in("`**You're in luck!** Xarray bundles a backend to automatically read these formats. Open data using xr.open_dataset(). We recommend explicitly setting engine='xxxx' for faster loading.`") installed-eng["""One of these formats? - GRIB - TileDB - GeoTIFF, JPEG-2000, etc. (via GDAL) - Sentinel-1 SAFE """] installed("""Install the linked backend library and use it with xr.open_dataset(file, engine='xxxx').""") other["`**Options:** - Look around to see if someone has created an Xarray backend for your format! - Create your own backend - Convert your data to a supported format `"] built-in-eng -->|Yes| built-in built-in-eng -->|No| installed-eng installed-eng -->|Yes| installed installed-eng -->|No| other click built-in-eng "https://docs.xarray.dev/en/stable/get-help/faq.html#how-do-i-open-format-x-file-as-an-xarray-dataset" classDef quesNodefmt font-size:12pt,fill:#0e4666,stroke:#59c7d6,stroke-width:3 class built-in-eng,installed-eng quesNodefmt classDef ansNodefmt font-size:12pt,fill:#4a4a4a,stroke:#17afb4,stroke-width:3 class built-in,installed,other ansNodefmt linkStyle default font-size:18pt,stroke-width:4 .. _io.backend_resolution: Backend Selection ----------------- When opening a file or URL without explicitly specifying the ``engine`` parameter, xarray automatically selects an appropriate backend based on the file path or URL. The backends are tried in order: **netcdf4 → h5netcdf → scipy → pydap → zarr**. .. note:: You can customize the order in which netCDF backends are tried using the ``netcdf_engine_order`` option in :py:func:`~xarray.set_options`: .. code-block:: python # Prefer h5netcdf over netcdf4 xr.set_options(netcdf_engine_order=["h5netcdf", "netcdf4", "scipy"]) See :ref:`options` for more details on configuration options. The following tables show which backend will be selected for different types of URLs and files. .. important:: ✅ means the backend will **guess it can open** the URL or file based on its path, extension, or magic number, but this doesn't guarantee success. For example, not all Zarr stores are xarray-compatible. ❌ means the backend will not attempt to open it. Remote URL Resolution ~~~~~~~~~~~~~~~~~~~~~ .. list-table:: :header-rows: 1 :widths: 50 10 10 10 10 10 * - URL - :ref:`netcdf4 ` - :ref:`h5netcdf ` - :ref:`scipy ` - :ref:`pydap ` - :ref:`zarr ` * - ``https://example.com/store.zarr`` - ❌ - ❌ - ❌ - ❌ - ✅ * - ``https://example.com/data.nc`` - ✅ - ✅ - ❌ - ❌ - ❌ * - ``http://example.com/data.nc?var=temp`` - ✅ - ❌ - ❌ - ❌ - ❌ * - ``http://example.com/dap4/data.nc?var=x`` - ✅ - ❌ - ❌ - ✅ - ❌ * - ``dap2://opendap.nasa.gov/dataset`` - ❌ - ❌ - ❌ - ✅ - ❌ * - ``https://example.com/DAP4/data`` - ❌ - ❌ - ❌ - ✅ - ❌ * - ``http://test.opendap.org/dap4/file.nc4`` - ✅ - ✅ - ❌ - ✅ - ❌ * - ``https://example.com/DAP4/data.nc`` - ✅ - ✅ - ❌ - ✅ - ❌ Local File Resolution ~~~~~~~~~~~~~~~~~~~~~ For local files, backends first try to read the file's **magic number** (first few bytes). If the magic number **cannot be read** (e.g., file doesn't exist, no permissions), they fall back to checking the file **extension**. If the magic number is readable but invalid, the backend returns False (does not fall back to extension). .. list-table:: :header-rows: 1 :widths: 40 20 10 10 10 10 * - File Path - Magic Number - :ref:`netcdf4 ` - :ref:`h5netcdf ` - :ref:`scipy ` - :ref:`zarr ` * - ``/path/to/file.nc`` - ``CDF\x01`` (netCDF3) - ✅ - ❌ - ✅ - ❌ * - ``/path/to/file.nc4`` - ``\x89HDF\r\n\x1a\n`` (HDF5/netCDF4) - ✅ - ✅ - ❌ - ❌ * - ``/path/to/file.nc.gz`` - ``\x1f\x8b`` + ``CDF`` inside - ❌ - ❌ - ✅ - ❌ * - ``/path/to/store.zarr/`` - (directory) - ❌ - ❌ - ❌ - ✅ * - ``/path/to/file.nc`` - *(no magic number)* - ✅ - ✅ - ✅ - ❌ * - ``/path/to/file.xyz`` - ``CDF\x01`` (netCDF3) - ✅ - ❌ - ✅ - ❌ * - ``/path/to/file.xyz`` - ``\x89HDF\r\n\x1a\n`` (HDF5/netCDF4) - ✅ - ✅ - ❌ - ❌ * - ``/path/to/file.xyz`` - *(no magic number)* - ❌ - ❌ - ❌ - ❌ .. note:: Remote URLs ending in ``.nc`` are **ambiguous**: - They could be netCDF files stored on a remote HTTP server (readable by ``netcdf4`` or ``h5netcdf``) - They could be OPeNDAP/DAP endpoints (readable by ``netcdf4`` with DAP support or ``pydap``) These interpretations are fundamentally incompatible. If xarray's automatic selection chooses the wrong backend, you must explicitly specify the ``engine`` parameter: .. code-block:: python # Force interpretation as a DAP endpoint ds = xr.open_dataset("http://example.com/data.nc", engine="pydap") # Force interpretation as a remote netCDF file ds = xr.open_dataset("https://example.com/data.nc", engine="netcdf4") .. _io.netcdf: netCDF ------ The recommended way to store xarray data structures is `netCDF`__, which is a binary file format for self-described datasets that originated in the geosciences. Xarray is based on the netCDF data model, so netCDF files on disk directly correspond to :py:class:`Dataset` objects (more accurately, a group in a netCDF file directly corresponds to a :py:class:`Dataset` object. See :ref:`io.netcdf_groups` for more.) NetCDF is supported on almost all platforms, and parsers exist for the vast majority of scientific programming languages. Recent versions of netCDF are based on the even more widely used HDF5 file-format. __ https://www.unidata.ucar.edu/software/netcdf/ .. tip:: If you aren't familiar with this data format, the `netCDF FAQ`_ is a good place to start. .. _netCDF FAQ: https://www.unidata.ucar.edu/software/netcdf/docs/faq.html#What-Is-netCDF Reading and writing netCDF files with xarray requires scipy, h5netcdf, or the `netCDF4-Python`__ library to be installed. SciPy only supports reading and writing of netCDF V3 files. __ https://github.com/Unidata/netcdf4-python We can save a Dataset to disk using the :py:meth:`Dataset.to_netcdf` method: .. jupyter-execute:: nc_filename = "saved_on_disk.nc" .. jupyter-execute:: :hide-code: # Ensure the file is located in a unique temporary directory # so that it doesn't conflict with parallel builds of the # documentation. import tempfile import os.path tempdir = tempfile.TemporaryDirectory() nc_filename = os.path.join(tempdir.name, nc_filename) .. jupyter-execute:: ds = xr.Dataset( {"foo": (("x", "y"), np.random.rand(4, 5))}, coords={ "x": [10, 20, 30, 40], "y": pd.date_range("2000-01-01", periods=5), "z": ("x", list("abcd")), }, ) ds.to_netcdf(nc_filename) By default, the file is saved as netCDF4 (assuming netCDF4-Python is installed). You can control the format and engine used to write the file with the ``format`` and ``engine`` arguments. .. tip:: Using the `h5netcdf `_ package by passing ``engine='h5netcdf'`` to :py:meth:`open_dataset` can sometimes be quicker than the default ``engine='netcdf4'`` that uses the `netCDF4 `_ package. We can load netCDF files to create a new Dataset using :py:func:`open_dataset`: .. jupyter-execute:: ds_disk = xr.open_dataset(nc_filename) ds_disk .. jupyter-execute:: :hide-code: # Close "saved_on_disk.nc", but retain the file until after closing or deleting other # datasets that will refer to it. ds_disk.close() Similarly, a DataArray can be saved to disk using the :py:meth:`DataArray.to_netcdf` method, and loaded from disk using the :py:func:`open_dataarray` function. As netCDF files correspond to :py:class:`Dataset` objects, these functions internally convert the ``DataArray`` to a ``Dataset`` before saving, and then convert back when loading, ensuring that the ``DataArray`` that is loaded is always exactly the same as the one that was saved. A dataset can also be loaded or written to a specific group within a netCDF file. To load from a group, pass a ``group`` keyword argument to the ``open_dataset`` function. The group can be specified as a path-like string, e.g., to access subgroup 'bar' within group 'foo' pass '/foo/bar' as the ``group`` argument. When writing multiple groups in one file, pass ``mode='a'`` to ``to_netcdf`` to ensure that each call does not delete the file. .. tip:: It is recommended to use :py:class:`~xarray.DataTree` to represent hierarchical data, and to use the :py:meth:`xarray.DataTree.to_netcdf` method when writing hierarchical data to a netCDF file. Data is *always* loaded lazily from netCDF files. You can manipulate, slice and subset Dataset and DataArray objects, and no array values are loaded into memory until you try to perform some sort of actual computation. For an example of how these lazy arrays work, see the OPeNDAP section below. There may be minor differences in the :py:class:`Dataset` object returned when reading a NetCDF file with different engines. It is important to note that when you modify values of a Dataset, even one linked to files on disk, only the in-memory copy you are manipulating in xarray is modified: the original file on disk is never touched. .. tip:: Xarray's lazy loading of remote or on-disk datasets is often but not always desirable. Before performing computationally intense operations, it is often a good idea to load a Dataset (or DataArray) entirely into memory by invoking the :py:meth:`Dataset.load` method. Datasets have a :py:meth:`Dataset.close` method to close the associated netCDF file. However, it's often cleaner to use a ``with`` statement: .. jupyter-execute:: # this automatically closes the dataset after use with xr.open_dataset(nc_filename) as ds: print(ds.keys()) Although xarray provides reasonable support for incremental reads of files on disk, it does not support incremental writes, which can be a useful strategy for dealing with datasets too big to fit into memory. Instead, xarray integrates with dask.array (see :ref:`dask`), which provides a fully featured engine for streaming computation. It is possible to append or overwrite netCDF variables using the ``mode='a'`` argument. When using this option, all variables in the dataset will be written to the original netCDF file, regardless if they exist in the original dataset. .. _io.netcdf_groups: Groups ~~~~~~ Whilst netCDF groups can only be loaded individually as ``Dataset`` objects, a whole file of many nested groups can be loaded as a single :py:class:`xarray.DataTree` object. To open a whole netCDF file as a tree of groups use the :py:func:`xarray.open_datatree` function. To save a DataTree object as a netCDF file containing many groups, use the :py:meth:`xarray.DataTree.to_netcdf` method. .. _netcdf.root_group.note: .. note:: Due to file format specifications the on-disk root group name is always ``"/"``, overriding any given ``DataTree`` root node name. .. _netcdf.group.warning: .. warning:: ``DataTree`` objects do not follow the exact same data model as netCDF files, which means that perfect round-tripping is not always possible. In particular in the netCDF data model dimensions are entities that can exist regardless of whether any variable possesses them. This is in contrast to `xarray's data model `_ (and hence :ref:`DataTree's data model `) in which the dimensions of a (Dataset/Tree) object are simply the set of dimensions present across all variables in that dataset. This means that if a netCDF file contains dimensions but no variables which possess those dimensions, these dimensions will not be present when that file is opened as a DataTree object. Saving this DataTree object to file will therefore not preserve these "unused" dimensions. .. _io.encoding: Reading encoded data ~~~~~~~~~~~~~~~~~~~~ NetCDF files follow some conventions for encoding datetime arrays (as numbers with a "units" attribute) and for packing and unpacking data (as described by the "scale_factor" and "add_offset" attributes). If the argument ``decode_cf=True`` (default) is given to :py:func:`open_dataset`, xarray will attempt to automatically decode the values in the netCDF objects according to `CF conventions`_. Sometimes this will fail, for example, if a variable has an invalid "units" or "calendar" attribute. For these cases, you can turn this decoding off manually. .. _CF conventions: https://cfconventions.org/ You can view this encoding information (among others) in the :py:attr:`DataArray.encoding` and :py:attr:`DataArray.encoding` attributes: .. jupyter-execute:: ds_disk["y"].encoding .. jupyter-execute:: ds_disk.encoding Note that all operations that manipulate variables other than indexing will remove encoding information. In some cases it is useful to intentionally reset a dataset's original encoding values. This can be done with either the :py:meth:`Dataset.drop_encoding` or :py:meth:`DataArray.drop_encoding` methods. .. jupyter-execute:: ds_no_encoding = ds_disk.drop_encoding() ds_no_encoding.encoding .. _combining multiple files: Reading multi-file datasets ........................... NetCDF files are often encountered in collections, e.g., with different files corresponding to different model runs or one file per timestamp. Xarray can straightforwardly combine such files into a single Dataset by making use of :py:func:`concat`, :py:func:`merge`, :py:func:`combine_nested` and :py:func:`combine_by_coords`. For details on the difference between these functions see :ref:`combining data`. Xarray includes support for manipulating datasets that don't fit into memory with dask_. If you have dask installed, you can open multiple files simultaneously in parallel using :py:func:`open_mfdataset`:: xr.open_mfdataset('my/files/*.nc', parallel=True) This function automatically concatenates and merges multiple files into a single xarray dataset. It is the recommended way to open multiple files with xarray. For more details on parallel reading, see :ref:`combining.multi`, :ref:`dask.io` and a `blog post`_ by Stephan Hoyer. :py:func:`open_mfdataset` takes many kwargs that allow you to control its behaviour (for e.g. ``parallel``, ``combine``, ``compat``, ``join``, ``concat_dim``). See its docstring for more details. .. note:: A common use-case involves a dataset distributed across a large number of files with each file containing a large number of variables. Commonly, a few of these variables need to be concatenated along a dimension (say ``"time"``), while the rest are equal across the datasets (ignoring floating point differences). The following command with suitable modifications (such as ``parallel=True``) works well with such datasets:: xr.open_mfdataset('my/files/*.nc', concat_dim="time", combine="nested", data_vars='minimal', coords='minimal', compat='override') This command concatenates variables along the ``"time"`` dimension, but only those that already contain the ``"time"`` dimension (``data_vars='minimal', coords='minimal'``). Variables that lack the ``"time"`` dimension are taken from the first dataset (``compat='override'``). .. _dask: https://www.dask.org .. _blog post: https://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/ Sometimes multi-file datasets are not conveniently organized for easy use of :py:func:`open_mfdataset`. One can use the ``preprocess`` argument to provide a function that takes a dataset and returns a modified Dataset. :py:func:`open_mfdataset` will call ``preprocess`` on every dataset (corresponding to each file) prior to combining them. If :py:func:`open_mfdataset` does not meet your needs, other approaches are possible. The general pattern for parallel reading of multiple files using dask, modifying those datasets and then combining into a single ``Dataset`` is:: def modify(ds): # modify ds here return ds # this is basically what open_mfdataset does open_kwargs = dict(decode_cf=True, decode_times=False) open_tasks = [dask.delayed(xr.open_dataset)(f, **open_kwargs) for f in file_names] tasks = [dask.delayed(modify)(task) for task in open_tasks] datasets = dask.compute(tasks) # get a list of xarray.Datasets combined = xr.combine_nested(datasets) # or some combination of concat, merge As an example, here's how we could approximate ``MFDataset`` from the netCDF4 library:: from glob import glob import xarray as xr def read_netcdfs(files, dim): # glob expands paths with * to a list of files, like the unix shell paths = sorted(glob(files)) datasets = [xr.open_dataset(p) for p in paths] combined = xr.concat(datasets, dim) return combined combined = read_netcdfs('/all/my/files/*.nc', dim='time') This function will work in many cases, but it's not very robust. First, it never closes files, which means it will fail if you need to load more than a few thousand files. Second, it assumes that you want all the data from each file and that it can all fit into memory. In many situations, you only need a small subset or an aggregated summary of the data from each file. Here's a slightly more sophisticated example of how to remedy these deficiencies:: def read_netcdfs(files, dim, transform_func=None): def process_one_path(path): # use a context manager, to ensure the file gets closed after use with xr.open_dataset(path) as ds: # transform_func should do some sort of selection or # aggregation if transform_func is not None: ds = transform_func(ds) # load all data from the transformed dataset, to ensure we can # use it after closing each original file ds.load() return ds paths = sorted(glob(files)) datasets = [process_one_path(p) for p in paths] combined = xr.concat(datasets, dim) return combined # here we suppose we only care about the combined mean of each file; # you might also use indexing operations like .sel to subset datasets combined = read_netcdfs('/all/my/files/*.nc', dim='time', transform_func=lambda ds: ds.mean()) This pattern works well and is very robust. We've used similar code to process tens of thousands of files constituting 100s of GB of data. .. _io.netcdf.writing_encoded: Writing encoded data ~~~~~~~~~~~~~~~~~~~~ Conversely, you can customize how xarray writes netCDF files on disk by providing explicit encodings for each dataset variable. The ``encoding`` argument takes a dictionary with variable names as keys and variable specific encodings as values. These encodings are saved as attributes on the netCDF variables on disk, which allows xarray to faithfully read encoded data back into memory. It is important to note that using encodings is entirely optional: if you do not supply any of these encoding options, xarray will write data to disk using a default encoding, or the options in the ``encoding`` attribute, if set. This works perfectly fine in most cases, but encoding can be useful for additional control, especially for enabling compression. In the file on disk, these encodings are saved as attributes on each variable, which allow xarray and other CF-compliant tools for working with netCDF files to correctly read the data. Scaling and type conversions ............................ These encoding options (based on `CF Conventions on packed data`_) work on any version of the netCDF file format: - ``dtype``: Any valid NumPy dtype or string convertible to a dtype, e.g., ``'int16'`` or ``'float32'``. This controls the type of the data written on disk. - ``_FillValue``: Values of ``NaN`` in xarray variables are remapped to this value when saved on disk. This is important when converting floating point with missing values to integers on disk, because ``NaN`` is not a valid value for integer dtypes. By default, variables with float types are attributed a ``_FillValue`` of ``NaN`` in the output file, unless explicitly disabled with an encoding ``{'_FillValue': None}``. - ``scale_factor`` and ``add_offset``: Used to convert from encoded data on disk to to the decoded data in memory, according to the formula ``decoded = scale_factor * encoded + add_offset``. Please note that ``scale_factor`` and ``add_offset`` must be of same type and determine the type of the decoded data. These parameters can be fruitfully combined to compress discretized data on disk. For example, to save the variable ``foo`` with a precision of 0.1 in 16-bit integers while converting ``NaN`` to ``-9999``, we would use ``encoding={'foo': {'dtype': 'int16', 'scale_factor': 0.1, '_FillValue': -9999}}``. Compression and decompression with such discretization is extremely fast. .. _CF Conventions on packed data: https://cfconventions.org/cf-conventions/cf-conventions.html#packed-data .. _io.string-encoding: String encoding ............... Xarray can write unicode strings to netCDF files in two ways: - As variable length strings. This is only supported on netCDF4 (HDF5) files. - By encoding strings into bytes, and writing encoded bytes as a character array. The default encoding is UTF-8. By default, we use variable length strings for compatible files and fall-back to using encoded character arrays. Character arrays can be selected even for netCDF4 files by setting the ``dtype`` field in ``encoding`` to ``S1`` (corresponding to NumPy's single-character bytes dtype). If character arrays are used: - The string encoding that was used is stored on disk in the ``_Encoding`` attribute, which matches an ad-hoc convention `adopted by the netCDF4-Python library `_. At the time of this writing (October 2017), a standard convention for indicating string encoding for character arrays in netCDF files was `still under discussion `_. Technically, you can use `any string encoding recognized by Python `_ if you feel the need to deviate from UTF-8, by setting the ``_Encoding`` field in ``encoding``. But `we don't recommend it `_. - The character dimension name can be specified by the ``char_dim_name`` field of a variable's ``encoding``. If the name of the character dimension is not specified, the default is ``f'string{data.shape[-1]}'``. When decoding character arrays from existing files, the ``char_dim_name`` is added to the variables ``encoding`` to preserve if encoding happens, but the field can be edited by the user. .. warning:: Missing values in bytes or unicode string arrays (represented by ``NaN`` in xarray) are currently written to disk as empty strings ``''``. This means missing values will not be restored when data is loaded from disk. This behavior is likely to change in the future (:issue:`1647`). Unfortunately, explicitly setting a ``_FillValue`` for string arrays to handle missing values doesn't work yet either, though we also hope to fix this in the future. Chunk based compression ....................... ``zlib``, ``complevel``, ``fletcher32``, ``contiguous`` and ``chunksizes`` can be used for enabling netCDF4/HDF5's chunk based compression, as described in the `documentation for createVariable`_ for netCDF4-Python. This only works for netCDF4 files and thus requires using ``format='netCDF4'`` and either ``engine='netcdf4'`` or ``engine='h5netcdf'``. .. _documentation for createVariable: https://unidata.github.io/netcdf4-python/#netCDF4.Dataset.createVariable Chunk based gzip compression can yield impressive space savings, especially for sparse data, but it comes with significant performance overhead. HDF5 libraries can only read complete chunks back into memory, and maximum decompression speed is in the range of 50-100 MB/s. Worse, HDF5's compression and decompression currently cannot be parallelized with dask. For these reasons, we recommend trying discretization based compression (described above) first. Time units .......... The ``units`` and ``calendar`` attributes control how xarray serializes ``datetime64`` and ``timedelta64`` arrays to datasets on disk as numeric values. The ``units`` encoding should be a string like ``'days since 1900-01-01'`` for ``datetime64`` data or a string like ``'days'`` for ``timedelta64`` data. ``calendar`` should be one of the calendar types supported by netCDF4-python: ``'standard'``, ``'gregorian'``, ``'proleptic_gregorian'``, ``'noleap'``, ``'365_day'``, ``'360_day'``, ``'julian'``, ``'all_leap'``, ``'366_day'``. By default, xarray uses the ``'proleptic_gregorian'`` calendar and units of the smallest time difference between values, with a reference time of the first time value. .. _io.coordinates: Coordinates ........... You can control the ``coordinates`` attribute written to disk by specifying ``DataArray.encoding["coordinates"]``. If not specified, xarray automatically sets ``DataArray.encoding["coordinates"]`` to a space-delimited list of names of coordinate variables that share dimensions with the ``DataArray`` being written. This allows perfect roundtripping of xarray datasets but may not be desirable. When an xarray ``Dataset`` contains non-dimensional coordinates that do not share dimensions with any of the variables, these coordinate variable names are saved under a "global" ``"coordinates"`` attribute. This is not CF-compliant but again facilitates roundtripping of xarray datasets. Invalid netCDF files ~~~~~~~~~~~~~~~~~~~~ The library ``h5netcdf`` allows writing some dtypes that aren't allowed in netCDF4 (see `h5netcdf documentation `_). This feature is available through :py:meth:`DataArray.to_netcdf` and :py:meth:`Dataset.to_netcdf` when used with ``engine="h5netcdf"``, only if ``invalid_netcdf=True`` is explicitly set. .. warning:: Note that this produces a file that is likely to be not readable by other netCDF libraries! .. _io.hdf5: HDF5 ---- `HDF5`_ is both a file format and a data model for storing information. HDF5 stores data hierarchically, using groups to create a nested structure. HDF5 is a more general version of the netCDF4 data model, so the nested structure is one of many similarities between the two data formats. Reading HDF5 files in xarray requires the ``h5netcdf`` engine, which can be installed with ``conda install h5netcdf``. Once installed we can use xarray to open HDF5 files: .. code:: python xr.open_dataset("/path/to/my/file.h5") The similarities between HDF5 and netCDF4 mean that HDF5 data can be written with the same :py:meth:`Dataset.to_netcdf` method as used for netCDF4 data: .. jupyter-execute:: ds = xr.Dataset( {"foo": (("x", "y"), np.random.rand(4, 5))}, coords={ "x": [10, 20, 30, 40], "y": pd.date_range("2000-01-01", periods=5), "z": ("x", list("abcd")), }, ) .. jupyter-execute:: :hide-code: # Check if the file exists and if not, create it if not os.path.exists("saved_on_disk.h5"): ds.to_netcdf("saved_on_disk.h5") .. code:: python ds.to_netcdf("saved_on_disk.h5") Groups ~~~~~~ If you have multiple or highly nested groups, xarray by default may not read the group that you want. A particular group of an HDF5 file can be specified using the ``group`` argument: .. code:: python xr.open_dataset("/path/to/my/file.h5", group="/my/group") While xarray cannot interrogate an HDF5 file to determine which groups are available, the HDF5 Python reader `h5py`_ can be used instead. Natively the xarray data structures can only handle one level of nesting, organized as DataArrays inside of Datasets. If your HDF5 file has additional levels of hierarchy you can only access one group and a time and will need to specify group names. .. _HDF5: https://hdfgroup.github.io/hdf5/index.html .. _h5py: https://www.h5py.org/ .. _io.zarr: Zarr ---- `Zarr`_ is a Python package that provides an implementation of chunked, compressed, N-dimensional arrays. Zarr has the ability to store arrays in a range of ways, including in memory, in files, and in cloud-based object storage such as `Amazon S3`_ and `Google Cloud Storage`_. Xarray's Zarr backend allows xarray to leverage these capabilities, including the ability to store and analyze datasets far too large fit onto disk (particularly :ref:`in combination with dask `). Xarray can't open just any zarr dataset, because xarray requires special metadata (attributes) describing the dataset dimensions and coordinates. At this time, xarray can only open zarr datasets with these special attributes, such as zarr datasets written by xarray, `netCDF `_, or `GDAL `_. For implementation details, see :ref:`zarr_encoding`. To write a dataset with zarr, we use the :py:meth:`Dataset.to_zarr` method. To write to a local directory, we pass a path to a directory: .. jupyter-execute:: zarr_filename = "example.zarr" .. jupyter-execute:: :hide-code: import os.path import tempfile tempdir = tempfile.TemporaryDirectory() zarr_filename = os.path.join(tempdir.name, zarr_filename) .. jupyter-execute:: :stderr: ds = xr.Dataset( {"foo": (("x", "y"), np.random.rand(4, 5))}, coords={ "x": [10, 20, 30, 40], "y": pd.date_range("2000-01-01", periods=5), "z": ("x", list("abcd")), }, ) ds.to_zarr(zarr_filename, zarr_format=2, consolidated=False) (The suffix ``.zarr`` is optional--just a reminder that a zarr store lives there.) If the directory does not exist, it will be created. If a zarr store is already present at that path, an error will be raised, preventing it from being overwritten. To override this behavior and overwrite an existing store, add ``mode='w'`` when invoking :py:meth:`~Dataset.to_zarr`. DataArrays can also be saved to disk using the :py:meth:`DataArray.to_zarr` method, and loaded from disk using the :py:func:`open_dataarray` function with ``engine='zarr'``. Similar to :py:meth:`DataArray.to_netcdf`, :py:meth:`DataArray.to_zarr` will convert the ``DataArray`` to a ``Dataset`` before saving, and then convert back when loading, ensuring that the ``DataArray`` that is loaded is always exactly the same as the one that was saved. .. note:: xarray does not write `NCZarr `_ attributes. Therefore, NCZarr data must be opened in read-only mode. To store variable length strings, convert them to object arrays first with ``dtype=object``. To read back a zarr dataset that has been created this way, we use the :py:func:`open_zarr` method: .. jupyter-execute:: ds_zarr = xr.open_zarr(zarr_filename, consolidated=False) ds_zarr Cloud Storage Buckets ~~~~~~~~~~~~~~~~~~~~~ It is possible to read and write xarray datasets directly from / to cloud storage buckets using zarr. This example uses the `gcsfs`_ package to provide an interface to `Google Cloud Storage`_. General `fsspec`_ URLs, those that begin with ``s3://`` or ``gcs://`` for example, are parsed and the store set up for you automatically when reading. You should include any arguments to the storage backend as the key ```storage_options``, part of ``backend_kwargs``. .. code:: python ds_gcs = xr.open_dataset( "gcs:///path.zarr", backend_kwargs={ "storage_options": {"project": "", "token": None} }, engine="zarr", ) This also works with ``open_mfdataset``, allowing you to pass a list of paths or a URL to be interpreted as a glob string. For writing, you may either specify a bucket URL or explicitly set up a ``zarr.abc.store.Store`` instance, as follows: .. tab:: URL .. code:: python # write to the bucket via GCS URL ds.to_zarr("gs://") # read it back ds_gcs = xr.open_zarr("gs://") .. tab:: fsspec .. code:: python import gcsfs import zarr # manually manage the cloud filesystem connection -- useful, for example, # when you need to manage permissions to cloud resources fs = gcsfs.GCSFileSystem(project="", token=None) zstore = zarr.storage.FsspecStore(fs, path="") # write to the bucket ds.to_zarr(store=zstore) # read it back ds_gcs = xr.open_zarr(zstore) .. tab:: obstore .. code:: python import obstore import zarr # alternatively, obstore offers a modern, performant interface for # cloud buckets gcsstore = obstore.store.GCSStore( "", prefix="", skip_signature=True ) zstore = zarr.store.ObjectStore(gcsstore) # write to the bucket ds.to_zarr(store=zstore) # read it back ds_gcs = xr.open_zarr(zstore) .. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/ .. _obstore: https://developmentseed.org/obstore/latest/ .. _Zarr: https://zarr.readthedocs.io/ .. _Amazon S3: https://aws.amazon.com/s3/ .. _Google Cloud Storage: https://cloud.google.com/storage/ .. _gcsfs: https://github.com/fsspec/gcsfs .. _io.zarr.distributed_writes: Distributed writes ~~~~~~~~~~~~~~~~~~ Xarray will natively use dask to write in parallel to a zarr store, which should satisfy most moderately sized datasets. For more flexible parallelization, we can use ``region`` to write to limited regions of arrays in an existing Zarr store. To scale this up to writing large datasets, first create an initial Zarr store without writing all of its array data. This can be done by first creating a ``Dataset`` with dummy values stored in :ref:`dask `, and then calling ``to_zarr`` with ``compute=False`` to write only metadata (including ``attrs``) to Zarr: .. jupyter-execute:: :hide-code: tempdir.cleanup() .. jupyter-execute:: import dask.array # The values of this dask array are entirely irrelevant; only the dtype, # shape and chunks are used dummies = dask.array.zeros(30, chunks=10) ds = xr.Dataset({"foo": ("x", dummies)}, coords={"x": np.arange(30)}) # Now we write the metadata without computing any array values ds.to_zarr(zarr_filename, compute=False, consolidated=False) Now, a Zarr store with the correct variable shapes and attributes exists that can be filled out by subsequent calls to ``to_zarr``. Setting ``region="auto"`` will open the existing store and determine the correct alignment of the new data with the existing dimensions, or as an explicit mapping from dimension names to Python ``slice`` objects indicating where the data should be written (in index space, not label space), e.g., .. jupyter-execute:: # For convenience, we'll slice a single dataset, but in the real use-case # we would create them separately possibly even from separate processes. ds = xr.Dataset({"foo": ("x", np.arange(30))}, coords={"x": np.arange(30)}) # Any of the following region specifications are valid ds.isel(x=slice(0, 10)).to_zarr(zarr_filename, region="auto", consolidated=False) ds.isel(x=slice(10, 20)).to_zarr(zarr_filename, region={"x": "auto"}, consolidated=False) ds.isel(x=slice(20, 30)).to_zarr(zarr_filename, region={"x": slice(20, 30)}, consolidated=False) Concurrent writes with ``region`` are safe as long as they modify distinct chunks in the underlying Zarr arrays (or use an appropriate ``lock``). As a safety check to make it harder to inadvertently override existing values, if you set ``region`` then *all* variables included in a Dataset must have dimensions included in ``region``. Other variables (typically coordinates) need to be explicitly dropped and/or written in a separate calls to ``to_zarr`` with ``mode='a'``. Zarr Compressors and Filters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are many different `options for compression and filtering possible with zarr `_. These options can be passed to the ``to_zarr`` method as variable encoding. For example: .. jupyter-execute:: zarr_filename = "foo.zarr" .. jupyter-execute:: :hide-code: import os.path import tempfile tempdir = tempfile.TemporaryDirectory() zarr_filename = os.path.join(tempdir.name, zarr_filename) .. jupyter-execute:: import zarr from zarr.codecs import BloscCodec compressor = BloscCodec(cname="zstd", clevel=3, shuffle="shuffle") ds.to_zarr(zarr_filename, consolidated=False, encoding={"foo": {"compressors": [compressor]}}) .. note:: Not all native zarr compression and filtering options have been tested with xarray. .. _io.zarr.appending: Modifying existing Zarr stores ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Xarray supports several ways of incrementally writing variables to a Zarr store. These options are useful for scenarios when it is infeasible or undesirable to write your entire dataset at once. 1. Use ``mode='a'`` to add or overwrite entire variables, 2. Use ``append_dim`` to resize and append to existing variables, and 3. Use ``region`` to write to limited regions of existing arrays. .. tip:: For ``Dataset`` objects containing dask arrays, a single call to ``to_zarr()`` will write all of your data in parallel. .. warning:: Alignment of coordinates is currently not checked when modifying an existing Zarr store. It is up to the user to ensure that coordinates are consistent. To add or overwrite entire variables, simply call :py:meth:`~Dataset.to_zarr` with ``mode='a'`` on a Dataset containing the new variables, passing in an existing Zarr store or path to a Zarr store. To resize and then append values along an existing dimension in a store, set ``append_dim``. This is a good option if data always arrives in a particular order, e.g., for time-stepping a simulation: .. jupyter-execute:: :hide-code: tempdir.cleanup() .. jupyter-execute:: ds1 = xr.Dataset( {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, coords={ "x": [10, 20, 30, 40], "y": [1, 2, 3, 4, 5], "t": pd.date_range("2001-01-01", periods=2), }, ) ds1.to_zarr(zarr_filename, consolidated=False) .. jupyter-execute:: ds2 = xr.Dataset( {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, coords={ "x": [10, 20, 30, 40], "y": [1, 2, 3, 4, 5], "t": pd.date_range("2001-01-03", periods=2), }, ) ds2.to_zarr(zarr_filename, append_dim="t", consolidated=False) .. _io.zarr.writing_chunks: Specifying chunks in a zarr store ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Chunk sizes may be specified in one of three ways when writing to a zarr store: 1. Manual chunk sizing through the use of the ``encoding`` argument in :py:meth:`Dataset.to_zarr`: 2. Automatic chunking based on chunks in dask arrays 3. Default chunk behavior determined by the zarr library The resulting chunks will be determined based on the order of the above list; dask chunks will be overridden by manually-specified chunks in the encoding argument, and the presence of either dask chunks or chunks in the ``encoding`` attribute will supersede the default chunking heuristics in zarr. Importantly, this logic applies to every array in the zarr store individually, including coordinate arrays. Therefore, if a dataset contains one or more dask arrays, it may still be desirable to specify a chunk size for the coordinate arrays (for example, with a chunk size of ``-1`` to include the full coordinate). To specify chunks manually using the ``encoding`` argument, provide a nested dictionary with the structure ``{'variable_or_coord_name': {'chunks': chunks_tuple}}``. .. note:: The positional ordering of the chunks in the encoding argument must match the positional ordering of the dimensions in each array. Watch out for arrays with differently-ordered dimensions within a single Dataset. For example, let's say we're working with a dataset with dimensions ``('time', 'x', 'y')``, a variable ``Tair`` which is chunked in ``x`` and ``y``, and two multi-dimensional coordinates ``xc`` and ``yc``: .. jupyter-execute:: ds = xr.tutorial.open_dataset("rasm") ds["Tair"] = ds["Tair"].chunk({"x": 100, "y": 100}) ds These multi-dimensional coordinates are only two-dimensional and take up very little space on disk or in memory, yet when writing to disk the default zarr behavior is to split them into chunks: .. jupyter-execute:: ds.to_zarr(zarr_filename, consolidated=False, mode="w") !tree -I zarr.json $zarr_filename This may cause unwanted overhead on some systems, such as when reading from a cloud storage provider. To disable this chunking, we can specify a chunk size equal to the shape of each coordinate array in the ``encoding`` argument: .. jupyter-execute:: ds.to_zarr( zarr_filename, encoding={"xc": {"chunks": ds.xc.shape}, "yc": {"chunks": ds.yc.shape}}, consolidated=False, mode="w", ) !tree -I zarr.json $zarr_filename The number of chunks on Tair matches our dask chunks, while there is now only a single chunk in the directory stores of each coordinate. Groups ~~~~~~ Nested groups in zarr stores can be represented by loading the store as a :py:class:`xarray.DataTree` object, similarly to netCDF. To open a whole zarr store as a tree of groups use the :py:func:`open_datatree` function. To save a ``DataTree`` object as a zarr store containing many groups, use the :py:meth:`xarray.DataTree.to_zarr()` method. .. note:: Note that perfect round-tripping should always be possible with a zarr store (:ref:`unlike for netCDF files `), as zarr does not support "unused" dimensions. For the root group the same restrictions (:ref:`as for netCDF files `) apply. Due to file format specifications the on-disk root group name is always ``"/"`` overriding any given ``DataTree`` root node name. .. _io.zarr.consolidated_metadata: Consolidated Metadata ~~~~~~~~~~~~~~~~~~~~~ Xarray needs to read all of the zarr metadata when it opens a dataset. In some storage mediums, such as with cloud object storage (e.g. `Amazon S3`_), this can introduce significant overhead, because two separate HTTP calls to the object store must be made for each variable in the dataset. By default Xarray uses a feature called *consolidated metadata*, storing all metadata for the entire dataset with a single key (by default called ``.zmetadata``). This typically drastically speeds up opening the store. (For more information on this feature, consult the `zarr docs on consolidating metadata `_.) By default, xarray writes consolidated metadata and attempts to read stores with consolidated metadata, falling back to use non-consolidated metadata for reads. Because this fall-back option is so much slower, xarray issues a ``RuntimeWarning`` with guidance when reading with consolidated metadata fails: Failed to open Zarr store with consolidated metadata, falling back to try reading non-consolidated metadata. This is typically much slower for opening a dataset. To silence this warning, consider: 1. Consolidating metadata in this existing store with :py:func:`zarr.consolidate_metadata`. 2. Explicitly setting ``consolidated=False``, to avoid trying to read consolidate metadata. 3. Explicitly setting ``consolidated=True``, to raise an error in this case instead of falling back to try reading non-consolidated metadata. Fill Values ~~~~~~~~~~~ Zarr arrays have a ``fill_value`` that is used for chunks that were never written to disk. For the Zarr version 2 format, Xarray will set ``fill_value`` to be equal to the CF/NetCDF ``"_FillValue"``. This is ``np.nan`` by default for floats, and unset otherwise. Note that the Zarr library will set a default ``fill_value`` if not specified (usually ``0``). For the Zarr version 3 format, ``_FillValue`` and ```fill_value`` are decoupled. So you can set ``fill_value`` in ``encoding`` as usual. Note that at read-time, you can control whether ``_FillValue`` is masked using the ``mask_and_scale`` kwarg; and whether Zarr's ``fill_value`` is treated as synonymous with ``_FillValue`` using the ``use_zarr_fill_value_as_mask`` kwarg to :py:func:`xarray.open_zarr`. .. _io.kerchunk: Kerchunk -------- `Kerchunk `_ is a Python library that allows you to access chunked and compressed data formats (such as NetCDF3, NetCDF4, HDF5, GRIB2, TIFF & FITS), many of which are primary data formats for many data archives, by viewing the whole archive as an ephemeral `Zarr`_ dataset which allows for parallel, chunk-specific access. Instead of creating a new copy of the dataset in the Zarr spec/format or downloading the files locally, Kerchunk reads through the data archive and extracts the byte range and compression information of each chunk and saves as a ``reference``. These references are then saved as ``json`` files or ``parquet`` (more efficient) for later use. You can view some of these stored in the ``references`` directory `here `_. .. note:: These references follow this `specification `_. Packages like `kerchunk`_ and `virtualizarr `_ help in creating and reading these references. Reading these data archives becomes really easy with ``kerchunk`` in combination with ``xarray``, especially when these archives are large in size. A single combined reference can refer to thousands of the original data files present in these archives. You can view the whole dataset with from this combined reference using the above packages. The following example shows opening a single ``json`` reference to the ``saved_on_disk.h5`` file created above. If the file were instead stored remotely (e.g. ``s3://saved_on_disk.h5``) you can use ``storage_options`` that are used to `configure fsspec `_: .. jupyter-execute:: ds_kerchunked = xr.open_dataset( "./combined.json", engine="kerchunk", storage_options={}, ) ds_kerchunked .. note:: You can refer to the `project pythia kerchunk cookbook `_ and the `pangeo guide on kerchunk `_ for more information. .. _io.iris: Iris ---- The Iris_ tool allows easy reading of common meteorological and climate model formats (including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very similar to ``DataArray`` objects, while enforcing a CF-compliant data model. DataArray ``to_iris`` and ``from_iris`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If iris is installed, xarray can convert a ``DataArray`` into a ``Cube`` using :py:meth:`DataArray.to_iris`: .. jupyter-execute:: da = xr.DataArray( np.random.rand(4, 5), dims=["x", "y"], coords=dict(x=[10, 20, 30, 40], y=pd.date_range("2000-01-01", periods=5)), ) cube = da.to_iris() print(cube) Conversely, we can create a new ``DataArray`` object from a ``Cube`` using :py:meth:`DataArray.from_iris`: .. jupyter-execute:: da_cube = xr.DataArray.from_iris(cube) da_cube Ncdata ~~~~~~ Ncdata_ provides more sophisticated means of transferring data, including entire datasets. It uses the file saving and loading functions in both projects to provide a more "correct" translation between them, but still with very low overhead and not using actual disk files. Here we load an xarray dataset and convert it to Iris cubes: .. jupyter-execute:: :stderr: ds = xr.tutorial.open_dataset("air_temperature_gradient") cubes = ncdata.iris_xarray.cubes_from_xarray(ds) print(cubes) .. jupyter-execute:: print(cubes[1]) And we can convert the cubes back to an xarray dataset: .. jupyter-execute:: # ensure dataset-level and variable-level attributes loaded correctly iris.FUTURE.save_split_attrs = True ds = ncdata.iris_xarray.cubes_to_xarray(cubes) ds Ncdata can also adjust file data within load and save operations, to fix data loading problems or provide exact save formatting without needing to modify files on disk. See for example : `ncdata usage examples`_ .. _Iris: https://scitools-iris.readthedocs.io .. _Ncdata: https://ncdata.readthedocs.io/en/latest/index.html .. _ncdata usage examples: https://github.com/pp-mo/ncdata/tree/v0.1.2?tab=readme-ov-file#correct-a-miscoded-attribute-in-iris-input .. _io.opendap: OPeNDAP ------- Xarray includes support for `OPeNDAP`__ (via the netCDF4 library or Pydap), which lets us access large datasets over HTTP. __ https://www.opendap.org/ For example, we can open a connection to GBs of weather data produced by the `PRISM`__ project, and hosted by `IRI`__ at Columbia: __ https://www.prism.oregonstate.edu/ __ https://iri.columbia.edu/ .. jupyter-input:: remote_data = xr.open_dataset( "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods", decode_times=False, ) remote_data .. jupyter-output:: Dimensions: (T: 1422, X: 1405, Y: 621) Coordinates: * X (X) float32 -125.0 -124.958 -124.917 -124.875 -124.833 -124.792 -124.75 ... * T (T) float32 -779.5 -778.5 -777.5 -776.5 -775.5 -774.5 -773.5 -772.5 -771.5 ... * Y (Y) float32 49.9167 49.875 49.8333 49.7917 49.75 49.7083 49.6667 49.625 ... Data variables: ppt (T, Y, X) float64 ... tdmean (T, Y, X) float64 ... tmax (T, Y, X) float64 ... tmin (T, Y, X) float64 ... Attributes: Conventions: IRIDL expires: 1375315200 .. TODO: update this example to show off decode_cf? .. note:: Like many real-world datasets, this dataset does not entirely follow `CF conventions`_. Unexpected formats will usually cause xarray's automatic decoding to fail. The way to work around this is to either set ``decode_cf=False`` in ``open_dataset`` to turn off all use of CF conventions, or by only disabling the troublesome parser. In this case, we set ``decode_times=False`` because the time axis here provides the calendar attribute in a format that xarray does not expect (the integer ``360`` instead of a string like ``'360_day'``). We can select and slice this data any number of times, and nothing is loaded over the network until we look at particular values: .. jupyter-input:: tmax = remote_data["tmax"][:500, ::3, ::3] tmax .. jupyter-output:: [48541500 values with dtype=float64] Coordinates: * Y (Y) float32 49.9167 49.7917 49.6667 49.5417 49.4167 49.2917 ... * X (X) float32 -125.0 -124.875 -124.75 -124.625 -124.5 -124.375 ... * T (T) float32 -779.5 -778.5 -777.5 -776.5 -775.5 -774.5 -773.5 ... Attributes: pointwidth: 120 standard_name: air_temperature units: Celsius_scale expires: 1443657600 .. jupyter-input:: # the data is downloaded automatically when we make the plot tmax[0].plot() .. image:: ../_static/opendap-prism-tmax.png Some servers require authentication before we can access the data. Pydap uses a `Requests`__ session object (which the user can pre-define), and this session object can recover `authentication`__` credentials from a locally stored ``.netrc`` file. For example, to connect to a server that requires NASA's URS authentication, with the username/password credentials stored on a locally accessible ``.netrc``, access to OPeNDAP data should be as simple as this:: import xarray as xr import requests my_session = requests.Session() ds_url = 'https://gpm1.gesdisc.eosdis.nasa.gov/opendap/hyrax/example.nc' ds = xr.open_dataset(ds_url, session=my_session, engine="pydap") Moreover, a bearer token header can be included in a `Requests`__ session object, allowing for token-based authentication which OPeNDAP servers can use to avoid some redirects. Lastly, OPeNDAP servers may provide endpoint URLs for different OPeNDAP protocols, DAP2 and DAP4. To specify which protocol between the two options to use, you can replace the scheme of the url with the name of the protocol. For example:: # dap2 url ds_url = 'dap2://gpm1.gesdisc.eosdis.nasa.gov/opendap/hyrax/example.nc' # dap4 url ds_url = 'dap4://gpm1.gesdisc.eosdis.nasa.gov/opendap/hyrax/example.nc' While most OPeNDAP servers implement DAP2, not all servers implement DAP4. It is recommended to check if the URL you are using `supports DAP4`__ by checking the URL on a browser. __ https://docs.python-requests.org __ https://pydap.github.io/pydap/en/notebooks/Authentication.html __ https://pydap.github.io/pydap/en/faqs/dap2_or_dap4_url.html .. _io.pickle: Pickle ------ The simplest way to serialize an xarray object is to use Python's built-in pickle module: .. jupyter-execute:: import pickle # use the highest protocol (-1) because it is way faster than the default # text based pickle format pkl = pickle.dumps(ds, protocol=-1) pickle.loads(pkl) Pickling is important because it doesn't require any external libraries and lets you use xarray objects with Python modules like :py:mod:`multiprocessing` or :ref:`Dask `. However, pickling is **not recommended for long-term storage**. Restoring a pickle requires that the internal structure of the types for the pickled data remain unchanged. Because the internal design of xarray is still being refined, we make no guarantees (at this point) that objects pickled with this version of xarray will work in future versions. .. note:: When pickling an object opened from a NetCDF file, the pickle file will contain a reference to the file on disk. If you want to store the actual array values, load it into memory first with :py:meth:`Dataset.load` or :py:meth:`Dataset.compute`. .. _dictionary io: Dictionary ---------- We can convert a ``Dataset`` (or a ``DataArray``) to a dict using :py:meth:`Dataset.to_dict`: .. jupyter-execute:: ds = xr.Dataset({"foo": ("x", np.arange(30))}) d = ds.to_dict() d We can create a new xarray object from a dict using :py:meth:`Dataset.from_dict`: .. jupyter-execute:: ds_dict = xr.Dataset.from_dict(d) ds_dict Dictionary support allows for flexible use of xarray objects. It doesn't require external libraries and dicts can easily be pickled, or converted to json, or geojson. All the values are converted to lists, so dicts might be quite large. To export just the dataset schema without the data itself, use the ``data=False`` option: .. jupyter-execute:: ds.to_dict(data=False) .. jupyter-execute:: :hide-code: # We're now done with the dataset named `ds`. Although the `with` statement closed # the dataset, displaying the unpickled pickle of `ds` re-opened "saved_on_disk.nc". # However, `ds` (rather than the unpickled dataset) refers to the open file. Delete # `ds` to close the file. del ds tempdir.cleanup() This can be useful for generating indices of dataset contents to expose to search indices or other automated data discovery tools. .. _io.rasterio: Rasterio -------- GDAL readable raster data using `rasterio`_ such as GeoTIFFs can be opened using the `rioxarray`_ extension. `rioxarray`_ can also handle geospatial related tasks such as re-projecting and clipping. .. jupyter-input:: import rioxarray rds = rioxarray.open_rasterio("RGB.byte.tif") rds .. jupyter-output:: [1703814 values with dtype=uint8] Coordinates: * band (band) int64 1 2 3 * y (y) float64 2.827e+06 2.826e+06 ... 2.612e+06 2.612e+06 * x (x) float64 1.021e+05 1.024e+05 ... 3.389e+05 3.392e+05 spatial_ref int64 0 Attributes: STATISTICS_MAXIMUM: 255 STATISTICS_MEAN: 29.947726688477 STATISTICS_MINIMUM: 0 STATISTICS_STDDEV: 52.340921626611 transform: (300.0379266750948, 0.0, 101985.0, 0.0, -300.0417827... _FillValue: 0.0 scale_factor: 1.0 add_offset: 0.0 grid_mapping: spatial_ref .. jupyter-input:: rds.rio.crs # CRS.from_epsg(32618) rds4326 = rds.rio.reproject("epsg:4326") rds4326.rio.crs # CRS.from_epsg(4326) rds4326.rio.to_raster("RGB.byte.4326.tif") .. _rasterio: https://rasterio.readthedocs.io/en/latest/ .. _rioxarray: https://corteva.github.io/rioxarray/stable/ .. _test files: https://github.com/rasterio/rasterio/blob/master/tests/data/RGB.byte.tif .. _pyproj: https://github.com/pyproj4/pyproj .. _io.cfgrib: .. jupyter-execute:: :hide-code: tempdir.cleanup() GRIB format via cfgrib ---------------------- Xarray supports reading GRIB files via ECMWF cfgrib_ python driver, if it is installed. To open a GRIB file supply ``engine='cfgrib'`` to :py:func:`open_dataset` after installing cfgrib_: .. jupyter-input:: ds_grib = xr.open_dataset("example.grib", engine="cfgrib") We recommend installing cfgrib via conda:: conda install -c conda-forge cfgrib .. _cfgrib: https://github.com/ecmwf/cfgrib CSV and other formats supported by pandas ----------------------------------------- For more options (tabular formats and CSV files in particular), consider exporting your objects to pandas and using its broad range of `IO tools`_. For CSV files, one might also consider `xarray_extras`_. .. _xarray_extras: https://xarray-extras.readthedocs.io/en/latest/api/csv.html .. _IO tools: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html Third party libraries --------------------- More formats are supported by extension libraries: - `xarray-mongodb `_: Store xarray objects on MongoDB python-xarray-2026.01.0/doc/user-guide/combining.rst0000664000175000017500000002742115136607163022366 0ustar alastairalastair.. _combining data: Combining data -------------- .. jupyter-execute:: :hide-code: :hide-output: import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) %xmode minimal * For combining datasets or data arrays along a single dimension, see concatenate_. * For combining datasets with different variables, see merge_. * For combining datasets or data arrays with different indexes or missing values, see combine_. * For combining datasets or data arrays along multiple dimensions see combining.multi_. .. _concatenate: Concatenate ~~~~~~~~~~~ To combine :py:class:`~xarray.Dataset` / :py:class:`~xarray.DataArray` objects along an existing or new dimension into a larger object, you can use :py:func:`~xarray.concat`. ``concat`` takes an iterable of ``DataArray`` or ``Dataset`` objects, as well as a dimension name, and concatenates along that dimension: .. jupyter-execute:: da = xr.DataArray( np.arange(6).reshape(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])] ) da.isel(y=slice(0, 1)) # same as da[:, :1] .. jupyter-execute:: # This resembles how you would use np.concatenate: xr.concat([da[:, :1], da[:, 1:]], dim="y") .. jupyter-execute:: # For more friendly pandas-like indexing you can use: xr.concat([da.isel(y=slice(0, 1)), da.isel(y=slice(1, None))], dim="y") In addition to combining along an existing dimension, ``concat`` can create a new dimension by stacking lower dimensional arrays together: .. jupyter-execute:: da.sel(x="a") .. jupyter-execute:: xr.concat([da.isel(x=0), da.isel(x=1)], "x") If the second argument to ``concat`` is a new dimension name, the arrays will be concatenated along that new dimension, which is always inserted as the first dimension: .. jupyter-execute:: da0 = da.isel(x=0, drop=True) da1 = da.isel(x=1, drop=True) xr.concat([da0, da1], "new_dim") The second argument to ``concat`` can also be an :py:class:`~pandas.Index` or :py:class:`~xarray.DataArray` object as well as a string, in which case it is used to label the values along the new dimension: .. jupyter-execute:: xr.concat([da0, da1], pd.Index([-90, -100], name="new_dim")) Of course, ``concat`` also works on ``Dataset`` objects: .. jupyter-execute:: ds = da.to_dataset(name="foo") xr.concat([ds.sel(x="a"), ds.sel(x="b")], "x") :py:func:`~xarray.concat` has a number of options which provide deeper control over which variables are concatenated and how it handles conflicting variables between datasets. With the default parameters, xarray will load some coordinate variables into memory to compare them between datasets. This may be prohibitively expensive if you are manipulating your dataset lazily using :ref:`dask`. .. note:: In a future version of xarray the default values for many of these options will change. You can opt into the new default values early using ``xr.set_options(use_new_combine_kwarg_defaults=True)``. .. _merge: Merge ~~~~~ To combine variables and coordinates between multiple ``DataArray`` and/or ``Dataset`` objects, use :py:func:`~xarray.merge`. It can merge a list of ``Dataset``, ``DataArray`` or dictionaries of objects convertible to ``DataArray`` objects: .. jupyter-execute:: xr.merge([ds, ds.rename({"foo": "bar"})]) .. jupyter-execute:: xr.merge([xr.DataArray(n, name="var%d" % n) for n in range(5)]) If you merge another dataset (or a dictionary including data array objects), by default the resulting dataset will be aligned on the **union** of all index coordinates: .. note:: In a future version of xarray the default value for ``join`` and ``compat`` will change. This change will mean that xarray will no longer attempt to align the indices of the merged dataset. You can opt into the new default values early using ``xr.set_options(use_new_combine_kwarg_defaults=True)``. Or explicitly set ``join='outer'`` to preserve old behavior. .. jupyter-execute:: other = xr.Dataset({"bar": ("x", [1, 2, 3, 4]), "x": list("abcd")}) xr.merge([ds, other], join="outer") This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised if you attempt to merge two variables with the same name but different values: .. jupyter-execute:: :raises: xr.merge([ds, ds + 1]) .. note:: In a future version of xarray the default value for ``compat`` will change from ``compat='no_conflicts'`` to ``compat='override'``. In this scenario the values in the first object override all the values in other objects. .. jupyter-execute:: xr.merge([ds, ds + 1], compat="override") The same non-destructive merging between ``DataArray`` index coordinates is used in the :py:class:`~xarray.Dataset` constructor: .. jupyter-execute:: xr.Dataset({"a": da.isel(x=slice(0, 1)), "b": da.isel(x=slice(1, 2))}) .. _combine: Combine ~~~~~~~ The instance method :py:meth:`~xarray.DataArray.combine_first` combines two datasets/data arrays and defaults to non-null values in the calling object, using values from the called object to fill holes. The resulting coordinates are the union of coordinate labels. Vacant cells as a result of the outer-join are filled with ``NaN``. For example: .. jupyter-execute:: ar0 = xr.DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])]) ar1 = xr.DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])]) ar0.combine_first(ar1) .. jupyter-execute:: ar1.combine_first(ar0) For datasets, ``ds0.combine_first(ds1)`` works similarly to ``xr.merge([ds0, ds1])``, except that ``xr.merge`` raises ``MergeError`` when there are conflicting values in variables to be merged, whereas ``.combine_first`` defaults to the calling object's values. .. note:: In a future version of xarray the default options for ``xr.merge`` will change such that the behavior matches ``combine_first``. .. _update: Update ~~~~~~ In contrast to ``merge``, :py:meth:`~xarray.Dataset.update` modifies a dataset in-place without checking for conflicts, and will overwrite any existing variables with new values: .. jupyter-execute:: ds.update({"space": ("space", [10.2, 9.4, 3.9])}) However, dimensions are still required to be consistent between different Dataset variables, so you cannot change the size of a dimension unless you replace all dataset variables that use it. ``update`` also performs automatic alignment if necessary. Unlike ``merge``, it maintains the alignment of the original array instead of merging indexes: .. jupyter-execute:: ds.update(other) The exact same alignment logic when setting a variable with ``__setitem__`` syntax: .. jupyter-execute:: ds["baz"] = xr.DataArray([9, 9, 9, 9, 9], coords=[("x", list("abcde"))]) ds.baz Equals and identical ~~~~~~~~~~~~~~~~~~~~ Xarray objects can be compared by using the :py:meth:`~xarray.Dataset.equals`, :py:meth:`~xarray.Dataset.identical` and :py:meth:`~xarray.Dataset.broadcast_equals` methods. These methods are used by the optional ``compat`` argument on ``concat`` and ``merge``. :py:attr:`~xarray.Dataset.equals` checks dimension names, indexes and array values: .. jupyter-execute:: da.equals(da.copy()) :py:attr:`~xarray.Dataset.identical` also checks attributes, and the name of each object: .. jupyter-execute:: da.identical(da.rename("bar")) :py:attr:`~xarray.Dataset.broadcast_equals` does a more relaxed form of equality check that allows variables to have different dimensions, as long as values are constant along those new dimensions: .. jupyter-execute:: left = xr.Dataset(coords={"x": 0}) right = xr.Dataset({"x": [0, 0, 0]}) left.broadcast_equals(right) Like pandas objects, two xarray objects are still equal or identical if they have missing values marked by ``NaN`` in the same locations. In contrast, the ``==`` operation performs element-wise comparison (like numpy): .. jupyter-execute:: da == da.copy() Note that ``NaN`` does not compare equal to ``NaN`` in element-wise comparison; you may need to deal with missing values explicitly. .. _combining.no_conflicts: Merging with 'no_conflicts' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``compat`` argument ``'no_conflicts'`` is only available when combining xarray objects with ``merge``. In addition to the above comparison methods it allows the merging of xarray objects with locations where *either* have ``NaN`` values. This can be used to combine data with overlapping coordinates as long as any non-missing values agree or are disjoint: .. jupyter-execute:: ds1 = xr.Dataset({"a": ("x", [10, 20, 30, np.nan])}, {"x": [1, 2, 3, 4]}) ds2 = xr.Dataset({"a": ("x", [np.nan, 30, 40, 50])}, {"x": [2, 3, 4, 5]}) xr.merge([ds1, ds2], join="outer", compat="no_conflicts") Note that due to the underlying representation of missing values as floating point numbers (``NaN``), variable data type is not always preserved when merging in this manner. .. _combining.multi: Combining along multiple dimensions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For combining many objects along multiple dimensions xarray provides :py:func:`~xarray.combine_nested` and :py:func:`~xarray.combine_by_coords`. These functions use a combination of ``concat`` and ``merge`` across different variables to combine many objects into one. :py:func:`~xarray.combine_nested` requires specifying the order in which the objects should be combined, while :py:func:`~xarray.combine_by_coords` attempts to infer this ordering automatically from the coordinates in the data. :py:func:`~xarray.combine_nested` is useful when you know the spatial relationship between each object in advance. The datasets must be provided in the form of a nested list, which specifies their relative position and ordering. A common task is collecting data from a parallelized simulation where each processor wrote out data to a separate file. A domain which was decomposed into 4 parts, 2 each along both the x and y axes, requires organising the datasets into a doubly-nested list, e.g: .. jupyter-execute:: arr = xr.DataArray( name="temperature", data=np.random.randint(5, size=(2, 2)), dims=["x", "y"] ) arr .. jupyter-execute:: ds_grid = [[arr, arr], [arr, arr]] xr.combine_nested(ds_grid, concat_dim=["x", "y"]) :py:func:`~xarray.combine_nested` can also be used to explicitly merge datasets with different variables. For example if we have 4 datasets, which are divided along two times, and contain two different variables, we can pass ``None`` to ``'concat_dim'`` to specify the dimension of the nested list over which we wish to use ``merge`` instead of ``concat``: .. jupyter-execute:: temp = xr.DataArray(name="temperature", data=np.random.randn(2), dims=["t"]) precip = xr.DataArray(name="precipitation", data=np.random.randn(2), dims=["t"]) ds_grid = [[temp, precip], [temp, precip]] xr.combine_nested(ds_grid, concat_dim=["t", None]) :py:func:`~xarray.combine_by_coords` is for combining objects which have dimension coordinates which specify their relationship to and order relative to one another, for example a linearly-increasing 'time' dimension coordinate. Here we combine two datasets using their common dimension coordinates. Notice they are concatenated in order based on the values in their dimension coordinates, not on their position in the list passed to ``combine_by_coords``. .. jupyter-execute:: x1 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [0, 1, 2])]) x2 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [3, 4, 5])]) xr.combine_by_coords([x2, x1]) These functions are used by :py:func:`~xarray.open_mfdataset` to open many files as one dataset. The particular function used is specified by setting the argument ``'combine'`` to ``'by_coords'`` or ``'nested'``. This is useful for situations where your data is split across many files in multiple locations, which have some known relationship between one another. python-xarray-2026.01.0/doc/user-guide/hierarchical-data.rst0000664000175000017500000007604515136607163023754 0ustar alastairalastair.. _userguide.hierarchical-data: Hierarchical data ================= .. jupyter-execute:: :hide-code: :hide-output: import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) np.set_printoptions(threshold=10) %xmode minimal .. _why: Why Hierarchical Data? ---------------------- Many real-world datasets are composed of multiple differing components, and it can often be useful to think of these in terms of a hierarchy of related groups of data. Examples of data which one might want organise in a grouped or hierarchical manner include: - Simulation data at multiple resolutions, - Observational data about the same system but from multiple different types of sensors, - Mixed experimental and theoretical data, - A systematic study recording the same experiment but with different parameters, - Heterogeneous data, such as demographic and metereological data, or even any combination of the above. Often datasets like this cannot easily fit into a single :py:class:`~xarray.Dataset` object, or are more usefully thought of as groups of related :py:class:`~xarray.Dataset` objects. For this purpose we provide the :py:class:`xarray.DataTree` class. This page explains in detail how to understand and use the different features of the :py:class:`~xarray.DataTree` class for your own hierarchical data needs. .. _node relationships: Node Relationships ------------------ .. _creating a family tree: Creating a Family Tree ~~~~~~~~~~~~~~~~~~~~~~ The three main ways of creating a :py:class:`~xarray.DataTree` object are described briefly in :ref:`creating a datatree`. Here we go into more detail about how to create a tree node-by-node, using a famous family tree from the Simpsons cartoon as an example. Let's start by defining nodes representing the two siblings, Bart and Lisa Simpson: .. jupyter-execute:: bart = xr.DataTree(name="Bart") lisa = xr.DataTree(name="Lisa") Each of these node objects knows their own :py:class:`~xarray.DataTree.name`, but they currently have no relationship to one another. We can connect them by creating another node representing a common parent, Homer Simpson: .. jupyter-execute:: homer = xr.DataTree(name="Homer", children={"Bart": bart, "Lisa": lisa}) Here we set the children of Homer in the node's constructor. We now have a small family tree where we can see how these individual Simpson family members are related to one another: .. jupyter-execute:: print(homer) .. note:: We use ``print()`` above to show the compact tree hierarchy. :py:class:`~xarray.DataTree` objects also have an interactive HTML representation that is enabled by default in editors such as JupyterLab and VSCode. The HTML representation is especially helpful for larger trees and exploring new datasets, as it allows you to expand and collapse nodes. If you prefer the text representations you can also set ``xr.set_options(display_style="text")``. .. Comment:: may remove note and print()s after upstream theme changes https://github.com/pydata/pydata-sphinx-theme/pull/2187 The nodes representing Bart and Lisa are now connected - we can confirm their sibling rivalry by examining the :py:class:`~xarray.DataTree.siblings` property: .. jupyter-execute:: list(homer["Bart"].siblings) But oops, we forgot Homer's third daughter, Maggie! Let's add her by updating Homer's :py:class:`~xarray.DataTree.children` property to include her: .. jupyter-execute:: maggie = xr.DataTree(name="Maggie") homer.children = {"Bart": bart, "Lisa": lisa, "Maggie": maggie} print(homer) Let's check that Maggie knows who her Dad is: .. jupyter-execute:: maggie.parent.name That's good - updating the properties of our nodes does not break the internal consistency of our tree, as changes of parentage are automatically reflected on both nodes. These children obviously have another parent, Marge Simpson, but :py:class:`~xarray.DataTree` nodes can only have a maximum of one parent. Genealogical `family trees are not even technically trees `_ in the mathematical sense - the fact that distant relatives can mate makes them directed acyclic graphs. Trees of :py:class:`~xarray.DataTree` objects cannot represent this. Homer is currently listed as having no parent (the so-called "root node" of this tree), but we can update his :py:class:`~xarray.DataTree.parent` property: .. jupyter-execute:: abe = xr.DataTree(name="Abe") abe.children = {"Homer": homer} Abe is now the "root" of this tree, which we can see by examining the :py:class:`~xarray.DataTree.root` property of any node in the tree .. jupyter-execute:: maggie.root.name We can see the whole tree by printing Abe's node or just part of the tree by printing Homer's node: .. jupyter-execute:: print(abe) .. jupyter-execute:: print(abe["Homer"]) In episode 28, Abe Simpson reveals that he had another son, Herbert "Herb" Simpson. We can add Herbert to the family tree without displacing Homer by :py:meth:`~xarray.DataTree.assign`-ing another child to Abe: .. jupyter-execute:: herbert = xr.DataTree(name="Herb") abe = abe.assign({"Herbert": herbert}) print(abe) .. jupyter-execute:: print(abe["Herbert"].name) print(herbert.name) .. note:: This example shows a subtlety - the returned tree has Homer's brother listed as ``"Herbert"``, but the original node was named "Herb". Not only are names overridden when stored as keys like this, but the new node is a copy, so that the original node that was referenced is unchanged (i.e. ``herbert.name == "Herb"`` still). In other words, nodes are copied into trees, not inserted into them. This is intentional, and mirrors the behaviour when storing named :py:class:`~xarray.DataArray` objects inside datasets. Certain manipulations of our tree are forbidden, if they would create an inconsistent result. In episode 51 of the show Futurama, Philip J. Fry travels back in time and accidentally becomes his own Grandfather. If we try similar time-travelling hijinks with Homer, we get a :py:class:`~xarray.InvalidTreeError` raised: .. jupyter-execute:: :raises: abe["Homer"].children = {"Abe": abe} .. _evolutionary tree: Ancestry in an Evolutionary Tree ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Let's use a different example of a tree to discuss more complex relationships between nodes - the phylogenetic tree, or tree of life. .. jupyter-execute:: vertebrates = xr.DataTree.from_dict( { "/Sharks": None, "/Bony Skeleton/Ray-finned Fish": None, "/Bony Skeleton/Four Limbs/Amphibians": None, "/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Primates": None, "/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Rodents & Rabbits": None, "/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Dinosaurs": None, "/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Birds": None, }, name="Vertebrae", ) primates = vertebrates["/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Primates"] dinosaurs = vertebrates[ "/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Dinosaurs" ] We have used the :py:meth:`~xarray.DataTree.from_dict` constructor method as a preferred way to quickly create a whole tree, and :ref:`filesystem paths` (to be explained shortly) to select two nodes of interest. .. jupyter-execute:: print(vertebrates) This tree shows various families of species, grouped by their common features (making it technically a `"Cladogram" `_, rather than an evolutionary tree). Here both the species and the features used to group them are represented by :py:class:`~xarray.DataTree` node objects - there is no distinction in types of node. We can however get a list of only the nodes we used to represent species by using the fact that all those nodes have no children - they are "leaf nodes". We can check if a node is a leaf with :py:meth:`~xarray.DataTree.is_leaf`, and get a list of all leaves with the :py:class:`~xarray.DataTree.leaves` property: .. jupyter-execute:: print(primates.is_leaf) [node.name for node in vertebrates.leaves] Pretending that this is a true evolutionary tree for a moment, we can find the features of the evolutionary ancestors (so-called "ancestor" nodes), the distinguishing feature of the common ancestor of all vertebrate life (the root node), and even the distinguishing feature of the common ancestor of any two species (the common ancestor of two nodes): .. jupyter-execute:: print([node.name for node in reversed(primates.parents)]) print(primates.root.name) print(primates.find_common_ancestor(dinosaurs).name) We can only find a common ancestor between two nodes that lie in the same tree. If we try to find the common evolutionary ancestor between primates and an Alien species that has no relationship to Earth's evolutionary tree, an error will be raised. .. jupyter-execute:: :raises: alien = xr.DataTree(name="Xenomorph") primates.find_common_ancestor(alien) .. _navigating trees: Navigating Trees ---------------- There are various ways to access the different nodes in a tree. Properties ~~~~~~~~~~ We can navigate trees using the :py:class:`~xarray.DataTree.parent` and :py:class:`~xarray.DataTree.children` properties of each node, for example: .. jupyter-execute:: lisa.parent.children["Bart"].name but there are also more convenient ways to access nodes. Dictionary-like interface ~~~~~~~~~~~~~~~~~~~~~~~~~ Children are stored on each node as a key-value mapping from name to child node. They can be accessed and altered via the :py:class:`~xarray.DataTree.__getitem__` and :py:class:`~xarray.DataTree.__setitem__` syntax. In general :py:class:`~xarray.DataTree.DataTree` objects support almost the entire set of dict-like methods, including :py:meth:`~xarray.DataTree.keys`, :py:class:`~xarray.DataTree.values`, :py:class:`~xarray.DataTree.items`, :py:meth:`~xarray.DataTree.__delitem__` and :py:meth:`~xarray.DataTree.update`. .. jupyter-execute:: print(vertebrates["Bony Skeleton"]["Ray-finned Fish"]) Note that the dict-like interface combines access to child :py:class:`~xarray.DataTree` nodes and stored :py:class:`~xarray.DataArrays`, so if we have a node that contains both children and data, calling :py:meth:`~xarray.DataTree.keys` will list both names of child nodes and names of data variables: .. jupyter-execute:: dt = xr.DataTree( dataset=xr.Dataset({"foo": 0, "bar": 1}), children={"a": xr.DataTree(), "b": xr.DataTree()}, ) print(dt) list(dt.keys()) This also means that the names of variables and of child nodes must be different to one another. Attribute-like access ~~~~~~~~~~~~~~~~~~~~~ You can also select both variables and child nodes through dot indexing .. jupyter-execute:: print(dt.foo) print(dt.a) .. _filesystem paths: Filesystem-like Paths ~~~~~~~~~~~~~~~~~~~~~ Hierarchical trees can be thought of as analogous to file systems. Each node is like a directory, and each directory can contain both more sub-directories and data. .. note:: Future development will allow you to make the filesystem analogy concrete by using :py:func:`~xarray.DataTree.open_mfdatatree` or :py:func:`~xarray.DataTree.save_mfdatatree`. (`See related issue in GitHub `_) Datatree objects support a syntax inspired by unix-like filesystems, where the "path" to a node is specified by the keys of each intermediate node in sequence, separated by forward slashes. This is an extension of the conventional dictionary ``__getitem__`` syntax to allow navigation across multiple levels of the tree. Like with filepaths, paths within the tree can either be relative to the current node, e.g. .. jupyter-execute:: print(abe["Homer/Bart"].name) print(abe["./Homer/Bart"].name) # alternative syntax or relative to the root node. A path specified from the root (as opposed to being specified relative to an arbitrary node in the tree) is sometimes also referred to as a `"fully qualified name" `_, or as an "absolute path". The root node is referred to by ``"/"``, so the path from the root node to its grand-child would be ``"/child/grandchild"``, e.g. .. jupyter-execute:: # access lisa's sibling by a relative path. print(lisa["../Bart"]) # or from absolute path print(lisa["/Homer/Bart"]) Relative paths between nodes also support the ``"../"`` syntax to mean the parent of the current node. We can use this with ``__setitem__`` to add a missing entry to our evolutionary tree, but add it relative to a more familiar node of interest: .. jupyter-execute:: primates["../../Two Fenestrae/Crocodiles"] = xr.DataTree() print(vertebrates) Given two nodes in a tree, we can also find their relative path: .. jupyter-execute:: bart.relative_to(lisa) You can use this filepath feature to build a nested tree from a dictionary of filesystem-like paths and corresponding :py:class:`~xarray.Dataset` objects in a single step. If we have a dictionary where each key is a valid path, and each value is either valid data or ``None``, we can construct a complex tree quickly using the alternative constructor :py:meth:`~xarray.DataTree.from_dict()`: .. jupyter-execute:: d = { "/": xr.Dataset({"foo": "orange"}), "/a": xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])}), "/a/b": xr.Dataset({"zed": np.nan}), "a/c/d": None, } dt = xr.DataTree.from_dict(d) print(dt) .. note:: Notice that using the path-like syntax will also create any intermediate empty nodes necessary to reach the end of the specified path (i.e. the node labelled ``"/a/c"`` in this case.) This is to help avoid lots of redundant entries when creating deeply-nested trees using :py:meth:`xarray.DataTree.from_dict`. .. _iterating over trees: Iterating over trees ~~~~~~~~~~~~~~~~~~~~ You can iterate over every node in a tree using the subtree :py:class:`~xarray.DataTree.subtree` property. This returns an iterable of nodes, which yields them in depth-first order. .. jupyter-execute:: for node in vertebrates.subtree: print(node.path) Similarly, :py:class:`~xarray.DataTree.subtree_with_keys` returns an iterable of relative paths and corresponding nodes. A very useful pattern is to iterate over :py:class:`~xarray.DataTree.subtree_with_keys` to manipulate nodes however you wish, then rebuild a new tree using :py:meth:`xarray.DataTree.from_dict()`. For example, we could keep only the nodes containing data by looping over all nodes, checking if they contain any data using :py:class:`~xarray.DataTree.has_data`, then rebuilding a new tree using only the paths of those nodes: .. jupyter-execute:: non_empty_nodes = { path: node.dataset for path, node in dt.subtree_with_keys if node.has_data } print(xr.DataTree.from_dict(non_empty_nodes)) You can see this tree is similar to the ``dt`` object above, except that it is missing the empty nodes ``a/c`` and ``a/c/d``. (If you want to keep the name of the root node, you will need to add the ``name`` kwarg to :py:class:`~xarray.DataTree.from_dict`, i.e. ``DataTree.from_dict(non_empty_nodes, name=dt.name)``.) .. _manipulating trees: Manipulating Trees ------------------ Subsetting Tree Nodes ~~~~~~~~~~~~~~~~~~~~~ We can subset our tree to select only nodes of interest in various ways. Similarly to on a real filesystem, matching nodes by common patterns in their paths is often useful. We can use :py:meth:`xarray.DataTree.match` for this: .. jupyter-execute:: dt = xr.DataTree.from_dict( { "/a/A": None, "/a/B": None, "/b/A": None, "/b/B": None, } ) result = dt.match("*/B") print(result) We can also subset trees by the contents of the nodes. :py:meth:`xarray.DataTree.filter` retains only the nodes of a tree that meet a certain condition. For example, we could recreate the Simpson's family tree with the ages of each individual, then filter for only the adults: First let's recreate the tree but with an ``age`` data variable in every node: .. jupyter-execute:: simpsons = xr.DataTree.from_dict( { "/": xr.Dataset({"age": 83}), "/Herbert": xr.Dataset({"age": 40}), "/Homer": xr.Dataset({"age": 39}), "/Homer/Bart": xr.Dataset({"age": 10}), "/Homer/Lisa": xr.Dataset({"age": 8}), "/Homer/Maggie": xr.Dataset({"age": 1}), }, name="Abe", ) print(simpsons) Now let's filter out the minors: .. jupyter-execute:: print(simpsons.filter(lambda node: node["age"] > 18)) The result is a new tree, containing only the nodes matching the condition. (Yes, under the hood :py:meth:`~xarray.DataTree.filter` is just syntactic sugar for the pattern we showed you in :ref:`iterating over trees` !) If you want to filter out empty nodes you can use :py:meth:`~xarray.DataTree.prune`. .. _Tree Contents: Tree Contents ------------- Hollow Trees ~~~~~~~~~~~~ A concept that can sometimes be useful is that of a "Hollow Tree", which means a tree with data stored only at the leaf nodes. This is useful because certain useful tree manipulation operations only make sense for hollow trees. You can check if a tree is a hollow tree by using the :py:class:`~xarray.DataTree.is_hollow` property. We can see that the Simpson's family is not hollow because the data variable ``"age"`` is present at some nodes which have children (i.e. Abe and Homer). .. jupyter-execute:: simpsons.is_hollow .. _tree computation: Computation ----------- :py:class:`~xarray.DataTree` objects are also useful for performing computations, not just for organizing data. Operations and Methods on Trees ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To show how applying operations across a whole tree at once can be useful, let's first create an example scientific dataset. .. jupyter-execute:: def time_stamps(n_samples, T): """Create an array of evenly-spaced time stamps""" return xr.DataArray( data=np.linspace(0, 2 * np.pi * T, n_samples), dims=["time"] ) def signal_generator(t, f, A, phase): """Generate an example electrical-like waveform""" return A * np.sin(f * t.data + phase) time_stamps1 = time_stamps(n_samples=15, T=1.5) time_stamps2 = time_stamps(n_samples=10, T=1.0) voltages = xr.DataTree.from_dict( { "/oscilloscope1": xr.Dataset( { "potential": ( "time", signal_generator(time_stamps1, f=2, A=1.2, phase=0.5), ), "current": ( "time", signal_generator(time_stamps1, f=2, A=1.2, phase=1), ), }, coords={"time": time_stamps1}, ), "/oscilloscope2": xr.Dataset( { "potential": ( "time", signal_generator(time_stamps2, f=1.6, A=1.6, phase=0.2), ), "current": ( "time", signal_generator(time_stamps2, f=1.6, A=1.6, phase=0.7), ), }, coords={"time": time_stamps2}, ), } ) print(voltages) Most xarray computation methods also exist as methods on datatree objects, so you can for example take the mean value of these two timeseries at once: .. jupyter-execute:: print(voltages.mean(dim="time")) This works by mapping the standard :py:meth:`xarray.Dataset.mean()` method over the dataset stored in each node of the tree one-by-one. The arguments passed to the method are used for every node, so the values of the arguments you pass might be valid for one node and invalid for another .. jupyter-execute:: :raises: voltages.isel(time=12) Notice that the error raised helpfully indicates which node of the tree the operation failed on. Arithmetic Methods on Trees ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Arithmetic methods are also implemented, so you can e.g. add a scalar to every dataset in the tree at once. For example, we can advance the timeline of the Simpsons by a decade just by .. jupyter-execute:: print(simpsons + 10) See that the same change (fast-forwarding by adding 10 years to the age of each character) has been applied to every node. Mapping Custom Functions Over Trees ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can map custom computation over each node in a tree using :py:meth:`xarray.DataTree.map_over_datasets`. You can map any function, so long as it takes :py:class:`xarray.Dataset` objects as one (or more) of the input arguments, and returns one (or more) xarray datasets. .. note:: Functions passed to :py:func:`~xarray.DataTree.map_over_datasets` cannot alter nodes in-place. Instead they must return new :py:class:`xarray.Dataset` objects. For example, we can define a function to calculate the Root Mean Square of a timeseries .. jupyter-execute:: def rms(signal): return np.sqrt(np.mean(signal**2)) Then calculate the RMS value of these signals: .. jupyter-execute:: print(voltages.map_over_datasets(rms)) .. _multiple trees: We can also use :py:func:`~xarray.map_over_datasets` to apply a function over the data in multiple trees, by passing the trees as positional arguments. Operating on Multiple Trees --------------------------- The examples so far have involved mapping functions or methods over the nodes of a single tree, but we can generalize this to mapping functions over multiple trees at once. Iterating Over Multiple Trees ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To iterate over the corresponding nodes in multiple trees, use :py:func:`~xarray.group_subtrees` instead of :py:class:`~xarray.DataTree.subtree_with_keys`. This combines well with :py:meth:`xarray.DataTree.from_dict()` to build a new tree: .. jupyter-execute:: dt1 = xr.DataTree.from_dict({"a": xr.Dataset({"x": 1}), "b": xr.Dataset({"x": 2})}) dt2 = xr.DataTree.from_dict( {"a": xr.Dataset({"x": 10}), "b": xr.Dataset({"x": 20})} ) result = {} for path, (node1, node2) in xr.group_subtrees(dt1, dt2): result[path] = node1.dataset + node2.dataset dt3 = xr.DataTree.from_dict(result) print(dt3) Alternatively, you apply a function directly to paired datasets at every node using :py:func:`xarray.map_over_datasets`: .. jupyter-execute:: dt3 = xr.map_over_datasets(lambda x, y: x + y, dt1, dt2) print(dt3) Comparing Trees for Isomorphism ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For it to make sense to map a single non-unary function over the nodes of multiple trees at once, each tree needs to have the same structure. Specifically two trees can only be considered similar, or "isomorphic", if the full paths to all of their descendent nodes are the same. Applying :py:func:`~xarray.group_subtrees` to trees with different structures raises :py:class:`~xarray.TreeIsomorphismError`: .. jupyter-execute:: :raises: tree = xr.DataTree.from_dict({"a": None, "a/b": None, "a/c": None}) simple_tree = xr.DataTree.from_dict({"a": None}) for _ in xr.group_subtrees(tree, simple_tree): ... We can explicitly also check if any two trees are isomorphic using the :py:meth:`~xarray.DataTree.isomorphic` method: .. jupyter-execute:: tree.isomorphic(simple_tree) Corresponding tree nodes do not need to have the same data in order to be considered isomorphic: .. jupyter-execute:: tree_with_data = xr.DataTree.from_dict({"a": xr.Dataset({"foo": 1})}) simple_tree.isomorphic(tree_with_data) They also do not need to define child nodes in the same order: .. jupyter-execute:: reordered_tree = xr.DataTree.from_dict({"a": None, "a/c": None, "a/b": None}) tree.isomorphic(reordered_tree) Arithmetic Between Multiple Trees ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Arithmetic operations like multiplication are binary operations, so as long as we have two isomorphic trees, we can do arithmetic between them. .. jupyter-execute:: currents = xr.DataTree.from_dict( { "/oscilloscope1": xr.Dataset( { "current": ( "time", signal_generator(time_stamps1, f=2, A=1.2, phase=1), ), }, coords={"time": time_stamps1}, ), "/oscilloscope2": xr.Dataset( { "current": ( "time", signal_generator(time_stamps2, f=1.6, A=1.6, phase=0.7), ), }, coords={"time": time_stamps2}, ), } ) print(currents) .. jupyter-execute:: currents.isomorphic(voltages) We could use this feature to quickly calculate the electrical power in our signal, P=IV. .. jupyter-execute:: power = currents * voltages print(power) .. _hierarchical-data.alignment-and-coordinate-inheritance: Alignment and Coordinate Inheritance ------------------------------------ .. _data-alignment: Data Alignment ~~~~~~~~~~~~~~ The data in different datatree nodes are not totally independent. In particular dimensions (and indexes) in child nodes must be exactly aligned with those in their parent nodes. Exact alignment means that shared dimensions must be the same length, and indexes along those dimensions must be equal. .. note:: If you were a previous user of the prototype `xarray-contrib/datatree `_ package, this is different from what you're used to! In that package the data model was that the data stored in each node actually was completely unrelated. The data model is now slightly stricter. This allows us to provide features like :ref:`coordinate-inheritance`. To demonstrate, let's first generate some example datasets which are not aligned with one another: .. jupyter-execute:: # (drop the attributes just to make the printed representation shorter) ds = xr.tutorial.open_dataset("air_temperature").drop_attrs() ds_daily = ds.resample(time="D").mean("time") ds_weekly = ds.resample(time="W").mean("time") ds_monthly = ds.resample(time="ME").mean("time") These datasets have different lengths along the ``time`` dimension, and are therefore not aligned along that dimension. .. jupyter-execute:: print(ds_daily.sizes) print(ds_weekly.sizes) print(ds_monthly.sizes) We cannot store these non-alignable variables on a single :py:class:`~xarray.Dataset` object, because they do not exactly align: .. jupyter-execute:: :raises: xr.align(ds_daily, ds_weekly, ds_monthly, join="exact") But we :ref:`previously said ` that multi-resolution data is a good use case for :py:class:`~xarray.DataTree`, so surely we should be able to store these in a single :py:class:`~xarray.DataTree`? If we first try to create a :py:class:`~xarray.DataTree` with these different-length time dimensions present in both parents and children, we will still get an alignment error: .. jupyter-execute:: :raises: xr.DataTree.from_dict({"daily": ds_daily, "daily/weekly": ds_weekly}) This is because DataTree checks that data in child nodes align exactly with their parents. .. note:: This requirement of aligned dimensions is similar to netCDF's concept of `inherited dimensions `_, as in netCDF-4 files dimensions are `visible to all child groups `_. This alignment check is performed up through the tree, all the way to the root, and so is therefore equivalent to requiring that this :py:func:`~xarray.align` command succeeds: .. code:: python xr.align(child.dataset, *(parent.dataset for parent in child.parents), join="exact") To represent our unalignable data in a single :py:class:`~xarray.DataTree`, we must instead place all variables which are a function of these different-length dimensions into nodes that are not direct descendents of one another, e.g. organize them as siblings. .. jupyter-execute:: dt = xr.DataTree.from_dict( {"daily": ds_daily, "weekly": ds_weekly, "monthly": ds_monthly} ) print(dt) Now we have a valid :py:class:`~xarray.DataTree` structure which contains all the data at each different time frequency, stored in a separate group. This is a useful way to organise our data because we can still operate on all the groups at once. For example we can extract all three timeseries at a specific lat-lon location: .. jupyter-execute:: dt_sel = dt.sel(lat=75, lon=300) print(dt_sel) or compute the standard deviation of each timeseries to find out how it varies with sampling frequency: .. jupyter-execute:: dt_std = dt.std(dim="time") print(dt_std) .. _coordinate-inheritance: Coordinate Inheritance ~~~~~~~~~~~~~~~~~~~~~~ Notice that in the trees we constructed above there is some redundancy - the ``lat`` and ``lon`` variables appear in each sibling group, but are identical across the groups. .. jupyter-execute:: dt We can use "Coordinate Inheritance" to define them only once in a parent group and remove this redundancy, whilst still being able to access those coordinate variables from the child groups. .. note:: This is also a new feature relative to the prototype `xarray-contrib/datatree `_ package. Let's instead place only the time-dependent variables in the child groups, and put the non-time-dependent ``lat`` and ``lon`` variables in the parent (root) group: .. jupyter-execute:: dt = xr.DataTree.from_dict( { "/": ds.drop_dims("time"), "daily": ds_daily.drop_vars(["lat", "lon"]), "weekly": ds_weekly.drop_vars(["lat", "lon"]), "monthly": ds_monthly.drop_vars(["lat", "lon"]), } ) dt This is preferred to the previous representation because it now makes it clear that all of these datasets share common spatial grid coordinates. Defining the common coordinates just once also ensures that the spatial coordinates for each group cannot become out of sync with one another during operations. We can still access the coordinates defined in the parent groups from any of the child groups as if they were actually present on the child groups: .. jupyter-execute:: dt.daily.coords .. jupyter-execute:: dt["daily/lat"] As we can still access them, we say that the ``lat`` and ``lon`` coordinates in the child groups have been "inherited" from their common parent group. If we print just one of the child nodes, it will still display inherited coordinates, but explicitly mark them as such: .. jupyter-execute:: dt["/daily"] This helps to differentiate which variables are defined on the datatree node that you are currently looking at, and which were defined somewhere above it. We can also still perform all the same operations on the whole tree: .. jupyter-execute:: dt.sel(lat=[75], lon=[300]) .. jupyter-execute:: dt.std(dim="time") python-xarray-2026.01.0/doc/user-guide/pandas.rst0000664000175000017500000002326115136607163021665 0ustar alastairalastair.. currentmodule:: xarray .. _pandas: =================== Working with pandas =================== One of the most important features of xarray is the ability to convert to and from :py:mod:`pandas` objects to interact with the rest of the PyData ecosystem. For example, for plotting labeled data, we highly recommend using the `visualization built in to pandas itself`__ or provided by the pandas aware libraries such as `Seaborn`__. __ https://pandas.pydata.org/pandas-docs/stable/visualization.html __ https://seaborn.pydata.org/ .. jupyter-execute:: :hide-code: import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) Hierarchical and tidy data ~~~~~~~~~~~~~~~~~~~~~~~~~~ Tabular data is easiest to work with when it meets the criteria for `tidy data`__: * Each column holds a different variable. * Each rows holds a different observation. __ https://www.jstatsoft.org/v59/i10/ In this "tidy data" format, we can represent any :py:class:`Dataset` and :py:class:`DataArray` in terms of :py:class:`~pandas.DataFrame` and :py:class:`~pandas.Series`, respectively (and vice-versa). The representation works by flattening non-coordinates to 1D, and turning the tensor product of coordinate indexes into a :py:class:`pandas.MultiIndex`. Dataset and DataFrame --------------------- To convert any dataset to a ``DataFrame`` in tidy form, use the :py:meth:`Dataset.to_dataframe()` method: .. jupyter-execute:: ds = xr.Dataset( {"foo": (("x", "y"), np.random.randn(2, 3))}, coords={ "x": [10, 20], "y": ["a", "b", "c"], "along_x": ("x", np.random.randn(2)), "scalar": 123, }, ) ds .. jupyter-execute:: df = ds.to_dataframe() df We see that each variable and coordinate in the Dataset is now a column in the DataFrame, with the exception of indexes which are in the index. To convert the ``DataFrame`` to any other convenient representation, use ``DataFrame`` methods like :py:meth:`~pandas.DataFrame.reset_index`, :py:meth:`~pandas.DataFrame.stack` and :py:meth:`~pandas.DataFrame.unstack`. For datasets containing dask arrays where the data should be lazily loaded, see the :py:meth:`Dataset.to_dask_dataframe()` method. To create a ``Dataset`` from a ``DataFrame``, use the :py:meth:`Dataset.from_dataframe` class method or the equivalent :py:meth:`pandas.DataFrame.to_xarray` method: .. jupyter-execute:: xr.Dataset.from_dataframe(df) Notice that the dimensions of variables in the ``Dataset`` have now expanded after the round-trip conversion to a ``DataFrame``. This is because every object in a ``DataFrame`` must have the same indices, so we need to broadcast the data of each array to the full size of the new ``MultiIndex``. Likewise, all the coordinates (other than indexes) ended up as variables, because pandas does not distinguish non-index coordinates. DataArray and Series -------------------- ``DataArray`` objects have a complementary representation in terms of a :py:class:`~pandas.Series`. Using a Series preserves the ``Dataset`` to ``DataArray`` relationship, because ``DataFrames`` are dict-like containers of ``Series``. The methods are very similar to those for working with DataFrames: .. jupyter-execute:: s = ds["foo"].to_series() s .. jupyter-execute:: # or equivalently, with Series.to_xarray() xr.DataArray.from_series(s) Both the ``from_series`` and ``from_dataframe`` methods use reindexing, so they work even if the hierarchical index is not a full tensor product: .. jupyter-execute:: s[::2] .. jupyter-execute:: s[::2].to_xarray() Lossless and reversible conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The previous ``Dataset`` example shows that the conversion is not reversible (lossy roundtrip) and that the size of the ``Dataset`` increases. Particularly after a roundtrip, the following deviations are noted: - a non-dimension Dataset ``coordinate`` is converted into ``variable`` - a non-dimension DataArray ``coordinate`` is not converted - ``dtype`` is not always the same (e.g. "str" is converted to "object") - ``attrs`` metadata is not conserved To avoid these problems, the third-party `ntv-pandas `__ library offers lossless and reversible conversions between ``Dataset``/ ``DataArray`` and pandas ``DataFrame`` objects. This solution is particularly interesting for converting any ``DataFrame`` into a ``Dataset`` (the converter finds the multidimensional structure hidden by the tabular structure). The `ntv-pandas examples `__ show how to improve the conversion for the previous ``Dataset`` example and for more complex examples. Multi-dimensional data ~~~~~~~~~~~~~~~~~~~~~~ Tidy data is great, but it sometimes you want to preserve dimensions instead of automatically stacking them into a ``MultiIndex``. :py:meth:`DataArray.to_pandas()` is a shortcut that lets you convert a DataArray directly into a pandas object with the same dimensionality, if available in pandas (i.e., a 1D array is converted to a :py:class:`~pandas.Series` and 2D to :py:class:`~pandas.DataFrame`): .. jupyter-execute:: arr = xr.DataArray( np.random.randn(2, 3), coords=[("x", [10, 20]), ("y", ["a", "b", "c"])] ) df = arr.to_pandas() df To perform the inverse operation of converting any pandas objects into a data array with the same shape, simply use the :py:class:`DataArray` constructor: .. jupyter-execute:: xr.DataArray(df) Both the ``DataArray`` and ``Dataset`` constructors directly convert pandas objects into xarray objects with the same shape. This means that they preserve all use of multi-indexes: .. jupyter-execute:: index = pd.MultiIndex.from_arrays( [["a", "a", "b"], [0, 1, 2]], names=["one", "two"] ) df = pd.DataFrame({"x": 1, "y": 2}, index=index) ds = xr.Dataset(df) ds However, you will need to set dimension names explicitly, either with the ``dims`` argument on in the ``DataArray`` constructor or by calling :py:class:`~Dataset.rename` on the new object. .. _panel transition: Transitioning from pandas.Panel to xarray ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``Panel``, pandas' data structure for 3D arrays, was always a second class data structure compared to the Series and DataFrame. To allow pandas developers to focus more on its core functionality built around the DataFrame, pandas removed ``Panel`` in favor of directing users who use multi-dimensional arrays to xarray. Xarray has most of ``Panel``'s features, a more explicit API (particularly around indexing), and the ability to scale to >3 dimensions with the same interface. As discussed in the :ref:`data structures section of the docs `, there are two primary data structures in xarray: ``DataArray`` and ``Dataset``. You can imagine a ``DataArray`` as a n-dimensional pandas ``Series`` (i.e. a single typed array), and a ``Dataset`` as the ``DataFrame`` equivalent (i.e. a dict of aligned ``DataArray`` objects). So you can represent a Panel, in two ways: - As a 3-dimensional ``DataArray``, - Or as a ``Dataset`` containing a number of 2-dimensional DataArray objects. Let's take a look: .. jupyter-execute:: data = np.random.default_rng(0).random((2, 3, 4)) items = list("ab") major_axis = list("mno") minor_axis = pd.date_range(start="2000", periods=4, name="date") With old versions of pandas (prior to 0.25), this could stored in a ``Panel``: .. jupyter-input:: pd.Panel(data, items, major_axis, minor_axis) .. jupyter-output:: Dimensions: 2 (items) x 3 (major_axis) x 4 (minor_axis) Items axis: a to b Major_axis axis: m to o Minor_axis axis: 2000-01-01 00:00:00 to 2000-01-04 00:00:00 To put this data in a ``DataArray``, write: .. jupyter-execute:: array = xr.DataArray(data, [items, major_axis, minor_axis]) array As you can see, there are three dimensions (each is also a coordinate). Two of the axes of were unnamed, so have been assigned ``dim_0`` and ``dim_1`` respectively, while the third retains its name ``date``. You can also easily convert this data into ``Dataset``: .. jupyter-execute:: array.to_dataset(dim="dim_0") Here, there are two data variables, each representing a DataFrame on panel's ``items`` axis, and labeled as such. Each variable is a 2D array of the respective values along the ``items`` dimension. While the xarray docs are relatively complete, a few items stand out for Panel users: - A DataArray's data is stored as a numpy array, and so can only contain a single type. As a result, a Panel that contains :py:class:`~pandas.DataFrame` objects with multiple types will be converted to ``dtype=object``. A ``Dataset`` of multiple ``DataArray`` objects each with its own dtype will allow original types to be preserved. - :ref:`Indexing ` is similar to pandas, but more explicit and leverages xarray's naming of dimensions. - Because of those features, making much higher dimensional data is very practical. - Variables in ``Dataset`` objects can use a subset of its dimensions. For example, you can have one dataset with Person x Score x Time, and another with Person x Score. - You can use coordinates are used for both dimensions and for variables which _label_ the data variables, so you could have a coordinate Age, that labelled the Person dimension of a Dataset of Person x Score x Time. While xarray may take some getting used to, it's worth it! If anything is unclear, please `post an issue on GitHub `__ or `StackOverflow `__, and we'll endeavor to respond to the specific case or improve the general docs. python-xarray-2026.01.0/doc/user-guide/groupby.rst0000664000175000017500000002513315136607163022106 0ustar alastairalastair.. currentmodule:: xarray .. _groupby: GroupBy: Group and Bin Data --------------------------- Often we want to bin or group data, produce statistics (mean, variance) on the groups, and then return a reduced data set. To do this, Xarray supports `"group by"`__ operations with the same API as pandas to implement the `split-apply-combine`__ strategy: __ https://pandas.pydata.org/pandas-docs/stable/groupby.html __ https://www.jstatsoft.org/v40/i01/paper - Split your data into multiple independent groups. - Apply some function to each group. - Combine your groups back into a single data object. Group by operations work on both :py:class:`Dataset` and :py:class:`DataArray` objects. Most of the examples focus on grouping by a single one-dimensional variable, although support for grouping over a multi-dimensional variable has recently been implemented. Note that for one-dimensional data, it is usually faster to rely on pandas' implementation of the same pipeline. .. tip:: `Install the flox package `_ to substantially improve the performance of GroupBy operations, particularly with dask. flox `extends Xarray's in-built GroupBy capabilities `_ by allowing grouping by multiple variables, and lazy grouping by dask arrays. If installed, Xarray will automatically use flox by default. Split ~~~~~ Let's create a simple example dataset: .. jupyter-execute:: :hide-code: import numpy as np import pandas as pd import xarray as xr np.random.seed(123456) .. jupyter-execute:: ds = xr.Dataset( {"foo": (("x", "y"), np.random.rand(4, 3))}, coords={"x": [10, 20, 30, 40], "letters": ("x", list("abba"))}, ) arr = ds["foo"] ds If we groupby the name of a variable or coordinate in a dataset (we can also use a DataArray directly), we get back a ``GroupBy`` object: .. jupyter-execute:: ds.groupby("letters") This object works very similarly to a pandas GroupBy object. You can view the group indices with the ``groups`` attribute: .. jupyter-execute:: ds.groupby("letters").groups You can also iterate over groups in ``(label, group)`` pairs: .. jupyter-execute:: list(ds.groupby("letters")) You can index out a particular group: .. jupyter-execute:: ds.groupby("letters")["b"] To group by multiple variables, see :ref:`this section `. Binning ~~~~~~~ Sometimes you don't want to use all the unique values to determine the groups but instead want to "bin" the data into coarser groups. You could always create a customized coordinate, but xarray facilitates this via the :py:meth:`Dataset.groupby_bins` method. .. jupyter-execute:: x_bins = [0, 25, 50] ds.groupby_bins("x", x_bins).groups The binning is implemented via :func:`pandas.cut`, whose documentation details how the bins are assigned. As seen in the example above, by default, the bins are labeled with strings using set notation to precisely identify the bin limits. To override this behavior, you can specify the bin labels explicitly. Here we choose ``float`` labels which identify the bin centers: .. jupyter-execute:: x_bin_labels = [12.5, 37.5] ds.groupby_bins("x", x_bins, labels=x_bin_labels).groups Apply ~~~~~ To apply a function to each group, you can use the flexible :py:meth:`core.groupby.DatasetGroupBy.map` method. The resulting objects are automatically concatenated back together along the group axis: .. jupyter-execute:: def standardize(x): return (x - x.mean()) / x.std() arr.groupby("letters").map(standardize) GroupBy objects also have a :py:meth:`core.groupby.DatasetGroupBy.reduce` method and methods like :py:meth:`core.groupby.DatasetGroupBy.mean` as shortcuts for applying an aggregation function: .. jupyter-execute:: arr.groupby("letters").mean(dim="x") Using a groupby is thus also a convenient shortcut for aggregating over all dimensions *other than* the provided one: .. jupyter-execute:: ds.groupby("x").std(...) .. note:: We use an ellipsis (`...`) here to indicate we want to reduce over all other dimensions First and last ~~~~~~~~~~~~~~ There are two special aggregation operations that are currently only found on groupby objects: first and last. These provide the first or last example of values for group along the grouped dimension: .. jupyter-execute:: ds.groupby("letters").first(...) By default, they skip missing values (control this with ``skipna``). Grouped arithmetic ~~~~~~~~~~~~~~~~~~ GroupBy objects also support a limited set of binary arithmetic operations, as a shortcut for mapping over all unique labels. Binary arithmetic is supported for ``(GroupBy, Dataset)`` and ``(GroupBy, DataArray)`` pairs, as long as the dataset or data array uses the unique grouped values as one of its index coordinates. For example: .. jupyter-execute:: alt = arr.groupby("letters").mean(...) alt .. jupyter-execute:: ds.groupby("letters") - alt This last line is roughly equivalent to the following:: results = [] for label, group in ds.groupby('letters'): results.append(group - alt.sel(letters=label)) xr.concat(results, dim='x') .. _groupby.multidim: Multidimensional Grouping ~~~~~~~~~~~~~~~~~~~~~~~~~ Many datasets have a multidimensional coordinate variable (e.g. longitude) which is different from the logical grid dimensions (e.g. nx, ny). Such variables are valid under the `CF conventions`__. Xarray supports groupby operations over multidimensional coordinate variables: __ https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimensional_latitude_longitude_coordinate_variables .. jupyter-execute:: da = xr.DataArray( [[0, 1], [2, 3]], coords={ "lon": (["ny", "nx"], [[30, 40], [40, 50]]), "lat": (["ny", "nx"], [[10, 10], [20, 20]]), }, dims=["ny", "nx"], ) da .. jupyter-execute:: da.groupby("lon").sum(...) .. jupyter-execute:: da.groupby("lon").map(lambda x: x - x.mean(), shortcut=False) Because multidimensional groups have the ability to generate a very large number of bins, coarse-binning via :py:meth:`Dataset.groupby_bins` may be desirable: .. jupyter-execute:: da.groupby_bins("lon", [0, 45, 50]).sum() These methods group by ``lon`` values. It is also possible to groupby each cell in a grid, regardless of value, by stacking multiple dimensions, applying your function, and then unstacking the result: .. jupyter-execute:: stacked = da.stack(gridcell=["ny", "nx"]) stacked.groupby("gridcell").sum(...).unstack("gridcell") Alternatively, you can groupby both ``lat`` and ``lon`` at the :ref:`same time `. .. _groupby.groupers: Grouper Objects ~~~~~~~~~~~~~~~ Both ``groupby_bins`` and ``resample`` are specializations of the core ``groupby`` operation for binning, and time resampling. Many problems demand more complex GroupBy application: for example, grouping by multiple variables with a combination of categorical grouping, binning, and resampling; or more specializations like spatial resampling; or more complex time grouping like special handling of seasons, or the ability to specify custom seasons. To handle these use-cases and more, Xarray is evolving to providing an extension point using ``Grouper`` objects. .. tip:: See the `grouper design`_ doc for more detail on the motivation and design ideas behind Grouper objects. .. _grouper design: https://github.com/pydata/xarray/blob/main/design_notes/grouper_objects.md For now Xarray provides three specialized Grouper objects: 1. :py:class:`groupers.UniqueGrouper` for categorical grouping 2. :py:class:`groupers.BinGrouper` for binned grouping 3. :py:class:`groupers.TimeResampler` for resampling along a datetime coordinate These provide functionality identical to the existing ``groupby``, ``groupby_bins``, and ``resample`` methods. That is, .. code-block:: python ds.groupby("x") is identical to .. code-block:: python from xarray.groupers import UniqueGrouper ds.groupby(x=UniqueGrouper()) Similarly, .. code-block:: python ds.groupby_bins("x", bins=bins) is identical to .. code-block:: python from xarray.groupers import BinGrouper ds.groupby(x=BinGrouper(bins)) and .. code-block:: python ds.resample(time="ME") is identical to .. code-block:: python from xarray.groupers import TimeResampler ds.resample(time=TimeResampler("ME")) The :py:class:`groupers.UniqueGrouper` accepts an optional ``labels`` kwarg that is not present in :py:meth:`DataArray.groupby` or :py:meth:`Dataset.groupby`. Specifying ``labels`` is required when grouping by a lazy array type (e.g. dask or cubed). The ``labels`` are used to construct the output coordinate (say for a reduction), and aggregations will only be run over the specified labels. You may use ``labels`` to also specify the ordering of groups to be used during iteration. The order will be preserved in the output. .. _groupby.multiple: Grouping by multiple variables ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use grouper objects to group by multiple dimensions: .. jupyter-execute:: from xarray.groupers import UniqueGrouper da.groupby(["lat", "lon"]).sum() The above is sugar for using ``UniqueGrouper`` objects directly: .. jupyter-execute:: da.groupby(lat=UniqueGrouper(), lon=UniqueGrouper()).sum() Different groupers can be combined to construct sophisticated GroupBy operations. .. jupyter-execute:: from xarray.groupers import BinGrouper ds.groupby(x=BinGrouper(bins=[5, 15, 25]), letters=UniqueGrouper()).sum() Time Grouping and Resampling ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. seealso:: See :ref:`resampling`. Shuffling ~~~~~~~~~ Shuffling is a generalization of sorting a DataArray or Dataset by another DataArray, named ``label`` for example, that follows from the idea of grouping by ``label``. Shuffling reorders the DataArray or the DataArrays in a Dataset such that all members of a group occur sequentially. For example, Shuffle the object using either :py:class:`DatasetGroupBy` or :py:class:`DataArrayGroupBy` as appropriate. .. jupyter-execute:: da = xr.DataArray( dims="x", data=[1, 2, 3, 4, 5, 6], coords={"label": ("x", "a b c a b c".split(" "))}, ) da.groupby("label").shuffle_to_chunks() For chunked array types (e.g. dask or cubed), shuffle may result in a more optimized communication pattern when compared to direct indexing by the appropriate indexer. Shuffling also makes GroupBy operations on chunked arrays an embarrassingly parallel problem, and may significantly improve workloads that use :py:meth:`DatasetGroupBy.map` or :py:meth:`DataArrayGroupBy.map`. python-xarray-2026.01.0/doc/user-guide/complex-numbers.rst0000664000175000017500000001064315136607163023537 0ustar alastairalastair.. currentmodule:: xarray .. _complex: Complex Numbers =============== .. jupyter-execute:: :hide-code: import numpy as np import xarray as xr .. jupyter-execute:: :hide-code: # Ensure the file is located in a unique temporary directory # so that it doesn't conflict with parallel builds of the # documentation. import tempfile import os.path tempdir = tempfile.TemporaryDirectory() Xarray leverages NumPy to seamlessly handle complex numbers in :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects. In the examples below, we are using a DataArray named ``da`` with complex elements (of :math:`\mathbb{C}`): .. jupyter-execute:: data = np.array([[1 + 2j, 3 + 4j], [5 + 6j, 7 + 8j]]) da = xr.DataArray( data, dims=["x", "y"], coords={"x": ["a", "b"], "y": [1, 2]}, name="complex_nums", ) Operations on Complex Data -------------------------- You can access real and imaginary components using the ``.real`` and ``.imag`` attributes. Most NumPy universal functions (ufuncs) like :py:doc:`numpy.abs ` or :py:doc:`numpy.angle ` work directly. .. jupyter-execute:: da.real .. jupyter-execute:: np.abs(da) .. note:: Like NumPy, ``.real`` and ``.imag`` typically return *views*, not copies, of the original data. Reading and Writing Complex Data -------------------------------- Writing complex data to NetCDF files (see :ref:`io.netcdf`) is supported via :py:meth:`~xarray.DataArray.to_netcdf` using specific backend engines that handle complex types: .. tab:: h5netcdf This requires the `h5netcdf `_ library to be installed. .. jupyter-execute:: complex_nums_h5_filename = "complex_nums_h5.nc" .. jupyter-execute:: :hide-code: complex_nums_h5_filename = os.path.join(tempdir.name, complex_nums_h5_filename) .. jupyter-execute:: # write the data to disk da.to_netcdf(complex_nums_h5_filename, engine="h5netcdf") # read the file back into memory ds_h5 = xr.open_dataset(complex_nums_h5_filename, engine="h5netcdf") # check the dtype ds_h5[da.name].dtype .. tab:: netcdf4 Requires the `netcdf4-python (>= 1.7.1) `_ library and you have to enable ``auto_complex=True``. .. jupyter-execute:: complex_nums_nc4_filename = "complex_nums_nc4.nc" .. jupyter-execute:: :hide-code: complex_nums_nc4_filename = os.path.join(tempdir.name, complex_nums_nc4_filename) .. jupyter-execute:: # write the data to disk da.to_netcdf(complex_nums_nc4_filename, engine="netcdf4", auto_complex=True) # read the file back into memory ds_nc4 = xr.open_dataset( complex_nums_nc4_filename, engine="netcdf4", auto_complex=True ) # check the dtype ds_nc4[da.name].dtype .. warning:: The ``scipy`` engine only supports NetCDF V3 and does *not* support complex arrays; writing with ``engine="scipy"`` raises a ``TypeError``. Alternative: Manual Handling ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If direct writing is not supported (e.g., targeting NetCDF3), you can manually split the complex array into separate real and imaginary variables before saving: .. jupyter-execute:: complex_manual_filename = "complex_manual.nc" .. jupyter-execute:: :hide-code: complex_manual_filename = os.path.join(tempdir.name, complex_manual_filename) .. jupyter-execute:: # Write data to file ds_manual = xr.Dataset( { f"{da.name}_real": da.real, f"{da.name}_imag": da.imag, } ) ds_manual.to_netcdf(complex_manual_filename, engine="scipy") # Example # Read data from file ds = xr.open_dataset(complex_manual_filename, engine="scipy") reconstructed = ds[f"{da.name}_real"] + 1j * ds[f"{da.name}_imag"] Recommendations ^^^^^^^^^^^^^^^ - Use ``engine="netcdf4"`` with ``auto_complex=True`` for full compliance and ease. - Use ``h5netcdf`` for HDF5-based storage when interoperability with HDF5 is desired. - For maximum legacy support (NetCDF3), manually handle real/imaginary components. .. jupyter-execute:: :hide-code: # Cleanup tempdir.cleanup() See also -------- - :ref:`io.netcdf` — full NetCDF I/O guide - `NumPy complex numbers `__ python-xarray-2026.01.0/doc/user-guide/weather-climate.rst0000664000175000017500000003175615136607163023502 0ustar alastairalastair.. currentmodule:: xarray .. _weather-climate: Weather and climate data ======================== .. jupyter-execute:: :hide-code: import xarray as xr import numpy as np Xarray can leverage metadata that follows the `Climate and Forecast (CF) conventions`_ if present. Examples include :ref:`automatic labelling of plots` with descriptive names and units if proper metadata is present and support for non-standard calendars used in climate science through the ``cftime`` module (explained in the :ref:`CFTimeIndex` section). There are also a number of :ref:`geosciences-focused projects that build on xarray`. .. _Climate and Forecast (CF) conventions: https://cfconventions.org .. _cf_variables: Related Variables ----------------- Several CF variable attributes contain lists of other variables associated with the variable with the attribute. A few of these are now parsed by xarray, with the attribute value popped to encoding on read and the variables in that value interpreted as non-dimension coordinates: - ``coordinates`` - ``bounds`` - ``grid_mapping`` - ``climatology`` - ``geometry`` - ``node_coordinates`` - ``node_count`` - ``part_node_count`` - ``interior_ring`` - ``cell_measures`` - ``formula_terms`` This decoding is controlled by the ``decode_coords`` kwarg to :py:func:`open_dataset` and :py:func:`open_mfdataset`. The CF attribute ``ancillary_variables`` was not included in the list due to the variables listed there being associated primarily with the variable with the attribute, rather than with the dimensions. .. _metpy_accessor: CF-compliant coordinate variables --------------------------------- `MetPy`_ adds a ``metpy`` accessor that allows accessing coordinates with appropriate CF metadata using generic names ``x``, ``y``, ``vertical`` and ``time``. There is also a ``cartopy_crs`` attribute that provides projection information, parsed from the appropriate CF metadata, as a `Cartopy`_ projection object. See the `metpy documentation`_ for more information. .. _`MetPy`: https://unidata.github.io/MetPy/dev/index.html .. _`metpy documentation`: https://unidata.github.io/MetPy/dev/tutorials/xarray_tutorial.html#coordinates .. _`Cartopy`: https://cartopy.readthedocs.io/stable/reference/crs.html .. _CFTimeIndex: Non-standard calendars and dates outside the precision range ------------------------------------------------------------ Through the standalone ``cftime`` library and a custom subclass of :py:class:`pandas.Index`, xarray supports a subset of the indexing functionality enabled through the standard :py:class:`pandas.DatetimeIndex` for dates from non-standard calendars commonly used in climate science or dates using a standard calendar, but outside the `precision range`_ and dates prior to `1582-10-15`_. .. note:: As of xarray version 0.11, by default, :py:class:`cftime.datetime` objects will be used to represent times (either in indexes, as a :py:class:`~xarray.CFTimeIndex`, or in data arrays with dtype object) if any of the following are true: - The dates are from a non-standard calendar - Any dates are outside the nanosecond-precision range (prior xarray version 2025.01.2) - Any dates are outside the time span limited by the resolution (from xarray version 2025.01.2) Otherwise pandas-compatible dates from a standard calendar will be represented with the ``np.datetime64[unit]`` data type (where unit can be one of ``"s"``, ``"ms"``, ``"us"``, ``"ns"``), enabling the use of a :py:class:`pandas.DatetimeIndex` or arrays with dtype ``np.datetime64[unit]`` and their full set of associated features. As of pandas version 2.0.0, pandas supports non-nanosecond precision datetime values. From xarray version 2025.01.2 on, non-nanosecond precision datetime values are also supported in xarray (this can be parameterized via :py:class:`~xarray.coders.CFDatetimeCoder` and ``decode_times`` kwarg). See also :ref:`internals.timecoding`. For example, you can create a DataArray indexed by a time coordinate with dates from a no-leap calendar and a :py:class:`~xarray.CFTimeIndex` will automatically be used: .. jupyter-execute:: from itertools import product from cftime import DatetimeNoLeap dates = [ DatetimeNoLeap(year, month, 1) for year, month in product(range(1, 3), range(1, 13)) ] da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") Xarray also includes a :py:func:`~xarray.date_range` function, which enables creating a :py:class:`~xarray.CFTimeIndex` with regularly-spaced dates. For instance, we can create the same dates and DataArray we created above using (note that ``use_cftime=True`` is not mandatory to return a :py:class:`~xarray.CFTimeIndex` for non-standard calendars, but can be nice to use to be explicit): .. jupyter-execute:: dates = xr.date_range( start="0001", periods=24, freq="MS", calendar="noleap", use_cftime=True ) da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") Mirroring pandas' method with the same name, :py:meth:`~xarray.infer_freq` allows one to infer the sampling frequency of a :py:class:`~xarray.CFTimeIndex` or a 1-D :py:class:`~xarray.DataArray` containing cftime objects. It also works transparently with ``np.datetime64`` and ``np.timedelta64`` data (with "s", "ms", "us" or "ns" resolution). .. jupyter-execute:: xr.infer_freq(dates) With :py:meth:`~xarray.CFTimeIndex.strftime` we can also easily generate formatted strings from the datetime values of a :py:class:`~xarray.CFTimeIndex` directly or through the ``dt`` accessor for a :py:class:`~xarray.DataArray` using the same formatting as the standard `datetime.strftime`_ convention . .. _datetime.strftime: https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior .. jupyter-execute:: dates.strftime("%c") .. jupyter-execute:: da["time"].dt.strftime("%Y%m%d") Conversion between non-standard calendar and to/from pandas DatetimeIndexes is facilitated with the :py:meth:`xarray.Dataset.convert_calendar` method (also available as :py:meth:`xarray.DataArray.convert_calendar`). Here, like elsewhere in xarray, the ``use_cftime`` argument controls which datetime backend is used in the output. The default (``None``) is to use ``pandas`` when possible, i.e. when the calendar is ``standard``/``gregorian`` and dates starting with `1582-10-15`_. There is no such restriction when converting to a ``proleptic_gregorian`` calendar. .. _1582-10-15: https://en.wikipedia.org/wiki/Gregorian_calendar .. jupyter-execute:: dates = xr.date_range( start="2001", periods=24, freq="MS", calendar="noleap", use_cftime=True ) da_nl = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") da_std = da.convert_calendar("standard", use_cftime=True) The data is unchanged, only the timestamps are modified. Further options are implemented for the special ``"360_day"`` calendar and for handling missing dates. There is also :py:meth:`xarray.Dataset.interp_calendar` (and :py:meth:`xarray.DataArray.interp_calendar`) for interpolating data between calendars. For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: - `Partial datetime string indexing`_: .. jupyter-execute:: da.sel(time="0001") .. jupyter-execute:: da.sel(time=slice("0001-05", "0002-02")) .. note:: For specifying full or partial datetime strings in cftime indexing, xarray supports two versions of the `ISO 8601 standard`_, the basic pattern (YYYYMMDDhhmmss) or the extended pattern (YYYY-MM-DDThh:mm:ss), as well as the default cftime string format (YYYY-MM-DD hh:mm:ss). This is somewhat more restrictive than pandas; in other words, some datetime strings that would be valid for a :py:class:`pandas.DatetimeIndex` are not valid for an :py:class:`~xarray.CFTimeIndex`. - Access of basic datetime components via the ``dt`` accessor (in this case just "year", "month", "day", "hour", "minute", "second", "microsecond", "season", "dayofyear", "dayofweek", and "days_in_month") with the addition of "calendar", absent from pandas: .. jupyter-execute:: da.time.dt.year .. jupyter-execute:: da.time.dt.month .. jupyter-execute:: da.time.dt.season .. jupyter-execute:: da.time.dt.dayofyear .. jupyter-execute:: da.time.dt.dayofweek .. jupyter-execute:: da.time.dt.days_in_month .. jupyter-execute:: da.time.dt.calendar - Rounding of datetimes to fixed frequencies via the ``dt`` accessor: .. jupyter-execute:: da.time.dt.ceil("3D").head() .. jupyter-execute:: da.time.dt.floor("5D").head() .. jupyter-execute:: da.time.dt.round("2D").head() - Group-by operations based on datetime accessor attributes (e.g. by month of the year): .. jupyter-execute:: da.groupby("time.month").sum() - Interpolation using :py:class:`cftime.datetime` objects: .. jupyter-execute:: da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) - Interpolation using datetime strings: .. jupyter-execute:: da.interp(time=["0001-01-15", "0001-02-15"]) - Differentiation: .. jupyter-execute:: da.differentiate("time") - Serialization: .. jupyter-execute:: filename = "example-no-leap.nc" .. jupyter-execute:: :hide-code: # Ensure the file is located in a unique temporary directory # so that it doesn't conflict with parallel builds of the # documentation. import tempfile import os.path tempdir = tempfile.TemporaryDirectory() filename = os.path.join(tempdir.name, filename) .. jupyter-execute:: da.to_netcdf(filename) reopened = xr.open_dataset(filename) reopened .. jupyter-execute:: :hide-code: reopened.close() tempdir.cleanup() - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: .. jupyter-execute:: da.resample(time="81min", closed="right", label="right", offset="3min").mean() .. _precision range: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations .. _ISO 8601 standard: https://en.wikipedia.org/wiki/ISO_8601 .. _partial datetime string indexing: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#partial-string-indexing .. _cftime_arithmetic_limitations: Arithmetic limitations with ``cftime`` objects ---------------------------------------------- A current limitation when working with non-standard calendars and :py:class:`cftime.datetime` objects is that they support arithmetic with :py:class:`datetime.timedelta`, but **not** with :py:class:`numpy.timedelta64`. This means that certain xarray operations (such as :py:meth:`~xarray.DataArray.diff`) may produce ``timedelta64`` results that cannot be directly combined with ``cftime`` coordinates. For example, let's define a time axis using ``cftime`` objects: .. jupyter-execute:: import xarray as xr import numpy as np import pandas as pd import cftime time = xr.DataArray( xr.date_range("2000", periods=3, freq="MS", use_cftime=True), dims="time", ) If you want to compute, e.g., midpoints in the time intervals, this will not work: .. code-block:: python # Attempt to compute midpoints time[:-1] + 0.5 * time.diff("time") and result in an error like this: .. code-block:: none UFuncTypeError: ufunc 'add' cannot use operands with types dtype('O') and dtype('`. .. note:: ``interp`` requires ``scipy`` installed. Scalar and 1-dimensional interpolation -------------------------------------- Interpolating a :py:class:`~xarray.DataArray` works mostly like labeled indexing of a :py:class:`~xarray.DataArray`, .. jupyter-execute:: da = xr.DataArray( np.sin(0.3 * np.arange(12).reshape(4, 3)), [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], ) # label lookup da.sel(time=3) .. jupyter-execute:: # interpolation da.interp(time=2.5) Similar to the indexing, :py:meth:`~xarray.DataArray.interp` also accepts an array-like, which gives the interpolated result as an array. .. jupyter-execute:: # label lookup da.sel(time=[2, 3]) .. jupyter-execute:: # interpolation da.interp(time=[2.5, 3.5]) To interpolate data with a :py:doc:`numpy.datetime64 ` coordinate you can pass a string. .. jupyter-execute:: da_dt64 = xr.DataArray( [1, 3], [("time", pd.date_range("1/1/2000", "1/3/2000", periods=2))] ) da_dt64.interp(time="2000-01-02") The interpolated data can be merged into the original :py:class:`~xarray.DataArray` by specifying the time periods required. .. jupyter-execute:: da_dt64.interp(time=pd.date_range("1/1/2000", "1/3/2000", periods=3)) Interpolation of data indexed by a :py:class:`~xarray.CFTimeIndex` is also allowed. See :ref:`CFTimeIndex` for examples. .. note:: Currently, our interpolation only works for regular grids. Therefore, similarly to :py:meth:`~xarray.DataArray.sel`, only 1D coordinates along a dimension can be used as the original coordinate to be interpolated. Multi-dimensional Interpolation ------------------------------- Like :py:meth:`~xarray.DataArray.sel`, :py:meth:`~xarray.DataArray.interp` accepts multiple coordinates. In this case, multidimensional interpolation is carried out. .. jupyter-execute:: # label lookup da.sel(time=2, space=0.1) .. jupyter-execute:: # interpolation da.interp(time=2.5, space=0.15) Array-like coordinates are also accepted: .. jupyter-execute:: # label lookup da.sel(time=[2, 3], space=[0.1, 0.2]) .. jupyter-execute:: # interpolation da.interp(time=[1.5, 2.5], space=[0.15, 0.25]) :py:meth:`~xarray.DataArray.interp_like` method is a useful shortcut. This method interpolates an xarray object onto the coordinates of another xarray object. For example, if we want to compute the difference between two :py:class:`~xarray.DataArray` s (``da`` and ``other``) staying on slightly different coordinates, .. jupyter-execute:: other = xr.DataArray( np.sin(0.4 * np.arange(9).reshape(3, 3)), [("time", [0.9, 1.9, 2.9]), ("space", [0.15, 0.25, 0.35])], ) it might be a good idea to first interpolate ``da`` so that it will stay on the same coordinates of ``other``, and then subtract it. :py:meth:`~xarray.DataArray.interp_like` can be used for such a case, .. jupyter-execute:: # interpolate da along other's coordinates interpolated = da.interp_like(other) interpolated It is now possible to safely compute the difference ``other - interpolated``. Interpolation methods --------------------- We use either :py:class:`scipy.interpolate.interp1d` or special interpolants from :py:class:`scipy.interpolate` for 1-dimensional interpolation (see :py:meth:`~xarray.Dataset.interp`). For multi-dimensional interpolation, an attempt is first made to decompose the interpolation in a series of 1-dimensional interpolations, in which case the relevant 1-dimensional interpolator is used. If a decomposition cannot be made (e.g. with advanced interpolation), :py:func:`scipy.interpolate.interpn` is used. The interpolation method can be specified by the optional ``method`` argument. .. jupyter-execute:: da = xr.DataArray( np.sin(np.linspace(0, 2 * np.pi, 10)), dims="x", coords={"x": np.linspace(0, 1, 10)}, ) da.plot.line("o", label="original") da.interp(x=np.linspace(0, 1, 100)).plot.line(label="linear (default)") da.interp(x=np.linspace(0, 1, 100), method="cubic").plot.line(label="cubic") plt.legend(); Additional keyword arguments can be passed to scipy's functions. .. jupyter-execute:: # fill 0 for the outside of the original coordinates. da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": 0.0}) .. jupyter-execute:: # 1-dimensional extrapolation da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": "extrapolate"}) .. jupyter-execute:: # multi-dimensional extrapolation da = xr.DataArray( np.sin(0.3 * np.arange(12).reshape(4, 3)), [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], ) da.interp( time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={"fill_value": "extrapolate"} ) Advanced Interpolation ---------------------- :py:meth:`~xarray.DataArray.interp` accepts :py:class:`~xarray.DataArray` as similar to :py:meth:`~xarray.DataArray.sel`, which enables us more advanced interpolation. Based on the dimension of the new coordinate passed to :py:meth:`~xarray.DataArray.interp`, the dimension of the result are determined. For example, if you want to interpolate a two dimensional array along a particular dimension, as illustrated below, you can pass two 1-dimensional :py:class:`~xarray.DataArray` s with a common dimension as new coordinate. .. image:: ../_static/advanced_selection_interpolation.svg :height: 200px :width: 400 px :alt: advanced indexing and interpolation :align: center For example: .. jupyter-execute:: da = xr.DataArray( np.sin(0.3 * np.arange(20).reshape(5, 4)), [("x", np.arange(5)), ("y", [0.1, 0.2, 0.3, 0.4])], ) # advanced indexing x = xr.DataArray([0, 2, 4], dims="z") y = xr.DataArray([0.1, 0.2, 0.3], dims="z") da.sel(x=x, y=y) .. jupyter-execute:: # advanced interpolation, without extrapolation x = xr.DataArray([0.5, 1.5, 2.5, 3.5], dims="z") y = xr.DataArray([0.15, 0.25, 0.35, 0.45], dims="z") da.interp(x=x, y=y) where values on the original coordinates ``(x, y) = ((0.5, 0.15), (1.5, 0.25), (2.5, 0.35), (3.5, 0.45))`` are obtained by the 2-dimensional interpolation and mapped along a new dimension ``z``. Since no keyword arguments are passed to the interpolation routine, no extrapolation is performed resulting in a ``nan`` value. If you want to add a coordinate to the new dimension ``z``, you can supply :py:class:`~xarray.DataArray` s with a coordinate. Extrapolation can be achieved by passing additional arguments to SciPy's ``interpnd`` function, .. jupyter-execute:: x = xr.DataArray([0.5, 1.5, 2.5, 3.5], dims="z", coords={"z": ["a", "b", "c", "d"]}) y = xr.DataArray( [0.15, 0.25, 0.35, 0.45], dims="z", coords={"z": ["a", "b", "c", "d"]} ) da.interp(x=x, y=y, kwargs={"fill_value": None}) For the details of the advanced indexing, see :ref:`more advanced indexing `. Interpolating arrays with NaN ----------------------------- Our :py:meth:`~xarray.DataArray.interp` works with arrays with NaN the same way that `scipy.interpolate.interp1d `_ and `scipy.interpolate.interpn `_ do. ``linear`` and ``nearest`` methods return arrays including NaN, while other methods such as ``cubic`` or ``quadratic`` return all NaN arrays. .. jupyter-execute:: da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims="x", coords={"x": range(5)}) da.interp(x=[0.5, 1.5, 2.5]) .. jupyter-execute:: da.interp(x=[0.5, 1.5, 2.5], method="cubic") To avoid this, you can drop NaN by :py:meth:`~xarray.DataArray.dropna`, and then make the interpolation .. jupyter-execute:: dropped = da.dropna("x") dropped .. jupyter-execute:: dropped.interp(x=[0.5, 1.5, 2.5], method="cubic") If NaNs are distributed randomly in your multidimensional array, dropping all the columns containing more than one NaNs by :py:meth:`~xarray.DataArray.dropna` may lose a significant amount of information. In such a case, you can fill NaN by :py:meth:`~xarray.DataArray.interpolate_na`, which is similar to :py:meth:`pandas.Series.interpolate`. .. jupyter-execute:: filled = da.interpolate_na(dim="x") filled This fills NaN by interpolating along the specified dimension. After filling NaNs, you can interpolate: .. jupyter-execute:: filled.interp(x=[0.5, 1.5, 2.5], method="cubic") For the details of :py:meth:`~xarray.DataArray.interpolate_na`, see :ref:`Missing values `. Example ------- Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. .. jupyter-execute:: # Raw data ds = xr.tutorial.open_dataset("air_temperature").isel(time=0) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) axes[0].set_title("Raw data") # Interpolated data new_lon = np.linspace(ds.lon[0].item(), ds.lon[-1].item(), ds.sizes["lon"] * 4) new_lat = np.linspace(ds.lat[0].item(), ds.lat[-1].item(), ds.sizes["lat"] * 4) dsi = ds.interp(lat=new_lat, lon=new_lon) dsi.air.plot(ax=axes[1]) axes[1].set_title("Interpolated data"); Our advanced interpolation can be used to remap the data to the new coordinate. Consider the new coordinates x and z on the two dimensional plane. The remapping can be done as follows .. jupyter-execute:: # new coordinate x = np.linspace(240, 300, 100) z = np.linspace(20, 70, 100) # relation between new and original coordinates lat = xr.DataArray(z, dims=["z"], coords={"z": z}) lon = xr.DataArray( (x[:, np.newaxis] - 270) / np.cos(z * np.pi / 180) + 270, dims=["x", "z"], coords={"x": x, "z": z}, ) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) # draw the new coordinate on the original coordinates. for idx in [0, 33, 66, 99]: axes[0].plot(lon.isel(x=idx), lat, "--k") for idx in [0, 33, 66, 99]: axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), "--k") axes[0].set_title("Raw data") dsi = ds.interp(lon=lon, lat=lat) dsi.air.plot(ax=axes[1]) axes[1].set_title("Remapped data"); python-xarray-2026.01.0/doc/user-guide/dask.rst0000664000175000017500000005542415136607163021347 0ustar alastairalastair.. currentmodule:: xarray .. _dask: Parallel Computing with Dask ============================ .. jupyter-execute:: # Note that it's not necessary to import dask to use xarray with dask. import numpy as np import pandas as pd import xarray as xr import bottleneck .. jupyter-execute:: :hide-code: import os import tempfile tempdir = tempfile.TemporaryDirectory() np.random.seed(123456) # limit the amount of information printed to screen xr.set_options(display_expand_data=False) np.set_printoptions(precision=3, linewidth=100, threshold=10, edgeitems=2) ds = xr.Dataset( { "temperature": ( ("time", "latitude", "longitude"), np.random.randn(30, 180, 180), ), "time": pd.date_range("2015-01-01", periods=30), "longitude": np.arange(180), "latitude": np.arange(89.5, -90.5, -1), } ) ds.to_netcdf(os.path.join(tempdir.name, "example-data.nc")) Xarray integrates with `Dask `__, a general purpose library for parallel computing, to handle larger-than-memory computations. If you’ve been using Xarray to read in large datasets or split up data across a number of files, you may already be using Dask: .. code-block:: python ds = xr.open_zarr("/path/to/data.zarr") timeseries = ds["temp"].mean(dim=["x", "y"]).compute() # Compute result Using Dask with Xarray feels similar to working with NumPy arrays, but on much larger datasets. The Dask integration is transparent, so you usually don’t need to manage the parallelism directly; Xarray and Dask handle these aspects behind the scenes. This makes it easy to write code that scales from small, in-memory datasets on a single machine to large datasets that are distributed across a cluster, with minimal code changes. Examples -------- If you're new to using Xarray with Dask, we recommend the `Xarray + Dask Tutorial `_. Here are some examples for using Xarray with Dask at scale: - `Zonal averaging with the NOAA National Water Model `_ - `CMIP6 Precipitation Frequency Analysis `_ - `Using Dask + Cloud Optimized GeoTIFFs `_ Find more examples at the `Project Pythia cookbook gallery `_. Using Dask with Xarray ---------------------- .. image:: ../_static/dask-array.svg :width: 50 % :align: right :alt: A Dask array Dask divides arrays into smaller parts called chunks. These chunks are small, manageable pieces of the larger dataset, that Dask is able to process in parallel (see the `Dask Array docs on chunks `_). Commonly chunks are set when reading data, but you can also set the chunksize manually at any point in your workflow using :py:meth:`Dataset.chunk` and :py:meth:`DataArray.chunk`. See :ref:`dask.chunks` for more. Xarray operations on Dask-backed arrays are lazy. This means computations are not executed immediately, but are instead queued up as tasks in a Dask graph. When a result is requested (e.g., for plotting, writing to disk, or explicitly computing), Dask executes the task graph. The computations are carried out in parallel, with each chunk being processed independently. This parallel execution is key to handling large datasets efficiently. Nearly all Xarray methods have been extended to work automatically with Dask Arrays. This includes things like indexing, concatenating, rechunking, grouped operations, etc. Common operations are covered in more detail in each of the sections below. .. _dask.io: Reading and writing data ~~~~~~~~~~~~~~~~~~~~~~~~ When reading data, Dask divides your dataset into smaller chunks. You can specify the size of chunks with the ``chunks`` argument. Specifying ``chunks="auto"`` will set the dask chunk sizes to be a multiple of the on-disk chunk sizes. This can be a good idea, but usually the appropriate dask chunk size will depend on your workflow. .. tab:: Zarr The `Zarr `_ format is ideal for working with large datasets. Each chunk is stored in a separate file, allowing parallel reading and writing with Dask. You can also use Zarr to read/write directly from cloud storage buckets (see the `Dask documentation on connecting to remote data `__) When you open a Zarr dataset with :py:func:`~xarray.open_zarr`, it is loaded as a Dask array by default (if Dask is installed):: ds = xr.open_zarr("path/to/directory.zarr") See :ref:`io.zarr` for more details. .. tab:: NetCDF Open a single netCDF file with :py:func:`~xarray.open_dataset` and supplying a ``chunks`` argument:: ds = xr.open_dataset("example-data.nc", chunks={"time": 10}) Or open multiple files in parallel with py:func:`~xarray.open_mfdataset`:: xr.open_mfdataset('my/files/*.nc', parallel=True) .. tip:: When reading in many netCDF files with py:func:`~xarray.open_mfdataset`, using ``engine="h5netcdf"`` can be faster than the default which uses the netCDF4 package. Save larger-than-memory netCDF files:: ds.to_netcdf("my-big-file.nc") Or set ``compute=False`` to return a dask.delayed object that can be computed later:: delayed_write = ds.to_netcdf("my-big-file.nc", compute=False) delayed_write.compute() .. note:: When using Dask’s distributed scheduler to write NETCDF4 files, it may be necessary to set the environment variable ``HDF5_USE_FILE_LOCKING=FALSE`` to avoid competing locks within the HDF5 SWMR file locking scheme. Note that writing netCDF files with Dask’s distributed scheduler is only supported for the netcdf4 backend. See :ref:`io.netcdf` for more details. .. tab:: HDF5 Open HDF5 files with :py:func:`~xarray.open_dataset`:: xr.open_dataset("/path/to/my/file.h5", chunks='auto') See :ref:`io.hdf5` for more details. .. tab:: GeoTIFF Open large geoTIFF files with rioxarray:: xds = rioxarray.open_rasterio("my-satellite-image.tif", chunks='auto') See :ref:`io.rasterio` for more details. Loading Dask Arrays ~~~~~~~~~~~~~~~~~~~ There are a few common cases where you may want to convert lazy Dask arrays into eager, in-memory Xarray data structures: - You want to inspect smaller intermediate results when working interactively or debugging - You've reduced the dataset (by filtering or with a groupby, for example) and now have something much smaller that fits in memory - You need to compute intermediate results since Dask is unable (or struggles) to perform a certain computation. The canonical example of this is normalizing a dataset, e.g., ``ds - ds.mean()``, when ``ds`` is larger than memory. Typically, you should either save ``ds`` to disk or compute ``ds.mean()`` eagerly. To do this, you can use :py:meth:`Dataset.compute` or :py:meth:`DataArray.compute`: .. jupyter-execute:: ds.compute() .. note:: Using :py:meth:`Dataset.compute` is preferred to :py:meth:`Dataset.load`, which changes the results in-place. You can also access :py:attr:`DataArray.values`, which will always be a NumPy array: .. jupyter-input:: ds.temperature.values .. jupyter-output:: array([[[ 4.691e-01, -2.829e-01, ..., -5.577e-01, 3.814e-01], [ 1.337e+00, -1.531e+00, ..., 8.726e-01, -1.538e+00], ... # truncated for brevity NumPy ufuncs like :py:func:`numpy.sin` transparently work on all xarray objects, including those that store lazy Dask arrays: .. jupyter-execute:: np.sin(ds) To access Dask arrays directly, use the :py:attr:`DataArray.data` attribute which exposes the DataArray's underlying array type. If you're using a Dask cluster, you can also use :py:meth:`Dataset.persist` for quickly accessing intermediate outputs. This is most helpful after expensive operations like rechunking or setting an index. It's a way of telling the cluster that it should start executing the computations that you have defined so far, and that it should try to keep those results in memory. You will get back a new Dask array that is semantically equivalent to your old array, but now points to running data. .. code-block:: python ds = ds.persist() .. tip:: Remember to save the dataset returned by persist! This is a common mistake. .. _dask.chunks: Chunking and performance ~~~~~~~~~~~~~~~~~~~~~~~~ The way a dataset is chunked can be critical to performance when working with large datasets. You'll want chunk sizes large enough to reduce the number of chunks that Dask has to think about (to reduce overhead from the task graph) but also small enough so that many of them can fit in memory at once. .. tip:: A good rule of thumb is to create arrays with a minimum chunk size of at least one million elements (e.g., a 1000x1000 matrix). With large arrays (10+ GB), you may need larger chunks. See `Choosing good chunk sizes in Dask `_. It can be helpful to choose chunk sizes based on your downstream analyses and to chunk as early as possible. Datasets with smaller chunks along the time axis, for example, can make time domain problems easier to parallelize since Dask can perform the same operation on each time chunk. If you're working with a large dataset with chunks that make downstream analyses challenging, you may need to rechunk your data. This is an expensive operation though, so is only recommended when needed. You can chunk or rechunk a dataset by: - Specifying the ``chunks`` kwarg when reading in your dataset. If you know you'll want to do some spatial subsetting, for example, you could use ``chunks={'latitude': 10, 'longitude': 10}`` to specify small chunks across space. This can avoid loading subsets of data that span multiple chunks, thus reducing the number of file reads. Note that this will only work, though, for chunks that are similar to how the data is chunked on disk. Otherwise, it will be very slow and require a lot of network bandwidth. - Many array file formats are chunked on disk. You can specify ``chunks={}`` to have a single dask chunk map to a single on-disk chunk, and ``chunks="auto"`` to have a single dask chunk be an automatically chosen multiple of the on-disk chunks. - Using :py:meth:`Dataset.chunk` after you've already read in your dataset. For time domain problems, for example, you can use ``ds.chunk(time=TimeResampler())`` to rechunk according to a specified unit of time. ``ds.chunk(time=TimeResampler("MS"))``, for example, will set the chunks so that a month of data is contained in one chunk. For large-scale rechunking tasks (e.g., converting a simulation dataset stored with chunking only along time to a dataset with chunking only across space), consider writing another copy of your data on disk and/or using dedicated tools such as `Rechunker `_. .. _dask.automatic-parallelization: Parallelize custom functions with ``apply_ufunc`` and ``map_blocks`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Almost all of Xarray's built-in operations work on Dask arrays. If you want to use a function that isn't wrapped by Xarray, and have it applied in parallel on each block of your xarray object, you have three options: 1. Use :py:func:`~xarray.apply_ufunc` to apply functions that consume and return NumPy arrays. 2. Use :py:func:`~xarray.map_blocks`, :py:meth:`Dataset.map_blocks` or :py:meth:`DataArray.map_blocks` to apply functions that consume and return xarray objects. 3. Extract Dask Arrays from xarray objects with :py:attr:`DataArray.data` and use Dask directly. .. tip:: See the extensive Xarray tutorial on `apply_ufunc `_. ``apply_ufunc`` ############### :py:func:`~xarray.apply_ufunc` automates `embarrassingly parallel `__ "map" type operations where a function written for processing NumPy arrays should be repeatedly applied to Xarray objects containing Dask Arrays. It works similarly to :py:func:`dask.array.map_blocks` and :py:func:`dask.array.blockwise`, but without requiring an intermediate layer of abstraction. See the `Dask documentation `__ for more details. For the best performance when using Dask's multi-threaded scheduler, wrap a function that already releases the global interpreter lock, which fortunately already includes most NumPy and Scipy functions. Here we show an example using NumPy operations and a fast function from `bottleneck `__, which we use to calculate `Spearman's rank-correlation coefficient `__: .. code-block:: python def covariance_gufunc(x, y): return ( (x - x.mean(axis=-1, keepdims=True)) * (y - y.mean(axis=-1, keepdims=True)) ).mean(axis=-1) def pearson_correlation_gufunc(x, y): return covariance_gufunc(x, y) / (x.std(axis=-1) * y.std(axis=-1)) def spearman_correlation_gufunc(x, y): x_ranks = bottleneck.rankdata(x, axis=-1) y_ranks = bottleneck.rankdata(y, axis=-1) return pearson_correlation_gufunc(x_ranks, y_ranks) def spearman_correlation(x, y, dim): return xr.apply_ufunc( spearman_correlation_gufunc, x, y, input_core_dims=[[dim], [dim]], dask="parallelized", output_dtypes=[float], ) The only aspect of this example that is different from standard usage of ``apply_ufunc()`` is that we needed to supply the ``output_dtypes`` arguments. (Read up on :ref:`compute.wrapping-custom` for an explanation of the "core dimensions" listed in ``input_core_dims``.) Our new ``spearman_correlation()`` function achieves near linear speedup when run on large arrays across the four cores on my laptop. It would also work as a streaming operation, when run on arrays loaded from disk: .. jupyter-input:: rs = np.random.default_rng(0) array1 = xr.DataArray(rs.randn(1000, 100000), dims=["place", "time"]) # 800MB array2 = array1 + 0.5 * rs.randn(1000, 100000) # using one core, on NumPy arrays %time _ = spearman_correlation(array1, array2, 'time') # CPU times: user 21.6 s, sys: 2.84 s, total: 24.5 s # Wall time: 24.9 s chunked1 = array1.chunk({"place": 10}) chunked2 = array2.chunk({"place": 10}) # using all my laptop's cores, with Dask r = spearman_correlation(chunked1, chunked2, "time").compute() %time _ = r.compute() # CPU times: user 30.9 s, sys: 1.74 s, total: 32.6 s # Wall time: 4.59 s One limitation of ``apply_ufunc()`` is that it cannot be applied to arrays with multiple chunks along a core dimension: .. jupyter-input:: spearman_correlation(chunked1, chunked2, "place") .. jupyter-output:: ValueError: dimension 'place' on 0th function argument to apply_ufunc with dask='parallelized' consists of multiple chunks, but is also a core dimension. To fix, rechunk into a single Dask array chunk along this dimension, i.e., ``.rechunk({'place': -1})``, but beware that this may significantly increase memory usage. This reflects the nature of core dimensions, in contrast to broadcast (non-core) dimensions that allow operations to be split into arbitrary chunks for application. .. tip:: When possible, it's recommended to use pre-existing ``dask.array`` functions, either with existing xarray methods or :py:func:`~xarray.apply_ufunc()` with ``dask='allowed'``. Dask can often have a more efficient implementation that makes use of the specialized structure of a problem, unlike the generic speedups offered by ``dask='parallelized'``. ``map_blocks`` ############## Functions that consume and return Xarray objects can be easily applied in parallel using :py:func:`map_blocks`. Your function will receive an Xarray Dataset or DataArray subset to one chunk along each chunked dimension. .. jupyter-execute:: ds.temperature This DataArray has 3 chunks each with length 10 along the time dimension. At compute time, a function applied with :py:func:`map_blocks` will receive a DataArray corresponding to a single block of shape 10x180x180 (time x latitude x longitude) with values loaded. The following snippet illustrates how to check the shape of the object received by the applied function. .. jupyter-execute:: def func(da): print(da.sizes) return da.time mapped = xr.map_blocks(func, ds.temperature) mapped Notice that the :py:meth:`map_blocks` call printed ``Frozen({'time': 0, 'latitude': 0, 'longitude': 0})`` to screen. ``func`` is received 0-sized blocks! :py:meth:`map_blocks` needs to know what the final result looks like in terms of dimensions, shapes etc. It does so by running the provided function on 0-shaped inputs (*automated inference*). This works in many cases, but not all. If automatic inference does not work for your function, provide the ``template`` kwarg (see :ref:`below `). In this case, automatic inference has worked so let's check that the result is as expected. .. jupyter-execute:: mapped.load(scheduler="single-threaded") mapped.identical(ds.time) Note that we use ``.load(scheduler="single-threaded")`` to execute the computation. This executes the Dask graph in serial using a for loop, but allows for printing to screen and other debugging techniques. We can easily see that our function is receiving blocks of shape 10x180x180 and the returned result is identical to ``ds.time`` as expected. Here is a common example where automated inference will not work. .. jupyter-execute:: :raises: def func(da): print(da.sizes) return da.isel(time=[1]) mapped = xr.map_blocks(func, ds.temperature) ``func`` cannot be run on 0-shaped inputs because it is not possible to extract element 1 along a dimension of size 0. In this case we need to tell :py:func:`map_blocks` what the returned result looks like using the ``template`` kwarg. ``template`` must be an xarray Dataset or DataArray (depending on what the function returns) with dimensions, shapes, chunk sizes, attributes, coordinate variables *and* data variables that look exactly like the expected result. The variables should be dask-backed and hence not incur much memory cost. .. _template-note: .. note:: Note that when ``template`` is provided, ``attrs`` from ``template`` are copied over to the result. Any ``attrs`` set in ``func`` will be ignored. .. jupyter-execute:: template = ds.temperature.isel(time=[1, 11, 21]) mapped = xr.map_blocks(func, ds.temperature, template=template) Notice that the 0-shaped sizes were not printed to screen. Since ``template`` has been provided :py:func:`map_blocks` does not need to infer it by running ``func`` on 0-shaped inputs. .. jupyter-execute:: mapped.identical(template) :py:func:`map_blocks` also allows passing ``args`` and ``kwargs`` down to the user function ``func``. ``func`` will be executed as ``func(block_xarray, *args, **kwargs)`` so ``args`` must be a list and ``kwargs`` must be a dictionary. .. jupyter-execute:: def func(obj, a, b=0): return obj + a + b mapped = ds.map_blocks(func, args=[10], kwargs={"b": 10}) expected = ds + 10 + 10 mapped.identical(expected) .. jupyter-execute:: :hide-code: ds.close() # Closes "example-data.nc". tempdir.cleanup() .. tip:: As :py:func:`map_blocks` loads each block into memory, reduce as much as possible objects consumed by user functions. For example, drop useless variables before calling ``func`` with :py:func:`map_blocks`. Deploying Dask -------------- By default, Dask uses the multi-threaded scheduler, which distributes work across multiple cores on a single machine and allows for processing some datasets that do not fit into memory. However, this has two limitations: - You are limited by the size of your hard drive - Downloading data can be slow and expensive Instead, it can be faster and cheaper to run your computations close to where your data is stored, distributed across many machines on a Dask cluster. Often, this means deploying Dask on HPC clusters or on the cloud. See the `Dask deployment documentation `__ for more details. Best Practices -------------- Dask is pretty easy to use but there are some gotchas, many of which are under active development. Here are some tips we have found through experience. We also recommend checking out the `Dask best practices `_. 1. Do your spatial and temporal indexing (e.g. ``.sel()`` or ``.isel()``) early, especially before calling ``resample()`` or ``groupby()``. Grouping and resampling triggers some computation on all the blocks, which in theory should commute with indexing, but this optimization hasn't been implemented in Dask yet. (See `Dask issue #746 `_). 2. More generally, ``groupby()`` is a costly operation and will perform a lot better if the ``flox`` package is installed. See the `flox documentation `_ for more. By default Xarray will use ``flox`` if installed. 3. Save intermediate results to disk as a netCDF files (using ``to_netcdf()``) and then load them again with ``open_dataset()`` for further computations. For example, if subtracting temporal mean from a dataset, save the temporal mean to disk before subtracting. Again, in theory, Dask should be able to do the computation in a streaming fashion, but in practice this is a fail case for the Dask scheduler, because it tries to keep every chunk of an array that it computes in memory. (See `Dask issue #874 `_) 4. Use the `Dask dashboard `_ to identify performance bottlenecks. Here's an example of a simplified workflow putting some of these tips together: .. code-block:: python ds = xr.open_zarr( # Since we're doing a spatial reduction, increase chunk size in x, y "my-data.zarr", chunks={"x": 100, "y": 100} ) time_subset = ds.sea_temperature.sel( time=slice("2020-01-01", "2020-12-31") # Filter early ) # faster resampling when flox is installed daily = ds.resample(time="D").mean() daily.load() # Pull smaller results into memory after reducing the dataset python-xarray-2026.01.0/doc/api.rst0000664000175000017500000000127015136607163017113 0ustar alastairalastair.. currentmodule:: xarray .. _api: ############# API reference ############# This page provides an auto-generated summary of xarray's API. For more details and examples, refer to the relevant chapters in the main part of the documentation. See also: :ref:`public-api` and :ref:`api-stability`. .. toctree:: :maxdepth: 1 api/top-level api/dataset api/dataarray api/datatree api/coordinates api/indexes api/ufuncs api/io api/encoding api/plotting api/groupby api/rolling api/coarsen api/rolling-exp api/weighted api/resample api/accessors api/tutorial api/testing api/backends api/exceptions api/advanced api/deprecated python-xarray-2026.01.0/doc/Makefile0000664000175000017500000002043015136607163017247 0ustar alastairalastair# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXATUOBUILD = sphinx-autobuild PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " rtdhtml Build html using same settings used on ReadtheDocs" @echo " livehtml Make standalone HTML files and rebuild the documentation when a change is detected. Also includes a livereload enabled web server" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and an HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " applehelp to make an Apple Help Book" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " epub3 to make an epub3" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" @echo " coverage to run coverage check of the documentation (if enabled)" @echo " dummy to check syntax errors of document sources" .PHONY: clean clean: rm -rf $(BUILDDIR)/* rm -rf generated/* rm -rf auto_gallery/ .PHONY: html html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." .PHONY: rtdhtml rtdhtml: $(SPHINXBUILD) -T -j auto -E -W --keep-going -b html -d $(BUILDDIR)/doctrees -D language=en . $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." .PHONY: livehtml livehtml: # @echo "$(SPHINXATUOBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html" $(SPHINXATUOBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html .PHONY: dirhtml dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." .PHONY: singlehtml singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." .PHONY: html-noplot html-noplot: $(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." .PHONY: pickle pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." .PHONY: json json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." .PHONY: htmlhelp htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." .PHONY: qthelp qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/xarray.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/xarray.qhc" .PHONY: applehelp applehelp: $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @echo @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." @echo "N.B. You won't be able to view it unless you put it in" \ "~/Library/Documentation/Help or install it in your application" \ "bundle." .PHONY: devhelp devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/xarray" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/xarray" @echo "# devhelp" .PHONY: epub epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." .PHONY: epub3 epub3: $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 @echo @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." .PHONY: latex latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." .PHONY: latexpdf latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: latexpdfja latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: text text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." .PHONY: man man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." .PHONY: texinfo texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." .PHONY: info info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." .PHONY: gettext gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." .PHONY: changes changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." .PHONY: linkcheck linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." .PHONY: doctest doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." .PHONY: coverage coverage: $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage @echo "Testing of coverage in the sources finished, look at the " \ "results in $(BUILDDIR)/coverage/python.txt." .PHONY: xml xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." .PHONY: pseudoxml pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." .PHONY: dummy dummy: $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy @echo @echo "Build finished. Dummy builder generates no files." python-xarray-2026.01.0/doc/examples/0000775000175000017500000000000015136607163017426 5ustar alastairalastairpython-xarray-2026.01.0/doc/examples/visualization_gallery.ipynb0000664000175000017500000001374415136607163025122 0ustar alastairalastair{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Visualization Gallery\n", "\n", "This notebook shows common visualization issues encountered in xarray." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import cartopy.crs as ccrs\n", "import matplotlib.pyplot as plt\n", "import xarray as xr\n", "\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load example dataset:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds = xr.tutorial.load_dataset(\"air_temperature\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Multiple plots and map projections\n", "\n", "Control the map projection parameters on multiple axes\n", "\n", "This example illustrates how to plot multiple maps and control their extent\n", "and aspect ratio.\n", "\n", "For more details see [this discussion](https://github.com/pydata/xarray/issues/1397#issuecomment-299190567) on github." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "air = ds.air.isel(time=[0, 724]) - 273.15\n", "\n", "# This is the map projection we want to plot *onto*\n", "map_proj = ccrs.LambertConformal(central_longitude=-95, central_latitude=45)\n", "\n", "p = air.plot(\n", " transform=ccrs.PlateCarree(), # the data's projection\n", " col=\"time\",\n", " col_wrap=1, # multiplot settings\n", " aspect=ds.dims[\"lon\"] / ds.dims[\"lat\"], # for a sensible figsize\n", " subplot_kws={\"projection\": map_proj},\n", ") # the plot's projection\n", "\n", "# We have to set the map's options on all axes\n", "for ax in p.axes.flat:\n", " ax.coastlines()\n", " ax.set_extent([-160, -30, 5, 75])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Centered colormaps\n", "\n", "Xarray's automatic colormaps choice" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "air = ds.air.isel(time=0)\n", "\n", "f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(8, 6))\n", "\n", "# The first plot (in kelvins) chooses \"viridis\" and uses the data's min/max\n", "air.plot(ax=ax1, cbar_kwargs={\"label\": \"K\"})\n", "ax1.set_title(\"Kelvins: default\")\n", "ax2.set_xlabel(\"\")\n", "\n", "# The second plot (in celsius) now chooses \"BuRd\" and centers min/max around 0\n", "airc = air - 273.15\n", "airc.plot(ax=ax2, cbar_kwargs={\"label\": \"°C\"})\n", "ax2.set_title(\"Celsius: default\")\n", "ax2.set_xlabel(\"\")\n", "ax2.set_ylabel(\"\")\n", "\n", "# The center doesn't have to be 0\n", "air.plot(ax=ax3, center=273.15, cbar_kwargs={\"label\": \"K\"})\n", "ax3.set_title(\"Kelvins: center=273.15\")\n", "\n", "# Or it can be ignored\n", "airc.plot(ax=ax4, center=False, cbar_kwargs={\"label\": \"°C\"})\n", "ax4.set_title(\"Celsius: center=False\")\n", "ax4.set_ylabel(\"\")\n", "\n", "# Make it nice\n", "plt.tight_layout()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Control the plot's colorbar\n", "\n", "Use ``cbar_kwargs`` keyword to specify the number of ticks.\n", "The ``spacing`` kwarg can be used to draw proportional ticks." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "air2d = ds.air.isel(time=500)\n", "\n", "# Prepare the figure\n", "f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(14, 4))\n", "\n", "# Irregular levels to illustrate the use of a proportional colorbar\n", "levels = [245, 250, 255, 260, 265, 270, 275, 280, 285, 290, 310, 340]\n", "\n", "# Plot data\n", "air2d.plot(ax=ax1, levels=levels)\n", "air2d.plot(ax=ax2, levels=levels, cbar_kwargs={\"ticks\": levels})\n", "air2d.plot(\n", " ax=ax3, levels=levels, cbar_kwargs={\"ticks\": levels, \"spacing\": \"proportional\"}\n", ")\n", "\n", "# Show plots\n", "plt.tight_layout()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Multiple lines from a 2d DataArray\n", "\n", "Use ``xarray.plot.line`` on a 2d DataArray to plot selections as\n", "multiple lines.\n", "\n", "See ``plotting.multiplelines`` for more details." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "air = ds.air - 273.15 # to celsius\n", "\n", "# Prepare the figure\n", "f, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4), sharey=True)\n", "\n", "# Selected latitude indices\n", "isel_lats = [10, 15, 20]\n", "\n", "# Temperature vs longitude plot - illustrates the \"hue\" kwarg\n", "air.isel(time=0, lat=isel_lats).plot.line(ax=ax1, hue=\"lat\")\n", "ax1.set_ylabel(\"°C\")\n", "\n", "# Temperature vs time plot - illustrates the \"x\" and \"add_legend\" kwargs\n", "air.isel(lon=30, lat=isel_lats).plot.line(ax=ax2, x=\"time\", add_legend=False)\n", "ax2.set_ylabel(\"\")\n", "\n", "# Show\n", "plt.tight_layout()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 4 } python-xarray-2026.01.0/doc/examples/blank_template.ipynb0000664000175000017500000000212315136607163023451 0ustar alastairalastair{ "cells": [ { "cell_type": "markdown", "id": "d8f54f6a", "metadata": {}, "source": [ "# Blank template\n", "\n", "Use this notebook from Binder to test an issue or reproduce a bug report" ] }, { "cell_type": "code", "execution_count": null, "id": "41b90ede", "metadata": {}, "outputs": [], "source": [ "import xarray as xr\n", "import numpy as np\n", "import pandas as pd\n", "\n", "ds = xr.tutorial.load_dataset(\"air_temperature\")\n", "da = ds[\"air\"]" ] }, { "cell_type": "code", "execution_count": null, "id": "effd9aeb", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 } python-xarray-2026.01.0/doc/examples/weather-data.ipynb0000664000175000017500000002124315136607163023041 0ustar alastairalastair{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Toy weather data\n", "\n", "Here is an example of how to easily manipulate a toy weather dataset using\n", "xarray and other recommended Python libraries:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "\n", "import xarray as xr\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:43:36.127628Z", "start_time": "2020-01-27T15:43:36.081733Z" } }, "outputs": [], "source": [ "np.random.seed(123)\n", "\n", "xr.set_options(display_style=\"html\")\n", "\n", "times = pd.date_range(\"2000-01-01\", \"2001-12-31\", name=\"time\")\n", "annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28))\n", "\n", "base = 10 + 15 * annual_cycle.reshape(-1, 1)\n", "tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3)\n", "tmax_values = base + 10 + 3 * np.random.randn(annual_cycle.size, 3)\n", "\n", "ds = xr.Dataset(\n", " {\n", " \"tmin\": ((\"time\", \"location\"), tmin_values),\n", " \"tmax\": ((\"time\", \"location\"), tmax_values),\n", " },\n", " {\"time\": times, \"location\": [\"IA\", \"IN\", \"IL\"]},\n", ")\n", "\n", "ds" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Examine a dataset with pandas and seaborn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Convert to a pandas DataFrame" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:47:14.160297Z", "start_time": "2020-01-27T15:47:14.126738Z" } }, "outputs": [], "source": [ "df = ds.to_dataframe()\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:47:32.682065Z", "start_time": "2020-01-27T15:47:32.652629Z" } }, "outputs": [], "source": [ "df.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Visualize using pandas" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:47:34.617042Z", "start_time": "2020-01-27T15:47:34.282605Z" } }, "outputs": [], "source": [ "ds.mean(dim=\"location\").to_dataframe().plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Visualize using seaborn" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:47:37.643175Z", "start_time": "2020-01-27T15:47:37.202479Z" } }, "outputs": [], "source": [ "sns.pairplot(df.reset_index(), vars=ds.data_vars)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Probability of freeze by calendar month" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:48:11.241224Z", "start_time": "2020-01-27T15:48:11.211156Z" } }, "outputs": [], "source": [ "freeze = (ds[\"tmin\"] <= 0).groupby(\"time.month\").mean(\"time\")\n", "freeze" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:48:13.131247Z", "start_time": "2020-01-27T15:48:12.924985Z" } }, "outputs": [], "source": [ "freeze.to_pandas().plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Monthly averaging" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:48:08.498259Z", "start_time": "2020-01-27T15:48:08.210890Z" } }, "outputs": [], "source": [ "monthly_avg = ds.resample(time=\"1MS\").mean()\n", "monthly_avg.sel(location=\"IA\").to_dataframe().plot(style=\"s-\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that ``MS`` here refers to Month-Start; ``M`` labels Month-End (the last day of the month)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Calculate monthly anomalies" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In climatology, \"anomalies\" refer to the difference between observations and\n", "typical weather for a particular season. Unlike observations, anomalies should\n", "not show any seasonal cycle." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:49:34.855086Z", "start_time": "2020-01-27T15:49:34.406439Z" } }, "outputs": [], "source": [ "climatology = ds.groupby(\"time.month\").mean(\"time\")\n", "anomalies = ds.groupby(\"time.month\") - climatology\n", "anomalies.mean(\"location\").to_dataframe()[[\"tmin\", \"tmax\"]].plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Calculate standardized monthly anomalies" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can create standardized anomalies where the difference between the\n", "observations and the climatological monthly mean is\n", "divided by the climatological standard deviation." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:50:09.144586Z", "start_time": "2020-01-27T15:50:08.734682Z" } }, "outputs": [], "source": [ "climatology_mean = ds.groupby(\"time.month\").mean(\"time\")\n", "climatology_std = ds.groupby(\"time.month\").std(\"time\")\n", "stand_anomalies = xr.apply_ufunc(\n", " lambda x, m, s: (x - m) / s,\n", " ds.groupby(\"time.month\"),\n", " climatology_mean,\n", " climatology_std,\n", ")\n", "\n", "stand_anomalies.mean(\"location\").to_dataframe()[[\"tmin\", \"tmax\"]].plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fill missing values with climatology" ] }, { "cell_type": "markdown", "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:50:46.192491Z", "start_time": "2020-01-27T15:50:46.174554Z" } }, "source": [ "The ``fillna`` method on grouped objects lets you easily fill missing values by group:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:51:40.279299Z", "start_time": "2020-01-27T15:51:40.220342Z" } }, "outputs": [], "source": [ "# throw away the first half of every month\n", "some_missing = ds.tmin.sel(time=ds[\"time.day\"] > 15).reindex_like(ds)\n", "filled = some_missing.groupby(\"time.month\").fillna(climatology.tmin)\n", "both = xr.Dataset({\"some_missing\": some_missing, \"filled\": filled})\n", "both" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:52:11.815769Z", "start_time": "2020-01-27T15:52:11.770825Z" } }, "outputs": [], "source": [ "df = both.sel(time=\"2000\").mean(\"location\").reset_coords(drop=True).to_dataframe()\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-27T15:52:14.867866Z", "start_time": "2020-01-27T15:52:14.449684Z" } }, "outputs": [], "source": [ "df[[\"filled\", \"some_missing\"]].plot()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": true, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 } python-xarray-2026.01.0/doc/examples/ERA5-GRIB-example.ipynb0000664000175000017500000000544315136607163023345 0ustar alastairalastair{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# GRIB Data Example " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "GRIB format is commonly used to disseminate atmospheric model data. With xarray and the cfgrib engine, GRIB data can easily be analyzed and visualized." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import xarray as xr\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To read GRIB data, you can use `xarray.load_dataset`. The only extra code you need is to specify the engine as `cfgrib`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds = xr.tutorial.load_dataset(\"era5-2mt-2019-03-uk.grib\", engine=\"cfgrib\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's create a simple plot of 2-m air temperature in degrees Celsius:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds = ds - 273.15\n", "ds.t2m[0].plot(cmap=plt.cm.coolwarm)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "With CartoPy, we can create a more detailed plot, using built-in shapefiles to help provide geographic context:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import cartopy.crs as ccrs\n", "import cartopy\n", "\n", "fig = plt.figure(figsize=(10, 10))\n", "ax = plt.axes(projection=ccrs.Robinson())\n", "ax.coastlines(resolution=\"10m\")\n", "plot = ds.t2m[0].plot(\n", " cmap=plt.cm.coolwarm, transform=ccrs.PlateCarree(), cbar_kwargs={\"shrink\": 0.6}\n", ")\n", "plt.title(\"ERA5 - 2m temperature British Isles March 2019\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Finally, we can also pull out a time series for a given location easily:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds.t2m.sel(longitude=0, latitude=51.5).plot()\n", "plt.title(\"ERA5 - London 2m temperature March 2019\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 } python-xarray-2026.01.0/doc/examples/multidimensional-coords.ipynb0000664000175000017500000001474215136607163025345 0ustar alastairalastair{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Working with Multidimensional Coordinates\n", "\n", "Author: [Ryan Abernathey](https://github.com/rabernat)\n", "\n", "Many datasets have _physical coordinates_ which differ from their _logical coordinates_. Xarray provides several ways to plot and analyze such datasets." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:49:56.068395Z", "start_time": "2018-11-28T20:49:56.035349Z" } }, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "import cartopy.crs as ccrs\n", "from matplotlib import pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As an example, consider this dataset from the [xarray-data](https://github.com/pydata/xarray-data) repository." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:50:13.629720Z", "start_time": "2018-11-28T20:50:13.484542Z" } }, "outputs": [], "source": [ "ds = xr.tutorial.open_dataset(\"rasm\").load()\n", "ds" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this example, the _logical coordinates_ are `x` and `y`, while the _physical coordinates_ are `xc` and `yc`, which represent the longitudes and latitudes of the data." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:50:15.836061Z", "start_time": "2018-11-28T20:50:15.768376Z" } }, "outputs": [], "source": [ "print(ds.xc.attrs)\n", "print(ds.yc.attrs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plotting ##\n", "\n", "Let's examine these coordinate variables by plotting them." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:50:17.928556Z", "start_time": "2018-11-28T20:50:17.031211Z" } }, "outputs": [], "source": [ "fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(14, 4))\n", "ds.xc.plot(ax=ax1)\n", "ds.yc.plot(ax=ax2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that the variables `xc` (longitude) and `yc` (latitude) are two-dimensional scalar fields.\n", "\n", "If we try to plot the data variable `Tair`, by default we get the logical coordinates." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:50:20.567749Z", "start_time": "2018-11-28T20:50:19.999393Z" } }, "outputs": [], "source": [ "ds.Tair[0].plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In order to visualize the data on a conventional latitude-longitude grid, we can take advantage of xarray's ability to apply [cartopy](https://cartopy.readthedocs.io/stable/) map projections." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:50:31.131708Z", "start_time": "2018-11-28T20:50:30.444697Z" } }, "outputs": [], "source": [ "plt.figure(figsize=(14, 6))\n", "ax = plt.axes(projection=ccrs.PlateCarree())\n", "ax.set_global()\n", "ds.Tair[0].plot.pcolormesh(\n", " ax=ax, transform=ccrs.PlateCarree(), x=\"xc\", y=\"yc\", add_colorbar=False\n", ")\n", "ax.coastlines()\n", "ax.set_ylim([0, 90]);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Multidimensional Groupby ##\n", "\n", "The above example allowed us to visualize the data on a regular latitude-longitude grid. But what if we want to do a calculation that involves grouping over one of these physical coordinates (rather than the logical coordinates), for example, calculating the mean temperature at each latitude. This can be achieved using xarray's `groupby` function, which accepts multidimensional variables. By default, `groupby` will use every unique value in the variable, which is probably not what we want. Instead, we can use the `groupby_bins` function to specify the output coordinates of the group. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:50:43.670463Z", "start_time": "2018-11-28T20:50:43.245501Z" } }, "outputs": [], "source": [ "# define two-degree wide latitude bins\n", "lat_bins = np.arange(0, 91, 2)\n", "# define a label for each bin corresponding to the central latitude\n", "lat_center = np.arange(1, 90, 2)\n", "# group according to those bins and take the mean\n", "Tair_lat_mean = ds.Tair.groupby_bins(\"yc\", lat_bins, labels=lat_center).mean(\n", " dim=xr.ALL_DIMS\n", ")\n", "# plot the result\n", "Tair_lat_mean.plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The resulting coordinate for the `groupby_bins` operation got the `_bins` suffix appended: `yc_bins`. This help us distinguish it from the original multidimensional variable `yc`.\n", "\n", "**Note**: This group-by-latitude approach does not take into account the finite-size geometry of grid cells. It simply bins each value according to the coordinates at the cell center. Xarray has no understanding of grid cells and their geometry. More precise geographic regridding for xarray data is available via the [xesmf](https://xesmf.readthedocs.io) package." ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": true, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 2 } python-xarray-2026.01.0/doc/examples/apply_ufunc_vectorize_1d.ipynb0000664000175000017500000006710615136607163025506 0ustar alastairalastair{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Applying unvectorized functions with `apply_ufunc`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This example will illustrate how to conveniently apply an unvectorized function `func` to xarray objects using `apply_ufunc`. `func` expects 1D numpy arrays and returns a 1D numpy array. Our goal is to conveniently apply this function along a dimension of xarray objects that may or may not wrap dask arrays with a signature.\n", "\n", "We will illustrate this using `np.interp`: \n", "\n", " Signature: np.interp(x, xp, fp, left=None, right=None, period=None)\n", " Docstring:\n", " One-dimensional linear interpolation.\n", "\n", " Returns the one-dimensional piecewise linear interpolant to a function\n", " with given discrete data points (`xp`, `fp`), evaluated at `x`.\n", "\n", "and write an `xr_interp` function with signature\n", "\n", " xr_interp(xarray_object, dimension_name, new_coordinate_to_interpolate_to)\n", "\n", "### Load data\n", "\n", "First let's load an example dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:45:51.659160Z", "start_time": "2020-01-15T14:45:50.528742Z" } }, "outputs": [], "source": [ "import xarray as xr\n", "import numpy as np\n", "\n", "xr.set_options(display_style=\"html\") # fancy HTML repr\n", "\n", "air = (\n", " xr.tutorial.load_dataset(\"air_temperature\")\n", " .air.sortby(\"lat\") # np.interp needs coordinate in ascending order\n", " .isel(time=slice(4), lon=slice(3))\n", ") # choose a small subset for convenience\n", "air" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The function we will apply is `np.interp` which expects 1D numpy arrays. This functionality is already implemented in xarray so we use that capability to make sure we are not making mistakes." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:45:55.431708Z", "start_time": "2020-01-15T14:45:55.104701Z" } }, "outputs": [], "source": [ "newlat = np.linspace(15, 75, 100)\n", "air.interp(lat=newlat)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's define a function that works with one vector of data along `lat` at a time." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:45:57.889496Z", "start_time": "2020-01-15T14:45:57.792269Z" } }, "outputs": [], "source": [ "def interp1d_np(data, x, xi):\n", " return np.interp(xi, x, data)\n", "\n", "\n", "interped = interp1d_np(air.isel(time=0, lon=0), air.lat, newlat)\n", "expected = air.interp(lat=newlat)\n", "\n", "# no errors are raised if values are equal to within floating point precision\n", "np.testing.assert_allclose(expected.isel(time=0, lon=0).values, interped)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### No errors are raised so our interpolation is working.\n", "\n", "This function consumes and returns numpy arrays, which means we need to do a lot of work to convert the result back to an xarray object with meaningful metadata. This is where `apply_ufunc` is very useful.\n", "\n", "### `apply_ufunc`\n", "\n", " Apply a vectorized function for unlabeled arrays on xarray objects.\n", "\n", " The function will be mapped over the data variable(s) of the input arguments using \n", " xarray’s standard rules for labeled computation, including alignment, broadcasting, \n", " looping over GroupBy/Dataset variables, and merging of coordinates.\n", " \n", "`apply_ufunc` has many capabilities but for simplicity this example will focus on the common task of vectorizing 1D functions over nD xarray objects. We will iteratively build up the right set of arguments to `apply_ufunc` and read through many error messages in doing so." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:45:59.768626Z", "start_time": "2020-01-15T14:45:59.543808Z" } }, "outputs": [], "source": [ "xr.apply_ufunc(\n", " interp1d_np, # first the function\n", " air.isel(time=0, lon=0), # now arguments in the order expected by 'interp1_np'\n", " air.lat,\n", " newlat,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`apply_ufunc` needs to know a lot of information about what our function does so that it can reconstruct the outputs. In this case, the size of dimension lat has changed and we need to explicitly specify that this will happen. xarray helpfully tells us that we need to specify the kwarg `exclude_dims`." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `exclude_dims`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "```\n", "exclude_dims : set, optional\n", " Core dimensions on the inputs to exclude from alignment and\n", " broadcasting entirely. Any input coordinates along these dimensions\n", " will be dropped. Each excluded dimension must also appear in\n", " ``input_core_dims`` for at least one argument. Only dimensions listed\n", " here are allowed to change size between input and output objects.\n", "```" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:46:02.187012Z", "start_time": "2020-01-15T14:46:02.105563Z" } }, "outputs": [], "source": [ "xr.apply_ufunc(\n", " interp1d_np, # first the function\n", " air.isel(time=0, lon=0), # now arguments in the order expected by 'interp1_np'\n", " air.lat,\n", " newlat,\n", " exclude_dims=set((\"lat\",)), # dimensions allowed to change size. Must be set!\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Core dimensions\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Core dimensions are central to using `apply_ufunc`. In our case, our function expects to receive a 1D vector along `lat` — this is the dimension that is \"core\" to the function's functionality. Multiple core dimensions are possible. `apply_ufunc` needs to know which dimensions of each variable are core dimensions.\n", "\n", " input_core_dims : Sequence[Sequence], optional\n", " List of the same length as ``args`` giving the list of core dimensions\n", " on each input argument that should not be broadcast. By default, we\n", " assume there are no core dimensions on any input arguments.\n", "\n", " For example, ``input_core_dims=[[], ['time']]`` indicates that all\n", " dimensions on the first argument and all dimensions other than 'time'\n", " on the second argument should be broadcast.\n", "\n", " Core dimensions are automatically moved to the last axes of input\n", " variables before applying ``func``, which facilitates using NumPy style\n", " generalized ufuncs [2]_.\n", " \n", " output_core_dims : List[tuple], optional\n", " List of the same length as the number of output arguments from\n", " ``func``, giving the list of core dimensions on each output that were\n", " not broadcast on the inputs. By default, we assume that ``func``\n", " outputs exactly one array, with axes corresponding to each broadcast\n", " dimension.\n", "\n", " Core dimensions are assumed to appear as the last dimensions of each\n", " output in the provided order.\n", " \n", "Next we specify `\"lat\"` as `input_core_dims` on both `air` and `air.lat`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:46:05.031672Z", "start_time": "2020-01-15T14:46:04.947588Z" } }, "outputs": [], "source": [ "xr.apply_ufunc(\n", " interp1d_np, # first the function\n", " air.isel(time=0, lon=0), # now arguments in the order expected by 'interp1_np'\n", " air.lat,\n", " newlat,\n", " input_core_dims=[[\"lat\"], [\"lat\"], []],\n", " exclude_dims=set((\"lat\",)), # dimensions allowed to change size. Must be set!\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "xarray is telling us that it expected to receive back a numpy array with 0 dimensions but instead received an array with 1 dimension corresponding to `newlat`. We can fix this by specifying `output_core_dims`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:46:09.325218Z", "start_time": "2020-01-15T14:46:09.303020Z" } }, "outputs": [], "source": [ "xr.apply_ufunc(\n", " interp1d_np, # first the function\n", " air.isel(time=0, lon=0), # now arguments in the order expected by 'interp1_np'\n", " air.lat,\n", " newlat,\n", " input_core_dims=[[\"lat\"], [\"lat\"], []], # list with one entry per arg\n", " output_core_dims=[[\"lat\"]],\n", " exclude_dims=set((\"lat\",)), # dimensions allowed to change size. Must be set!\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Finally we get some output! Let's check that this is right\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:46:11.295440Z", "start_time": "2020-01-15T14:46:11.226553Z" } }, "outputs": [], "source": [ "interped = xr.apply_ufunc(\n", " interp1d_np, # first the function\n", " air.isel(time=0, lon=0), # now arguments in the order expected by 'interp1_np'\n", " air.lat,\n", " newlat,\n", " input_core_dims=[[\"lat\"], [\"lat\"], []], # list with one entry per arg\n", " output_core_dims=[[\"lat\"]],\n", " exclude_dims=set((\"lat\",)), # dimensions allowed to change size. Must be set!\n", ")\n", "interped[\"lat\"] = newlat # need to add this manually\n", "xr.testing.assert_allclose(expected.isel(time=0, lon=0), interped)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "No errors are raised so it is right!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Vectorization with `np.vectorize`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now our function currently only works on one vector of data which is not so useful given our 3D dataset.\n", "Let's try passing the whole dataset. We add a `print` statement so we can see what our function receives." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:46:13.808646Z", "start_time": "2020-01-15T14:46:13.680098Z" } }, "outputs": [], "source": [ "def interp1d_np(data, x, xi):\n", " print(f\"data: {data.shape} | x: {x.shape} | xi: {xi.shape}\")\n", " return np.interp(xi, x, data)\n", "\n", "\n", "interped = xr.apply_ufunc(\n", " interp1d_np, # first the function\n", " air.isel(\n", " lon=slice(3), time=slice(4)\n", " ), # now arguments in the order expected by 'interp1_np'\n", " air.lat,\n", " newlat,\n", " input_core_dims=[[\"lat\"], [\"lat\"], []], # list with one entry per arg\n", " output_core_dims=[[\"lat\"]],\n", " exclude_dims=set((\"lat\",)), # dimensions allowed to change size. Must be set!\n", ")\n", "interped[\"lat\"] = newlat # need to add this manually\n", "xr.testing.assert_allclose(expected.isel(time=0, lon=0), interped)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "That's a hard-to-interpret error but our `print` call helpfully printed the shapes of the input data: \n", "\n", " data: (10, 53, 25) | x: (25,) | xi: (100,)\n", "\n", "We see that `air` has been passed as a 3D numpy array which is not what `np.interp` expects. Instead we want loop over all combinations of `lon` and `time`; and apply our function to each corresponding vector of data along `lat`.\n", "`apply_ufunc` makes this easy by specifying `vectorize=True`:\n", "\n", " vectorize : bool, optional\n", " If True, then assume ``func`` only takes arrays defined over core\n", " dimensions as input and vectorize it automatically with\n", " :py:func:`numpy.vectorize`. This option exists for convenience, but is\n", " almost always slower than supplying a pre-vectorized function.\n", " Using this option requires NumPy version 1.12 or newer.\n", " \n", "Also see the documentation for `np.vectorize`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.vectorize.html. Most importantly\n", "\n", " The vectorize function is provided primarily for convenience, not for performance. \n", " The implementation is essentially a for loop." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:46:26.633233Z", "start_time": "2020-01-15T14:46:26.515209Z" } }, "outputs": [], "source": [ "def interp1d_np(data, x, xi):\n", " print(f\"data: {data.shape} | x: {x.shape} | xi: {xi.shape}\")\n", " return np.interp(xi, x, data)\n", "\n", "\n", "interped = xr.apply_ufunc(\n", " interp1d_np, # first the function\n", " air, # now arguments in the order expected by 'interp1_np'\n", " air.lat, # as above\n", " newlat, # as above\n", " input_core_dims=[[\"lat\"], [\"lat\"], []], # list with one entry per arg\n", " output_core_dims=[[\"lat\"]], # returned data has one dimension\n", " exclude_dims=set((\"lat\",)), # dimensions allowed to change size. Must be set!\n", " vectorize=True, # loop over non-core dims\n", ")\n", "interped[\"lat\"] = newlat # need to add this manually\n", "xr.testing.assert_allclose(expected, interped)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This unfortunately is another cryptic error from numpy. \n", "\n", "Notice that `newlat` is not an xarray object. Let's add a dimension name `new_lat` and modify the call. Note this cannot be `lat` because xarray expects dimensions to be the same size (or broadcastable) among all inputs. `output_core_dims` needs to be modified appropriately. We'll manually rename `new_lat` back to `lat` for easy checking." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:46:30.026663Z", "start_time": "2020-01-15T14:46:29.893267Z" } }, "outputs": [], "source": [ "def interp1d_np(data, x, xi):\n", " print(f\"data: {data.shape} | x: {x.shape} | xi: {xi.shape}\")\n", " return np.interp(xi, x, data)\n", "\n", "\n", "interped = xr.apply_ufunc(\n", " interp1d_np, # first the function\n", " air, # now arguments in the order expected by 'interp1_np'\n", " air.lat, # as above\n", " newlat, # as above\n", " input_core_dims=[[\"lat\"], [\"lat\"], [\"new_lat\"]], # list with one entry per arg\n", " output_core_dims=[[\"new_lat\"]], # returned data has one dimension\n", " exclude_dims=set((\"lat\",)), # dimensions allowed to change size. Must be a set!\n", " vectorize=True, # loop over non-core dims\n", ")\n", "interped = interped.rename({\"new_lat\": \"lat\"})\n", "interped[\"lat\"] = newlat # need to add this manually\n", "xr.testing.assert_allclose(\n", " expected.transpose(*interped.dims), interped # order of dims is different\n", ")\n", "interped" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Notice that the printed input shapes are all 1D and correspond to one vector along the `lat` dimension.\n", "\n", "The result is now an xarray object with coordinate values copied over from `data`. This is why `apply_ufunc` is so convenient; it takes care of a lot of boilerplate necessary to apply functions that consume and produce numpy arrays to xarray objects.\n", "\n", "One final point: `lat` is now the *last* dimension in `interped`. This is a \"property\" of core dimensions: they are moved to the end before being sent to `interp1d_np` as was noted in the docstring for `input_core_dims`\n", "\n", " Core dimensions are automatically moved to the last axes of input\n", " variables before applying ``func``, which facilitates using NumPy style\n", " generalized ufuncs [2]_." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Parallelization with dask\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So far our function can only handle numpy arrays. A real benefit of `apply_ufunc` is the ability to easily parallelize over dask chunks _when needed_. \n", "\n", "We want to apply this function in a vectorized fashion over each chunk of the dask array. This is possible using dask's `blockwise`, `map_blocks`, or `apply_gufunc`. Xarray's `apply_ufunc` wraps dask's `apply_gufunc` and asking it to map the function over chunks using `apply_gufunc` is as simple as specifying `dask=\"parallelized\"`. With this level of flexibility we need to provide dask with some extra information: \n", " 1. `output_dtypes`: dtypes of all returned objects, and \n", " 2. `output_sizes`: lengths of any new dimensions. \n", " \n", "Here we need to specify `output_dtypes` since `apply_ufunc` can infer the size of the new dimension `new_lat` from the argument corresponding to the third element in `input_core_dims`. Here I choose the chunk sizes to illustrate that `np.vectorize` is still applied so that our function receives 1D vectors even though the blocks are 3D." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:48:42.469341Z", "start_time": "2020-01-15T14:48:42.344209Z" } }, "outputs": [], "source": [ "def interp1d_np(data, x, xi):\n", " print(f\"data: {data.shape} | x: {x.shape} | xi: {xi.shape}\")\n", " return np.interp(xi, x, data)\n", "\n", "\n", "interped = xr.apply_ufunc(\n", " interp1d_np, # first the function\n", " air.chunk(\n", " {\"time\": 2, \"lon\": 2}\n", " ), # now arguments in the order expected by 'interp1_np'\n", " air.lat, # as above\n", " newlat, # as above\n", " input_core_dims=[[\"lat\"], [\"lat\"], [\"new_lat\"]], # list with one entry per arg\n", " output_core_dims=[[\"new_lat\"]], # returned data has one dimension\n", " exclude_dims=set((\"lat\",)), # dimensions allowed to change size. Must be a set!\n", " vectorize=True, # loop over non-core dims\n", " dask=\"parallelized\",\n", " output_dtypes=[air.dtype], # one per output\n", ").rename({\"new_lat\": \"lat\"})\n", "interped[\"lat\"] = newlat # need to add this manually\n", "xr.testing.assert_allclose(expected.transpose(*interped.dims), interped)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Yay! our function is receiving 1D vectors, so we've successfully parallelized applying a 1D function over a block. If you have a distributed dashboard up, you should see computes happening as equality is checked.\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### High performance vectorization: gufuncs, numba & guvectorize\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`np.vectorize` is a very convenient function but is unfortunately slow. It is only marginally faster than writing a for loop in Python and looping. A common way to get around this is to write a base interpolation function that can handle nD arrays in a compiled language like Fortran and then pass that to `apply_ufunc`.\n", "\n", "Another option is to use the numba package which provides a very convenient `guvectorize` decorator: https://numba.pydata.org/numba-doc/latest/user/vectorize.html#the-guvectorize-decorator\n", "\n", "Any decorated function gets compiled and will loop over any non-core dimension in parallel when necessary. We need to specify some extra information:\n", "\n", " 1. Our function cannot return a variable any more. Instead it must receive a variable (the last argument) whose contents the function will modify. So we change from `def interp1d_np(data, x, xi)` to `def interp1d_np_gufunc(data, x, xi, out)`. Our computed results must be assigned to `out`. All values of `out` must be assigned explicitly.\n", " \n", " 2. `guvectorize` needs to know the dtypes of the input and output. This is specified in string form as the first argument. Each element of the tuple corresponds to each argument of the function. In this case, we specify `float64` for all inputs and outputs: `\"(float64[:], float64[:], float64[:], float64[:])\"` corresponding to `data, x, xi, out`\n", " \n", " 3. Now we need to tell numba the size of the dimensions the function takes as inputs and returns as output i.e. core dimensions. This is done in symbolic form i.e. `data` and `x` are vectors of the same length, say `n`; `xi` and the output `out` have a different length, say `m`. So the second argument is (again as a string)\n", " `\"(n), (n), (m) -> (m).\"` corresponding again to `data, x, xi, out`\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:48:45.267633Z", "start_time": "2020-01-15T14:48:44.943939Z" } }, "outputs": [], "source": [ "from numba import float64, guvectorize\n", "\n", "\n", "@guvectorize(\"(float64[:], float64[:], float64[:], float64[:])\", \"(n), (n), (m) -> (m)\")\n", "def interp1d_np_gufunc(data, x, xi, out):\n", " # numba doesn't really like this.\n", " # seem to support fstrings so do it the old way\n", " print(\n", " \"data: \" + str(data.shape) + \" | x:\" + str(x.shape) + \" | xi: \" + str(xi.shape)\n", " )\n", " out[:] = np.interp(xi, x, data)\n", " # gufuncs don't return data\n", " # instead you assign to a the last arg\n", " # return np.interp(xi, x, data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The warnings are about object-mode compilation relating to the `print` statement. This means we don't get much speed up: https://numba.pydata.org/numba-doc/latest/user/performance-tips.html#no-python-mode-vs-object-mode. We'll keep the `print` statement temporarily to make sure that `guvectorize` acts like we want it to." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:48:54.755405Z", "start_time": "2020-01-15T14:48:54.634724Z" } }, "outputs": [], "source": [ "interped = xr.apply_ufunc(\n", " interp1d_np_gufunc, # first the function\n", " air.chunk(\n", " {\"time\": 2, \"lon\": 2}\n", " ), # now arguments in the order expected by 'interp1_np'\n", " air.lat, # as above\n", " newlat, # as above\n", " input_core_dims=[[\"lat\"], [\"lat\"], [\"new_lat\"]], # list with one entry per arg\n", " output_core_dims=[[\"new_lat\"]], # returned data has one dimension\n", " exclude_dims=set((\"lat\",)), # dimensions allowed to change size. Must be a set!\n", " # vectorize=True, # not needed since numba takes care of vectorizing\n", " dask=\"parallelized\",\n", " output_dtypes=[air.dtype], # one per output\n", ").rename({\"new_lat\": \"lat\"})\n", "interped[\"lat\"] = newlat # need to add this manually\n", "xr.testing.assert_allclose(expected.transpose(*interped.dims), interped)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Yay! Our function is receiving 1D vectors and is working automatically with dask arrays. Finally let's comment out the print line and wrap everything up in a nice reusable function" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-01-15T14:49:28.667528Z", "start_time": "2020-01-15T14:49:28.103914Z" } }, "outputs": [], "source": [ "from numba import float64, guvectorize\n", "\n", "\n", "@guvectorize(\n", " \"(float64[:], float64[:], float64[:], float64[:])\",\n", " \"(n), (n), (m) -> (m)\",\n", " nopython=True,\n", ")\n", "def interp1d_np_gufunc(data, x, xi, out):\n", " out[:] = np.interp(xi, x, data)\n", "\n", "\n", "def xr_interp(data, dim, newdim):\n", " interped = xr.apply_ufunc(\n", " interp1d_np_gufunc, # first the function\n", " data, # now arguments in the order expected by 'interp1_np'\n", " data[dim], # as above\n", " newdim, # as above\n", " input_core_dims=[[dim], [dim], [\"__newdim__\"]], # list with one entry per arg\n", " output_core_dims=[[\"__newdim__\"]], # returned data has one dimension\n", " exclude_dims=set((dim,)), # dimensions allowed to change size. Must be a set!\n", " # vectorize=True, # not needed since numba takes care of vectorizing\n", " dask=\"parallelized\",\n", " output_dtypes=[\n", " data.dtype\n", " ], # one per output; could also be float or np.dtype(\"float64\")\n", " ).rename({\"__newdim__\": dim})\n", " interped[dim] = newdim # need to add this manually\n", "\n", " return interped\n", "\n", "\n", "xr.testing.assert_allclose(\n", " expected.transpose(*interped.dims),\n", " xr_interp(air.chunk({\"time\": 2, \"lon\": 2}), \"lat\", newlat),\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This technique is generalizable to any 1D function." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "nbsphinx": { "allow_errors": true }, "org": null, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 4 } python-xarray-2026.01.0/doc/examples/_code/0000775000175000017500000000000015136607163020477 5ustar alastairalastairpython-xarray-2026.01.0/doc/examples/_code/accessor_example.py0000664000175000017500000000127715136607163024375 0ustar alastairalastairimport xarray as xr @xr.register_dataset_accessor("geo") class GeoAccessor: def __init__(self, xarray_obj): self._obj = xarray_obj self._center = None @property def center(self): """Return the geographic center point of this dataset.""" if self._center is None: # we can use a cache on our accessor objects, because accessors # themselves are cached on instances that access them. lon = self._obj.latitude lat = self._obj.longitude self._center = (float(lon.mean()), float(lat.mean())) return self._center def plot(self): """Plot data on a map.""" return "plotting!" python-xarray-2026.01.0/doc/examples/ROMS_ocean_model.ipynb0000664000175000017500000001615315136607163023604 0ustar alastairalastair{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# ROMS Ocean Model Example" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The Regional Ocean Modeling System ([ROMS](https://www.myroms.org/)) is an open source hydrodynamic model that is used for simulating currents and water properties in coastal and estuarine regions. ROMS is one of a few standard ocean models, and it has an active user community.\n", "\n", "ROMS uses a regular C-Grid in the horizontal, similar to other structured grid ocean and atmospheric models, and a stretched vertical coordinate (see [the ROMS documentation](https://www.myroms.org/wiki/Vertical_S-coordinate) for more details). Both of these require special treatment when using `xarray` to analyze ROMS ocean model output. This example notebook shows how to create a lazily evaluated vertical coordinate, and make some basic plots. The `xgcm` package is required to do analysis that is aware of the horizontal C-Grid." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import cartopy.crs as ccrs\n", "import cartopy.feature as cfeature\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline\n", "\n", "import xarray as xr" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load a sample ROMS file. This is a subset of a full model available at \n", "\n", " http://barataria.tamu.edu/thredds/catalog.html?dataset=txla_hindcast_agg\n", " \n", "The subsetting was done using the following command on one of the output files:\n", "\n", " #open dataset\n", " ds = xr.open_dataset('/d2/shared/TXLA_ROMS/output_20yr_obc/2001/ocean_his_0015.nc')\n", " \n", " # Turn on chunking to activate dask and parallelize read/write.\n", " ds = ds.chunk({'ocean_time': 1})\n", " \n", " # Pick out some of the variables that will be included as coordinates\n", " ds = ds.set_coords(['Cs_r', 'Cs_w', 'hc', 'h', 'Vtransform'])\n", " \n", " # Select a subset of variables. Salt will be visualized, zeta is used to \n", " # calculate the vertical coordinate\n", " variables = ['salt', 'zeta']\n", " ds[variables].isel(ocean_time=slice(47, None, 7*24), \n", " xi_rho=slice(300, None)).to_netcdf('ROMS_example.nc', mode='w')\n", "\n", "So, the `ROMS_example.nc` file contains a subset of the grid, one 3D variable, and two time steps." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load in ROMS dataset as an xarray object" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# load in the file\n", "ds = xr.tutorial.open_dataset(\"ROMS_example.nc\", chunks={\"ocean_time\": 1})\n", "\n", "# This is a way to turn on chunking and lazy evaluation. Opening with mfdataset, or\n", "# setting the chunking in the open_dataset would also achieve this.\n", "ds" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Add a lazilly calculated vertical coordinates\n", "\n", "Write equations to calculate the vertical coordinate. These will be only evaluated when data is requested. Information about the ROMS vertical coordinate can be found [here](https://www.myroms.org/wiki/Vertical_S-coordinate).\n", "\n", "In short, for `Vtransform==2` as used in this example, \n", "\n", "$Z_0 = (h_c \\, S + h \\,C) / (h_c + h)$\n", "\n", "$z = Z_0 (\\zeta + h) + \\zeta$\n", "\n", "where the variables are defined as in the link above." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if ds.Vtransform == 1:\n", " Zo_rho = ds.hc * (ds.s_rho - ds.Cs_r) + ds.Cs_r * ds.h\n", " z_rho = Zo_rho + ds.zeta * (1 + Zo_rho / ds.h)\n", "elif ds.Vtransform == 2:\n", " Zo_rho = (ds.hc * ds.s_rho + ds.Cs_r * ds.h) / (ds.hc + ds.h)\n", " z_rho = ds.zeta + (ds.zeta + ds.h) * Zo_rho\n", "\n", "ds.coords[\"z_rho\"] = z_rho.transpose() # needing transpose seems to be an xarray bug\n", "ds.salt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### A naive vertical slice\n", "\n", "Creating a slice using the s-coordinate as the vertical dimension is typically not very informative." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "ds.salt.isel(xi_rho=50, ocean_time=0).plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can feed coordinate information to the plot method to give a more informative cross-section that uses the depths. Note that we did not need to slice the depth or longitude information separately, this was done automatically as the variable was sliced." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "section = ds.salt.isel(xi_rho=50, eta_rho=slice(0, 167), ocean_time=0)\n", "section.plot(x=\"lon_rho\", y=\"z_rho\", figsize=(15, 6), clim=(25, 35))\n", "plt.ylim([-100, 1]);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### A plan view\n", "\n", "Now make a naive plan view, without any projection information, just using lon/lat as x/y. This looks OK, but will appear compressed because lon and lat do not have an aspect constrained by the projection." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds.salt.isel(s_rho=-1, ocean_time=0).plot(x=\"lon_rho\", y=\"lat_rho\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And let's use a projection to make it nicer, and add a coast." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "proj = ccrs.LambertConformal(central_longitude=-92, central_latitude=29)\n", "fig = plt.figure(figsize=(15, 5))\n", "ax = plt.axes(projection=proj)\n", "ds.salt.isel(s_rho=-1, ocean_time=0).plot(\n", " x=\"lon_rho\", y=\"lat_rho\", transform=ccrs.PlateCarree()\n", ")\n", "\n", "coast_10m = cfeature.NaturalEarthFeature(\n", " \"physical\", \"land\", \"10m\", edgecolor=\"k\", facecolor=\"0.8\"\n", ")\n", "ax.add_feature(coast_10m)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 2 } python-xarray-2026.01.0/doc/examples/area_weighted_temperature.ipynb0000664000175000017500000001405315136607163025701 0ustar alastairalastair{ "cells": [ { "cell_type": "markdown", "metadata": { "toc": true }, "source": [ "

    Table of Contents

    \n", "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Compare weighted and unweighted mean temperature\n", "\n", "\n", "Author: [Mathias Hauser](https://github.com/mathause/)\n", "\n", "\n", "We use the `air_temperature` example dataset to calculate the area-weighted temperature over its domain. This dataset has a regular latitude/ longitude grid, thus the grid cell area decreases towards the pole. For this grid we can use the cosine of the latitude as proxy for the grid cell area.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-03-17T14:43:57.222351Z", "start_time": "2020-03-17T14:43:56.147541Z" } }, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "import cartopy.crs as ccrs\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "import xarray as xr" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data\n", "\n", "Load the data, convert to celsius, and resample to daily values" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-03-17T14:43:57.831734Z", "start_time": "2020-03-17T14:43:57.651845Z" } }, "outputs": [], "source": [ "ds = xr.tutorial.load_dataset(\"air_temperature\")\n", "\n", "# to celsius\n", "air = ds.air - 273.15\n", "\n", "# resample from 6-hourly to daily values\n", "air = air.resample(time=\"D\").mean()\n", "\n", "air" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Plot the first timestep:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-03-17T14:43:59.887120Z", "start_time": "2020-03-17T14:43:59.582894Z" } }, "outputs": [], "source": [ "projection = ccrs.LambertConformal(central_longitude=-95, central_latitude=45)\n", "\n", "f, ax = plt.subplots(subplot_kw=dict(projection=projection))\n", "\n", "air.isel(time=0).plot(transform=ccrs.PlateCarree(), cbar_kwargs=dict(shrink=0.7))\n", "ax.coastlines()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Creating weights\n", "\n", "For a rectangular grid the cosine of the latitude is proportional to the grid cell area." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-03-17T14:44:18.777092Z", "start_time": "2020-03-17T14:44:18.736587Z" } }, "outputs": [], "source": [ "weights = np.cos(np.deg2rad(air.lat))\n", "weights.name = \"weights\"\n", "weights" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Weighted mean" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-03-17T14:44:52.607120Z", "start_time": "2020-03-17T14:44:52.564674Z" } }, "outputs": [], "source": [ "air_weighted = air.weighted(weights)\n", "air_weighted" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-03-17T14:44:54.334279Z", "start_time": "2020-03-17T14:44:54.280022Z" } }, "outputs": [], "source": [ "weighted_mean = air_weighted.mean((\"lon\", \"lat\"))\n", "weighted_mean" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plot: comparison with unweighted mean\n", "\n", "Note how the weighted mean temperature is higher than the unweighted." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-03-17T14:45:08.877307Z", "start_time": "2020-03-17T14:45:08.673383Z" } }, "outputs": [], "source": [ "weighted_mean.plot(label=\"weighted\")\n", "air.mean((\"lon\", \"lat\")).plot(label=\"unweighted\")\n", "\n", "plt.legend()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": true, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 4 } python-xarray-2026.01.0/doc/examples/monthly-means.ipynb0000664000175000017500000001652615136607163023276 0ustar alastairalastair{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Calculating Seasonal Averages from Time Series of Monthly Means \n", "=====\n", "\n", "Author: [Joe Hamman](https://github.com/jhamman/)\n", "\n", "The data used for this example can be found in the [xarray-data](https://github.com/pydata/xarray-data) repository. You may need to change the path to `rasm.nc` below.\n", "\n", "Suppose we have a netCDF or `xarray.Dataset` of monthly mean data and we want to calculate the seasonal average. To do this properly, we need to calculate the weighted average considering that each month has a different number of days." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:51:35.958210Z", "start_time": "2018-11-28T20:51:35.936966Z" } }, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Open the `Dataset`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:51:36.072316Z", "start_time": "2018-11-28T20:51:36.016594Z" } }, "outputs": [], "source": [ "ds = xr.tutorial.open_dataset(\"rasm\").load()\n", "ds" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Now for the heavy lifting:\n", "We first have to come up with the weights,\n", "- calculate the month length for each monthly data record\n", "- calculate weights using `groupby('time.season')`\n", "\n", "Finally, we just need to multiply our weights by the `Dataset` and sum along the time dimension. Creating a `DataArray` for the month length is as easy as using the `days_in_month` accessor on the time coordinate. The calendar type, in this case `'noleap'`, is automatically considered in this operation." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "month_length = ds.time.dt.days_in_month\n", "month_length" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:51:36.132413Z", "start_time": "2018-11-28T20:51:36.073708Z" } }, "outputs": [], "source": [ "# Calculate the weights by grouping by 'time.season'.\n", "weights = (\n", " month_length.groupby(\"time.season\") / month_length.groupby(\"time.season\").sum()\n", ")\n", "\n", "# Test that the sum of the weights for each season is 1.0\n", "np.testing.assert_allclose(weights.groupby(\"time.season\").sum().values, np.ones(4))\n", "\n", "# Calculate the weighted average\n", "ds_weighted = (ds * weights).groupby(\"time.season\").sum(dim=\"time\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:51:36.152913Z", "start_time": "2018-11-28T20:51:36.133997Z" } }, "outputs": [], "source": [ "ds_weighted" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:51:36.190765Z", "start_time": "2018-11-28T20:51:36.154416Z" } }, "outputs": [], "source": [ "# only used for comparisons\n", "ds_unweighted = ds.groupby(\"time.season\").mean(\"time\")\n", "ds_diff = ds_weighted - ds_unweighted" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:51:40.264871Z", "start_time": "2018-11-28T20:51:36.192467Z" } }, "outputs": [], "source": [ "# Quick plot to show the results\n", "notnull = pd.notnull(ds_unweighted[\"Tair\"][0])\n", "\n", "fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(14, 12))\n", "for i, season in enumerate((\"DJF\", \"MAM\", \"JJA\", \"SON\")):\n", " ds_weighted[\"Tair\"].sel(season=season).where(notnull).plot.pcolormesh(\n", " ax=axes[i, 0],\n", " vmin=-30,\n", " vmax=30,\n", " cmap=\"Spectral_r\",\n", " add_colorbar=True,\n", " extend=\"both\",\n", " )\n", "\n", " ds_unweighted[\"Tair\"].sel(season=season).where(notnull).plot.pcolormesh(\n", " ax=axes[i, 1],\n", " vmin=-30,\n", " vmax=30,\n", " cmap=\"Spectral_r\",\n", " add_colorbar=True,\n", " extend=\"both\",\n", " )\n", "\n", " ds_diff[\"Tair\"].sel(season=season).where(notnull).plot.pcolormesh(\n", " ax=axes[i, 2],\n", " vmin=-0.1,\n", " vmax=0.1,\n", " cmap=\"RdBu_r\",\n", " add_colorbar=True,\n", " extend=\"both\",\n", " )\n", "\n", " axes[i, 0].set_ylabel(season)\n", " axes[i, 1].set_ylabel(\"\")\n", " axes[i, 2].set_ylabel(\"\")\n", "\n", "for ax in axes.flat:\n", " ax.axes.get_xaxis().set_ticklabels([])\n", " ax.axes.get_yaxis().set_ticklabels([])\n", " ax.axes.axis(\"tight\")\n", " ax.set_xlabel(\"\")\n", "\n", "axes[0, 0].set_title(\"Weighted by DPM\")\n", "axes[0, 1].set_title(\"Equal Weighting\")\n", "axes[0, 2].set_title(\"Difference\")\n", "\n", "plt.tight_layout()\n", "\n", "fig.suptitle(\"Seasonal Surface Air Temperature\", fontsize=16, y=1.02)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-28T20:51:40.284898Z", "start_time": "2018-11-28T20:51:40.266406Z" } }, "outputs": [], "source": [ "# Wrap it into a simple function\n", "def season_mean(ds, calendar=\"standard\"):\n", " # Make a DataArray with the number of days in each month, size = len(time)\n", " month_length = ds.time.dt.days_in_month\n", "\n", " # Calculate the weights by grouping by 'time.season'\n", " weights = (\n", " month_length.groupby(\"time.season\") / month_length.groupby(\"time.season\").sum()\n", " )\n", "\n", " # Test that the sum of the weights for each season is 1.0\n", " np.testing.assert_allclose(weights.groupby(\"time.season\").sum().values, np.ones(4))\n", "\n", " # Calculate the weighted average\n", " return (ds * weights).groupby(\"time.season\").sum(dim=\"time\")" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": true, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 4 } python-xarray-2026.01.0/doc/roadmap.rst0000664000175000017500000002703315136607163017772 0ustar alastairalastair.. _roadmap: Development roadmap =================== Authors: Xarray developers Date: September 7, 2021 Xarray is an open source Python library for labeled multidimensional arrays and datasets. Our philosophy -------------- Why has xarray been successful? In our opinion: - Xarray does a great job of solving **specific use-cases** for multidimensional data analysis: - The dominant use-case for xarray is for analysis of gridded dataset in the geosciences, e.g., as part of the `Pangeo `__ project. - Xarray is also used more broadly in the physical sciences, where we've found the needs for analyzing multidimensional datasets are remarkably consistent (e.g., see `SunPy `__ and `PlasmaPy `__). - Finally, xarray is used in a variety of other domains, including finance, `probabilistic programming `__ and genomics. - Xarray is also a **domain agnostic** solution: - We focus on providing a flexible set of functionality related labeled multidimensional arrays, rather than solving particular problems. - This facilitates collaboration between users with different needs, and helps us attract a broad community of contributors. - Importantly, this retains flexibility, for use cases that don't fit particularly well into existing frameworks. - Xarray **integrates well** with other libraries in the scientific Python stack. - We leverage first-class external libraries for core features of xarray (e.g., NumPy for ndarrays, pandas for indexing, dask for parallel computing) - We expose our internal abstractions to users (e.g., ``apply_ufunc()``), which facilitates extending xarray in various ways. Together, these features have made xarray a first-class choice for labeled multidimensional arrays in Python. We want to double-down on xarray's strengths by making it an even more flexible and powerful tool for multidimensional data analysis. We want to continue to engage xarray's core geoscience users, and to also reach out to new domains to learn from other successful data models like those of `yt `__ or the `OLAP cube `__. Specific needs -------------- The user community has voiced a number specific needs related to how xarray interfaces with domain specific problems. Xarray may not solve all of these issues directly, but these areas provide opportunities for xarray to provide better, more extensible, interfaces. Some examples of these common needs are: - Non-regular grids (e.g., staggered and unstructured meshes). - Physical units. - Lazily computed arrays (e.g., for coordinate systems). - New file-formats. Technical vision ---------------- We think the right approach to extending xarray's user community and the usefulness of the project is to focus on improving key interfaces that can be used externally to meet domain-specific needs. We can generalize the community's needs into three main categories: - More flexible grids/indexing. - More flexible arrays/computing. - More flexible storage backends. - More flexible data structures. Each of these are detailed further in the subsections below. Flexible indexes ~~~~~~~~~~~~~~~~ .. note:: Work on flexible grids and indexes is currently underway. See `GH Project #1 `__ for more detail. Xarray currently keeps track of indexes associated with coordinates by storing them in the form of a ``pandas.Index`` in special ``xarray.IndexVariable`` objects. The limitations of this model became clear with the addition of ``pandas.MultiIndex`` support in xarray 0.9, where a single index corresponds to multiple xarray variables. MultiIndex support is highly useful, but xarray now has numerous special cases to check for MultiIndex levels. A cleaner model would be to elevate ``indexes`` to an explicit part of xarray's data model, e.g., as attributes on the ``Dataset`` and ``DataArray`` classes. Indexes would need to be propagated along with coordinates in xarray operations, but will no longer would need to have a one-to-one correspondence with coordinate variables. Instead, an index should be able to refer to multiple (possibly multidimensional) coordinates that define it. See :issue:`1603` for full details. Specific tasks: - Add an ``indexes`` attribute to ``xarray.Dataset`` and ``xarray.Dataset``, as dictionaries that map from coordinate names to xarray index objects. - Use the new index interface to write wrappers for ``pandas.Index``, ``pandas.MultiIndex`` and ``scipy.spatial.KDTree``. - Expose the interface externally to allow third-party libraries to implement custom indexing routines, e.g., for geospatial look-ups on the surface of the Earth. In addition to the new features it directly enables, this clean up will allow xarray to more easily implement some long-awaited features that build upon indexing, such as groupby operations with multiple variables. Flexible arrays ~~~~~~~~~~~~~~~ .. note:: Work on flexible arrays is currently underway. See `GH Project #2 `__ for more detail. Xarray currently supports wrapping multidimensional arrays defined by NumPy, dask and to a limited-extent pandas. It would be nice to have interfaces that allow xarray to wrap alternative N-D array implementations, e.g.: - Arrays holding physical units. - Lazily computed arrays. - Other ndarray objects, e.g., sparse, xnd, xtensor. Our strategy has been to pursue upstream improvements in NumPy (see `NEP-22 `__) for supporting a complete duck-typing interface using with NumPy's higher level array API. Improvements in NumPy's support for custom data types would also be highly useful for xarray users. By pursuing these improvements in NumPy we hope to extend the benefits to the full scientific Python community, and avoid tight coupling between xarray and specific third-party libraries (e.g., for implementing units). This will allow xarray to maintain its domain agnostic strengths. We expect that we may eventually add some minimal interfaces in xarray for features that we delegate to external array libraries (e.g., for getting units and changing units). If we do add these features, we expect them to be thin wrappers, with core functionality implemented by third-party libraries. Flexible storage ~~~~~~~~~~~~~~~~ The xarray backends module has grown in size and complexity. Much of this growth has been "organic" and mostly to support incremental additions to the supported backends. This has left us with a fragile internal API that is difficult for even experienced xarray developers to use. Moreover, the lack of a public facing API for building xarray backends means that users can not easily build backend interface for xarray in third-party libraries. The idea of refactoring the backends API and exposing it to users was originally proposed in :issue:`1970`. The idea would be to develop a well tested and generic backend base class and associated utilities for external use. Specific tasks for this development would include: - Exposing an abstract backend for writing new storage systems. - Exposing utilities for features like automatic closing of files, LRU-caching and explicit/lazy indexing. - Possibly moving some infrequently used backends to third-party packages. Flexible data structures ~~~~~~~~~~~~~~~~~~~~~~~~ Xarray provides two primary data structures, the ``xarray.DataArray`` and the ``xarray.Dataset``. This section describes two possible data model extensions. Tree-like data structure ++++++++++++++++++++++++ .. note:: After some time, the community DataTree project has now been updated and merged into xarray exposing :py:class:`xarray.DataTree`. This is just released and a bit experimental, but please try it out and let us know what you think. Take a look at our :ref:`quick-overview-datatrees` quickstart. Xarray’s highest-level object was previously an ``xarray.Dataset``, whose data model echoes that of a single netCDF group. However real-world datasets are often better represented by a collection of related Datasets. Particular common examples include: - Multi-resolution datasets, - Collections of time series datasets with differing lengths, - Heterogeneous datasets comprising multiple different types of related observational or simulation data, - Bayesian workflows involving various statistical distributions over multiple variables, - Whole netCDF files containing multiple groups. - Comparison of output from many similar models (such as in the IPCC's Coupled Model Intercomparison Projects) A new tree-like data structure, ``xarray.DataTree``, which is essentially a structured hierarchical collection of Datasets, represents these cases and instead maps to multiple netCDF groups (see :issue:`4118`). Currently there are several libraries which have wrapped xarray in order to build domain-specific data structures (e.g. `xarray-multiscale `__.), but the general ``xarray.DataTree`` object obviates the need for these and consolidates effort in a single domain-agnostic tool, much as xarray has already achieved. Labeled array without coordinates +++++++++++++++++++++++++++++++++ There is a need for a lightweight array structure with named dimensions for convenient indexing and broadcasting. Xarray includes such a structure internally (``xarray.Variable``). We want to factor out xarray's “Variable” object into a standalone package with minimal dependencies for integration with libraries that don't want to inherit xarray's dependency on pandas (e.g. scikit-learn). The new “Variable” class will follow established array protocols and the new data-apis standard. It will be capable of wrapping multiple array-like objects (e.g. NumPy, Dask, Sparse, Pint, CuPy, Pytorch). While “DataArray” fits some of these requirements, it offers a more complex data model than is desired for many applications and depends on pandas. Engaging more users ------------------- Like many open-source projects, the documentation of xarray has grown together with the library's features. While we think that the xarray documentation is comprehensive already, we acknowledge that the adoption of xarray might be slowed down because of the substantial time investment required to learn its working principles. In particular, non-computer scientists or users less familiar with the pydata ecosystem might find it difficult to learn xarray and realize how xarray can help them in their daily work. In order to lower this adoption barrier, we propose to: - Develop entry-level tutorials for users with different backgrounds. For example, we would like to develop tutorials for users with or without previous knowledge of pandas, NumPy, netCDF, etc. These tutorials may be built as part of xarray's documentation or included in a separate repository to enable interactive use (e.g. mybinder.org). - Document typical user workflows in a dedicated website, following the example of `dask-stories `__. - Write a basic glossary that defines terms that might not be familiar to all (e.g. "lazy", "labeled", "serialization", "indexing", "backend"). Administrative -------------- NumFOCUS ~~~~~~~~ On July 16, 2018, Joe and Stephan submitted xarray's fiscal sponsorship application to NumFOCUS. python-xarray-2026.01.0/doc/README.rst0000664000175000017500000000027515136607163017303 0ustar alastairalastair:orphan: xarray ------ You can find information about building the docs at our `Contributing page `_. python-xarray-2026.01.0/doc/api/0000775000175000017500000000000015136607163016361 5ustar alastairalastairpython-xarray-2026.01.0/doc/api/rolling.rst0000664000175000017500000000166115136607163020565 0ustar alastairalastair.. currentmodule:: xarray Rolling objects =============== .. currentmodule:: xarray.computation.rolling Dataset ------- .. autosummary:: :toctree: ../generated/ DatasetRolling DatasetRolling.construct DatasetRolling.reduce DatasetRolling.argmax DatasetRolling.argmin DatasetRolling.count DatasetRolling.max DatasetRolling.mean DatasetRolling.median DatasetRolling.min DatasetRolling.prod DatasetRolling.std DatasetRolling.sum DatasetRolling.var DataArray --------- .. autosummary:: :toctree: ../generated/ DataArrayRolling DataArrayRolling.__iter__ DataArrayRolling.construct DataArrayRolling.reduce DataArrayRolling.argmax DataArrayRolling.argmin DataArrayRolling.count DataArrayRolling.max DataArrayRolling.mean DataArrayRolling.median DataArrayRolling.min DataArrayRolling.prod DataArrayRolling.std DataArrayRolling.sum DataArrayRolling.var python-xarray-2026.01.0/doc/api/resample.rst0000664000175000017500000000343115136607163020724 0ustar alastairalastair.. currentmodule:: xarray Resample objects ================ .. currentmodule:: xarray.core.resample Dataset ------- .. autosummary:: :toctree: ../generated/ DatasetResample DatasetResample.asfreq DatasetResample.backfill DatasetResample.interpolate DatasetResample.nearest DatasetResample.pad DatasetResample.all DatasetResample.any DatasetResample.apply DatasetResample.assign DatasetResample.assign_coords DatasetResample.bfill DatasetResample.count DatasetResample.ffill DatasetResample.fillna DatasetResample.first DatasetResample.last DatasetResample.map DatasetResample.max DatasetResample.mean DatasetResample.median DatasetResample.min DatasetResample.prod DatasetResample.quantile DatasetResample.reduce DatasetResample.std DatasetResample.sum DatasetResample.var DatasetResample.where DatasetResample.dims DatasetResample.groups DataArray --------- .. autosummary:: :toctree: ../generated/ DataArrayResample DataArrayResample.asfreq DataArrayResample.backfill DataArrayResample.interpolate DataArrayResample.nearest DataArrayResample.pad DataArrayResample.all DataArrayResample.any DataArrayResample.apply DataArrayResample.assign_coords DataArrayResample.bfill DataArrayResample.count DataArrayResample.ffill DataArrayResample.fillna DataArrayResample.first DataArrayResample.last DataArrayResample.map DataArrayResample.max DataArrayResample.mean DataArrayResample.median DataArrayResample.min DataArrayResample.prod DataArrayResample.quantile DataArrayResample.reduce DataArrayResample.std DataArrayResample.sum DataArrayResample.var DataArrayResample.where DataArrayResample.dims DataArrayResample.groups python-xarray-2026.01.0/doc/api/advanced.rst0000664000175000017500000000076015136607163020663 0ustar alastairalastair.. currentmodule:: xarray Advanced API ============ The methods and properties here are advanced API and not recommended for use unless you know what you are doing. .. autosummary:: :toctree: ../generated/ Dataset.variables DataArray.variable DataTree.variables Variable IndexVariable as_variable Context register_dataset_accessor register_dataarray_accessor register_datatree_accessor Dataset.set_close .. .. .. Missing: .. ``DataTree.set_close`` python-xarray-2026.01.0/doc/api/weighted.rst0000664000175000017500000000123715136607163020716 0ustar alastairalastair.. currentmodule:: xarray Weighted objects ================ .. currentmodule:: xarray.computation.weighted Dataset ------- .. autosummary:: :toctree: ../generated/ DatasetWeighted DatasetWeighted.mean DatasetWeighted.quantile DatasetWeighted.sum DatasetWeighted.std DatasetWeighted.var DatasetWeighted.sum_of_weights DatasetWeighted.sum_of_squares DataArray --------- .. autosummary:: :toctree: ../generated/ DataArrayWeighted DataArrayWeighted.mean DataArrayWeighted.quantile DataArrayWeighted.sum DataArrayWeighted.std DataArrayWeighted.var DataArrayWeighted.sum_of_weights DataArrayWeighted.sum_of_squares python-xarray-2026.01.0/doc/api/plotting.rst0000664000175000017500000000220515136607163020752 0ustar alastairalastair.. currentmodule:: xarray Plotting ======== Dataset ------- .. autosummary:: :toctree: ../generated/ :template: autosummary/accessor_method.rst Dataset.plot.scatter Dataset.plot.quiver Dataset.plot.streamplot DataArray --------- .. autosummary:: :toctree: ../generated/ :template: autosummary/accessor_callable.rst DataArray.plot .. autosummary:: :toctree: ../generated/ :template: autosummary/accessor_method.rst DataArray.plot.contourf DataArray.plot.contour DataArray.plot.hist DataArray.plot.imshow DataArray.plot.line DataArray.plot.pcolormesh DataArray.plot.step DataArray.plot.scatter DataArray.plot.surface Faceting -------- .. autosummary:: :toctree: ../generated/ plot.FacetGrid plot.FacetGrid.add_colorbar plot.FacetGrid.add_legend plot.FacetGrid.add_quiverkey plot.FacetGrid.map plot.FacetGrid.map_dataarray plot.FacetGrid.map_dataarray_line plot.FacetGrid.map_dataset plot.FacetGrid.map_plot1d plot.FacetGrid.set_axis_labels plot.FacetGrid.set_ticks plot.FacetGrid.set_titles plot.FacetGrid.set_xlabels plot.FacetGrid.set_ylabels python-xarray-2026.01.0/doc/api/backends.rst0000664000175000017500000000203515136607163020665 0ustar alastairalastair.. currentmodule:: xarray Backends ======== .. autosummary:: :toctree: ../generated/ backends.BackendArray backends.BackendEntrypoint backends.list_engines backends.refresh_engines These backends provide a low-level interface for lazily loading data from external file-formats or protocols, and can be manually invoked to create arguments for the ``load_store`` and ``dump_to_store`` Dataset methods: .. autosummary:: :toctree: ../generated/ backends.NetCDF4DataStore backends.H5NetCDFStore backends.PydapDataStore backends.ScipyDataStore backends.ZarrStore backends.FileManager backends.CachingFileManager backends.DummyFileManager These BackendEntrypoints provide a basic interface to the most commonly used filetypes in the xarray universe. .. autosummary:: :toctree: ../generated/ backends.NetCDF4BackendEntrypoint backends.H5netcdfBackendEntrypoint backends.PydapBackendEntrypoint backends.ScipyBackendEntrypoint backends.StoreBackendEntrypoint backends.ZarrBackendEntrypoint python-xarray-2026.01.0/doc/api/coarsen.rst0000664000175000017500000000161015136607163020543 0ustar alastairalastair.. currentmodule:: xarray Coarsen objects =============== .. currentmodule:: xarray.computation.rolling Dataset ------- .. autosummary:: :toctree: ../generated/ DatasetCoarsen DatasetCoarsen.all DatasetCoarsen.any DatasetCoarsen.construct DatasetCoarsen.count DatasetCoarsen.max DatasetCoarsen.mean DatasetCoarsen.median DatasetCoarsen.min DatasetCoarsen.prod DatasetCoarsen.reduce DatasetCoarsen.std DatasetCoarsen.sum DatasetCoarsen.var DataArray --------- .. autosummary:: :toctree: ../generated/ DataArrayCoarsen DataArrayCoarsen.all DataArrayCoarsen.any DataArrayCoarsen.construct DataArrayCoarsen.count DataArrayCoarsen.max DataArrayCoarsen.mean DataArrayCoarsen.median DataArrayCoarsen.min DataArrayCoarsen.prod DataArrayCoarsen.reduce DataArrayCoarsen.std DataArrayCoarsen.sum DataArrayCoarsen.var python-xarray-2026.01.0/doc/api/testing.rst0000664000175000017500000000171515136607163020574 0ustar alastairalastair.. currentmodule:: xarray Testing ======= .. autosummary:: :toctree: ../generated/ testing.assert_equal testing.assert_identical testing.assert_allclose testing.assert_chunks_equal Test that two ``DataTree`` objects are similar. .. autosummary:: :toctree: ../generated/ testing.assert_isomorphic testing.assert_equal testing.assert_identical Hypothesis Testing Strategies ============================= .. currentmodule:: xarray See the :ref:`documentation page on testing ` for a guide on how to use these strategies. .. warning:: These strategies should be considered highly experimental, and liable to change at any time. .. autosummary:: :toctree: ../generated/ testing.strategies.supported_dtypes testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes testing.strategies.attrs testing.strategies.variables testing.strategies.unique_subset_of python-xarray-2026.01.0/doc/api/datatree.rst0000664000175000017500000001323015136607163020703 0ustar alastairalastair.. currentmodule:: xarray DataTree ======== Creating a DataTree ------------------- Methods of creating a ``DataTree``. .. autosummary:: :toctree: ../generated/ DataTree DataTree.from_dict Tree Attributes --------------- Attributes relating to the recursive tree-like structure of a ``DataTree``. .. autosummary:: :toctree: ../generated/ DataTree.parent DataTree.children DataTree.name DataTree.path DataTree.root DataTree.is_root DataTree.is_leaf DataTree.leaves DataTree.level DataTree.depth DataTree.width DataTree.subtree DataTree.subtree_with_keys DataTree.descendants DataTree.siblings DataTree.lineage DataTree.parents DataTree.ancestors DataTree.groups DataTree.xindexes Data Contents ------------- Interface to the data objects (optionally) stored inside a single ``DataTree`` node. This interface echoes that of ``xarray.Dataset``. .. autosummary:: :toctree: ../generated/ DataTree.dims DataTree.sizes DataTree.data_vars DataTree.ds DataTree.coords DataTree.attrs DataTree.encoding DataTree.indexes DataTree.nbytes DataTree.dataset DataTree.to_dataset DataTree.has_data DataTree.has_attrs DataTree.is_empty DataTree.is_hollow DataTree.chunksizes Dictionary Interface -------------------- ``DataTree`` objects also have a dict-like interface mapping keys to either ``xarray.DataArray``\s or to child ``DataTree`` nodes. .. autosummary:: :toctree: ../generated/ DataTree.__getitem__ DataTree.__setitem__ DataTree.__delitem__ DataTree.update DataTree.get DataTree.items DataTree.keys DataTree.values Tree Manipulation ----------------- For manipulating, traversing, navigating, or mapping over the tree structure. .. autosummary:: :toctree: ../generated/ DataTree.orphan DataTree.same_tree DataTree.relative_to DataTree.iter_lineage DataTree.find_common_ancestor DataTree.map_over_datasets DataTree.pipe DataTree.match DataTree.filter DataTree.filter_like Pathlib-like Interface ---------------------- ``DataTree`` objects deliberately echo some of the API of :py:class:`pathlib.PurePath`. .. autosummary:: :toctree: ../generated/ DataTree.name DataTree.parent DataTree.parents DataTree.relative_to .. Missing: .. .. .. ``DataTree.glob`` .. ``DataTree.joinpath`` .. ``DataTree.with_name`` .. ``DataTree.walk`` .. ``DataTree.rename`` .. ``DataTree.replace`` DataTree Contents ----------------- Manipulate the contents of all nodes in a ``DataTree`` simultaneously. .. autosummary:: :toctree: ../generated/ DataTree.copy .. DataTree.assign_coords .. DataTree.merge .. DataTree.rename .. DataTree.rename_vars .. DataTree.rename_dims .. DataTree.swap_dims .. DataTree.expand_dims .. DataTree.drop_vars .. DataTree.drop_dims .. DataTree.set_coords .. DataTree.reset_coords DataTree Node Contents ---------------------- Manipulate the contents of a single ``DataTree`` node. .. autosummary:: :toctree: ../generated/ DataTree.assign DataTree.drop_nodes DataTree Operations ------------------- Apply operations over multiple ``DataTree`` objects. .. autosummary:: :toctree: ../generated/ map_over_datasets group_subtrees Comparisons ----------- Compare one ``DataTree`` object to another. .. autosummary:: :toctree: ../generated/ DataTree.isomorphic DataTree.equals DataTree.identical Indexing -------- Index into all nodes in the subtree simultaneously. .. autosummary:: :toctree: ../generated/ DataTree.isel DataTree.sel .. DataTree.drop_sel .. DataTree.drop_isel .. DataTree.head .. DataTree.tail .. DataTree.thin .. DataTree.squeeze .. DataTree.interp .. DataTree.interp_like .. DataTree.reindex .. DataTree.reindex_like .. DataTree.set_index .. DataTree.reset_index .. DataTree.reorder_levels .. DataTree.query .. .. .. Missing: .. ``DataTree.loc`` .. Missing Value Handling .. ---------------------- .. .. autosummary:: .. :toctree: ../generated/ .. DataTree.isnull .. DataTree.notnull .. DataTree.combine_first .. DataTree.dropna .. DataTree.fillna .. DataTree.ffill .. DataTree.bfill .. DataTree.interpolate_na .. DataTree.where .. DataTree.isin .. Computation .. ----------- .. Apply a computation to the data in all nodes in the subtree simultaneously. .. .. autosummary:: .. :toctree: ../generated/ .. DataTree.map .. DataTree.reduce .. DataTree.diff .. DataTree.quantile .. DataTree.differentiate .. DataTree.integrate .. DataTree.map_blocks .. DataTree.polyfit .. DataTree.curvefit Aggregation ----------- Aggregate data in all nodes in the subtree simultaneously. .. autosummary:: :toctree: ../generated/ DataTree.all DataTree.any DataTree.max DataTree.min DataTree.mean DataTree.median DataTree.prod DataTree.sum DataTree.std DataTree.var DataTree.cumsum DataTree.cumprod ndarray methods --------------- Methods copied from :py:class:`numpy.ndarray` objects, here applying to the data in all nodes in the subtree. .. autosummary:: :toctree: ../generated/ DataTree.argsort DataTree.conj DataTree.conjugate DataTree.round .. DataTree.astype .. DataTree.clip .. DataTree.rank .. Reshaping and reorganising .. -------------------------- .. Reshape or reorganise the data in all nodes in the subtree. .. .. autosummary:: .. :toctree: ../generated/ .. DataTree.transpose .. DataTree.stack .. DataTree.unstack .. DataTree.shift .. DataTree.roll .. DataTree.pad .. DataTree.sortby .. DataTree.broadcast_like python-xarray-2026.01.0/doc/api/rolling-exp.rst0000664000175000017500000000034715136607163021357 0ustar alastairalastair.. currentmodule:: xarray Exponential rolling objects =========================== .. currentmodule:: xarray.computation.rolling_exp .. autosummary:: :toctree: ../generated/ RollingExp RollingExp.mean RollingExp.sum python-xarray-2026.01.0/doc/api/tutorial.rst0000664000175000017500000000030115136607163020750 0ustar alastairalastair.. currentmodule:: xarray Tutorial ======== .. autosummary:: :toctree: ../generated/ tutorial.open_dataset tutorial.load_dataset tutorial.open_datatree tutorial.load_datatree python-xarray-2026.01.0/doc/api/io.rst0000664000175000017500000000300015136607163017513 0ustar alastairalastair.. currentmodule:: xarray IO / Conversion =============== Dataset methods --------------- .. autosummary:: :toctree: ../generated/ load_dataset open_dataset open_mfdataset open_zarr save_mfdataset Dataset.as_numpy Dataset.from_dataframe Dataset.from_dict Dataset.to_dataarray Dataset.to_dataframe Dataset.to_dask_dataframe Dataset.to_dict Dataset.to_netcdf Dataset.to_pandas Dataset.to_zarr Dataset.chunk Dataset.close Dataset.compute Dataset.filter_by_attrs Dataset.info Dataset.load Dataset.persist Dataset.unify_chunks DataArray methods ----------------- .. autosummary:: :toctree: ../generated/ load_dataarray open_dataarray DataArray.as_numpy DataArray.from_dict DataArray.from_iris DataArray.from_series DataArray.to_dask_dataframe DataArray.to_dataframe DataArray.to_dataset DataArray.to_dict DataArray.to_index DataArray.to_iris DataArray.to_masked_array DataArray.to_netcdf DataArray.to_numpy DataArray.to_pandas DataArray.to_series DataArray.to_zarr DataArray.chunk DataArray.close DataArray.compute DataArray.persist DataArray.load DataArray.unify_chunks DataTree methods ---------------- .. autosummary:: :toctree: ../generated/ load_datatree open_datatree open_groups DataTree.to_dict DataTree.to_netcdf DataTree.to_zarr DataTree.chunk DataTree.load DataTree.compute DataTree.persist .. .. .. Missing: .. ``open_mfdatatree`` python-xarray-2026.01.0/doc/api/deprecated.rst0000664000175000017500000000063115136607163021213 0ustar alastairalastair.. currentmodule:: xarray Deprecated / Pending Deprecation ================================ .. autosummary:: :toctree: ../generated/ Dataset.drop DataArray.drop Dataset.apply core.groupby.DataArrayGroupBy.apply core.groupby.DatasetGroupBy.apply .. autosummary:: :toctree: ../generated/ :template: autosummary/accessor_attribute.rst DataArray.dt.weekofyear DataArray.dt.week python-xarray-2026.01.0/doc/api/ufuncs.rst0000664000175000017500000000424515136607163020423 0ustar alastairalastair.. currentmodule:: xarray Universal functions =================== These functions are equivalent to their NumPy versions, but for xarray objects backed by non-NumPy array types (e.g. ``cupy``, ``sparse``, or ``jax``), they will ensure that the computation is dispatched to the appropriate backend. You can find them in the ``xarray.ufuncs`` module: .. autosummary:: :toctree: ../generated/ ufuncs.abs ufuncs.absolute ufuncs.acos ufuncs.acosh ufuncs.arccos ufuncs.arccosh ufuncs.arcsin ufuncs.arcsinh ufuncs.arctan ufuncs.arctanh ufuncs.asin ufuncs.asinh ufuncs.atan ufuncs.atanh ufuncs.bitwise_count ufuncs.bitwise_invert ufuncs.bitwise_not ufuncs.cbrt ufuncs.ceil ufuncs.conj ufuncs.conjugate ufuncs.cos ufuncs.cosh ufuncs.deg2rad ufuncs.degrees ufuncs.exp ufuncs.exp2 ufuncs.expm1 ufuncs.fabs ufuncs.floor ufuncs.invert ufuncs.isfinite ufuncs.isinf ufuncs.isnan ufuncs.isnat ufuncs.log ufuncs.log10 ufuncs.log1p ufuncs.log2 ufuncs.logical_not ufuncs.negative ufuncs.positive ufuncs.rad2deg ufuncs.radians ufuncs.reciprocal ufuncs.rint ufuncs.sign ufuncs.signbit ufuncs.sin ufuncs.sinh ufuncs.spacing ufuncs.sqrt ufuncs.square ufuncs.tan ufuncs.tanh ufuncs.trunc ufuncs.add ufuncs.arctan2 ufuncs.atan2 ufuncs.bitwise_and ufuncs.bitwise_left_shift ufuncs.bitwise_or ufuncs.bitwise_right_shift ufuncs.bitwise_xor ufuncs.copysign ufuncs.divide ufuncs.equal ufuncs.float_power ufuncs.floor_divide ufuncs.fmax ufuncs.fmin ufuncs.fmod ufuncs.gcd ufuncs.greater ufuncs.greater_equal ufuncs.heaviside ufuncs.hypot ufuncs.lcm ufuncs.ldexp ufuncs.left_shift ufuncs.less ufuncs.less_equal ufuncs.logaddexp ufuncs.logaddexp2 ufuncs.logical_and ufuncs.logical_or ufuncs.logical_xor ufuncs.maximum ufuncs.minimum ufuncs.mod ufuncs.multiply ufuncs.nextafter ufuncs.not_equal ufuncs.pow ufuncs.power ufuncs.remainder ufuncs.right_shift ufuncs.subtract ufuncs.true_divide ufuncs.angle ufuncs.isreal ufuncs.iscomplex python-xarray-2026.01.0/doc/api/top-level.rst0000664000175000017500000000164115136607163021024 0ustar alastairalastair.. currentmodule:: xarray Top-level functions =================== Computation ----------- .. note:: For worked examples and advanced usage of ``apply_ufunc``, see the :doc:`User Guide on Computation
    `, and the `apply_ufunc tutorial `_. .. autosummary:: :toctree: ../generated/ apply_ufunc cov corr cross dot map_blocks polyval unify_chunks where Combining Data -------------- .. autosummary:: :toctree: ../generated/ align broadcast concat merge combine_by_coords combine_nested Creation -------- .. autosummary:: :toctree: ../generated/ DataArray Dataset DataTree full_like zeros_like ones_like Miscellaneous ------------- .. autosummary:: :toctree: ../generated/ decode_cf infer_freq show_versions set_options get_options python-xarray-2026.01.0/doc/api/dataarray.rst0000664000175000017500000001461415136607163021071 0ustar alastairalastair.. currentmodule:: xarray DataArray ========= .. autosummary:: :toctree: ../generated/ DataArray Attributes ---------- .. autosummary:: :toctree: ../generated/ DataArray.values DataArray.data DataArray.coords DataArray.dims DataArray.sizes DataArray.name DataArray.attrs DataArray.encoding DataArray.indexes DataArray.xindexes DataArray.chunksizes ndarray attributes ------------------ .. autosummary:: :toctree: ../generated/ DataArray.ndim DataArray.nbytes DataArray.shape DataArray.size DataArray.dtype DataArray.chunks DataArray contents ------------------ .. autosummary:: :toctree: ../generated/ DataArray.assign_coords DataArray.assign_attrs DataArray.pipe DataArray.rename DataArray.swap_dims DataArray.expand_dims DataArray.drop_vars DataArray.drop_indexes DataArray.drop_duplicates DataArray.drop_encoding DataArray.drop_attrs DataArray.reset_coords DataArray.copy DataArray.convert_calendar DataArray.interp_calendar DataArray.get_index DataArray.astype DataArray.item Indexing -------- .. autosummary:: :toctree: ../generated/ DataArray.__getitem__ DataArray.__setitem__ DataArray.loc DataArray.isel DataArray.sel DataArray.drop_sel DataArray.drop_isel DataArray.head DataArray.tail DataArray.thin DataArray.squeeze DataArray.interp DataArray.interp_like DataArray.reindex DataArray.reindex_like DataArray.set_index DataArray.reset_index DataArray.set_xindex DataArray.reorder_levels DataArray.query Missing value handling ---------------------- .. autosummary:: :toctree: ../generated/ DataArray.isnull DataArray.notnull DataArray.combine_first DataArray.count DataArray.dropna DataArray.fillna DataArray.ffill DataArray.bfill DataArray.interpolate_na DataArray.where DataArray.isin Comparisons ----------- .. autosummary:: :toctree: ../generated/ DataArray.equals DataArray.identical DataArray.broadcast_equals Computation ----------- .. autosummary:: :toctree: ../generated/ DataArray.reduce DataArray.groupby DataArray.groupby_bins DataArray.rolling DataArray.rolling_exp DataArray.cumulative DataArray.weighted DataArray.coarsen DataArray.resample DataArray.get_axis_num DataArray.diff DataArray.dot DataArray.quantile DataArray.differentiate DataArray.integrate DataArray.polyfit DataArray.map_blocks DataArray.curvefit Aggregation ----------- .. autosummary:: :toctree: ../generated/ DataArray.all DataArray.any DataArray.argmax DataArray.argmin DataArray.count DataArray.idxmax DataArray.idxmin DataArray.max DataArray.min DataArray.mean DataArray.median DataArray.prod DataArray.sum DataArray.std DataArray.var DataArray.cumsum DataArray.cumprod ndarray methods --------------- .. autosummary:: :toctree: ../generated/ DataArray.argsort DataArray.clip DataArray.conj DataArray.conjugate DataArray.imag DataArray.searchsorted DataArray.round DataArray.real DataArray.T DataArray.rank String manipulation ------------------- .. autosummary:: :toctree: ../generated/ :template: autosummary/accessor.rst DataArray.str .. autosummary:: :toctree: ../generated/ :template: autosummary/accessor_method.rst DataArray.str.capitalize DataArray.str.casefold DataArray.str.cat DataArray.str.center DataArray.str.contains DataArray.str.count DataArray.str.decode DataArray.str.encode DataArray.str.endswith DataArray.str.extract DataArray.str.extractall DataArray.str.find DataArray.str.findall DataArray.str.format DataArray.str.get DataArray.str.get_dummies DataArray.str.index DataArray.str.isalnum DataArray.str.isalpha DataArray.str.isdecimal DataArray.str.isdigit DataArray.str.islower DataArray.str.isnumeric DataArray.str.isspace DataArray.str.istitle DataArray.str.isupper DataArray.str.join DataArray.str.len DataArray.str.ljust DataArray.str.lower DataArray.str.lstrip DataArray.str.match DataArray.str.normalize DataArray.str.pad DataArray.str.partition DataArray.str.repeat DataArray.str.replace DataArray.str.rfind DataArray.str.rindex DataArray.str.rjust DataArray.str.rpartition DataArray.str.rsplit DataArray.str.rstrip DataArray.str.slice DataArray.str.slice_replace DataArray.str.split DataArray.str.startswith DataArray.str.strip DataArray.str.swapcase DataArray.str.title DataArray.str.translate DataArray.str.upper DataArray.str.wrap DataArray.str.zfill Datetimelike properties ----------------------- **Datetime properties**: .. autosummary:: :toctree: ../generated/ :template: autosummary/accessor_attribute.rst DataArray.dt.year DataArray.dt.month DataArray.dt.day DataArray.dt.hour DataArray.dt.minute DataArray.dt.second DataArray.dt.microsecond DataArray.dt.nanosecond DataArray.dt.dayofweek DataArray.dt.weekday DataArray.dt.dayofyear DataArray.dt.quarter DataArray.dt.days_in_month DataArray.dt.daysinmonth DataArray.dt.days_in_year DataArray.dt.season DataArray.dt.time DataArray.dt.date DataArray.dt.decimal_year DataArray.dt.calendar DataArray.dt.is_month_start DataArray.dt.is_month_end DataArray.dt.is_quarter_end DataArray.dt.is_year_start DataArray.dt.is_leap_year **Datetime methods**: .. autosummary:: :toctree: ../generated/ :template: autosummary/accessor_method.rst DataArray.dt.floor DataArray.dt.ceil DataArray.dt.isocalendar DataArray.dt.round DataArray.dt.strftime **Timedelta properties**: .. autosummary:: :toctree: ../generated/ :template: autosummary/accessor_attribute.rst DataArray.dt.days DataArray.dt.seconds DataArray.dt.microseconds DataArray.dt.nanoseconds DataArray.dt.total_seconds **Timedelta methods**: .. autosummary:: :toctree: ../generated/ :template: autosummary/accessor_method.rst DataArray.dt.floor DataArray.dt.ceil DataArray.dt.round Reshaping and reorganizing -------------------------- .. autosummary:: :toctree: ../generated/ DataArray.transpose DataArray.stack DataArray.unstack DataArray.to_unstacked_dataset DataArray.shift DataArray.roll DataArray.pad DataArray.sortby DataArray.broadcast_like python-xarray-2026.01.0/doc/api/exceptions.rst0000664000175000017500000000054415136607163021277 0ustar alastairalastair.. currentmodule:: xarray Exceptions ========== .. autosummary:: :toctree: ../generated/ AlignmentError CoordinateValidationError MergeError SerializationWarning DataTree -------- Exceptions raised when manipulating trees. .. autosummary:: :toctree: ../generated/ TreeIsomorphismError InvalidTreeError NotFoundInTreeError python-xarray-2026.01.0/doc/api/coordinates.rst0000664000175000017500000000303015136607163021421 0ustar alastairalastair.. currentmodule:: xarray Coordinates =========== Creating coordinates -------------------- .. autosummary:: :toctree: ../generated/ Coordinates Coordinates.from_xindex Coordinates.from_pandas_multiindex Attributes ---------- .. autosummary:: :toctree: ../generated/ Coordinates.dims Coordinates.sizes Coordinates.dtypes Coordinates.variables Coordinates.indexes Coordinates.xindexes Dictionary Interface -------------------- Coordinates implement the mapping interface with keys given by variable names and values given by ``DataArray`` objects. .. autosummary:: :toctree: ../generated/ Coordinates.__getitem__ Coordinates.__setitem__ Coordinates.__delitem__ Coordinates.__or__ Coordinates.update Coordinates.get Coordinates.items Coordinates.keys Coordinates.values Coordinates contents -------------------- .. autosummary:: :toctree: ../generated/ Coordinates.to_dataset Coordinates.to_index Coordinates.assign Coordinates.drop_dims Coordinates.drop_vars Coordinates.merge Coordinates.copy Coordinates.rename_vars Coordinates.rename_dims Comparisons ----------- .. autosummary:: :toctree: ../generated/ Coordinates.equals Coordinates.identical Proxies ------- .. currentmodule:: xarray.core.coordinates Coordinates that are accessed from the ``coords`` property of Dataset, DataArray and DataTree objects, respectively. .. autosummary:: :toctree: ../generated/ DatasetCoordinates DataArrayCoordinates DataTreeCoordinates python-xarray-2026.01.0/doc/api/encoding.rst0000664000175000017500000000033715136607163020704 0ustar alastairalastair.. currentmodule:: xarray Encoding/Decoding ================= .. autosummary:: :toctree: ../generated/ decode_cf Coder objects ------------- .. autosummary:: :toctree: ../generated/ coders.CFDatetimeCoder python-xarray-2026.01.0/doc/api/groupby.rst0000664000175000017500000000357415136607163020613 0ustar alastairalastair.. currentmodule:: xarray GroupBy objects =============== .. currentmodule:: xarray.core.groupby Dataset ------- .. autosummary:: :toctree: ../generated/ DatasetGroupBy DatasetGroupBy.map DatasetGroupBy.reduce DatasetGroupBy.assign DatasetGroupBy.assign_coords DatasetGroupBy.first DatasetGroupBy.last DatasetGroupBy.fillna DatasetGroupBy.quantile DatasetGroupBy.where DatasetGroupBy.all DatasetGroupBy.any DatasetGroupBy.count DatasetGroupBy.cumsum DatasetGroupBy.cumprod DatasetGroupBy.max DatasetGroupBy.mean DatasetGroupBy.median DatasetGroupBy.min DatasetGroupBy.prod DatasetGroupBy.std DatasetGroupBy.sum DatasetGroupBy.var DatasetGroupBy.dims DatasetGroupBy.groups DatasetGroupBy.shuffle_to_chunks DataArray --------- .. autosummary:: :toctree: ../generated/ DataArrayGroupBy DataArrayGroupBy.map DataArrayGroupBy.reduce DataArrayGroupBy.assign_coords DataArrayGroupBy.first DataArrayGroupBy.last DataArrayGroupBy.fillna DataArrayGroupBy.quantile DataArrayGroupBy.where DataArrayGroupBy.all DataArrayGroupBy.any DataArrayGroupBy.count DataArrayGroupBy.cumsum DataArrayGroupBy.cumprod DataArrayGroupBy.max DataArrayGroupBy.mean DataArrayGroupBy.median DataArrayGroupBy.min DataArrayGroupBy.prod DataArrayGroupBy.std DataArrayGroupBy.sum DataArrayGroupBy.var DataArrayGroupBy.dims DataArrayGroupBy.groups DataArrayGroupBy.shuffle_to_chunks Grouper Objects --------------- .. currentmodule:: xarray .. autosummary:: :toctree: ../generated/ groupers.BinGrouper groupers.SeasonGrouper groupers.UniqueGrouper Resampler Objects ----------------- .. autosummary:: :toctree: ../generated/ groupers.SeasonResampler groupers.SeasonResampler.compute_chunks groupers.TimeResampler groupers.TimeResampler.compute_chunks python-xarray-2026.01.0/doc/api/indexes.rst0000664000175000017500000000274115136607163020556 0ustar alastairalastair.. currentmodule:: xarray Indexes ======= .. seealso:: See the Xarray gallery on `custom indexes `_ for more examples. Creating indexes ---------------- .. autosummary:: :toctree: ../generated/ cftime_range date_range date_range_like indexes.RangeIndex.arange indexes.RangeIndex.linspace Built-in Indexes ---------------- Default, pandas-backed indexes built-in to Xarray: .. autosummary:: :toctree: ../generated/ indexes.PandasIndex indexes.PandasMultiIndex More complex indexes built-in to Xarray: .. autosummary:: :toctree: ../generated/ CFTimeIndex indexes.RangeIndex indexes.NDPointIndex indexes.CoordinateTransformIndex Building custom indexes ----------------------- These classes are building blocks for more complex Indexes: .. autosummary:: :toctree: ../generated/ indexes.CoordinateTransform indexes.CoordinateTransformIndex indexes.NDPointIndex indexes.TreeAdapter The Index base class for building custom indexes: .. autosummary:: :toctree: ../generated/ Index Index.from_variables Index.concat Index.stack Index.unstack Index.create_variables Index.should_add_coord_to_array Index.to_pandas_index Index.isel Index.sel Index.join Index.reindex_like Index.equals Index.roll Index.rename Index.copy The following are useful when building custom Indexes .. autosummary:: :toctree: ../generated/ IndexSelResult python-xarray-2026.01.0/doc/api/dataset.rst0000664000175000017500000000661115136607163020544 0ustar alastairalastair.. currentmodule:: xarray Dataset ======= .. autosummary:: :toctree: ../generated/ Dataset Attributes ---------- .. autosummary:: :toctree: ../generated/ Dataset.dims Dataset.sizes Dataset.dtypes Dataset.data_vars Dataset.coords Dataset.attrs Dataset.encoding Dataset.indexes Dataset.xindexes Dataset.chunks Dataset.chunksizes Dataset.nbytes Dictionary interface -------------------- Datasets implement the mapping interface with keys given by variable names and values given by ``DataArray`` objects. .. autosummary:: :toctree: ../generated/ Dataset.__getitem__ Dataset.__setitem__ Dataset.__delitem__ Dataset.update Dataset.get Dataset.items Dataset.keys Dataset.values Dataset contents ---------------- .. autosummary:: :toctree: ../generated/ Dataset.copy Dataset.assign Dataset.assign_coords Dataset.assign_attrs Dataset.pipe Dataset.merge Dataset.rename Dataset.rename_vars Dataset.rename_dims Dataset.swap_dims Dataset.expand_dims Dataset.drop_vars Dataset.drop_indexes Dataset.drop_duplicates Dataset.drop_dims Dataset.drop_encoding Dataset.drop_attrs Dataset.set_coords Dataset.reset_coords Dataset.convert_calendar Dataset.interp_calendar Dataset.get_index Comparisons ----------- .. autosummary:: :toctree: ../generated/ Dataset.equals Dataset.identical Dataset.broadcast_equals Indexing -------- .. autosummary:: :toctree: ../generated/ Dataset.loc Dataset.isel Dataset.sel Dataset.drop_sel Dataset.drop_isel Dataset.head Dataset.tail Dataset.thin Dataset.squeeze Dataset.interp Dataset.interp_like Dataset.reindex Dataset.reindex_like Dataset.set_index Dataset.reset_index Dataset.set_xindex Dataset.reorder_levels Dataset.query Missing value handling ---------------------- .. autosummary:: :toctree: ../generated/ Dataset.isnull Dataset.notnull Dataset.combine_first Dataset.count Dataset.dropna Dataset.fillna Dataset.ffill Dataset.bfill Dataset.interpolate_na Dataset.where Dataset.isin Computation ----------- .. autosummary:: :toctree: ../generated/ Dataset.map Dataset.reduce Dataset.groupby Dataset.groupby_bins Dataset.rolling Dataset.rolling_exp Dataset.cumulative Dataset.weighted Dataset.coarsen Dataset.resample Dataset.diff Dataset.quantile Dataset.differentiate Dataset.integrate Dataset.map_blocks Dataset.polyfit Dataset.curvefit Dataset.eval Aggregation ----------- .. autosummary:: :toctree: ../generated/ Dataset.all Dataset.any Dataset.argmax Dataset.argmin Dataset.count Dataset.idxmax Dataset.idxmin Dataset.max Dataset.min Dataset.mean Dataset.median Dataset.prod Dataset.sum Dataset.std Dataset.var Dataset.cumsum Dataset.cumprod ndarray methods --------------- .. autosummary:: :toctree: ../generated/ Dataset.argsort Dataset.astype Dataset.clip Dataset.conj Dataset.conjugate Dataset.imag Dataset.round Dataset.real Dataset.rank Reshaping and reorganizing -------------------------- .. autosummary:: :toctree: ../generated/ Dataset.transpose Dataset.stack Dataset.unstack Dataset.to_stacked_array Dataset.shift Dataset.roll Dataset.pad Dataset.sortby Dataset.broadcast_like python-xarray-2026.01.0/doc/api/accessors.rst0000664000175000017500000000033515136607163021101 0ustar alastairalastair.. currentmodule:: xarray Accessors ========= .. currentmodule:: xarray.core .. autosummary:: :toctree: ../generated/ accessor_dt.DatetimeAccessor accessor_dt.TimedeltaAccessor accessor_str.StringAccessor python-xarray-2026.01.0/pixi.toml0000664000175000017500000002256115136607163016717 0ustar alastairalastair[workspace] preview = ["pixi-build"] channels = ["conda-forge", "nodefaults"] platforms = ["win-64", "linux-64", "osx-arm64"] [package] name = "xarray" version = "dynamic" # dynamic versioning needs better support in pixi https://github.com/prefix-dev/pixi/issues/2923#issuecomment-2598460666 . Putting `version = "dynamic"` here for now until pixi recommends something else. [package.build] backend = { name = "pixi-build-python", version = ">=0.4.4" } [package.host-dependencies] setuptools = "*" setuptools_scm = "*" [package.run-dependencies] python = "*" numpy = "*" pandas = "*" packaging = "24.1.*" git = "*" # needed for dynamic versioning [dependencies] xarray = { path = "." } [target.linux-64.dependencies] pydap-server = "*" [feature.py311.dependencies] python = "3.11.*" [feature.py312.dependencies] python = "3.12.*" [feature.py313.dependencies] python = "3.13.*" [feature.backends.dependencies] # files h5netcdf = "*" h5py = "*" hdf5 = "*" netcdf4 = "*" zarr = "*" rasterio = "*" # opendap pydap = "*" lxml = "*" # Optional dep of pydap # s3 boto3 = "*" fsspec = "*" aiobotocore = "*" [feature.numba.dependencies] numba = "*" numbagg = "*" [feature.dask.dependencies] dask = "*" distributed = "*" [feature.accel.dependencies] flox = "*" bottleneck = "*" numexpr = "*" pyarrow = "*" opt_einsum = "*" [feature.viz.dependencies] cartopy = "*" matplotlib-base = "*" nc-time-axis = "*" seaborn = "*" [feature.extras.dependencies] # array sparse = "*" pint = "*" array-api-strict = "*" # algorithms scipy = "*" toolz = "*" # tutorial pooch = "*" # calendar cftime = "*" # other iris = "*" [feature.extras.pypi-dependencies] # array jax = "*" # no way to get cpu-only jaxlib from conda if gpu is present [feature.minimal.dependencies] # minimal versions python = "3.11.*" numpy = "1.26.*" pandas = "2.2.*" [feature.minimum-scipy.dependencies] scipy = "1.13.*" [feature.min-versions.dependencies] array-api-strict = "2.4.*" # dependency for testing the array api compat boto3 = "1.34.*" bottleneck = "1.4.*" cartopy = "0.23.*" cftime = "1.6.*" dask-core = "2024.6.*" distributed = "2024.6.*" flox = "0.9.*" h5netcdf = "1.4.*" # h5py and hdf5 tend to cause conflicts # for e.g. hdf5 1.12 conflicts with h5py=3.1 # prioritize bumping other packages instead h5py = "3.11.*" hdf5 = "1.14.*" iris = "3.9.*" lxml = "5.1.*" # Optional dep of pydap matplotlib-base = "3.8.*" nc-time-axis = "1.4.*" # netcdf follows a 1.major.minor[.patch] convention # (see https://github.com/Unidata/netcdf4-python/issues/1090) netcdf4 = "1.6.*" numba = "0.60.*" numbagg = "0.8.*" packaging = "24.1.*" pint = "0.24.*" pydap = "3.5.*" rasterio = "1.3.*" seaborn = "0.13.*" sparse = "0.15.*" toolz = "0.12.*" zarr = "2.18.*" # TODO: Remove `target.unix` restriction once pandas nightly has win-64 wheels again. # Without this, `pixi lock` fails because it can't solve the nightly feature for win-64, # which breaks RTD builds (RTD has no lock file cache, unlike GitHub Actions CI). [feature.nightly.target.unix.dependencies] python = "*" [feature.nightly.pypi-options.dependency-overrides] numpy = { version = "*", index = "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" } scipy = { version = "*", index = "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" } matplotlib = { version = "*", index = "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" } pandas = { version = "*", index = "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" } pyarrow = { version = "*", index = "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" } dask = { git = "https://github.com/dask/dask" } distributed = { git = "https://github.com/dask/distributed" } zarr = { git = "https://github.com/zarr-developers/zarr-python" } numcodecs = { git = "https://github.com/zarr-developers/numcodecs" } cftime = { git = "https://github.com/Unidata/cftime" } # packaging = { git = "https://github.com/pypa/packaging"} #? Pixi warns if this is enabled pint = { git = "https://github.com/hgrecco/pint" } bottleneck = { git = "https://github.com/pydata/bottleneck" } fsspec = { git = "https://github.com/intake/filesystem_spec" } nc-time-axis = { git = "https://github.com/SciTools/nc-time-axis" } flox = { git = "https://github.com/xarray-contrib/flox" } h5netcdf = { git = "https://github.com/h5netcdf/h5netcdf" } opt_einsum = { git = "https://github.com/dgasmith/opt_einsum" } # sparse = { git = "https://github.com/pydata/sparse"} [feature.nightly.target.unix.pypi-dependencies] xarray = { path = ".", editable = true } numpy = "*" pandas = "*" matplotlib = "*" scipy = "*" pyarrow = "*" dask = "*" distributed = "*" zarr = "*" numcodecs = "*" cftime = "*" packaging = "*" pint = "*" bottleneck = "*" fsspec = "*" nc-time-axis = "*" flox = "*" # h5netcdf = "*" # h5py = "*" opt_einsum = "*" netcdf4 = "*" scitools-iris = "*" pydap = "*" cartopy = "*" seaborn = "*" [feature.test.dependencies] pytest = "*" pytest-asyncio = "*" pytest-cov = "*" pytest-env = "*" pytest-mypy-plugins = "*" pytest-reportlog = "*" pytest-timeout = "*" pytest-xdist = "*" pytz = "*" hypothesis = "*" coveralls = "*" [feature.test.tasks] test = { cmd = "pytest" } [feature.doc.dependencies] kerchunk = "*" ipykernel = "*" ipywidgets = "*" # silence nbsphinx warning ipython = "*" jupyter_client = "*" jupyter_sphinx = "*" nbsphinx = "*" ncdata = "*" pydata-sphinx-theme = "*" pyproj = "*" rich = "*" # for Zarr tree() setuptools = "*" sphinx-autosummary-accessors = "*" sphinx-copybutton = "*" sphinx-design = "*" sphinx-inline-tabs = "*" sphinx-llm = ">=0.2.1" sphinx = ">=6,<8" sphinxcontrib-mermaid = "*" sphinxcontrib-srclinks = "*" sphinx-remove-toctrees = "*" sphinxext-opengraph = "*" sphinxext-rediraffe = "*" [feature.doc.pypi-dependencies] cfgrib = "*" # pypi dep because of https://github.com/prefix-dev/pixi/issues/3032#issuecomment-3302638043 [feature.doc.tasks] doc = { cmd = "make html", cwd = "doc" } doc-clean = { cmd = "make clean && make html", cwd = "doc" } [feature.typing.dependencies] mypy = "==1.18.1" pyright = "*" hypothesis = "*" lxml = "*" pandas-stubs = "<=2.3.3.251219" types-colorama = "*" types-docutils = "*" types-psutil = "*" types-Pygments = "*" types-python-dateutil = "*" types-pytz = "*" types-PyYAML = "*" types-requests = "*" types-setuptools = "*" types-openpyxl = "*" typing_extensions = "*" pip = "*" [feature.typing.pypi-dependencies] types-defusedxml = "*" types-pexpect = "*" [feature.typing.tasks] mypy = "mypy --install-types --non-interactive --cobertura-xml-report mypy_report" [feature.pre-commit.dependencies] pre-commit = "*" [feature.pre-commit.tasks] pre-commit = { cmd = "pre-commit" } [feature.release.dependencies] gitpython = "*" cytoolz = "*" [feature.release.tasks] release-contributors = "python ci/release_contributors.py" [feature.dev.dependencies] ipython = ">=9.8.0,<10" black = ">=25.1.0,<26" [feature.dev.pypi-dependencies] pytest-accept = ">=0.2.2, <0.3" [feature.policy.pypi-dependencies] xarray-minimum-dependency-policy = "*" [feature.policy.dependencies] python = "3.13.*" [feature.policy.tasks.check-policy] cmd = "minimum-versions validate --policy ci/policy.yaml --manifest-path pixi.toml {{ env }}" args = ["env"] [feature.policy.tasks] policy-bare-minimum = [ { task = "check-policy", args = [ "pixi:test-py311-bare-minimum", ] }, ] policy-bare-min-and-scipy = [ { task = "check-policy", args = [ "pixi:test-py311-bare-min-and-scipy", ] }, ] policy-min-versions = [ { task = "check-policy", args = [ "pixi:test-py311-min-versions", ] }, ] policy = [ { task = "check-policy", args = [ """\ pixi:test-py311-bare-minimum \ pixi:test-py311-bare-min-and-scipy \ pixi:test-py311-min-versions \ """, ] }, ] [environments] # Testing # test-just-xarray = { features = ["test"] } # https://github.com/pydata/xarray/pull/10888/files#r2511336147 test-py313-no-numba = { features = [ "py313", "test", "backends", "accel", "dask", "viz", "extras", ] } test-py313-no-dask = { features = [ "py312", "test", "backends", "accel", "numba", "viz", "extras", ] } test-py313 = { features = [ "py313", "test", "backends", "accel", "numba", "dask", "viz", "extras", ] } test-nightly = { features = [ "py313", "nightly", "test", # "typing", ], no-default-feature = true } test-py311 = { features = [ "py311", "test", "backends", "accel", "numba", "dask", "viz", "extras", ] } test-py311-with-typing = { features = [ "py311", "test", "backends", "accel", "numba", "dask", "viz", "extras", "typing", ] } test-py313-with-typing = { features = [ "py313", "test", "backends", "accel", "numba", "dask", "viz", "extras", "typing", ] } test-py311-bare-minimum = { features = ["test", "minimal"] } test-py311-bare-min-and-scipy = { features = [ "test", "minimal", "minimum-scipy", ] } test-py311-min-versions = { features = [ "test", "minimal", "minimum-scipy", "min-versions", ] } # Extra typing = { features = ["typing"] } doc = { features = [ "doc", "backends", "test", "accel", "viz", "extras", ] } pre-commit = { features = ["pre-commit"], no-default-feature = true } release = { features = ["release"], no-default-feature = true } default = { features = [ "py313", "test", "backends", "accel", "numba", "dask", "viz", "extras", "dev", ] } policy = { features = ["policy"], no-default-feature = true } python-xarray-2026.01.0/.codecov.yml0000664000175000017500000000116415136607163017270 0ustar alastairalastaircodecov: require_ci_to_pass: true coverage: status: project: default: # Require 1% coverage, i.e., always succeed target: 1% flags: - unittests paths: - "!xarray/tests/" unittests: target: 90% flags: - unittests paths: - "!xarray/tests/" mypy: target: 20% flags: - mypy patch: false changes: false comment: false flags: unittests: paths: - "xarray" - "!xarray/tests" carryforward: false mypy: paths: - "xarray" carryforward: false python-xarray-2026.01.0/.gitignore0000664000175000017500000000230415136607163017032 0ustar alastairalastair*.py[cod] __pycache__ .env .venv # example caches from Hypothesis .hypothesis/ # temp files from docs build doc/*.nc doc/auto_gallery doc/rasm.zarr # C extensions *.so # Packages *.egg *.egg-info .eggs dist build eggs parts bin var sdist develop-eggs .installed.cfg lib lib64 # Installer logs pip-log.txt # Unit test / coverage reports .coverage .coverage.* .tox nosetests.xml .cache .prettier_cache .dmypy.json .mypy_cache .ropeproject/ .tags* .testmon* .tmontmp/ .pytest_cache dask-worker-space/ # asv environments asv_bench/.asv asv_bench/pkgs # Translations *.mo # Mr Developer .mr.developer.cfg .project .pydevproject # IDEs .idea *.swp .DS_Store .vscode/ # xarray specific doc/_build doc/generated/ doc/api/generated/ xarray/tests/data/*.grib.*.idx # Claude Code .claude/ # Sync tools Icon* .ipynb_checkpoints doc/team-panel.txt doc/external-examples-gallery.txt doc/notebooks-examples-gallery.txt doc/videos-gallery.txt doc/*.zarr doc/*.nc doc/*.h5 # Until we support this properly, excluding from gitignore. (adding it to # gitignore to make it _easier_ to work with `uv`, not as an indication that I # think we shouldn't...) uv.lock mypy_report/ xarray-docs/ # pixi environments .pixi pixi.lock python-xarray-2026.01.0/licenses/0000775000175000017500000000000015136607163016650 5ustar alastairalastairpython-xarray-2026.01.0/licenses/PANDAS_LICENSE0000664000175000017500000000321615136607163020705 0ustar alastairalastairpandas license ============== Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team All rights reserved. Copyright (c) 2008-2011 AQR Capital Management, LLC All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. python-xarray-2026.01.0/licenses/DASK_LICENSE0000664000175000017500000000271515136607163020464 0ustar alastairalastairCopyright (c) 2014-2018, Anaconda, Inc. and contributors All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of Anaconda nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. python-xarray-2026.01.0/licenses/NUMPY_LICENSE0000664000175000017500000000300715136607163020645 0ustar alastairalastairCopyright (c) 2005-2011, NumPy Developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the NumPy Developers nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. python-xarray-2026.01.0/licenses/ICOMOON_LICENSE0000664000175000017500000004434015136607163021045 0ustar alastairalastairAttribution 4.0 International ======================================================================= Creative Commons Corporation ("Creative Commons") is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an "as-is" basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible. Using Creative Commons Public Licenses Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses. Considerations for licensors: Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC- licensed material, or material used under an exception or limitation to copyright. More considerations for licensors: wiki.creativecommons.org/Considerations_for_licensors Considerations for the public: By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensor's permission is not necessary for any reason--for example, because of any applicable exception or limitation to copyright--then that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described. Although not required by our licenses, you are encouraged to respect those requests where reasonable. More considerations for the public: wiki.creativecommons.org/Considerations_for_licensees ======================================================================= Creative Commons Attribution 4.0 International Public License By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. Section 1 -- Definitions. a. Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. b. Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. c. Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. d. Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. e. Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. f. Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License. g. Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. h. Licensor means the individual(s) or entity(ies) granting rights under this Public License. i. Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. j. Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. k. You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. Section 2 -- Scope. a. License grant. 1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: a. reproduce and Share the Licensed Material, in whole or in part; and b. produce, reproduce, and Share Adapted Material. 2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. 3. Term. The term of this Public License is specified in Section 6(a). 4. Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a) (4) never produces Adapted Material. 5. Downstream recipients. a. Offer from the Licensor -- Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. b. No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. 6. No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). b. Other rights. 1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. 2. Patent and trademark rights are not licensed under this Public License. 3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties. Section 3 -- License Conditions. Your exercise of the Licensed Rights is expressly made subject to the following conditions. a. Attribution. 1. If You Share the Licensed Material (including in modified form), You must: a. retain the following if it is supplied by the Licensor with the Licensed Material: i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); ii. a copyright notice; iii. a notice that refers to this Public License; iv. a notice that refers to the disclaimer of warranties; v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; b. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and c. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. 3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. 4. If You Share Adapted Material You produce, the Adapter's License You apply must not prevent recipients of the Adapted Material from complying with this Public License. Section 4 -- Sui Generis Database Rights. Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database; b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material; and c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. Section 5 -- Disclaimer of Warranties and Limitation of Liability. a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. Section 6 -- Term and Termination. a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: 1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or 2. upon express reinstatement by the Licensor. For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. Section 7 -- Other Terms and Conditions. a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. Section 8 -- Interpretation. a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. ======================================================================= Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” The text of the Creative Commons public licenses is dedicated to the public domain under the CC0 Public Domain Dedication. Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at creativecommons.org/policies, Creative Commons does not authorize the use of the trademark "Creative Commons" or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses. Creative Commons may be contacted at creativecommons.org. python-xarray-2026.01.0/licenses/PYTHON_LICENSE0000664000175000017500000003073115136607163020762 0ustar alastairalastairA. HISTORY OF THE SOFTWARE ========================== Python was created in the early 1990s by Guido van Rossum at Stichting Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands as a successor of a language called ABC. Guido remains Python's principal author, although it includes many contributions from others. In 1995, Guido continued his work on Python at the Corporation for National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) in Reston, Virginia where he released several versions of the software. In May 2000, Guido and the Python core development team moved to BeOpen.com to form the BeOpen PythonLabs team. In October of the same year, the PythonLabs team moved to Digital Creations (now Zope Corporation, see http://www.zope.com). In 2001, the Python Software Foundation (PSF, see http://www.python.org/psf/) was formed, a non-profit organization created specifically to own Python-related Intellectual Property. Zope Corporation is a sponsoring member of the PSF. All Python releases are Open Source (see http://www.opensource.org for the Open Source Definition). Historically, most, but not all, Python releases have also been GPL-compatible; the table below summarizes the various releases. Release Derived Year Owner GPL- from compatible? (1) 0.9.0 thru 1.2 1991-1995 CWI yes 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes 1.6 1.5.2 2000 CNRI no 2.0 1.6 2000 BeOpen.com no 1.6.1 1.6 2001 CNRI yes (2) 2.1 2.0+1.6.1 2001 PSF no 2.0.1 2.0+1.6.1 2001 PSF yes 2.1.1 2.1+2.0.1 2001 PSF yes 2.1.2 2.1.1 2002 PSF yes 2.1.3 2.1.2 2002 PSF yes 2.2 and above 2.1.1 2001-now PSF yes Footnotes: (1) GPL-compatible doesn't mean that we're distributing Python under the GPL. All Python licenses, unlike the GPL, let you distribute a modified version without making your changes open source. The GPL-compatible licenses make it possible to combine Python with other software that is released under the GPL; the others don't. (2) According to Richard Stallman, 1.6.1 is not GPL-compatible, because its license has a choice of law clause. According to CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 is "not incompatible" with the GPL. Thanks to the many outside volunteers who have worked under Guido's direction to make these releases possible. B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON =============================================================== PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 -------------------------------------------- 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Python Software Foundation; All Rights Reserved" are retained in Python alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python. 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement. BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 ------------------------------------------- BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the Individual or Organization ("Licensee") accessing and otherwise using this software in source or binary form and its associated documentation ("the Software"). 2. Subject to the terms and conditions of this BeOpen Python License Agreement, BeOpen hereby grants Licensee a non-exclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use the Software alone or in any derivative version, provided, however, that the BeOpen Python License is retained in the Software, alone or in any derivative version prepared by Licensee. 3. BeOpen is making the Software available to Licensee on an "AS IS" basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 5. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 6. This License Agreement shall be governed by and interpreted in all respects by the law of the State of California, excluding conflict of law provisions. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between BeOpen and Licensee. This License Agreement does not grant permission to use BeOpen trademarks or trade names in a trademark sense to endorse or promote products or services of Licensee, or any third party. As an exception, the "BeOpen Python" logos available at http://www.pythonlabs.com/logos.html may be used according to the permissions granted on that web page. 7. By copying, installing or otherwise using the software, Licensee agrees to be bound by the terms and conditions of this License Agreement. CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 --------------------------------------- 1. This LICENSE AGREEMENT is between the Corporation for National Research Initiatives, having an office at 1895 Preston White Drive, Reston, VA 20191 ("CNRI"), and the Individual or Organization ("Licensee") accessing and otherwise using Python 1.6.1 software in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, CNRI hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python 1.6.1 alone or in any derivative version, provided, however, that CNRI's License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) 1995-2001 Corporation for National Research Initiatives; All Rights Reserved" are retained in Python 1.6.1 alone or in any derivative version prepared by Licensee. Alternately, in lieu of CNRI's License Agreement, Licensee may substitute the following text (omitting the quotes): "Python 1.6.1 is made available subject to the terms and conditions in CNRI's License Agreement. This Agreement together with Python 1.6.1 may be located on the Internet using the following unique, persistent identifier (known as a handle): 1895.22/1013. This Agreement may also be obtained from a proxy server on the Internet using the following URL: http://hdl.handle.net/1895.22/1013". 3. In the event Licensee prepares a derivative work that is based on or incorporates Python 1.6.1 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python 1.6.1. 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. This License Agreement shall be governed by the federal intellectual property law of the United States, including without limitation the federal copyright law, and, to the extent such U.S. federal law does not apply, by the law of the Commonwealth of Virginia, excluding Virginia's conflict of law provisions. Notwithstanding the foregoing, with regard to derivative works based on Python 1.6.1 that incorporate non-separable material that was previously distributed under the GNU General Public License (GPL), the law of the Commonwealth of Virginia shall govern this License Agreement only as to issues arising under or with respect to Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between CNRI and Licensee. This License Agreement does not grant permission to use CNRI trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By clicking on the "ACCEPT" button where indicated, or by copying, installing or otherwise using Python 1.6.1, Licensee agrees to be bound by the terms and conditions of this License Agreement. ACCEPT CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 -------------------------------------------------- Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, The Netherlands. All rights reserved. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation, and that the name of Stichting Mathematisch Centrum or CWI not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. python-xarray-2026.01.0/licenses/SEABORN_LICENSE0000664000175000017500000000273215136607163021032 0ustar alastairalastairCopyright (c) 2012-2013, Michael L. Waskom All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the {organization} nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. python-xarray-2026.01.0/licenses/SCIKIT_LEARN_LICENSE0000664000175000017500000000277315136607163021655 0ustar alastairalastairBSD 3-Clause License Copyright (c) 2007-2021 The scikit-learn developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE python-xarray-2026.01.0/licenses/ANYTREE_LICENSE0000664000175000017500000002613515136607163021053 0ustar alastairalastair Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright {yyyy} {name of copyright owner} Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. python-xarray-2026.01.0/CORE_TEAM_GUIDE.md0000664000175000017500000004740515136607163017652 0ustar alastairalastair> **_Note:_** This Core Team Member Guide was adapted from the [napari project's Core Developer Guide](https://napari.org/stable/developers/core_dev_guide.html) and the [Pandas maintainers guide](https://pandas.pydata.org/docs/development/maintaining.html). # Core Team Member Guide Welcome, new core team member! We appreciate the quality of your work, and enjoy working with you! Thank you for your numerous contributions to the project so far. By accepting the invitation to become a core team member you are **not required to commit to doing any more work** - xarray is a volunteer project, and we value the contributions you have made already. You can see a list of all the current core team members on our [@pydata/xarray](https://github.com/orgs/pydata/teams/xarray) GitHub team. Once accepted, you should now be on that list too. This document offers guidelines for your new role. ## Tasks Xarray values a wide range of contributions, only some of which involve writing code. As such, we do not currently make a distinction between a "core team member", "core developer", "maintainer", or "triage team member" as some projects do (e.g. [pandas](https://pandas.pydata.org/docs/development/maintaining.html)). That said, if you prefer to refer to your role as one of the other titles above then that is fine by us! Xarray is mostly a volunteer project, so these tasks shouldn’t be read as “expectations”. **There are no strict expectations**, other than to adhere to our [Code of Conduct](https://github.com/pydata/xarray/tree/main/CODE_OF_CONDUCT.md). Rather, the tasks that follow are general descriptions of what it might mean to be a core team member: - Facilitate a welcoming environment for those who file issues, make pull requests, and open discussion topics, - Triage newly filed issues, - Review newly opened pull requests, - Respond to updates on existing issues and pull requests, - Drive discussion and decisions on stalled issues and pull requests, - Provide experience / wisdom on API design questions to ensure consistency and maintainability, - Project organization (run developer meetings, coordinate with sponsors), - Project evangelism (advertise xarray to new users), - Community contact (represent xarray in user communities such as [Pangeo](https://pangeo.io/)), - Key project contact (represent xarray's perspective within key related projects like NumPy, Zarr or Dask), - Project fundraising (help write and administrate grants that will support xarray), - Improve documentation or tutorials (especially on [`tutorial.xarray.dev`](https://tutorial.xarray.dev/)), - Presenting or running tutorials (such as those we have given at the SciPy conference), - Help maintain the [`xarray.dev`](https://xarray.dev/) landing page and website, the [code for which is here](https://github.com/xarray-contrib/xarray.dev), - Write blog posts on the [xarray blog](https://xarray.dev/blog), - Help maintain xarray's various Continuous Integration Workflows, - Help maintain a regular release schedule (we aim for one or more releases per month), - Attend the bi-weekly community meeting ([issue](https://github.com/pydata/xarray/issues/4001)), - Contribute to the xarray codebase. (Matt Rocklin's post on [the role of a maintainer](https://matthewrocklin.com/blog/2019/05/18/maintainer) may be interesting background reading, but should not be taken to strictly apply to the Xarray project.) Obviously you are not expected to contribute in all (or even more than one) of these ways! They are listed so as to indicate the many types of work that go into maintaining xarray. It is natural that your available time and enthusiasm for the project will wax and wane - this is fine and expected! It is also common for core team members to have a "niche" - a particular part of the codebase they have specific expertise with, or certain types of task above which they primarily perform. If however you feel that is unlikely you will be able to be actively contribute in the foreseeable future (or especially if you won't be available to answer questions about pieces of code that you wrote previously) then you may want to consider letting us know you would rather be listed as an "Emeritus Core Team Member", as this would help us in evaluating the overall health of the project. ## Issue triage One of the main ways you might spend your contribution time is by responding to or triaging new issues. Here’s a typical workflow for triaging a newly opened issue or discussion: 1. **Thank the reporter for opening an issue.** The issue tracker is many people’s first interaction with the xarray project itself, beyond just using the library. It may also be their first open-source contribution of any kind. As such, we want it to be a welcoming, pleasant experience. 2. **Is the necessary information provided?** Ideally reporters would fill out the issue template, but many don’t. If crucial information (like the version of xarray they used), is missing feel free to ask for that and label the issue with “needs info”. The report should follow the [guidelines for xarray discussions](https://github.com/pydata/xarray/discussions/5404). You may want to link to that if they didn’t follow the template. Make sure that the title accurately reflects the issue. Edit it yourself if it’s not clear. Remember also that issues can be converted to discussions and vice versa if appropriate. 3. **Is this a duplicate issue?** We have many open issues. If a new issue is clearly a duplicate, label the new issue as “duplicate”, and close the issue with a link to the original issue. Make sure to still thank the reporter, and encourage them to chime in on the original issue, and perhaps try to fix it. If the new issue provides relevant information, such as a better or slightly different example, add it to the original issue as a comment or an edit to the original post. 4. **Is the issue minimal and reproducible?** For bug reports, we ask that the reporter provide a minimal reproducible example. See [minimal-bug-reports](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) for a good explanation. If the example is not reproducible, or if it’s clearly not minimal, feel free to ask the reporter if they can provide and example or simplify the provided one. Do acknowledge that writing minimal reproducible examples is hard work. If the reporter is struggling, you can try to write one yourself and we’ll edit the original post to include it. If a nice reproducible example has been provided, thank the reporter for that. If a reproducible example can’t be provided, add the “needs mcve” label. If a reproducible example is provided, but you see a simplification, edit the original post with your simpler reproducible example. 5. **Is this a clearly defined feature request?** Generally, xarray prefers to discuss and design new features in issues, before a pull request is made. Encourage the submitter to include a proposed API for the new feature. Having them write a full docstring is a good way to pin down specifics. We may need a discussion from several xarray maintainers before deciding whether the proposal is in scope for xarray. 6. **Is this a usage question?** We prefer that usage questions are asked on StackOverflow with the [`python-xarray` tag](https://stackoverflow.com/questions/tagged/python-xarray) or as a [GitHub discussion topic](https://github.com/pydata/xarray/discussions). If it’s easy to answer, feel free to link to the relevant documentation section, let them know that in the future this kind of question should be on StackOverflow, and close the issue. 7. **What labels and milestones should I add?** Apply the relevant labels. This is a bit of an art, and comes with experience. Look at similar issues to get a feel for how things are labeled. Labels used for labelling issues that relate to particular features or parts of the codebase normally have the form `topic-`. If the issue is clearly defined and the fix seems relatively straightforward, label the issue as `contrib-good-first-issue`. You can also remove the `needs triage` label that is automatically applied to all newly-opened issues. 8. **Where should the poster look to fix the issue?** If you can, it is very helpful to point to the approximate location in the codebase where a contributor might begin to fix the issue. This helps ease the way in for new contributors to the repository. ## Code review and contributions As a core team member, you are a representative of the project, and trusted to make decisions that will serve the long term interests of all users. You also gain the responsibility of shepherding other contributors through the review process; here are some guidelines for how to do that. ### All contributors are treated the same You should now have gained the ability to merge or approve other contributors' pull requests. Merging contributions is a shared power: only merge contributions you yourself have carefully reviewed, and that are clear improvements for the project. When in doubt, and especially for more complex changes, wait until at least one other core team member has approved. (See [Reviewing](#reviewing) and especially [Merge Only Changes You Understand](#merge-only-changes-you-understand) below.) It should also be considered best practice to leave a reasonable (24hr) time window after approval before merge to ensure that other core team members have a reasonable chance to weigh in. Adding the `plan-to-merge` label notifies developers of the imminent merge. We are also an international community, with contributors from many different time zones, some of whom will only contribute during their working hours, others who might only be able to contribute during nights and weekends. It is important to be respectful of other peoples schedules and working habits, even if it slows the project down slightly - we are in this for the long run. In the same vein you also shouldn't feel pressured to be constantly available or online, and users or contributors who are overly demanding and unreasonable to the point of harassment will be directed to our [Code of Conduct](https://github.com/pydata/xarray/tree/main/CODE_OF_CONDUCT.md). We value sustainable development practices over mad rushes. When merging, we automatically use GitHub's [Squash and Merge](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/incorporating-changes-from-a-pull-request/merging-a-pull-request#merging-a-pull-request) to ensure a clean git history. You should also continue to make your own pull requests as before and in accordance with the [general contributing guide](https://docs.xarray.dev/en/stable/contributing.html). These pull requests still require the approval of another core team member before they can be merged. ### How to conduct a good review _Always_ be kind to contributors. Contributors are often doing volunteer work, for which we are tremendously grateful. Provide constructive criticism on ideas and implementations, and remind yourself of how it felt when your own work was being evaluated as a novice. `xarray` strongly values mentorship in code review. New users often need more handholding, having little to no git experience. Repeat yourself liberally, and, if you don’t recognize a contributor, point them to our development guide, or other GitHub workflow tutorials around the web. Do not assume that they know how GitHub works (many don't realize that adding a commit automatically updates a pull request, for example). Gentle, polite, kind encouragement can make the difference between a new core team member and an abandoned pull request. When reviewing, focus on the following: 1. **Usability and generality:** `xarray` is a user-facing package that strives to be accessible to both novice and advanced users, and new features should ultimately be accessible to everyone using the package. `xarray` targets the scientific user community broadly, and core features should be domain-agnostic and general purpose. Custom functionality is meant to be provided through our various types of interoperability. 2. **Performance and benchmarks:** As `xarray` targets scientific applications that often involve large multidimensional datasets, high performance is a key value of `xarray`. While every new feature won't scale equally to all sizes of data, keeping in mind performance and our [benchmarks](https://github.com/pydata/xarray/tree/main/asv_bench) during a review may be important, and you may need to ask for benchmarks to be run and reported or new benchmarks to be added. You can run the CI benchmarking suite on any PR by tagging it with the `run-benchmark` label. 3. **APIs and stability:** Coding users and developers will make extensive use of our APIs. The foundation of a healthy ecosystem will be a fully capable and stable set of APIs, so as `xarray` matures it will very important to ensure our APIs are stable. Spending the extra time to consider names of public facing variables and methods, alongside function signatures, could save us considerable trouble in the future. We do our best to provide [deprecation cycles](https://docs.xarray.dev/en/stable/contributing.html#backwards-compatibility) when making backwards-incompatible changes. 4. **Documentation and tutorials:** All new methods should have appropriate doc strings following [PEP257](https://peps.python.org/pep-0257/) and the [NumPy documentation guide](https://numpy.org/devdocs/dev/howto-docs.html#documentation-style). For any major new features, accompanying changes should be made to our [tutorials](https://tutorial.xarray.dev). These should not only illustrates the new feature, but explains it. 5. **Implementations and algorithms:** You should understand the code being modified or added before approving it. (See [Merge Only Changes You Understand](#merge-only-changes-you-understand) below.) Implementations should do what they claim and be simple, readable, and efficient in that order. 6. **Tests:** All contributions _must_ be tested, and each added line of code should be covered by at least one test. Good tests not only execute the code, but explore corner cases. It can be tempting not to review tests, but please do so. Other changes may be _nitpicky_: spelling mistakes, formatting, etc. Do not insist contributors make these changes, but instead you should offer to make these changes by [pushing to their branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/committing-changes-to-a-pull-request-branch-created-from-a-fork), or using GitHub’s [suggestion](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/reviewing-changes-in-pull-requests/commenting-on-a-pull-request) [feature](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/reviewing-changes-in-pull-requests/incorporating-feedback-in-your-pull-request), and be prepared to make them yourself if needed. Using the suggestion feature is preferred because it gives the contributor a choice in whether to accept the changes. Unless you know that a contributor is experienced with git, don’t ask for a rebase when merge conflicts arise. Instead, rebase the branch yourself, force-push to their branch, and advise the contributor to force-pull. If the contributor is no longer active, you may take over their branch by submitting a new pull request and closing the original, including a reference to the original pull request. In doing so, ensure you communicate that you are not throwing the contributor's work away! If appropriate it is a good idea to acknowledge other contributions to the pull request using the `Co-authored-by` [syntax](https://docs.github.com/en/pull-requests/committing-changes-to-your-project/creating-and-editing-commits/creating-a-commit-with-multiple-authors) in the commit message. ### Merge only changes you understand _Long-term maintainability_ is an important concern. Code doesn't merely have to _work_, but should be _understood_ by multiple core developers. Changes will have to be made in the future, and the original contributor may have moved on. Therefore, _do not merge a code change unless you understand it_. Ask for help freely: we can consult community members, or even external developers, for added insight where needed, and see this as a great learning opportunity. While we collectively "own" any patches (and bugs!) that become part of the code base, you are vouching for changes you merge. Please take that responsibility seriously. Feel free to ping other active maintainers with any questions you may have. ## Further resources As a core member, you should be familiar with community and developer resources such as: - Our [contributor guide](https://docs.xarray.dev/en/stable/contributing.html). - Our [code of conduct](https://github.com/pydata/xarray/tree/main/CODE_OF_CONDUCT.md). - Our [philosophy and development roadmap](https://docs.xarray.dev/en/stable/roadmap.html). - [PEP8](https://peps.python.org/pep-0008/) for Python style. - [PEP257](https://peps.python.org/pep-0257/) and the [NumPy documentation guide](https://numpy.org/devdocs/dev/howto-docs.html#documentation-style) for docstring conventions. - [`pre-commit`](https://pre-commit.com) hooks for autoformatting. - [`ruff`](https://github.com/astral-sh/ruff) autoformatting and linting. - [python-xarray](https://stackoverflow.com/questions/tagged/python-xarray) on Stack Overflow. - [@xarray_dev](https://x.com/xarray_dev) on X. - [xarray-dev](https://discord.gg/bsSGdwBn) discord community (normally only used for remote synchronous chat during sprints). You are not required to monitor any of the social resources. Where possible we prefer to point people towards asynchronous forms of communication like github issues instead of realtime chat options as they are far easier for a global community to consume and refer back to. We hold a [bi-weekly developers meeting](https://docs.xarray.dev/en/stable/developers-meeting.html) via video call. This is a great place to bring up any questions you have, raise visibility of an issue and/or gather more perspectives. Attendance is absolutely optional, and we keep the meeting to 30 minutes in respect of your valuable time. This meeting is public, so we occasionally have non-core team members join us. We also have a private mailing list for core team members `xarray-core-team@googlegroups.com` which is sparingly used for discussions that are required to be private, such as nominating new core members and discussing financial issues. ## Inviting new core members Any core member may nominate other contributors to join the core team. While there is no hard-and-fast rule about who can be nominated, ideally, they should have: been part of the project for at least two months, contributed significant changes of their own, contributed to the discussion and review of others' work, and collaborated in a way befitting our community values. **We strongly encourage nominating anyone who has made significant non-code contributions to the Xarray community in any way**. After nomination voting will happen on a private mailing list. While it is expected that most votes will be unanimous, a two-thirds majority of the cast votes is enough. Core team members can choose to become emeritus core team members and suspend their approval and voting rights until they become active again. ## Contribute to this guide (!) This guide reflects the experience of the current core team members. We may well have missed things that, by now, have become second nature—things that you, as a new team member, will spot more easily. Please ask the other core team members if you have any questions, and submit a pull request with insights gained. ## Conclusion We are excited to have you on board! We look forward to your contributions to the code base and the community. Thank you in advance! python-xarray-2026.01.0/design_notes/0000775000175000017500000000000015136607163017524 5ustar alastairalastairpython-xarray-2026.01.0/design_notes/flexible_indexes_notes.md0000664000175000017500000006672115136607163024603 0ustar alastairalastair# Proposal: Xarray flexible indexes refactoring Current status: https://github.com/pydata/xarray/projects/1 ## 1. Data Model Indexes are used in Xarray to extract data from Xarray objects using coordinate labels instead of using integer array indices. Although the indexes used in an Xarray object can be accessed (or built on-the-fly) via public methods like `to_index()` or properties like `indexes`, those are mainly used internally. The goal of this project is to make those indexes 1st-class citizens of Xarray's data model. As such, indexes should clearly be separated from Xarray coordinates with the following relationships: - Index -> Coordinate: one-to-many - Coordinate -> Index: one-to-zero-or-one An index may be built from one or more coordinates. However, each coordinate must relate to one index at most. Additionally, a coordinate may not be tied to any index. The order in which multiple coordinates relate to an index may matter. For example, Scikit-Learn's [`BallTree`](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.BallTree.html#sklearn.neighbors.BallTree) index with the Haversine metric requires providing latitude and longitude values in that specific order. As another example, the order in which levels are defined in a `pandas.MultiIndex` may affect its lexsort depth (see [MultiIndex sorting](https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#sorting-a-multiindex)). Xarray's current data model has the same index-coordinate relationships than stated above, although this assumes that multi-index "virtual" coordinates are counted as coordinates (we can consider them as such, with some constraints). More importantly, This refactoring would turn the current one-to-one relationship between a dimension and an index into a many-to-many relationship, which would overcome some current limitations. For example, we might want to select data along a dimension which has several coordinates: ```python >>> da array([...]) Coordinates: * drainage_area (river_profile) float64 ... * chi (river_profile) float64 ... ``` In this example, `chi` is a transformation of the `drainage_area` variable that is often used in geomorphology. We'd like to select data along the river profile using either `da.sel(drainage_area=...)` or `da.sel(chi=...)` but that's not currently possible. We could rename the `river_profile` dimension to one of the coordinates, then use `sel` with that coordinate, then call `swap_dims` if we want to use `sel` with the other coordinate, but that's not ideal. We could also build a `pandas.MultiIndex` from `drainage_area` and `chi`, but that's not optimal (there's no hierarchical relationship between these two coordinates). Let's take another example: ```python >>> da array([[...], [...]]) Coordinates: * lon (x, y) float64 ... * lat (x, y) float64 ... * x (x) float64 ... * y (y) float64 ... ``` This refactoring would allow creating a geographic index for `lat` and `lon` and two simple indexes for `x` and `y` such that we could select data with either `da.sel(lon=..., lat=...)` or `da.sel(x=..., y=...)`. Refactoring the dimension -> index one-to-one relationship into many-to-many would also introduce some issues that we'll need to address, e.g., ambiguous cases like `da.sel(chi=..., drainage_area=...)` where multiple indexes may potentially return inconsistent positional indexers along a dimension. ## 2. Proposed API changes ### 2.1 Index wrapper classes Every index that is used to select data from Xarray objects should inherit from a base class, e.g., `XarrayIndex`, that provides some common API. `XarrayIndex` subclasses would generally consist of thin wrappers around existing index classes such as `pandas.Index`, `pandas.MultiIndex`, `scipy.spatial.KDTree`, etc. There is a variety of features that an xarray index wrapper may or may not support: - 1-dimensional vs. 2-dimensional vs. n-dimensional coordinate (e.g., `pandas.Index` only supports 1-dimensional coordinates while a geographic index could be built from n-dimensional coordinates) - built from a single vs multiple coordinate(s) (e.g., `pandas.Index` is built from one coordinate, `pandas.MultiIndex` may be built from an arbitrary number of coordinates and a geographic index would typically require two latitude/longitude coordinates) - in-memory vs. out-of-core (dask) index data/coordinates (vs. other array backends) - range-based vs. point-wise selection - exact vs. inexact lookups Whether or not a `XarrayIndex` subclass supports each of the features listed above should be either declared explicitly via a common API or left to the implementation. An `XarrayIndex` subclass may encapsulate more than one underlying object used to perform the actual indexing. Such "meta" index would typically support a range of features among those mentioned above and would automatically select the optimal index object for a given indexing operation. An `XarrayIndex` subclass must/should/may implement the following properties/methods: - a `from_coords` class method that creates a new index wrapper instance from one or more Dataset/DataArray coordinates (+ some options) - a `query` method that takes label-based indexers as argument (+ some options) and that returns the corresponding position-based indexers - an `indexes` property to access the underlying index object(s) wrapped by the `XarrayIndex` subclass - a `data` property to access index's data and map it to coordinate data (see [Section 4](#4-indexvariable)) - a `__getitem__()` implementation to propagate the index through DataArray/Dataset indexing operations - `equals()`, `union()` and `intersection()` methods for data alignment (see [Section 2.6](#26-using-indexes-for-data-alignment)) - Xarray coordinate getters (see [Section 2.2.4](#224-implicit-coordinates)) - a method that may return a new index and that will be called when one of the corresponding coordinates is dropped from the Dataset/DataArray (multi-coordinate indexes) - `encode()`/`decode()` methods that would allow storage-agnostic serialization and fast-path reconstruction of the underlying index object(s) (see [Section 2.8](#28-index-encoding)) - one or more "non-standard" methods or properties that could be leveraged in Xarray 3rd-party extensions like Dataset/DataArray accessors (see [Section 2.7](#27-using-indexes-for-other-purposes)) The `XarrayIndex` API has still to be defined in detail. Xarray should provide a minimal set of built-in index wrappers (this could be reduced to the indexes currently supported in Xarray, i.e., `pandas.Index` and `pandas.MultiIndex`). Other index wrappers may be implemented in 3rd-party libraries (recommended). The `XarrayIndex` base class should be part of Xarray's public API. #### 2.1.1 Index discoverability For better discoverability of Xarray-compatible indexes, Xarray could provide some mechanism to register new index wrappers, e.g., something like [xoak's `IndexRegistry`](https://xoak.readthedocs.io/en/latest/_api_generated/xoak.IndexRegistry.html#xoak.IndexRegistry) or [numcodec's registry](https://numcodecs.readthedocs.io/en/stable/registry.html). Additionally (or alternatively), new index wrappers may be registered via entry points as is already the case for storage backends and maybe other backends (plotting) in the future. Registering new indexes either via a custom registry or via entry points should be optional. Xarray should also allow providing `XarrayIndex` subclasses in its API (Dataset/DataArray constructors, `set_index()`, etc.). ### 2.2 Explicit vs. implicit index creation #### 2.2.1 Dataset/DataArray's `indexes` constructor argument The new `indexes` argument of Dataset/DataArray constructors may be used to specify which kind of index to bind to which coordinate(s). It would consist of a mapping where, for each item, the key is one coordinate name (or a sequence of coordinate names) that must be given in `coords` and the value is the type of the index to build from this (these) coordinate(s): ```python >>> da = xr.DataArray( ... data=[[275.2, 273.5], [270.8, 278.6]], ... dims=("x", "y"), ... coords={ ... "lat": (("x", "y"), [[45.6, 46.5], [50.2, 51.6]]), ... "lon": (("x", "y"), [[5.7, 10.5], [6.2, 12.8]]), ... }, ... indexes={("lat", "lon"): SpatialIndex}, ... ) array([[275.2, 273.5], [270.8, 278.6]]) Coordinates: * lat (x, y) float64 45.6 46.5 50.2 51.6 * lon (x, y) float64 5.7 10.5 6.2 12.8 ``` More formally, `indexes` would accept `Mapping[CoordinateNames, IndexSpec]` where: - `CoordinateNames = Union[CoordinateName, Tuple[CoordinateName, ...]]` and `CoordinateName = Hashable` - `IndexSpec = Union[Type[XarrayIndex], Tuple[Type[XarrayIndex], Dict[str, Any]], XarrayIndex]`, so that index instances or index classes + build options could be also passed Currently index objects like `pandas.MultiIndex` can be passed directly to `coords`, which in this specific case results in the implicit creation of virtual coordinates. With the new `indexes` argument this behavior may become even more confusing than it currently is. For the sake of clarity, it would be appropriate to eventually drop support for this specific behavior and treat any given mapping value given in `coords` as an array that can be wrapped into an Xarray variable, i.e., in the case of a multi-index: ```python >>> xr.DataArray([1.0, 2.0], dims="x", coords={"x": midx}) array([1., 2.]) Coordinates: x (x) object ('a', 0) ('b', 1) ``` A possible, more explicit solution to reuse a `pandas.MultiIndex` in a DataArray/Dataset with levels exposed as coordinates is proposed in [Section 2.2.4](#224-implicit-coordinates). #### 2.2.2 Dataset/DataArray's `set_index` method New indexes may also be built from existing sets of coordinates or variables in a Dataset/DataArray using the `.set_index()` method. The [current signature](https://docs.xarray.dev/en/stable/generated/xarray.DataArray.set_index.html#xarray.DataArray.set_index) of `.set_index()` is tailored to `pandas.MultiIndex` and tied to the concept of a dimension-index. It is therefore hardly reusable as-is in the context of flexible indexes proposed here. The new signature may look like one of these: - A. `.set_index(coords: CoordinateNames, index: Union[XarrayIndex, Type[XarrayIndex]], **index_kwargs)`: one index is set at a time, index construction options may be passed as keyword arguments - B. `.set_index(indexes: Mapping[CoordinateNames, Union[Type[XarrayIndex], Tuple[Type[XarrayIndex], Dict[str, Any]]]])`: multiple indexes may be set at a time from a mapping of coordinate or variable name(s) as keys and `XarrayIndex` subclasses (maybe with a dict of build options) as values. If variable names are given as keys of they will be promoted as coordinates Option A looks simple and elegant but significantly departs from the current signature. Option B is more consistent with the Dataset/DataArray constructor signature proposed in the previous section and would be easier to adopt in parallel with the current signature that we could still support through some depreciation cycle. The `append` parameter of the current `.set_index()` is specific to `pandas.MultiIndex`. With option B we could still support it, although we might want to either drop it or move it to the index construction options in the future. #### 2.2.3 Implicit default indexes In general explicit index creation should be preferred over implicit index creation. However, there is a majority of cases where basic `pandas.Index` objects could be built and used as indexes for 1-dimensional coordinates. For convenience, Xarray should automatically build such indexes for the coordinates where no index has been explicitly assigned in the Dataset/DataArray constructor or when indexes have been reset / dropped. For which coordinates? - A. only 1D coordinates with a name matching their dimension name - B. all 1D coordinates When to create it? - A. each time when a new Dataset/DataArray is created - B. only when we need it (i.e., when calling `.sel()` or `indexes`) Options A and A are what Xarray currently does and may be the best choice considering that indexes could possibly be invalidated by coordinate mutation. Besides `pandas.Index`, other indexes currently supported in Xarray like `CFTimeIndex` could be built depending on the coordinate data type. #### 2.2.4 Implicit coordinates Like for the indexes, explicit coordinate creation should be preferred over implicit coordinate creation. However, there may be some situations where we would like to keep creating coordinates implicitly for backwards compatibility. For example, it is currently possible to pass a `pandas.MultiIndex` object as a coordinate to the Dataset/DataArray constructor: ```python >>> midx = pd.MultiIndex.from_arrays([["a", "b"], [0, 1]], names=["lvl1", "lvl2"]) >>> da = xr.DataArray([1.0, 2.0], dims="x", coords={"x": midx}) >>> da array([1., 2.]) Coordinates: * x (x) MultiIndex - lvl1 (x) object 'a' 'b' - lvl2 (x) int64 0 1 ``` In that case, virtual coordinates are created for each level of the multi-index. After the index refactoring, these coordinates would become real coordinates bound to the multi-index. In the example above a coordinate is also created for the `x` dimension: ```python >>> da.x array([('a', 0), ('b', 1)], dtype=object) Coordinates: * x (x) MultiIndex - lvl1 (x) object 'a' 'b' - lvl2 (x) int64 0 1 ``` With the new proposed data model, this wouldn't be a requirement anymore: there is no concept of a dimension-index. However, some users might still rely on the `x` coordinate so we could still (temporarily) support it for backwards compatibility. Besides `pandas.MultiIndex`, there may be other situations where we would like to reuse an existing index in a new Dataset/DataArray (e.g., when the index is very expensive to build), and which might require implicit creation of one or more coordinates. The example given here is quite confusing, though: this is not an easily predictable behavior. We could entirely avoid the implicit creation of coordinates, e.g., using a helper function that generates coordinate + index dictionaries that we could then pass directly to the DataArray/Dataset constructor: ```python >>> coords_dict, index_dict = create_coords_from_index( ... midx, dims="x", include_dim_coord=True ... ) >>> coords_dict {'x': array([('a', 0), ('b', 1)], dtype=object), 'lvl1': array(['a', 'b'], dtype=object), 'lvl2': array([0, 1])} >>> index_dict {('lvl1', 'lvl2'): midx} >>> xr.DataArray([1.0, 2.0], dims="x", coords=coords_dict, indexes=index_dict) array([1., 2.]) Coordinates: x (x) object ('a', 0) ('b', 1) * lvl1 (x) object 'a' 'b' * lvl2 (x) int64 0 1 ``` ### 2.2.5 Immutable indexes Some underlying indexes might be mutable (e.g., a tree-based index structure that allows dynamic addition of data points) while other indexes like `pandas.Index` aren't. To keep things simple, it is probably better to continue considering all indexes in Xarray as immutable (as well as their corresponding coordinates, see [Section 2.4.1](#241-mutable-coordinates)). ### 2.3 Index access #### 2.3.1 Dataset/DataArray's `indexes` property The `indexes` property would allow easy access to all the indexes used in a Dataset/DataArray. It would return a `Dict[CoordinateName, XarrayIndex]` for easy index lookup from coordinate name. #### 2.3.2 Additional Dataset/DataArray properties or methods In some cases the format returned by the `indexes` property would not be the best (e.g, it may return duplicate index instances as values). For convenience, we could add one more property / method to get the indexes in the desired format if needed. ### 2.4 Propagate indexes through operations #### 2.4.1 Mutable coordinates Dataset/DataArray coordinates may be replaced (`__setitem__`) or dropped (`__delitem__`) in-place, which may invalidate some of the indexes. A drastic though probably reasonable solution in this case would be to simply drop all indexes bound to those replaced/dropped coordinates. For the case where a 1D basic coordinate that corresponds to a dimension is added/replaced, we could automatically generate a new index (see [Section 2.2.4](#224-implicit-indexes)). We must also ensure that coordinates having a bound index are immutable, e.g., still wrap them into `IndexVariable` objects (even though the `IndexVariable` class might change substantially after this refactoring). #### 2.4.2 New Dataset/DataArray with updated coordinates Xarray provides a variety of Dataset/DataArray operations affecting the coordinates and where simply dropping the index(es) is not desirable. For example: - multi-coordinate indexes could be reduced to single coordinate indexes - like in `.reset_index()` or `.sel()` applied on a subset of the levels of a `pandas.MultiIndex` and that internally call `MultiIndex.droplevel` and `MultiIndex.get_loc_level`, respectively - indexes may be indexed themselves - like `pandas.Index` implements `__getitem__()` - when indexing their corresponding coordinate(s), e.g., via `.sel()` or `.isel()`, those indexes should be indexed too - this might not be supported by all Xarray indexes, though - some indexes that can't be indexed could still be automatically (re)built in the new Dataset/DataArray - like for example building a new `KDTree` index from the selection of a subset of an initial collection of data points - this is not always desirable, though, as indexes may be expensive to build - a more reasonable option would be to explicitly re-build the index, e.g., using `.set_index()` - Dataset/DataArray operations involving alignment (see [Section 2.6](#26-using-indexes-for-data-alignment)) ### 2.5 Using indexes for data selection One main use of indexes is label-based data selection using the DataArray/Dataset `.sel()` method. This refactoring would introduce a number of API changes that could go through some depreciation cycles: - the keys of the mapping given to `indexers` (or the names of `indexer_kwargs`) would not correspond to only dimension names but could be the name of any coordinate that has an index - for a `pandas.MultiIndex`, if no dimension-coordinate is created by default (see [Section 2.2.4](#224-implicit-coordinates)), providing dict-like objects as indexers should be depreciated - there should be the possibility to provide additional options to the indexes that support specific selection features (e.g., Scikit-learn's `BallTree`'s `dualtree` query option to boost performance). - the best API is not trivial here, since `.sel()` may accept indexers passed to several indexes (which should still be supported for convenience and compatibility), and indexes may have similar options with different semantics - we could introduce a new parameter like `index_options: Dict[XarrayIndex, Dict[str, Any]]` to pass options grouped by index - the `method` and `tolerance` parameters are specific to `pandas.Index` and would not be supported by all indexes: probably best is to eventually pass those arguments as `index_options` - the list valid indexer types might be extended in order to support new ways of indexing data, e.g., unordered selection of all points within a given range - alternatively, we could reuse existing indexer types with different semantics depending on the index, e.g., using `slice(min, max, None)` for unordered range selection With the new data model proposed here, an ambiguous situation may occur when indexers are given for several coordinates that share the same dimension but not the same index, e.g., from the example in [Section 1](#1-data-model): ```python da.sel(x=..., y=..., lat=..., lon=...) ``` The easiest solution for this situation would be to raise an error. Alternatively, we could introduce a new parameter to specify how to combine the resulting integer indexers (i.e., union vs intersection), although this could already be achieved by chaining `.sel()` calls or combining `.sel()` with `.merge()` (it may or may not be straightforward). ### 2.6 Using indexes for data alignment Another main use if indexes is data alignment in various operations. Some considerations regarding alignment and flexible indexes: - support for alignment should probably be optional for an `XarrayIndex` subclass. - like `pandas.Index`, the index wrapper classes that support it should implement `.equals()`, `.union()` and/or `.intersection()` - support might be partial if that makes sense (outer, inner, left, right, exact...). - index equality might involve more than just the labels: for example a spatial index might be used to check if the coordinate system (CRS) is identical for two sets of coordinates - some indexes might implement inexact alignment, like in [#4489](https://github.com/pydata/xarray/pull/4489) or a `KDTree` index that selects nearest-neighbors within a given tolerance - alignment may be "multi-dimensional", i.e., the `KDTree` example above vs. dimensions aligned independently of each other - we need to decide what to do when one dimension has more than one index that supports alignment - we should probably raise unless the user explicitly specify which index to use for the alignment - we need to decide what to do when one dimension has one or more index(es) but none support alignment - either we raise or we fail back (silently) to alignment based on dimension size - for inexact alignment, the tolerance threshold might be given when building the index and/or when performing the alignment - are there cases where we want a specific index to perform alignment and another index to perform selection? - it would be tricky to support that unless we allow multiple indexes per coordinate - alternatively, underlying indexes could be picked internally in a "meta" index for one operation or another, although the risk is to eventually have to deal with an explosion of index wrapper classes with different meta indexes for each combination that we'd like to use. ### 2.7 Using indexes for other purposes Xarray also provides a number of Dataset/DataArray methods where indexes are used in various ways, e.g., - `resample` (`CFTimeIndex` and a `DatetimeIntervalIndex`) - `DatetimeAccessor` & `TimedeltaAccessor` properties (`CFTimeIndex` and a `DatetimeIntervalIndex`) - `interp` & `interpolate_na`, - with `IntervalIndex`, these become regridding operations. Should we support hooks for these operations? - `differentiate`, `integrate`, `polyfit` - raise an error if not a "simple" 1D index? - `pad` - `coarsen` has to make choices about output index labels. - `sortby` - `stack`/`unstack` - plotting - `plot.pcolormesh` "infers" interval breaks along axes, which are really inferred `bounds` for the appropriate indexes. - `plot.step` again uses `bounds`. In fact, we may even want `step` to be the default 1D plotting function if the axis has `bounds` attached. It would be reasonable to first restrict those methods to the indexes that are currently available in Xarray, and maybe extend the `XarrayIndex` API later upon request when the opportunity arises. Conversely, nothing should prevent implementing "non-standard" API in 3rd-party `XarrayIndex` subclasses that could be used in DataArray/Dataset extensions (accessors). For example, we might want to reuse a `KDTree` index to compute k-nearest neighbors (returning a DataArray/Dataset with a new dimension) and/or the distances to the nearest neighbors (returning a DataArray/Dataset with a new data variable). ### 2.8 Index encoding Indexes don't need to be directly serializable since we could (re)build them from their corresponding coordinate(s). However, it would be useful if some indexes could be encoded/decoded to/from a set of arrays that would allow optimized reconstruction and/or storage, e.g., - `pandas.MultiIndex` -> `index.levels` and `index.codes` - Scikit-learn's `KDTree` and `BallTree` that use an array-based representation of an immutable tree structure ## 3. Index representation in DataArray/Dataset's `repr` Since indexes would become 1st class citizen of Xarray's data model, they deserve their own section in Dataset/DataArray `repr` that could look like: ``` array([[5.4, 7.8], [6.2, 4.7]]) Coordinates: * lon (x, y) float64 10.2 15.2 12.6 17.6 * lat (x, y) float64 40.2 45.6 42.2 47.6 * x (x) float64 200.0 400.0 * y (y) float64 800.0 1e+03 Indexes: lat, lon x y ``` To keep the `repr` compact, we could: - consolidate entries that map to the same index object, and have a short inline repr for `XarrayIndex` object - collapse the index section by default in the HTML `repr` - maybe omit all trivial indexes for 1D coordinates that match the dimension name ## 4. `IndexVariable` `IndexVariable` is currently used to wrap a `pandas.Index` as a variable, which would not be relevant after this refactoring since it is aimed at decoupling indexes and variables. We'll probably need to move elsewhere some of the features implemented in `IndexVariable` to: - ensure that all coordinates with an index are immutable (see [Section 2.4.1](#241-mutable-coordinates)) - do not set values directly, do not (re)chunk (even though it may be already chunked), do not load, do not convert to sparse/dense, etc. - directly reuse index's data when that's possible - in the case of a `pandas.Index`, it makes little sense to have duplicate data (e.g., as a NumPy array) for its corresponding coordinate - convert a variable into a `pandas.Index` using `.to_index()` (for backwards compatibility). Other `IndexVariable` API like `level_names` and `get_level_variable()` would not useful anymore: it is specific to how we currently deal with `pandas.MultiIndex` and virtual "level" coordinates in Xarray. ## 5. Chunked coordinates and/or indexers We could take opportunity of this refactoring to better leverage chunked coordinates (and/or chunked indexers for data selection). There's two ways to enable it: A. support for chunked coordinates is left to the index B. support for chunked coordinates is index agnostic and is implemented in Xarray As an example for B, [xoak](https://github.com/ESM-VFC/xoak) supports building an index for each chunk, which is coupled with a two-step data selection process (cross-index queries + brute force "reduction" look-up). There is an example [here](https://xoak.readthedocs.io/en/latest/examples/dask_support.html). This may be tedious to generalize this to other kinds of operations, though. Xoak's Dask support is rather experimental, not super stable (it's quite hard to control index replication and data transfer between Dask workers with the default settings), and depends on whether indexes are thread-safe and/or serializable. Option A may be more reasonable for now. ## 6. Coordinate duck arrays Another opportunity of this refactoring is support for duck arrays as index coordinates. Decoupling coordinates and indexes would _de-facto_ enable it. However, support for duck arrays in index-based operations such as data selection or alignment would probably require some protocol extension, e.g., ```python class MyDuckArray: ... def _sel_(self, indexer): """Prepare the label-based indexer to conform to this coordinate array.""" ... return new_indexer ... ``` For example, a `pint` array would implement `_sel_` to perform indexer unit conversion or raise, warn, or just pass the indexer through if it has no units. python-xarray-2026.01.0/design_notes/named_array_design_doc.md0000664000175000017500000006676115136607163024526 0ustar alastairalastair# named-array Design Document ## Abstract Despite the wealth of scientific libraries in the Python ecosystem, there is a gap for a lightweight, efficient array structure with named dimensions that can provide convenient broadcasting and indexing. Existing solutions like Xarray's Variable, [Pytorch Named Tensor](https://github.com/pytorch/pytorch/issues/60832), [Levanter](https://crfm.stanford.edu/2023/06/16/levanter-1_0-release.html), and [Larray](https://larray.readthedocs.io/en/stable/tutorial/getting_started.html) have their own strengths and weaknesses. Xarray's Variable is an efficient data structure, but it depends on the relatively heavy-weight library Pandas, which limits its use in other projects. Pytorch Named Tensor offers named dimensions, but it lacks support for many operations, making it less user-friendly. Levanter is a powerful tool with a named tensor module (Haliax) that makes deep learning code easier to read, understand, and write, but it is not as lightweight or generic as desired. Larry offers labeled N-dimensional arrays, but it may not provide the level of seamless interoperability with other scientific Python libraries that some users need. named-array aims to solve these issues by exposing the core functionality of Xarray's Variable class as a standalone package. ## Motivation and Scope The Python ecosystem boasts a wealth of scientific libraries that enable efficient computations on large, multi-dimensional arrays. Libraries like PyTorch, Xarray, and NumPy have revolutionized scientific computing by offering robust data structures for array manipulations. Despite this wealth of tools, a gap exists in the Python landscape for a lightweight, efficient array structure with named dimensions that can provide convenient broadcasting and indexing. Xarray internally maintains a data structure that meets this need, referred to as [`xarray.Variable`](https://docs.xarray.dev/en/latest/generated/xarray.Variable.html) . However, Xarray's dependency on Pandas, a relatively heavy-weight library, restricts other projects from leveraging this efficient data structure (, , ). We propose the creation of a standalone Python package, "named-array". This package is envisioned to be a version of the `xarray.Variable` data structure, cleanly separated from the heavier dependencies of Xarray. named-array will provide a lightweight, user-friendly array-like data structure with named dimensions, facilitating convenient indexing and broadcasting. The package will use existing scientific Python community standards such as established array protocols and the new [Python array API standard](https://data-apis.org/array-api/latest), allowing users to wrap multiple duck-array objects, including, but not limited to, NumPy, Dask, Sparse, Pint, CuPy, and Pytorch. The development of named-array is projected to meet a key community need and expected to broaden Xarray's user base. By making the core `xarray.Variable` more accessible, we anticipate an increase in contributors and a reduction in the developer burden on current Xarray maintainers. ### Goals 1. **Simple and minimal**: named-array will expose Xarray's [Variable class](https://docs.xarray.dev/en/stable/internals/variable-objects.html) as a standalone object (`NamedArray`) with named axes (dimensions) and arbitrary metadata (attributes) but without coordinate labels. This will make it a lightweight, efficient array data structure that allows convenient broadcasting and indexing. 2. **Interoperability**: named-array will follow established scientific Python community standards and in doing so, will allow it to wrap multiple duck-array objects, including but not limited to, NumPy, Dask, Sparse, Pint, CuPy, and Pytorch. 3. **Community Engagement**: By making the core `xarray.Variable` more accessible, we open the door to increased adoption of this fundamental data structure. As such, we hope to see an increase in contributors and reduction in the developer burden on current Xarray maintainers. ### Non-Goals 1. **Extensive Data Analysis**: named-array will not provide extensive data analysis features like statistical functions, data cleaning, or visualization. Its primary focus is on providing a data structure that allows users to use dimension names for descriptive array manipulations. 2. **Support for I/O**: named-array will not bundle file reading functions. Instead users will be expected to handle I/O and then wrap those arrays with the new named-array data structure. ## Backward Compatibility The creation of named-array is intended to separate the `xarray.Variable` from Xarray into a standalone package. This allows it to be used independently, without the need for Xarray's dependencies, like Pandas. This separation has implications for backward compatibility. Since the new named-array is envisioned to contain the core features of Xarray's variable, existing code using Variable from Xarray should be able to switch to named-array with minimal changes. However, there are several potential issues related to backward compatibility: - **API Changes**: as the Variable is decoupled from Xarray and moved into named-array, some changes to the API may be necessary. These changes might include differences in function signature, etc. These changes could break existing code that relies on the current API and associated utility functions (e.g. `as_variable()`). The `xarray.Variable` object will subclass `NamedArray`, and provide the existing interface for compatibility. ## Detailed Description named-array aims to provide a lightweight, efficient array structure with named dimensions, or axes, that enables convenient broadcasting and indexing. The primary component of named-array is a standalone version of the xarray.Variable data structure, which was previously a part of the Xarray library. The xarray.Variable data structure in named-array will maintain the core features of its counterpart in Xarray, including: - **Named Axes (Dimensions)**: Each axis of the array can be given a name, providing a descriptive and intuitive way to reference the dimensions of the array. - **Arbitrary Metadata (Attributes)**: named-array will support the attachment of arbitrary metadata to arrays as a dict, providing a mechanism to store additional information about the data that the array represents. - **Convenient Broadcasting and Indexing**: With named dimensions, broadcasting and indexing operations become more intuitive and less error-prone. The named-array package is designed to be interoperable with other scientific Python libraries. It will follow established scientific Python community standards and use standard array protocols, as well as the new data-apis standard. This allows named-array to wrap multiple duck-array objects, including, but not limited to, NumPy, Dask, Sparse, Pint, CuPy, and Pytorch. ## Implementation - **Decoupling**: making `variable.py` agnostic to Xarray internals by decoupling it from the rest of the library. This will make the code more modular and easier to maintain. However, this will also make the code more complex, as we will need to define a clear interface for how the functionality in `variable.py` interacts with the rest of the library, particularly the ExplicitlyIndexed subclasses used to enable lazy indexing of data on disk. - **Move Xarray's internal lazy indexing classes to follow standard Array Protocols**: moving the lazy indexing classes like `ExplicitlyIndexed` to use standard array protocols will be a key step in decoupling. It will also potentially improve interoperability with other libraries that use these protocols, and prepare these classes [for eventual movement out](https://github.com/pydata/xarray/issues/5081) of the Xarray code base. However, this will also require significant changes to the code, and we will need to ensure that all existing functionality is preserved. - Use [https://data-apis.org/array-api-compat/](https://data-apis.org/array-api-compat/) to handle compatibility issues? - **Leave lazy indexing classes in Xarray for now** - **Preserve support for Dask collection protocols**: named-array will preserve existing support for the dask collections protocol namely the **dask\_\*\*\*** methods - **Preserve support for ChunkManagerEntrypoint?** Opening variables backed by dask vs cubed arrays currently is [handled within Variable.chunk](https://github.com/pydata/xarray/blob/92c8b33eb464b09d6f8277265b16cae039ab57ee/xarray/core/variable.py#L1272C15-L1272C15). If we are preserving dask support it would be nice to preserve general chunked array type support, but this currently requires an entrypoint. ### Plan 1. Create a new baseclass for `xarray.Variable` to its own module e.g. `xarray.core.base_variable` 2. Remove all imports of internal Xarray classes and utils from `base_variable.py`. `base_variable.Variable` should not depend on anything in xarray.core - Will require moving the lazy indexing classes (subclasses of ExplicitlyIndexed) to be standards compliant containers.` - an array-api compliant container that provides **array_namespace**` - Support `.oindex` and `.vindex` for explicit indexing - Potentially implement this by introducing a new compliant wrapper object? - Delete the `NON_NUMPY_SUPPORTED_ARRAY_TYPES` variable which special-cases ExplicitlyIndexed and `pd.Index.` - `ExplicitlyIndexed` class and subclasses should provide `.oindex` and `.vindex` for indexing by `Variable.__getitem__.`: `oindex` and `vindex` were proposed in [NEP21](https://numpy.org/neps/nep-0021-advanced-indexing.html), but have not been implemented yet - Delete the ExplicitIndexer objects (`BasicIndexer`, `VectorizedIndexer`, `OuterIndexer`) - Remove explicit support for `pd.Index`. When provided with a `pd.Index` object, Variable will coerce to an array using `np.array(pd.Index)`. For Xarray's purposes, Xarray can use `as_variable` to explicitly wrap these in PandasIndexingAdapter and pass them to `Variable.__init__`. 3. Define a minimal variable interface that the rest of Xarray can use: 1. `dims`: tuple of dimension names 2. `data`: numpy/dask/duck arrays` 3. `attrs``: dictionary of attributes 4. Implement basic functions & methods for manipulating these objects. These methods will be a cleaned-up subset (for now) of functionality on xarray.Variable, with adaptations inspired by the [Python array API](https://data-apis.org/array-api/2022.12/API_specification/index.html). 5. Existing Variable structures 1. Keep Variable object which subclasses the new structure that adds the `.encoding` attribute and potentially other methods needed for easy refactoring. 2. IndexVariable will remain in xarray.core.variable and subclass the new named-array data structure pending future deletion. 6. Docstrings and user-facing APIs will need to be updated to reflect the changed methods on Variable objects. Further implementation details are in Appendix: [Implementation Details](#appendix-implementation-details). ## Plan for decoupling lazy indexing functionality from NamedArray Today's implementation Xarray's lazy indexing functionality uses three private objects: `*Indexer`, `*IndexingAdapter`, `*Array`. These objects are needed for two reason: 1. We need to translate from Xarray (NamedArray) indexing rules to bare array indexing rules. - `*Indexer` objects track the type of indexing - basic, orthogonal, vectorized 2. Not all arrays support the same indexing rules, so we need `*Indexing` adapters 1. Indexing Adapters today implement `__getitem__` and use type of `*Indexer` object to do appropriate conversions. 3. We also want to support lazy indexing of on-disk arrays. 1. These again support different types of indexing, so we have `explicit_indexing_adapter` that understands `*Indexer` objects. ### Goals 1. We would like to keep the lazy indexing array objects, and backend array objects within Xarray. Thus NamedArray cannot treat these objects specially. 2. A key source of confusion (and coupling) is that both lazy indexing arrays and indexing adapters, both handle Indexer objects, and both subclass `ExplicitlyIndexedNDArrayMixin`. These are however conceptually different. ### Proposal 1. The `NumpyIndexingAdapter`, `DaskIndexingAdapter`, and `ArrayApiIndexingAdapter` classes will need to migrate to Named Array project since we will want to support indexing of numpy, dask, and array-API arrays appropriately. 2. The `as_indexable` function which wraps an array with the appropriate adapter will also migrate over to named array. 3. Lazy indexing arrays will implement `__getitem__` for basic indexing, `.oindex` for orthogonal indexing, and `.vindex` for vectorized indexing. 4. IndexingAdapter classes will similarly implement `__getitem__`, `oindex`, and `vindex`. 5. `NamedArray.__getitem__` (and `__setitem__`) will still use `*Indexer` objects internally (for e.g. in `NamedArray._broadcast_indexes`), but use `.oindex`, `.vindex` on the underlying indexing adapters. 6. We will move the `*Indexer` and `*IndexingAdapter` classes to Named Array. These will be considered private in the long-term. 7. `as_indexable` will no longer special case `ExplicitlyIndexed` objects (we can special case a new `IndexingAdapter` mixin class that will be private to NamedArray). To handle Xarray's lazy indexing arrays, we will introduce a new `ExplicitIndexingAdapter` which will wrap any array with any of `.oindex` of `.vindex` implemented. 1. This will be the last case in the if-chain that is, we will try to wrap with all other `IndexingAdapter` objects before using `ExplicitIndexingAdapter` as a fallback. This Adapter will be used for the lazy indexing arrays, and backend arrays. 2. As with other indexing adapters (point 4 above), this `ExplicitIndexingAdapter` will only implement `__getitem__` and will understand `*Indexer` objects. 8. For backwards compatibility with external backends, we will have to gracefully deprecate `indexing.explicit_indexing_adapter` which translates from Xarray's indexing rules to the indexing supported by the backend. 1. We could split `explicit_indexing_adapter` in to 3: - `basic_indexing_adapter`, `outer_indexing_adapter` and `vectorized_indexing_adapter` for public use. 2. Implement fall back `.oindex`, `.vindex` properties on `BackendArray` base class. These will simply rewrap the `key` tuple with the appropriate `*Indexer` object, and pass it on to `__getitem__` or `__setitem__`. These methods will also raise DeprecationWarning so that external backends will know to migrate to `.oindex`, and `.vindex` over the next year. THe most uncertain piece here is maintaining backward compatibility with external backends. We should first migrate a single internal backend, and test out the proposed approach. ## Project Timeline and Milestones We have identified the following milestones for the completion of this project: 1. **Write and publish a design document**: this document will explain the purpose of named-array, the intended audience, and the features it will provide. It will also describe the architecture of named-array and how it will be implemented. This will ensure early community awareness and engagement in the project to promote subsequent uptake. 2. **Refactor `variable.py` to `base_variable.py`** and remove internal Xarray imports. 3. **Break out the package and create continuous integration infrastructure**: this will entail breaking out the named-array project into a Python package and creating a continuous integration (CI) system. This will help to modularize the code and make it easier to manage. Building a CI system will help ensure that codebase changes do not break existing functionality. 4. Incrementally add new functions & methods to the new package, ported from xarray. This will start to make named-array useful on its own. 5. Refactor the existing Xarray codebase to rely on the newly created package (named-array): This will help to demonstrate the usefulness of the new package, and also provide an example for others who may want to use it. 6. Expand tests, add documentation, and write a blog post: expanding the test suite will help to ensure that the code is reliable and that changes do not introduce bugs. Adding documentation will make it easier for others to understand and use the project. 7. Finally, we will write a series of blog posts on [xarray.dev](https://xarray.dev/) to promote the project and attract more contributors. - Toward the end of the process, write a few blog posts that demonstrate the use of the newly available data structure - pick the same example applications used by other implementations/applications (e.g. Pytorch, sklearn, and Levanter) to show how it can work. ## Related Work 1. [GitHub - deepmind/graphcast](https://github.com/deepmind/graphcast) 2. [Getting Started — LArray 0.34 documentation](https://larray.readthedocs.io/en/stable/tutorial/getting_started.html) 3. [Levanter — Legible, Scalable, Reproducible Foundation Models with JAX](https://crfm.stanford.edu/2023/06/16/levanter-1_0-release.html) 4. [google/xarray-tensorstore](https://github.com/google/xarray-tensorstore) 5. [State of Torch Named Tensors · Issue #60832 · pytorch/pytorch · GitHub](https://github.com/pytorch/pytorch/issues/60832) - Incomplete support: Many primitive operations result in errors, making it difficult to use NamedTensors in Practice. Users often have to resort to removing the names from tensors to avoid these errors. - Lack of active development: the development of the NamedTensor feature in PyTorch is not currently active due a lack of bandwidth for resolving ambiguities in the design. - Usability issues: the current form of NamedTensor is not user-friendly and sometimes raises errors, making it difficult for users to incorporate NamedTensors into their workflows. 6. [Scikit-learn Enhancement Proposals (SLEPs) 8, 12, 14](https://github.com/scikit-learn/enhancement_proposals/pull/18) - Some of the key points and limitations discussed in these proposals are: - Inconsistency in feature name handling: Scikit-learn currently lacks a consistent and comprehensive way to handle and propagate feature names through its pipelines and estimators ([SLEP 8](https://github.com/scikit-learn/enhancement_proposals/pull/18),[SLEP 12](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep012/proposal.html)). - Memory intensive for large feature sets: storing and propagating feature names can be memory intensive, particularly in cases where the entire "dictionary" becomes the features, such as in NLP use cases ([SLEP 8](https://github.com/scikit-learn/enhancement_proposals/pull/18),[GitHub issue #35](https://github.com/scikit-learn/enhancement_proposals/issues/35)) - Sparse matrices: sparse data structures present a challenge for feature name propagation. For instance, the sparse data structure functionality in Pandas 1.0 only supports converting directly to the coordinate format (COO), which can be an issue with transformers such as the OneHotEncoder.transform that has been optimized to construct a CSR matrix ([SLEP 14](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep014/proposal.html)) - New Data structures: the introduction of new data structures, such as "InputArray" or "DataArray" could lead to more burden for third-party estimator maintainers and increase the learning curve for users. Xarray's "DataArray" is mentioned as a potential alternative, but the proposal mentions that the conversion from a Pandas dataframe to a Dataset is not lossless ([SLEP 12](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep012/proposal.html),[SLEP 14](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep014/proposal.html),[GitHub issue #35](https://github.com/scikit-learn/enhancement_proposals/issues/35)). - Dependency on other libraries: solutions that involve using Xarray and/or Pandas to handle feature names come with the challenge of managing dependencies. While a soft dependency approach is suggested, this means users would be able to have/enable the feature only if they have the dependency installed. Xarra-lite's integration with other scientific Python libraries could potentially help with this issue ([GitHub issue #35](https://github.com/scikit-learn/enhancement_proposals/issues/35)). ## References and Previous Discussion - [[Proposal] Expose Variable without Pandas dependency · Issue #3981 · pydata/xarray · GitHub](https://github.com/pydata/xarray/issues/3981) - [https://github.com/pydata/xarray/issues/3981#issuecomment-985051449](https://github.com/pydata/xarray/issues/3981#issuecomment-985051449) - [Lazy indexing arrays as a stand-alone package · Issue #5081 · pydata/xarray · GitHub](https://github.com/pydata/xarray/issues/5081) ### Appendix: Engagement with the Community We plan to publicize this document on : - [x] `Xarray dev call` - [ ] `Scientific Python discourse` - [ ] `Xarray GitHub` - [ ] `Twitter (X)` - [ ] `Respond to NamedTensor and Scikit-Learn issues?` - [ ] `Pangeo Discourse` - [ ] `Numpy, SciPy email lists?` - [ ] `Xarray blog` Additionally, We plan on writing a series of blog posts to effectively showcase the implementation and potential of the newly available functionality. To illustrate this, we will use the same example applications as other established libraries (such as Pytorch, sklearn), providing practical demonstrations of how these new data structures can be leveraged. ### Appendix: API Surface Questions: 1. Document Xarray indexing rules 2. Document use of .oindex and .vindex protocols 3. Do we use `.mean` and `.nanmean` or `.mean(skipna=...)`? - Default behavior in named-array should mirror NumPy / the array API standard, not pandas. - nanmean is not (yet) in the [array API](https://github.com/pydata/xarray/pull/7424#issuecomment-1373979208). There are a handful of other key functions (e.g., median) that are are also missing. I think that should be OK, as long as what we support is a strict superset of the array API. 4. What methods need to be exposed on Variable? - `Variable.concat` classmethod: create two functions, one as the equivalent of `np.stack` and other for `np.concat` - `.rolling_window` and `.coarsen_reshape` ? - `named-array.apply_ufunc`: used in astype, clip, quantile, isnull, notnull` #### methods to be preserved from xarray.Variable ```python # Sorting Variable.argsort Variable.searchsorted # NaN handling Variable.fillna Variable.isnull Variable.notnull # Lazy data handling Variable.chunk # Could instead have accessor interface and recommend users use `Variable.dask.chunk` and `Variable.cubed.chunk`? Variable.to_numpy() Variable.as_numpy() # Xarray-specific Variable.get_axis_num Variable.isel Variable.to_dict # Reductions Variable.reduce Variable.all Variable.any Variable.argmax Variable.argmin Variable.count Variable.max Variable.mean Variable.median Variable.min Variable.prod Variable.quantile Variable.std Variable.sum Variable.var # Accumulate Variable.cumprod Variable.cumsum # numpy-like Methods Variable.astype Variable.copy Variable.clip Variable.round Variable.item Variable.where # Reordering/Reshaping Variable.squeeze Variable.pad Variable.roll Variable.shift ``` #### methods to be renamed from xarray.Variable ```python # Xarray-specific Variable.concat # create two functions, one as the equivalent of `np.stack` and other for `np.concat` # Given how niche these are, these would be better as functions than methods. # We could also keep these in Xarray, at least for now. If we don't think people will use functionality outside of Xarray it probably is not worth the trouble of porting it (including documentation, etc). Variable.coarsen # This should probably be called something like coarsen_reduce. Variable.coarsen_reshape Variable.rolling_window Variable.set_dims # split this into broadcast_to and expand_dims # Reordering/Reshaping Variable.stack # To avoid confusion with np.stack, let's call this stack_dims. Variable.transpose # Could consider calling this permute_dims, like the [array API standard](https://data-apis.org/array-api/2022.12/API_specification/manipulation_functions.html#objects-in-api) Variable.unstack # Likewise, maybe call this unstack_dims? ``` #### methods to be removed from xarray.Variable ```python # Testing Variable.broadcast_equals Variable.equals Variable.identical Variable.no_conflicts # Lazy data handling Variable.compute # We can probably omit this method for now, too, given that dask.compute() uses a protocol. The other concern is that different array libraries have different notions of "compute" and this one is rather Dask specific, including conversion from Dask to NumPy arrays. For example, in JAX every operation executes eagerly, but in a non-blocking fashion, and you need to call jax.block_until_ready() to ensure computation is finished. Variable.load # Could remove? compute vs load is a common source of confusion. # Xarray-specific Variable.to_index Variable.to_index_variable Variable.to_variable Variable.to_base_variable Variable.to_coord Variable.rank # Uses bottleneck. Delete? Could use https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rankdata.html instead # numpy-like Methods Variable.conjugate # .conj is enough Variable.__array_wrap__ # This is a very old NumPy protocol for duck arrays. We don't need it now that we have `__array_ufunc__` and `__array_function__` # Encoding Variable.reset_encoding ``` #### Attributes to be preserved from xarray.Variable ```python # Properties Variable.attrs Variable.chunks Variable.data Variable.dims Variable.dtype Variable.nbytes Variable.ndim Variable.shape Variable.size Variable.sizes Variable.T Variable.real Variable.imag Variable.conj ``` #### Attributes to be renamed from xarray.Variable ```python ``` #### Attributes to be removed from xarray.Variable ```python Variable.values # Probably also remove -- this is a legacy from before Xarray supported dask arrays. ".data" is enough. # Encoding Variable.encoding ``` ### Appendix: Implementation Details - Merge in VariableArithmetic's parent classes: AbstractArray, NdimSizeLenMixin with the new data structure.. ```python class VariableArithmetic( ImplementsArrayReduce, IncludeReduceMethods, IncludeCumMethods, IncludeNumpySameMethods, SupportsArithmetic, VariableOpsMixin, ): __slots__ = () # prioritize our operations over those of numpy.ndarray (priority=0) __array_priority__ = 50 ``` - Move over `_typed_ops.VariableOpsMixin` - Build a list of utility functions used elsewhere : Which of these should become public API? - `broadcast_variables`: `dataset.py`, `dataarray.py`,`missing.py` - This could be just called "broadcast" in named-array. - `Variable._getitem_with_mask` : `alignment.py` - keep this method/function as private and inside Xarray. - The Variable constructor will need to be rewritten to no longer accept tuples, encodings, etc. These details should be handled at the Xarray data structure level. - What happens to `duck_array_ops?` - What about Variable.chunk and "chunk managers"? - Could this functionality be left in Xarray proper for now? Alternative array types like JAX also have some notion of "chunks" for parallel arrays, but the details differ in a number of ways from the Dask/Cubed. - Perhaps variable.chunk/load methods should become functions defined in xarray that convert Variable objects. This is easy so long as xarray can reach in and replace .data - Utility functions like `as_variable` should be moved out of `base_variable.py` so they can convert BaseVariable objects to/from DataArray or Dataset containing explicitly indexed arrays. python-xarray-2026.01.0/design_notes/grouper_objects.md0000664000175000017500000003020015136607163023235 0ustar alastairalastair# Grouper Objects **Author**: Deepak Cherian **Created**: Nov 21, 2023 ## Abstract I propose the addition of Grouper objects to Xarray's public API so that ```python Dataset.groupby(x=BinGrouper(bins=np.arange(10, 2))) ``` is identical to today's syntax: ```python Dataset.groupby_bins("x", bins=np.arange(10, 2)) ``` ## Motivation and scope Xarray's GroupBy API implements the split-apply-combine pattern (Wickham, 2011)[^1], which applies to a very large number of problems: histogramming, compositing, climatological averaging, resampling to a different time frequency, etc. The pattern abstracts the following pseudocode: ```python results = [] for element in unique_labels: subset = ds.sel(x=(ds.x == element)) # split # subset = ds.where(ds.x == element, drop=True) # alternative result = subset.mean() # apply results.append(result) xr.concat(results) # combine ``` to ```python ds.groupby("x").mean() # splits, applies, and combines ``` Efficient vectorized implementations of this pattern are implemented in numpy's [`ufunc.at`](https://numpy.org/doc/stable/reference/generated/numpy.ufunc.at.html), [`ufunc.reduceat`](https://numpy.org/doc/stable/reference/generated/numpy.ufunc.reduceat.html), [`numbagg.grouped`](https://github.com/numbagg/numbagg/blob/main/numbagg/grouped.py), [`numpy_groupies`](https://github.com/ml31415/numpy-groupies), and probably more. These vectorized implementations _all_ require, as input, an array of integer codes or labels that identify unique elements in the array being grouped over (`'x'` in the example above). ```python import numpy as np # array to reduce a = np.array([1, 1, 1, 1, 2]) # initial value for result out = np.zeros((3,), dtype=int) # integer codes labels = np.array([0, 0, 1, 2, 1]) # groupby-reduction np.add.at(out, labels, a) out # array([2, 3, 1]) ``` One can 'factorize' or construct such an array of integer codes using `pandas.factorize` or `numpy.unique(..., return_inverse=True)` for categorical arrays; `pandas.cut`, `pandas.qcut`, or `np.digitize` for discretizing continuous variables. In practice, since `GroupBy` objects exist, much of complexity in applying the groupby paradigm stems from appropriately factorizing or generating labels for the operation. Consider these two examples: 1. [Bins that vary in a dimension](https://flox.readthedocs.io/en/latest/user-stories/nD-bins.html) 2. [Overlapping groups](https://flox.readthedocs.io/en/latest/user-stories/overlaps.html) 3. [Rolling resampling](https://github.com/pydata/xarray/discussions/8361) Anecdotally, less experienced users commonly resort to the for-loopy implementation illustrated by the pseudocode above when the analysis at hand is not easily expressed using the API presented by Xarray's GroupBy object. Xarray's GroupBy API today abstracts away the split, apply, and combine stages but not the "factorize" stage. Grouper objects will close the gap. ## Usage and impact Grouper objects 1. Will abstract useful factorization algorithms, and 2. Present a natural way to extend GroupBy to grouping by multiple variables: `ds.groupby(x=BinGrouper(...), t=Resampler(freq="M", ...)).mean()`. In addition, Grouper objects provide a nice interface to add often-requested grouping functionality 1. A new `SpaceResampler` would allow specifying resampling spatial dimensions. ([issue](https://github.com/pydata/xarray/issues/4008)) 2. `RollingTimeResampler` would allow rolling-like functionality that understands timestamps ([issue](https://github.com/pydata/xarray/issues/3216)) 3. A `QuantileBinGrouper` to abstract away `pd.cut` ([issue](https://github.com/pydata/xarray/discussions/7110)) 4. A `SeasonGrouper` and `SeasonResampler` would abstract away common annoyances with such calculations today 1. Support seasons that span a year-end. 2. Only include seasons with complete data coverage. 3. Allow grouping over seasons of unequal length 4. See [this xcdat discussion](https://github.com/xCDAT/xcdat/issues/416) for a `SeasonGrouper` like functionality: 5. Return results with seasons in a sensible order 5. Weighted grouping ([issue](https://github.com/pydata/xarray/issues/3937)) 1. Once `IntervalIndex` like objects are supported, `Resampler` groupers can account for interval lengths when resampling. ## Backward Compatibility Xarray's existing grouping functionality will be exposed using two new Groupers: 1. `UniqueGrouper` which uses `pandas.factorize` 2. `BinGrouper` which uses `pandas.cut` 3. `TimeResampler` which mimics pandas' `.resample` Grouping by single variables will be unaffected so that `ds.groupby('x')` will be identical to `ds.groupby(x=UniqueGrouper())`. Similarly, `ds.groupby_bins('x', bins=np.arange(10, 2))` will be unchanged and identical to `ds.groupby(x=BinGrouper(bins=np.arange(10, 2)))`. ## Detailed description All Grouper objects will subclass from a Grouper object ```python import abc class Grouper(abc.ABC): @abc.abstractmethod def factorize(self, by: DataArray): raise NotImplementedError class CustomGrouper(Grouper): def factorize(self, by: DataArray): ... return codes, group_indices, unique_coord, full_index def weights(self, by: DataArray) -> DataArray: ... return weights ``` ### The `factorize` method Today, the `factorize` method takes as input the group variable and returns 4 variables (I propose to clean this up below): 1. `codes`: An array of same shape as the `group` with int dtype. NaNs in `group` are coded by `-1` and ignored later. 2. `group_indices` is a list of index location of `group` elements that belong to a single group. 3. `unique_coord` is (usually) a `pandas.Index` object of all unique `group` members present in `group`. 4. `full_index` is a `pandas.Index` of all `group` members. This is different from `unique_coord` for binning and resampling, where not all groups in the output may be represented in the input `group`. For grouping by a categorical variable e.g. `['a', 'b', 'a', 'c']`, `full_index` and `unique_coord` are identical. There is some redundancy here since `unique_coord` is always equal to or a subset of `full_index`. We can clean this up (see Implementation below). ### The `weights` method (?) The proposed `weights` method is optional and unimplemented today. Groupers with `weights` will allow composing `weighted` and `groupby` ([issue](https://github.com/pydata/xarray/issues/3937)). The `weights` method should return an appropriate array of weights such that the following property is satisfied ```python gb_sum = ds.groupby(by).sum() weights = CustomGrouper.weights(by) weighted_sum = xr.dot(ds, weights) assert_identical(gb_sum, weighted_sum) ``` For example, the boolean weights for `group=np.array(['a', 'b', 'c', 'a', 'a'])` should be ``` [[1, 0, 0, 1, 1], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0]] ``` This is the boolean "summarization matrix" referred to in the classic Iverson (1980, Section 4.3)[^2] and "nub sieve" in [various APLs](https://aplwiki.com/wiki/Nub_Sieve). > [!NOTE] > We can always construct `weights` automatically using `group_indices` from `factorize`, so this is not a required method. For a rolling resampling, windowed weights are possible ``` [[0.5, 1, 0.5, 0, 0], [0, 0.25, 1, 1, 0], [0, 0, 0, 1, 1]] ``` ### The `preferred_chunks` method (?) Rechunking support is another optional extension point. In `flox` I experimented some with automatically rechunking to make a groupby more parallel-friendly ([example 1](https://flox.readthedocs.io/en/latest/generated/flox.rechunk_for_blockwise.html), [example 2](https://flox.readthedocs.io/en/latest/generated/flox.rechunk_for_cohorts.html)). A great example is for resampling-style groupby reductions, for which `codes` might look like ``` 0001|11122|3333 ``` where `|` represents chunk boundaries. A simple rechunking to ``` 000|111122|3333 ``` would make this resampling reduction an embarrassingly parallel blockwise problem. Similarly consider monthly-mean climatologies for which the month numbers might be ``` 1 2 3 4 5 | 6 7 8 9 10 | 11 12 1 2 3 | 4 5 6 7 8 | 9 10 11 12 | ``` A slight rechunking to ``` 1 2 3 4 | 5 6 7 8 | 9 10 11 12 | 1 2 3 4 | 5 6 7 8 | 9 10 11 12 | ``` allows us to reduce `1, 2, 3, 4` separately from `5,6,7,8` and `9, 10, 11, 12` while still being parallel friendly (see the [flox documentation](https://flox.readthedocs.io/en/latest/implementation.html#method-cohorts) for more). We could attempt to detect these patterns, or we could just have the Grouper take as input `chunks` and return a tuple of "nice" chunk sizes to rechunk to. ```python def preferred_chunks(self, chunks: ChunksTuple) -> ChunksTuple: pass ``` For monthly means, since the period of repetition of labels is 12, the Grouper might choose possible chunk sizes of `((2,),(3,),(4,),(6,))`. For resampling, the Grouper could choose to resample to a multiple or an even fraction of the resampling frequency. ## Related work Pandas has [Grouper objects](https://pandas.pydata.org/docs/reference/api/pandas.Grouper.html#pandas-grouper) that represent the GroupBy instruction. However, these objects do not appear to be extension points, unlike the Grouper objects proposed here. Instead, Pandas' `ExtensionArray` has a [`factorize`](https://pandas.pydata.org/docs/reference/api/pandas.api.extensions.ExtensionArray.factorize.html) method. Composing rolling with time resampling is a common workload: 1. Polars has [`group_by_dynamic`](https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.group_by_dynamic.html) which appears to be like the proposed `RollingResampler`. 2. scikit-downscale provides [`PaddedDOYGrouper`](https://github.com/pangeo-data/scikit-downscale/blob/e16944a32b44f774980fa953ea18e29a628c71b8/skdownscale/pointwise_models/groupers.py#L19) ## Implementation Proposal 1. Get rid of `squeeze` [issue](https://github.com/pydata/xarray/issues/2157): [PR](https://github.com/pydata/xarray/pull/8506) 2. Merge existing two class implementation to a single Grouper class 1. This design was implemented in [this PR](https://github.com/pydata/xarray/pull/7206) to account for some annoying data dependencies. 2. See [PR](https://github.com/pydata/xarray/pull/8509) 3. Clean up what's returned by `factorize` methods. 1. A solution here might be to have `group_indices: Mapping[int, Sequence[int]]` be a mapping from group index in `full_index` to a sequence of integers. 2. Return a `namedtuple` or `dataclass` from existing Grouper factorize methods to facilitate API changes in the future. 4. Figure out what to pass to `factorize` 1. Xarray eagerly reshapes nD variables to 1D. This is an implementation detail we need not expose. 2. When grouping by an unindexed variable Xarray passes a `_DummyGroup` object. This seems like something we don't want in the public interface. We could special case "internal" Groupers to preserve the optimizations in `UniqueGrouper`. 5. Grouper objects will exposed under the `xr.groupers` Namespace. At first these will include `UniqueGrouper`, `BinGrouper`, and `TimeResampler`. ## Alternatives One major design choice made here was to adopt the syntax `ds.groupby(x=BinGrouper(...))` instead of `ds.groupby(BinGrouper('x', ...))`. This allows reuse of Grouper objects, example ```python grouper = BinGrouper(...) ds.groupby(x=grouper, y=grouper) ``` but requires that all variables being grouped by (`x` and `y` above) are present in Dataset `ds`. This does not seem like a bad requirement. Importantly `Grouper` instances will be copied internally so that they can safely cache state that might be shared between `factorize` and `weights`. Today, it is possible to `ds.groupby(DataArray, ...)`. This syntax will still be supported. ## Discussion This proposal builds on these discussions: 1. https://github.com/xarray-contrib/flox/issues/191#issuecomment-1328898836 2. https://github.com/pydata/xarray/issues/6610 ## Copyright This document has been placed in the public domain. ## References and footnotes [^1]: Wickham, H. (2011). The split-apply-combine strategy for data analysis. https://vita.had.co.nz/papers/plyr.html [^2]: Iverson, K.E. (1980). Notation as a tool of thought. Commun. ACM 23, 8 (Aug. 1980), 444–465. https://doi.org/10.1145/358896.358899 python-xarray-2026.01.0/.git_archival.txt0000664000175000017500000000021115136607163020310 0ustar alastairalastairnode: b96039b5c86e79ca49dd96c975b81e8130f2cbae node-date: 2026-01-28T10:47:15-07:00 describe-name: v2026.01.0 ref-names: tag: v2026.01.0 python-xarray-2026.01.0/HOW_TO_RELEASE.md0000664000175000017500000001134115136607163017564 0ustar alastairalastair# How to issue an xarray release in 16 easy steps Time required: about an hour. These instructions assume that `upstream` refers to the main repository: ```sh $ git remote -v {...} upstream https://github.com/pydata/xarray (fetch) upstream https://github.com/pydata/xarray (push) ``` 1. Ensure your main branch is synced to upstream: ```sh git switch main git pull upstream main ``` 2. Add a list of contributors. First fetch all previous release tags so we can see the version number of the last release was: ```sh git fetch upstream --tags ``` Then run ```sh pixi run release-contributors ``` and copy the output. 3. Write a release summary: ~50 words describing the high level features. This will be used in the release emails, tweets, GitHub release notes, etc. 4. Look over whats-new.rst and the docs. Make sure "What's New" is complete (check the date!) and add the release summary at the top. Things to watch out for: - Important new features should be highlighted towards the top. - Function/method references should include links to the API docs. - Sometimes notes get added in the wrong section of whats-new, typically due to a bad merge. Check for these before a release by using git diff, e.g., `git diff v{YYYY.MM.X-1} whats-new.rst` where {YYYY.MM.X-1} is the previous release. 5. Open a PR with the release summary and whatsnew changes; in particular the release headline should get feedback from the team on what's important to include. Apply the `Release` label to the PR to trigger a test build action. 6. After merging, again ensure your main branch is synced to upstream: ```sh git switch main git pull upstream main ``` 7. If you have any doubts, run the full test suite one final time! ```sh pytest ``` 8. Check that the [ReadTheDocs build](https://readthedocs.org/projects/xray/) is passing on the `latest` build version (which is built from the `main` branch). 9. Issue the release on GitHub. Click on "Draft a new release" at . Type in the version number (with a "v") and paste the release summary in the notes. 10. This should automatically trigger an upload of the new build to PyPI via GitHub Actions. Check this has run [here](https://github.com/pydata/xarray/actions/workflows/pypi-release.yaml), and that the version number you expect is displayed [on PyPI](https://pypi.org/project/xarray/) 11. Add a section for the next release {YYYY.MM.X+1} to doc/whats-new.rst (we avoid doing this earlier so that it doesn't show up in the RTD build): ```rst .. _whats-new.YYYY.MM.X+1: vYYYY.MM.X+1 (unreleased) ----------------------- New Features ~~~~~~~~~~~~ Breaking Changes ~~~~~~~~~~~~~~~~ Deprecations ~~~~~~~~~~~~ Bug Fixes ~~~~~~~~~ Documentation ~~~~~~~~~~~~~ Internal Changes ~~~~~~~~~~~~~~~~ ``` 12. Make a PR with these changes and merge it: ```sh git checkout -b empty-whatsnew-YYYY.MM.X+1 git commit -am "empty whatsnew" git push ``` (Note that repo branch restrictions prevent pushing to `main`, so you have to just-self-merge this.) 13. Consider updating the version available on pyodide: - Open the PyPI page for [Xarray downloads](https://pypi.org/project/xarray/#files) - Edit [`packages/xarray/meta.yaml`](https://github.com/pyodide/pyodide-recipes/blob/main/packages/xarray/meta.yaml) to update the - version number - link to the wheel (under "Built Distribution" on the PyPI page) - SHA256 hash (Click "Show Hashes" next to the link to the wheel) - Open a pull request to pyodide-recipes 14. Issue the release announcement to mailing lists & Twitter (X). For bug fix releases, I usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader list (no more than once every 3-6 months): - pydata@googlegroups.com - xarray@googlegroups.com - numpy-discussion@scipy.org - scipy-user@scipy.org - pyaos@lists.johnny-lin.com Google search will turn up examples of prior release announcements (look for "ANN xarray"). Some of these groups require you to be subscribed in order to email them. ## Note on version numbering As of 2022.03.0, we utilize the [CALVER](https://calver.org/) version system. Specifically, we have adopted the pattern `YYYY.MM.X`, where `YYYY` is a 4-digit year (e.g. `2022`), `0M` is a 2-digit zero-padded month (e.g. `01` for January), and `X` is the release number (starting at zero at the start of each month and incremented once for each additional release). python-xarray-2026.01.0/ci/0000775000175000017500000000000015136607163015436 5ustar alastairalastairpython-xarray-2026.01.0/ci/requirements/0000775000175000017500000000000015136607163020161 5ustar alastairalastairpython-xarray-2026.01.0/ci/requirements/environment-benchmark.yml0000664000175000017500000000066015136607163025202 0ustar alastairalastairname: xarray-benchmark channels: - conda-forge - nodefaults dependencies: - bottleneck - cftime - dask-core - distributed - flox - netcdf4 - numba - numbagg - numexpr - py-rattler - numpy>=2.2,<2.3 # https://github.com/numba/numba/issues/10105 - opt_einsum - packaging - pandas - pyarrow # pandas raises a deprecation warning without this, breaking doctests - sparse - scipy - toolz - zarr python-xarray-2026.01.0/ci/requirements/environment.yml0000664000175000017500000000226015136607163023250 0ustar alastairalastairname: xarray-tests channels: - conda-forge - nodefaults dependencies: - aiobotocore - array-api-strict - boto3 - bottleneck - cartopy - cftime - dask-core - distributed - flox - fsspec - h5netcdf - h5py - hdf5 - hypothesis - iris - lxml # Optional dep of pydap - matplotlib-base - mypy==1.18.1 - nc-time-axis - netcdf4 - numba - numbagg - numexpr - numpy>=2.2 - opt_einsum - packaging - pandas - pandas-stubs<=2.2.3.241126 # https://github.com/pydata/xarray/issues/10110 # - pint>=0.22 - pip - pooch - pre-commit - pyarrow # pandas raises a deprecation warning without this, breaking doctests - pydap - pytest - pytest-asyncio - pytest-cov - pytest-env - pytest-mypy-plugins - pytest-timeout - pytest-xdist - rasterio - scipy - seaborn - sparse - toolz - types-colorama - types-docutils - types-psutil - types-Pygments - types-python-dateutil - types-pytz - types-PyYAML - types-requests - types-setuptools - types-openpyxl - typing_extensions - zarr - pip: - jax # no way to get cpu-only jaxlib from conda if gpu is present - types-defusedxml - types-pexpect python-xarray-2026.01.0/ci/release_contributors.py0000664000175000017500000000330415136607163022245 0ustar alastairalastairimport re import textwrap import git from tlz.itertoolz import last, unique co_author_re = re.compile(r"Co-authored-by: (?P[^<]+?) <(?P.+)>") ignored = [ {"name": "dependabot[bot]"}, {"name": "pre-commit-ci[bot]"}, { "name": "Claude", "email": [ "noreply@anthropic.com", "claude@anthropic.com", "no-reply@anthropic.com", ], }, ] def is_ignored(name, email): # linear search, for now for ignore in ignored: if ignore["name"] != name: continue ignored_email = ignore.get("email") if ignored_email is None or email in ignored_email: return True return False def main(): repo = git.Repo(".") most_recent_release = last(list(repo.tags)) # extract information from commits contributors = {} for commit in repo.iter_commits(f"{most_recent_release.name}.."): matches = co_author_re.findall(commit.message) if matches: contributors.update({email: name for name, email in matches}) contributors[commit.author.email] = commit.author.name # deduplicate and ignore # TODO: extract ignores from .github/release.yml unique_contributors = unique( name for email, name in contributors.items() if not is_ignored(name, email) ) sorted_ = sorted(unique_contributors) if len(sorted_) > 1: names = f"{', '.join(sorted_[:-1])} and {sorted_[-1]}" else: names = "".join(sorted_) statement = textwrap.dedent( f"""\ Thanks to the {len(sorted_)} contributors to this release: {names} """.rstrip() ) print(statement) if __name__ == "__main__": main() python-xarray-2026.01.0/ci/policy.yaml0000664000175000017500000000113115136607163017615 0ustar alastairalastairchannels: - conda-forge platforms: - noarch - linux-64 policy: # all packages in months packages: python: 30 numpy: 18 default: 12 # overrides for the policy overrides: {} # these packages are completely ignored exclude: - coveralls - pip - pytest - pytest-asyncio - pytest-cov - pytest-env - pytest-mypy-plugins - pytest-timeout - pytest-xdist - pytest-hypothesis - hypothesis - pytz - pytest-reportlog # these packages don't fail the CI, but will be printed in the report ignored_violations: - array-api-strict python-xarray-2026.01.0/LICENSE0000664000175000017500000002403415136607163016053 0ustar alastairalastairApache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: You must give any other recipients of the Work or Derivative Works a copy of this License; and You must cause any modified files to carry prominent notices stating that You changed the files; and You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2014-2024 xarray Developers Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. python-xarray-2026.01.0/.pre-commit-config.yaml0000664000175000017500000000514515136607163021331 0ustar alastairalastair# https://pre-commit.com/ ci: autoupdate_schedule: monthly autoupdate_commit_msg: "Update pre-commit hooks" repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml - id: debug-statements - id: mixed-line-ending - repo: https://github.com/pre-commit/pygrep-hooks rev: v1.10.0 hooks: # - id: python-check-blanket-noqa # checked by ruff # - id: python-check-blanket-type-ignore # checked by ruff # - id: python-check-mock-methods # checked by ruff - id: python-no-log-warn # - id: python-use-type-annotations # too many false positives - id: rst-backticks - id: rst-directive-colons - id: rst-inline-touching-normal - id: text-unicode-replacement-char - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.14.10 hooks: - id: ruff-check args: ["--fix", "--show-fixes"] - id: ruff-format - repo: https://github.com/keewis/blackdoc rev: v0.4.6 hooks: - id: blackdoc exclude: "generate_aggregations.py" # make sure this is the most recent version of black additional_dependencies: ["black==25.11.0"] - repo: https://github.com/rbubley/mirrors-prettier rev: v3.7.4 hooks: - id: prettier args: ["--cache-location=.prettier_cache/cache"] - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.19.1 hooks: - id: mypy # Copied from setup.cfg exclude: "properties|asv_bench" # This is slow and so we take it out of the fast-path; requires passing # `--hook-stage manual` to pre-commit stages: [manual] additional_dependencies: [ # Type stubs types-python-dateutil, types-setuptools, types-PyYAML, types-pytz, typing-extensions>=4.1.0, numpy, ] - repo: https://github.com/citation-file-format/cff-converter-python rev: 5295f87c0e261da61a7b919fc754e3a77edd98a7 hooks: - id: validate-cff - repo: https://github.com/ComPWA/taplo-pre-commit rev: v0.9.3 hooks: - id: taplo-format args: ["--option", "array_auto_collapse=false"] - id: taplo-lint args: ["--no-schema"] - repo: https://github.com/abravalheri/validate-pyproject rev: v0.24.1 hooks: - id: validate-pyproject additional_dependencies: ["validate-pyproject-schema-store[all]"] - repo: https://github.com/adhtruong/mirrors-typos rev: v1.41.0 hooks: - id: typos python-xarray-2026.01.0/.git-blame-ignore-revs0000664000175000017500000000016315136607163021143 0ustar alastairalastair# black PR 3142 d089df385e737f71067309ff7abae15994d581ec # isort PR 1924 0e73e240107caee3ffd1a1149f0150c390d43251 python-xarray-2026.01.0/CITATION.cff0000664000175000017500000000717715136607163016751 0ustar alastairalastaircff-version: 1.2.0 message: "If you use this software, please cite it as below." authors: - family-names: "Hoyer" given-names: "Stephan" orcid: "https://orcid.org/0000-0002-5207-0380" - family-names: "Roos" given-names: "Maximilian" - family-names: "Joseph" given-names: "Hamman" orcid: "https://orcid.org/0000-0001-7479-8439" - family-names: "Magin" given-names: "Justus" orcid: "https://orcid.org/0000-0002-4254-8002" - family-names: "Cherian" given-names: "Deepak" orcid: "https://orcid.org/0000-0002-6861-8734" - family-names: "Fitzgerald" given-names: "Clark" orcid: "https://orcid.org/0000-0003-3446-6389" - family-names: "Hauser" given-names: "Mathias" orcid: "https://orcid.org/0000-0002-0057-4878" - family-names: "Fujii" given-names: "Keisuke" orcid: "https://orcid.org/0000-0003-0390-9984" - family-names: "Maussion" given-names: "Fabien" orcid: "https://orcid.org/0000-0002-3211-506X" - family-names: "Imperiale" given-names: "Guido" - family-names: "Clark" given-names: "Spencer" orcid: "https://orcid.org/0000-0001-5595-7895" - family-names: "Kleeman" given-names: "Alex" - family-names: "Nicholas" given-names: "Thomas" orcid: "https://orcid.org/0000-0002-2176-0530" - family-names: "Kluyver" given-names: "Thomas" orcid: "https://orcid.org/0000-0003-4020-6364" - family-names: "Westling" given-names: "Jimmy" - family-names: "Munroe" given-names: "James" orcid: "https://orcid.org/0000-0001-9098-6309" - family-names: "Amici" given-names: "Alessandro" orcid: "https://orcid.org/0000-0002-1778-4505" - family-names: "Barghini" given-names: "Aureliana" - family-names: "Banihirwe" given-names: "Anderson" orcid: "https://orcid.org/0000-0001-6583-571X" - family-names: "Bell" given-names: "Ray" orcid: "https://orcid.org/0000-0003-2623-0587" - family-names: "Hatfield-Dodds" given-names: "Zac" orcid: "https://orcid.org/0000-0002-8646-8362" - family-names: "Abernathey" given-names: "Ryan" orcid: "https://orcid.org/0000-0001-5999-4917" - family-names: "Bovy" given-names: "Benoît" - family-names: "Omotani" given-names: "John" orcid: "https://orcid.org/0000-0002-3156-8227" - family-names: "Mühlbauer" given-names: "Kai" orcid: "https://orcid.org/0000-0001-6599-1034" - family-names: "Roszko" given-names: "Maximilian K." orcid: "https://orcid.org/0000-0001-9424-2526" - family-names: "Wolfram" given-names: "Phillip J." orcid: "https://orcid.org/0000-0001-5971-4241" - family-names: "Henderson" given-names: "Scott" orcid: "https://orcid.org/0000-0003-0624-4965" - family-names: "Awowale" given-names: "Eniola Olufunke" - family-names: "Scheick" given-names: "Jessica" orcid: "https://orcid.org/0000-0002-3421-4459" - family-names: "Savoie" given-names: "Matthew" orcid: "https://orcid.org/0000-0002-8881-2550" - family-names: "Littlejohns" given-names: "Owen" title: "xarray" abstract: "N-D labeled arrays and datasets in Python." license: Apache-2.0 doi: 10.5281/zenodo.598201 url: "https://xarray.dev/" repository-code: "https://github.com/pydata/xarray" preferred-citation: type: article authors: - family-names: "Hoyer" given-names: "Stephan" orcid: "https://orcid.org/0000-0002-5207-0380" - family-names: "Joseph" given-names: "Hamman" orcid: "https://orcid.org/0000-0001-7479-8439" doi: "10.5334/jors.148" journal: "Journal of Open Research Software" month: 4 title: "xarray: N-D labeled Arrays and Datasets in Python" volume: 5 issue: 1 year: 2017 python-xarray-2026.01.0/.readthedocs.yaml0000664000175000017500000000117215136607163020273 0ustar alastairalastairversion: 2 sphinx: configuration: doc/conf.py fail_on_warning: true build: os: ubuntu-lts-latest tools: # just so RTD stops complaining python: "latest" jobs: create_environment: - asdf plugin add pixi - asdf install pixi latest - asdf global pixi latest post_checkout: - (git --no-pager log --pretty="tformat:%s" -1 | grep -vqF "[skip-rtd]") || exit 183 - git fetch --unshallow || true pre_install: - git update-index --assume-unchanged doc/conf.py install: - pixi install -e doc build: html: - pixi run doc BUILDDIR=$READTHEDOCS_OUTPUT