Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ def _infer_coords_and_dims(
"data"
)

original_dims = dims # Keep reference for error messages

if isinstance(dims, str):
dims = (dims,)
elif dims is None:
Expand All @@ -161,12 +163,27 @@ def _infer_coords_and_dims(
coord, name=dim, auto_convert=False
).to_index_variable()
dims[n] = coord.name
dims_tuple = tuple(dims)
elif isinstance(dims, Iterable):
dims = tuple(dims)
else:
# Single non-string, non-iterable hashable (int, UUID, etc.)
dims = (dims,)
dims_tuple = dims
if len(dims_tuple) != len(shape):
raise ValueError(
"different number of dimensions on data "
f"and dims: {len(shape)} vs {len(dims_tuple)}"
)
# Provide helpful error message for tuple ambiguity case
if isinstance(original_dims, tuple) and len(dims_tuple) > 1 and len(shape) == 1:
raise ValueError(
f"You passed dims={original_dims} for 1-dimensional data. "
f"This is ambiguous: did you mean {len(dims_tuple)} separate dimensions, "
f"or a single dimension with tuple name {original_dims}? "
f"For a single tuple-named dimension, use dims=[{original_dims}]. "
f"For multiple dimensions, use {len(dims_tuple)}-dimensional data."
)
else:
raise ValueError(
"different number of dimensions on data "
f"and dims: {len(shape)} vs {len(dims_tuple)}"
)
for d in dims_tuple:
if not hashable(d):
raise TypeError(f"Dimension {d} is not hashable")
Expand Down Expand Up @@ -321,6 +338,10 @@ class DataArray(
to the number of dimensions. If this argument is omitted,
dimension names are taken from ``coords`` (if possible) and
otherwise default to ``['dim_0', ... 'dim_n']``.

Note: Tuples are treated as sequences, so ('a', 'b') means two
dimensions named 'a' and 'b'. To use a tuple as a single dimension
name, wrap it in a list: [('a', 'b')].
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
name, wrap it in a list: [('a', 'b')].
name, wrap it in a another tuple: (('a', 'b'),) or list: [('a', 'b')].

Should recommend what xarray tries to use internally first, tuple[Hashable, ...].

name : str or None, optional
Name of this array.
attrs : dict_like or None, optional
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1707,8 +1707,8 @@ def __init__(
)

self._index_type = index_type
self._indexes = dict(**indexes)
self._variables = dict(**variables)
self._indexes = dict(indexes)
self._variables = dict(variables)

self._dims: Mapping[Hashable, int] | None = None
self.__coord_name_id: dict[Any, int] | None = None
Expand Down
13 changes: 9 additions & 4 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
is_duck_dask_array,
maybe_coerce_to_str,
)
from xarray.namedarray._typing import _DimsLike
from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions
from xarray.namedarray.parallelcompat import get_chunked_array_type
from xarray.namedarray.pycompat import (
Expand Down Expand Up @@ -369,7 +370,7 @@ class Variable(NamedArray, AbstractArray, VariableArithmetic):

def __init__(
self,
dims,
dims: _DimsLike,
data: T_DuckArray | ArrayLike,
attrs=None,
encoding=None,
Expand All @@ -378,10 +379,14 @@ def __init__(
"""
Parameters
----------
dims : str or sequence of str
Name(s) of the the data dimension(s). Must be either a string (only
for 1D data) or a sequence of strings with length equal to the
dims : Hashable or sequence of Hashable
Name(s) of the the data dimension(s). Must be either a Hashable
(only for 1D data) or a sequence of Hashables with length equal to the
number of dimensions.

Note: Tuples are treated as sequences, so ('a', 'b') means two
dimensions named 'a' and 'b'. To use a tuple as a single dimension
name, wrap it in a list: [('a', 'b')].
Copy link
Contributor

@Illviljan Illviljan Aug 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
name, wrap it in a list: [('a', 'b')].
name, wrap it in a tuple: (('a', 'b'),) or list: [('a', 'b')].

data : array_like
Data array which supports numpy-like data access.
attrs : dict_like or None, optional
Expand Down
2 changes: 1 addition & 1 deletion xarray/namedarray/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def dtype(self) -> _DType_co: ...
_Dim = Hashable
_Dims = tuple[_Dim, ...]

_DimsLike = Union[str, Iterable[_Dim]]
_DimsLike = Union[_Dim, Iterable[_Dim]]

# https://data-apis.org/array-api/latest/API_specification/indexing.html
# TODO: np.array_api was bugged and didn't allow (None,), but should!
Expand Down
29 changes: 24 additions & 5 deletions xarray/namedarray/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,12 +500,31 @@ def dims(self, value: _DimsLike) -> None:
self._dims = self._parse_dimensions(value)

def _parse_dimensions(self, dims: _DimsLike) -> _Dims:
dims = (dims,) if isinstance(dims, str) else tuple(dims)
original_dims = dims # Keep reference to original input for error messages

if isinstance(dims, str):
dims = (dims,)
elif isinstance(dims, Iterable):
dims = tuple(dims)
else:
# Single non-string, non-iterable hashable (int, UUID, etc.)
dims = (dims,)
Comment on lines +505 to +511
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_parse_dimensions is triggered every time NamedArray is initialized. Adding more if-cases will slow down for the people (and NamedArray internally) using the correct type dims: tuple[Hashable, ...].

Would be nice to invert the check to promote correct typing.


if len(dims) != self.ndim:
raise ValueError(
f"dimensions {dims} must have the same length as the "
f"number of data dimensions, ndim={self.ndim}"
)
# Provide a more helpful error message that explains the tuple ambiguity
if isinstance(original_dims, tuple) and len(dims) > 1 and self.ndim == 1:
raise ValueError(
f"You passed dims={original_dims} for 1-dimensional data. "
f"This is ambiguous: did you mean {len(dims)} separate dimensions, "
f"or a single dimension with tuple name {original_dims}? "
f"For a single tuple-named dimension, use dims=[{original_dims}]. "
f"For multiple dimensions, use {len(dims)}-dimensional data."
)
else:
raise ValueError(
f"dimensions {dims} must have the same length as the "
f"number of data dimensions, ndim={self.ndim}"
)
if len(set(dims)) < len(dims):
repeated_dims = {d for d in dims if dims.count(d) > 1}
warnings.warn(
Expand Down
34 changes: 29 additions & 5 deletions xarray/tests/test_hashable.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import uuid
from enum import Enum
from typing import TYPE_CHECKING, Union

Expand All @@ -10,7 +11,7 @@
if TYPE_CHECKING:
from xarray.core.types import TypeAlias

DimT: TypeAlias = Union[int, tuple, "DEnum", "CustomHashable"]
DimT: TypeAlias = Union[int, tuple, "DEnum", "CustomHashable", uuid.UUID]


class DEnum(Enum):
Expand All @@ -32,15 +33,38 @@ def __hash__(self) -> int:
pytest.param(("a", "b"), id="tuple"),
pytest.param(DEnum.dim, id="enum"),
pytest.param(CustomHashable(3), id="HashableObject"),
pytest.param(uuid.UUID("12345678-1234-5678-1234-567812345678"), id="uuid"),
],
)

parametrize_wrapped = pytest.mark.parametrize(
"wrapped",
[
pytest.param(True, id="wrapped"),
pytest.param(False, id="bare"),
],
)


@parametrize_dim
def test_hashable_dims(dim: DimT) -> None:
v = Variable([dim], [1, 2, 3])
da = DataArray([1, 2, 3], dims=[dim])
Dataset({"a": ([dim], [1, 2, 3])})
@parametrize_wrapped
def test_hashable_dims(dim: DimT, wrapped: bool) -> None:
# Pass dims either wrapped in a list or bare
dims_arg = [dim] if wrapped else dim

# Bare tuple case should error with helpful message for 1D data
if not wrapped and isinstance(dim, tuple):
with pytest.raises(ValueError, match="This is ambiguous"):
Variable(dims_arg, [1, 2, 3])
with pytest.raises(ValueError, match="This is ambiguous"):
DataArray([1, 2, 3], dims=dims_arg)
with pytest.raises(ValueError):
Dataset({"a": (dims_arg, [1, 2, 3])})
return # Don't run the other tests for this case

v = Variable(dims_arg, [1, 2, 3])
da = DataArray([1, 2, 3], dims=dims_arg)
Dataset({"a": (dims_arg, [1, 2, 3])})

# alternative constructors
DataArray(v)
Expand Down
Loading