diff --git a/altair/datasets/__init__.py b/altair/datasets/__init__.py
index 01dc35212..3c61eda0b 100644
--- a/altair/datasets/__init__.py
+++ b/altair/datasets/__init__.py
@@ -1,5 +1,5 @@
"""
-Load example datasets **remotely** from `vega-datasets`_.
+Load example datasets *remotely* from `vega-datasets`_.
Provides over **70+** datasets, used throughout our `Example Gallery`_.
@@ -85,24 +85,18 @@
"""
Get a remote dataset and load as tabular data.
-For full Tab completions, instead use:
+For full Tab completions, instead use::
from altair.datasets import Loader
load = Loader.from_backend("polars")
cars = load("cars")
movies = load("movies")
-Alternatively, specify ``backend`` during a call:
+Alternatively, specify ``backend`` during a call::
from altair.datasets import load
cars = load("cars", backend="polars")
movies = load("movies", backend="polars")
-
-Related
--------
-- https://github.com/vega/altair/pull/3631#issuecomment-2480832609
-- https://github.com/vega/altair/pull/3631#discussion_r1847111064
-- https://github.com/vega/altair/pull/3631#discussion_r1847176465
"""
@@ -124,17 +118,14 @@ def url(
.. note::
Only needed if ``name`` is available in multiple formats.
+ Returns
+ -------
+ ``str``
+
.. _Path.stem:
https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem
.. _Path.suffix:
https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix
-
- Related
- -------
- - https://github.com/vega/altair/pull/3631#issuecomment-2484826592
- - https://github.com/vega/altair/pull/3631#issuecomment-2480832711
- - https://github.com/vega/altair/discussions/3150#discussioncomment-11280516
- - https://github.com/vega/altair/pull/3631#discussion_r1846662053
"""
from altair.datasets._exceptions import AltairDatasetsError
diff --git a/altair/datasets/_cache.py b/altair/datasets/_cache.py
index 08016d622..a415a8380 100644
--- a/altair/datasets/_cache.py
+++ b/altair/datasets/_cache.py
@@ -317,26 +317,27 @@ def path(self) -> Path:
"""
Returns path to datasets cache.
- Defaults to (`XDG_CACHE_HOME`_):
+ Defaults to (`XDG_CACHE_HOME`_)::
"$XDG_CACHE_HOME/altair/"
- But can be configured using the environment variable:
+ But can be configured using the environment variable::
"$ALTAIR_DATASETS_DIR"
- You can set this for the current session via:
+ You can set this for the current session via::
- >>> from pathlib import Path
- >>> from altair.datasets import load
- >>> load.cache.path = Path.home() / ".altair_cache"
+ from pathlib import Path
+ from altair.datasets import load
- >>> load.cache.path.relative_to(Path.home()).as_posix()
- '.altair_cache'
+ load.cache.path = Path.home() / ".altair_cache"
- You can *later* disable caching via:
+ load.cache.path.relative_to(Path.home()).as_posix()
+ ".altair_cache"
- >>> load.cache.path = None
+ You can *later* disable caching via::
+
+ load.cache.path = None
.. _XDG_CACHE_HOME:
https://specifications.freedesktop.org/basedir-spec/latest/#variables
diff --git a/altair/datasets/_loader.py b/altair/datasets/_loader.py
index 6c359edb2..8f13ab2de 100644
--- a/altair/datasets/_loader.py
+++ b/altair/datasets/_loader.py
@@ -29,14 +29,14 @@
class Loader(Generic[IntoDataFrameT, IntoFrameT]):
"""
- Load example datasets **remotely** from `vega-datasets`_, with caching.
+ Load example datasets *remotely* from `vega-datasets`_, with caching.
- A new ``Loader`` must be initialized by specifying a backend:
+ A new ``Loader`` must be initialized by specifying a backend::
from altair.datasets import Loader
load = Loader.from_backend("polars")
- >>> load # doctest: +SKIP
+ load
Loader[polars]
.. _vega-datasets:
@@ -81,42 +81,35 @@ def from_backend(cls, backend_name: _Backend = "polars", /) -> Loader[Any, Any]:
.. warning::
Most datasets use a `JSON format not supported`_ by ``pyarrow``
- .. _polars defaults:
- https://docs.pola.rs/api/python/stable/reference/io.html
- .. _pandas defaults:
- https://pandas.pydata.org/docs/reference/io.html
- .. _JSON format not supported:
- https://arrow.apache.org/docs/python/json.html#reading-json-files
-
Examples
--------
- Using ``polars``:
+ Using ``polars``::
from altair.datasets import Loader
load = Loader.from_backend("polars")
cars = load("cars")
- >>> type(cars) # doctest: +SKIP
+ type(cars)
polars.dataframe.frame.DataFrame
- Using ``pandas``:
+ Using ``pandas``::
load = Loader.from_backend("pandas")
cars = load("cars")
- >>> type(cars) # doctest: +SKIP
+ type(cars)
pandas.core.frame.DataFrame
- Using ``pandas``, backed by ``pyarrow`` dtypes:
+ Using ``pandas``, backed by ``pyarrow`` dtypes::
load = Loader.from_backend("pandas[pyarrow]")
cars = load("cars")
- >>> type(cars) # doctest: +SKIP
+ type(cars)
pandas.core.frame.DataFrame
- >>> cars.dtypes # doctest: +SKIP
+ cars.dtypes
Name string[pyarrow]
Miles_per_Gallon double[pyarrow]
Cylinders int64[pyarrow]
@@ -127,6 +120,13 @@ def from_backend(cls, backend_name: _Backend = "polars", /) -> Loader[Any, Any]:
Year timestamp[ns][pyarrow]
Origin string[pyarrow]
dtype: object
+
+ .. _polars defaults:
+ https://docs.pola.rs/api/python/stable/reference/io.html
+ .. _pandas defaults:
+ https://pandas.pydata.org/docs/reference/io.html
+ .. _JSON format not supported:
+ https://arrow.apache.org/docs/python/json.html#reading-json-files
"""
obj = Loader.__new__(Loader)
obj._reader = backend(backend_name)
@@ -154,24 +154,19 @@ def __call__(
**kwds
Arguments passed to the underlying read function.
- .. _Path.stem:
- https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem
- .. _Path.suffix:
- https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix
-
Examples
--------
- Using ``polars``:
+ Using ``polars``::
from altair.datasets import Loader
load = Loader.from_backend("polars")
source = load("iowa-electricity")
- >>> source.columns # doctest: +SKIP
+ source.columns
['year', 'source', 'net_generation']
- >>> source # doctest: +SKIP
+ source
shape: (51, 3)
┌────────────┬──────────────┬────────────────┐
│ year ┆ source ┆ net_generation │
@@ -191,15 +186,15 @@ def __call__(
│ 2017-01-01 ┆ Renewables ┆ 21933 │
└────────────┴──────────────┴────────────────┘
- Using ``pandas``:
+ Using ``pandas``::
load = Loader.from_backend("pandas")
source = load("iowa-electricity")
- >>> source.columns # doctest: +SKIP
+ source.columns
Index(['year', 'source', 'net_generation'], dtype='object')
- >>> source # doctest: +SKIP
+ source
year source net_generation
0 2001-01-01 Fossil Fuels 35361
1 2002-01-01 Fossil Fuels 35991
@@ -215,15 +210,15 @@ def __call__(
[51 rows x 3 columns]
- Using ``pyarrow``:
+ Using ``pyarrow``::
load = Loader.from_backend("pyarrow")
source = load("iowa-electricity")
- >>> source.column_names # doctest: +SKIP
+ source.column_names
['year', 'source', 'net_generation']
- >>> source # doctest: +SKIP
+ source
pyarrow.Table
year: date32[day]
source: string
@@ -232,6 +227,11 @@ def __call__(
year: [[2001-01-01,2002-01-01,2003-01-01,2004-01-01,2005-01-01,...,2013-01-01,2014-01-01,2015-01-01,2016-01-01,2017-01-01]]
source: [["Fossil Fuels","Fossil Fuels","Fossil Fuels","Fossil Fuels","Fossil Fuels",...,"Renewables","Renewables","Renewables","Renewables","Renewables"]]
net_generation: [[35361,35991,36234,36205,36883,...,16476,17452,19091,21241,21933]]
+
+ .. _Path.stem:
+ https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem
+ .. _Path.suffix:
+ https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix
"""
return self._reader.dataset(name, suffix, **kwds)
@@ -261,16 +261,16 @@ def url(
Examples
--------
- The returned url will always point to an accessible dataset:
+ The returned url will always point to an accessible dataset::
import altair as alt
from altair.datasets import Loader
load = Loader.from_backend("polars")
- >>> load.url("cars") # doctest: +SKIP
- 'https://cdn.jsdelivr.net/npm/vega-datasets@v2.11.0/data/cars.json'
+ load.url("cars")
+ "https://cdn.jsdelivr.net/npm/vega-datasets@v2.11.0/data/cars.json"
- We can pass the result directly to a chart:
+ We can pass the result directly to a chart::
url = load.url("cars")
alt.Chart(url).mark_point().encode(x="Horsepower:Q", y="Miles_per_Gallon:Q")
@@ -282,19 +282,19 @@ def cache(self) -> DatasetCache[IntoDataFrameT, IntoFrameT]:
"""
Caching of remote dataset requests.
- Configure cache path:
+ Configure cache path::
self.cache.path = "..."
- Download the latest datasets *ahead-of-time*:
+ Download the latest datasets *ahead-of-time*::
self.cache.download_all()
- Remove all downloaded datasets:
+ Remove all downloaded datasets::
self.cache.clear()
- Disable caching:
+ Disable caching::
self.cache.path = None
"""
diff --git a/doc/user_guide/api.rst b/doc/user_guide/api.rst
index 5793f0ae8..336c29d54 100644
--- a/doc/user_guide/api.rst
+++ b/doc/user_guide/api.rst
@@ -791,5 +791,21 @@ Typing
Optional
is_chart_type
+.. _api-datasets:
+
+Datasets
+--------
+.. currentmodule:: altair.datasets
+
+.. autosummary::
+ :toctree: generated/datasets/
+ :nosignatures:
+
+ Loader
+ load
+ url
+
.. _Generic:
https://typing.readthedocs.io/en/latest/spec/generics.html#generics
+.. _vega-datasets:
+ https://github.com/vega/vega-datasets
diff --git a/tools/generate_api_docs.py b/tools/generate_api_docs.py
index 55c68729e..babd3d3eb 100644
--- a/tools/generate_api_docs.py
+++ b/tools/generate_api_docs.py
@@ -110,8 +110,22 @@
{typing_objects}
+.. _api-datasets:
+
+Datasets
+--------
+.. currentmodule:: altair.datasets
+
+.. autosummary::
+ :toctree: generated/datasets/
+ :nosignatures:
+
+ {datasets_objects}
+
.. _Generic:
https://typing.readthedocs.io/en/latest/spec/generics.html#generics
+.. _vega-datasets:
+ https://github.com/vega/vega-datasets
"""
@@ -171,6 +185,10 @@ def theme() -> list[str]:
return sort_3
+def datasets() -> list[str]:
+ return alt.datasets.__all__
+
+
def lowlevel_wrappers() -> list[str]:
objects = sorted(iter_objects(alt.schema.core, restrict_to_subclass=alt.SchemaBase))
# The names of these two classes are also used for classes in alt.channels. Due to
@@ -194,6 +212,7 @@ def write_api_file() -> None:
api_classes=sep.join(api_classes()),
typing_objects=sep.join(type_hints()),
theme_objects=sep.join(theme()),
+ datasets_objects=sep.join(datasets()),
),
encoding="utf-8",
)