diff --git a/altair/datasets/__init__.py b/altair/datasets/__init__.py index 01dc35212..3c61eda0b 100644 --- a/altair/datasets/__init__.py +++ b/altair/datasets/__init__.py @@ -1,5 +1,5 @@ """ -Load example datasets **remotely** from `vega-datasets`_. +Load example datasets *remotely* from `vega-datasets`_. Provides over **70+** datasets, used throughout our `Example Gallery`_. @@ -85,24 +85,18 @@ """ Get a remote dataset and load as tabular data. -For full Tab completions, instead use: +For full Tab completions, instead use:: from altair.datasets import Loader load = Loader.from_backend("polars") cars = load("cars") movies = load("movies") -Alternatively, specify ``backend`` during a call: +Alternatively, specify ``backend`` during a call:: from altair.datasets import load cars = load("cars", backend="polars") movies = load("movies", backend="polars") - -Related -------- -- https://github.com/vega/altair/pull/3631#issuecomment-2480832609 -- https://github.com/vega/altair/pull/3631#discussion_r1847111064 -- https://github.com/vega/altair/pull/3631#discussion_r1847176465 """ @@ -124,17 +118,14 @@ def url( .. note:: Only needed if ``name`` is available in multiple formats. + Returns + ------- + ``str`` + .. _Path.stem: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem .. _Path.suffix: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix - - Related - ------- - - https://github.com/vega/altair/pull/3631#issuecomment-2484826592 - - https://github.com/vega/altair/pull/3631#issuecomment-2480832711 - - https://github.com/vega/altair/discussions/3150#discussioncomment-11280516 - - https://github.com/vega/altair/pull/3631#discussion_r1846662053 """ from altair.datasets._exceptions import AltairDatasetsError diff --git a/altair/datasets/_cache.py b/altair/datasets/_cache.py index 08016d622..a415a8380 100644 --- a/altair/datasets/_cache.py +++ b/altair/datasets/_cache.py @@ -317,26 +317,27 @@ def path(self) -> Path: """ Returns path to datasets cache. - Defaults to (`XDG_CACHE_HOME`_): + Defaults to (`XDG_CACHE_HOME`_):: "$XDG_CACHE_HOME/altair/" - But can be configured using the environment variable: + But can be configured using the environment variable:: "$ALTAIR_DATASETS_DIR" - You can set this for the current session via: + You can set this for the current session via:: - >>> from pathlib import Path - >>> from altair.datasets import load - >>> load.cache.path = Path.home() / ".altair_cache" + from pathlib import Path + from altair.datasets import load - >>> load.cache.path.relative_to(Path.home()).as_posix() - '.altair_cache' + load.cache.path = Path.home() / ".altair_cache" - You can *later* disable caching via: + load.cache.path.relative_to(Path.home()).as_posix() + ".altair_cache" - >>> load.cache.path = None + You can *later* disable caching via:: + + load.cache.path = None .. _XDG_CACHE_HOME: https://specifications.freedesktop.org/basedir-spec/latest/#variables diff --git a/altair/datasets/_loader.py b/altair/datasets/_loader.py index 6c359edb2..8f13ab2de 100644 --- a/altair/datasets/_loader.py +++ b/altair/datasets/_loader.py @@ -29,14 +29,14 @@ class Loader(Generic[IntoDataFrameT, IntoFrameT]): """ - Load example datasets **remotely** from `vega-datasets`_, with caching. + Load example datasets *remotely* from `vega-datasets`_, with caching. - A new ``Loader`` must be initialized by specifying a backend: + A new ``Loader`` must be initialized by specifying a backend:: from altair.datasets import Loader load = Loader.from_backend("polars") - >>> load # doctest: +SKIP + load Loader[polars] .. _vega-datasets: @@ -81,42 +81,35 @@ def from_backend(cls, backend_name: _Backend = "polars", /) -> Loader[Any, Any]: .. warning:: Most datasets use a `JSON format not supported`_ by ``pyarrow`` - .. _polars defaults: - https://docs.pola.rs/api/python/stable/reference/io.html - .. _pandas defaults: - https://pandas.pydata.org/docs/reference/io.html - .. _JSON format not supported: - https://arrow.apache.org/docs/python/json.html#reading-json-files - Examples -------- - Using ``polars``: + Using ``polars``:: from altair.datasets import Loader load = Loader.from_backend("polars") cars = load("cars") - >>> type(cars) # doctest: +SKIP + type(cars) polars.dataframe.frame.DataFrame - Using ``pandas``: + Using ``pandas``:: load = Loader.from_backend("pandas") cars = load("cars") - >>> type(cars) # doctest: +SKIP + type(cars) pandas.core.frame.DataFrame - Using ``pandas``, backed by ``pyarrow`` dtypes: + Using ``pandas``, backed by ``pyarrow`` dtypes:: load = Loader.from_backend("pandas[pyarrow]") cars = load("cars") - >>> type(cars) # doctest: +SKIP + type(cars) pandas.core.frame.DataFrame - >>> cars.dtypes # doctest: +SKIP + cars.dtypes Name string[pyarrow] Miles_per_Gallon double[pyarrow] Cylinders int64[pyarrow] @@ -127,6 +120,13 @@ def from_backend(cls, backend_name: _Backend = "polars", /) -> Loader[Any, Any]: Year timestamp[ns][pyarrow] Origin string[pyarrow] dtype: object + + .. _polars defaults: + https://docs.pola.rs/api/python/stable/reference/io.html + .. _pandas defaults: + https://pandas.pydata.org/docs/reference/io.html + .. _JSON format not supported: + https://arrow.apache.org/docs/python/json.html#reading-json-files """ obj = Loader.__new__(Loader) obj._reader = backend(backend_name) @@ -154,24 +154,19 @@ def __call__( **kwds Arguments passed to the underlying read function. - .. _Path.stem: - https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem - .. _Path.suffix: - https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix - Examples -------- - Using ``polars``: + Using ``polars``:: from altair.datasets import Loader load = Loader.from_backend("polars") source = load("iowa-electricity") - >>> source.columns # doctest: +SKIP + source.columns ['year', 'source', 'net_generation'] - >>> source # doctest: +SKIP + source shape: (51, 3) ┌────────────┬──────────────┬────────────────┐ │ year ┆ source ┆ net_generation │ @@ -191,15 +186,15 @@ def __call__( │ 2017-01-01 ┆ Renewables ┆ 21933 │ └────────────┴──────────────┴────────────────┘ - Using ``pandas``: + Using ``pandas``:: load = Loader.from_backend("pandas") source = load("iowa-electricity") - >>> source.columns # doctest: +SKIP + source.columns Index(['year', 'source', 'net_generation'], dtype='object') - >>> source # doctest: +SKIP + source year source net_generation 0 2001-01-01 Fossil Fuels 35361 1 2002-01-01 Fossil Fuels 35991 @@ -215,15 +210,15 @@ def __call__( [51 rows x 3 columns] - Using ``pyarrow``: + Using ``pyarrow``:: load = Loader.from_backend("pyarrow") source = load("iowa-electricity") - >>> source.column_names # doctest: +SKIP + source.column_names ['year', 'source', 'net_generation'] - >>> source # doctest: +SKIP + source pyarrow.Table year: date32[day] source: string @@ -232,6 +227,11 @@ def __call__( year: [[2001-01-01,2002-01-01,2003-01-01,2004-01-01,2005-01-01,...,2013-01-01,2014-01-01,2015-01-01,2016-01-01,2017-01-01]] source: [["Fossil Fuels","Fossil Fuels","Fossil Fuels","Fossil Fuels","Fossil Fuels",...,"Renewables","Renewables","Renewables","Renewables","Renewables"]] net_generation: [[35361,35991,36234,36205,36883,...,16476,17452,19091,21241,21933]] + + .. _Path.stem: + https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem + .. _Path.suffix: + https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix """ return self._reader.dataset(name, suffix, **kwds) @@ -261,16 +261,16 @@ def url( Examples -------- - The returned url will always point to an accessible dataset: + The returned url will always point to an accessible dataset:: import altair as alt from altair.datasets import Loader load = Loader.from_backend("polars") - >>> load.url("cars") # doctest: +SKIP - 'https://cdn.jsdelivr.net/npm/vega-datasets@v2.11.0/data/cars.json' + load.url("cars") + "https://cdn.jsdelivr.net/npm/vega-datasets@v2.11.0/data/cars.json" - We can pass the result directly to a chart: + We can pass the result directly to a chart:: url = load.url("cars") alt.Chart(url).mark_point().encode(x="Horsepower:Q", y="Miles_per_Gallon:Q") @@ -282,19 +282,19 @@ def cache(self) -> DatasetCache[IntoDataFrameT, IntoFrameT]: """ Caching of remote dataset requests. - Configure cache path: + Configure cache path:: self.cache.path = "..." - Download the latest datasets *ahead-of-time*: + Download the latest datasets *ahead-of-time*:: self.cache.download_all() - Remove all downloaded datasets: + Remove all downloaded datasets:: self.cache.clear() - Disable caching: + Disable caching:: self.cache.path = None """ diff --git a/doc/user_guide/api.rst b/doc/user_guide/api.rst index 5793f0ae8..336c29d54 100644 --- a/doc/user_guide/api.rst +++ b/doc/user_guide/api.rst @@ -791,5 +791,21 @@ Typing Optional is_chart_type +.. _api-datasets: + +Datasets +-------- +.. currentmodule:: altair.datasets + +.. autosummary:: + :toctree: generated/datasets/ + :nosignatures: + + Loader + load + url + .. _Generic: https://typing.readthedocs.io/en/latest/spec/generics.html#generics +.. _vega-datasets: + https://github.com/vega/vega-datasets diff --git a/tools/generate_api_docs.py b/tools/generate_api_docs.py index 55c68729e..babd3d3eb 100644 --- a/tools/generate_api_docs.py +++ b/tools/generate_api_docs.py @@ -110,8 +110,22 @@ {typing_objects} +.. _api-datasets: + +Datasets +-------- +.. currentmodule:: altair.datasets + +.. autosummary:: + :toctree: generated/datasets/ + :nosignatures: + + {datasets_objects} + .. _Generic: https://typing.readthedocs.io/en/latest/spec/generics.html#generics +.. _vega-datasets: + https://github.com/vega/vega-datasets """ @@ -171,6 +185,10 @@ def theme() -> list[str]: return sort_3 +def datasets() -> list[str]: + return alt.datasets.__all__ + + def lowlevel_wrappers() -> list[str]: objects = sorted(iter_objects(alt.schema.core, restrict_to_subclass=alt.SchemaBase)) # The names of these two classes are also used for classes in alt.channels. Due to @@ -194,6 +212,7 @@ def write_api_file() -> None: api_classes=sep.join(api_classes()), typing_objects=sep.join(type_hints()), theme_objects=sep.join(theme()), + datasets_objects=sep.join(datasets()), ), encoding="utf-8", )