Skip to content

Forward args to _get_remote_config() and honour core/no_scm if present #10719

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ def _make_fs(
else:
config = Config.load_file(conf)

# Setup config to the new DVCFileSystem to use the remote repo, but rely on the
# local cache instead of the remote's cache. This avoids re-streaming of data,
# but messes up the call to `_get_remote_config()` downstream, which will need
# to ignore cache parameters.
config["cache"] = self.repo.config["cache"]
config["cache"]["dir"] = self.repo.cache.local_cache_dir

Expand Down
22 changes: 19 additions & 3 deletions dvc/repo/open_repo.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import os
import tempfile
import threading
Expand Down Expand Up @@ -50,7 +51,7 @@ def open_repo(url, *args, **kwargs):
if os.path.exists(url):
url = os.path.abspath(url)
try:
config = _get_remote_config(url)
config = _get_remote_config(url, *args, **kwargs)
config.update(kwargs.get("config") or {})
kwargs["config"] = config
return Repo(url, *args, **kwargs)
Expand Down Expand Up @@ -97,9 +98,24 @@ def clean_repos():
_remove(path)


def _get_remote_config(url):
def _get_remote_config(url, *args, **kwargs):
try:
repo = Repo(url)
# Deepcopy to prevent modifying the original `kwargs['config']`
config = copy.deepcopy(kwargs.get("config"))

# Import operations will use this function to get the remote's cache. However,
# while the `url` sent will point to the external repo, the cache information
# in `kwargs["config"]["cache"]["dir"]`) will point to the local repo,
# see `dvc/dependency/repo.py:RepoDependency._make_fs()`
#
# This breaks this function, since we'd be instructing `Repo()` to use the wrong
# cache to being with. We need to remove the cache info from `kwargs["config"]`
# to read the actual remote repo data.
if config:
_ = config.pop("cache", None)

repo = Repo(url, config=config)

except NotDvcRepoError:
return {}

Expand Down
33 changes: 33 additions & 0 deletions tests/func/api/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from dvc import api
from dvc.exceptions import OutputNotFoundError, PathMissingError
from dvc.scm import CloneError, SCMError
from dvc.testing.api_tests import TestAPI # noqa: F401
from dvc.testing.tmp_dir import make_subrepo
from dvc.utils.fs import remove
Expand Down Expand Up @@ -65,6 +66,38 @@ def test_get_url_from_remote(tmp_dir, erepo_dir, cloud, local_cloud):
)


def test_get_url_ignore_scm(tmp_dir, dvc, cloud, scm):
tmp_dir.add_remote(config=cloud.config)
tmp_dir.dvc_gen("foo", "foo", commit="add foo")

repo_posix = tmp_dir.as_posix()
expected_url = (cloud / "files" / "md5" / "ac/bd18db4cc2f85cedef654fccc4a4d8").url

# Test baseline with scm
assert api.get_url("foo", repo=repo_posix) == expected_url

# Simulate gitless environment (e.g. deployed container)
(tmp_dir / ".git").rename(tmp_dir / "gitless_environment")

# Test failure mode when trying to access with git
with pytest.raises(SCMError, match="is not a git repository"):
api.get_url("foo", repo=repo_posix)

# Test successful access by ignoring git
assert (
api.get_url("foo", repo=repo_posix, config={"core": {"no_scm": True}})
== expected_url
)

# Addressing repos with `file://` triggers git, so it fails in a gitless environment
repo_url = f"file://{repo_posix}"
with pytest.raises(
CloneError,
match="SCM error",
):
api.get_url("foo", repo=repo_url, config={"core": {"no_scm": True}})


def test_open_external(tmp_dir, erepo_dir, cloud):
erepo_dir.add_remote(config=cloud.config)

Expand Down
Loading