Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more urls to github + don't use raw api #31

Merged
merged 4 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/prefect_cloud/cli/root.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ async def deploy(
...,
"--from",
"-f",
help=".py file containing the function to deploy.",
help=(
"URL to a .py file containing the function to deploy. Supported formats: \n\n"
"-- Github: [https://]github.com/owner/repo/(blob|tree)/ref/path/to/file"
),
),
dependencies: list[str] = typer.Option(
...,
Expand Down
56 changes: 38 additions & 18 deletions src/prefect_cloud/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,23 @@ class GitHubFileRef:

owner: str
repo: str
branch: str
ref: str # Can be either a branch name or commit SHA
filepath: str
ref_type: Literal["blob", "tree"]

@classmethod
def from_url(cls, url: str) -> "GitHubFileRef":
"""Parse a GitHub URL into its components.

Handles both blob and tree URLs:
Handles various GitHub URL formats:
- https://github.com/owner/repo/blob/branch/path/to/file.py
- https://github.com/owner/repo/tree/branch/path/to/file.py
- github.com/owner/repo/blob/a1b2c3d/path/to/file.py (commit SHA)
- github.com/owner/repo/blob/path/to/file.py (uses first path component as ref)

Also handles tree URLs:
- https://github.com/owner/repo/tree/branch/path/to/dir
- github.com/owner/repo/tree/a1b2c3d/path/to/dir (commit SHA)
- github.com/owner/repo/tree/path/to/dir (uses first path component as ref)
Comment on lines +27 to +35
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd love to surface this up to the --help of the deploy command


Args:
url: GitHub URL to parse
Expand All @@ -35,17 +41,22 @@ def from_url(cls, url: str) -> "GitHubFileRef":
GitHubFileRef containing parsed components

Raises:
ValueError: If URL is not a valid GitHub blob/tree URL
ValueError: If URL cannot be parsed into required components
"""
# Handle URLs without protocol but with github.com
if url.startswith("github.com"):
url = "https://" + url

parsed = urlparse(url)
if parsed.netloc != "github.com":
raise ValueError("Not a GitHub URL")
raise ValueError("Not a GitHub URL. Must include 'github.com' in the URL")

# Remove leading/trailing slashes and split path
parts = parsed.path.strip("/").split("/")
if len(parts) < 5: # owner/repo/[blob|tree]/branch/filepath
if len(parts) < 4: # Need at least owner/repo/[blob|tree]/ref
raise ValueError(
"Invalid GitHub URL. Expected format: "
"https://github.com/owner/repo/blob|tree/branch/path/to/file.py"
"https://github.com/owner/repo/blob|tree/ref/path/to/file.py"
)

owner, repo = parts[:2]
Expand All @@ -56,11 +67,18 @@ def from_url(cls, url: str) -> "GitHubFileRef":
f"Invalid reference type '{ref_type}'. Must be 'blob' or 'tree'"
)

branch = parts[3]
filepath = "/".join(parts[4:])
# Always use the first component after blob/tree as the ref
ref = parts[3]
filepath = "/".join(parts[4:]) if len(parts) > 4 else ""

if not filepath:
raise ValueError(
"Invalid GitHub URL. Expected format: "
"https://github.com/owner/repo/blob|tree/ref/path/to/file.py"
)

return cls(
owner=owner, repo=repo, branch=branch, filepath=filepath, ref_type=ref_type
owner=owner, repo=repo, ref=ref, filepath=filepath, ref_type=ref_type
)

@property
Expand All @@ -74,20 +92,20 @@ def directory(self) -> str:
return str(Path(self.filepath).parent)

@property
def raw_url(self) -> str:
"""Get the raw.githubusercontent.com URL for this file."""
return f"https://raw.githubusercontent.com/{self.owner}/{self.repo}/refs/heads/{self.branch}/{self.filepath}"
def api_url(self) -> str:
"""Get the GitHub API URL for this file."""
return f"https://api.github.com/repos/{self.owner}/{self.repo}/contents/{self.filepath}?ref={self.ref}"

def __str__(self) -> str:
return f"github.com/{self.owner}/{self.repo} @ {self.branch} - {self.filepath}"
return f"github.com/{self.owner}/{self.repo} @ {self.ref} - {self.filepath}"


def to_pull_step(
github_ref: GitHubFileRef, credentials_block: str | None = None
) -> dict[str, Any]:
pull_step_kwargs = {
"repository": github_ref.clone_url,
"branch": github_ref.branch,
"branch": github_ref.ref,
}
if credentials_block:
pull_step_kwargs["access_token"] = (
Expand All @@ -100,13 +118,15 @@ def to_pull_step(
async def get_github_raw_content(
github_ref: GitHubFileRef, credentials: str | None = None
) -> str:
"""Get raw content of a file from GitHub."""
headers: dict[str, str] = {}
"""Get content of a file from GitHub API."""
headers: dict[str, str] = {
"Accept": "application/vnd.github.v3.raw",
}
if credentials:
headers["Authorization"] = f"Bearer {credentials}"

async with AsyncClient() as client:
response = await client.get(github_ref.raw_url, headers=headers)
response = await client.get(github_ref.api_url, headers=headers)
if response.status_code == 404:
raise FileNotFound(f"File not found: {github_ref}")
response.raise_for_status()
Expand Down
214 changes: 214 additions & 0 deletions tests/test_github.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
import pytest
from httpx import Response

from prefect_cloud.github import FileNotFound, GitHubFileRef, get_github_raw_content


class TestGitHubFileRef:
def test_from_url_blob(self):
url = "https://github.com/PrefectHQ/prefect/blob/main/src/prefect/cli/root.py"
ref = GitHubFileRef.from_url(url)

assert ref.owner == "PrefectHQ"
assert ref.repo == "prefect"
assert ref.ref == "main"
assert ref.filepath == "src/prefect/cli/root.py"
assert ref.ref_type == "blob"

def test_from_url_tree(self):
url = "https://github.com/PrefectHQ/prefect/tree/main/src/prefect/cli"
ref = GitHubFileRef.from_url(url)

assert ref.owner == "PrefectHQ"
assert ref.repo == "prefect"
assert ref.ref == "main"
assert ref.filepath == "src/prefect/cli"
assert ref.ref_type == "tree"

def test_from_url_invalid_github(self):
with pytest.raises(ValueError, match="Not a GitHub URL"):
GitHubFileRef.from_url("https://gitlab.com/owner/repo/blob/main/file.py")

def test_from_url_invalid_format(self):
with pytest.raises(ValueError, match="Invalid GitHub URL"):
GitHubFileRef.from_url("https://github.com/owner/repo")

def test_from_url_invalid_ref_type(self):
with pytest.raises(ValueError, match="Invalid reference type"):
GitHubFileRef.from_url("https://github.com/owner/repo/invalid/main/file.py")

def test_clone_url(self):
ref = GitHubFileRef(
owner="PrefectHQ",
repo="prefect",
ref="main",
filepath="README.md",
ref_type="blob",
)
assert ref.clone_url == "https://github.com/PrefectHQ/prefect.git"

def test_directory(self):
ref = GitHubFileRef(
owner="PrefectHQ",
repo="prefect",
ref="main",
filepath="src/prefect/cli/root.py",
ref_type="blob",
)
assert ref.directory == "src/prefect/cli"

def test_api_url(self):
ref = GitHubFileRef(
owner="PrefectHQ",
repo="prefect",
ref="main",
filepath="README.md",
ref_type="blob",
)
assert (
ref.api_url
== "https://api.github.com/repos/PrefectHQ/prefect/contents/README.md?ref=main"
)

def test_str_representation(self):
ref = GitHubFileRef(
owner="PrefectHQ",
repo="prefect",
ref="main",
filepath="README.md",
ref_type="blob",
)
assert str(ref) == "github.com/PrefectHQ/prefect @ main - README.md"

def test_from_url_without_protocol(self):
url = "github.com/PrefectHQ/prefect/blob/main/README.md"
ref = GitHubFileRef.from_url(url)

assert ref.owner == "PrefectHQ"
assert ref.repo == "prefect"
assert ref.ref == "main"
assert ref.filepath == "README.md"
assert ref.ref_type == "blob"

def test_from_url_with_http(self):
url = "http://github.com/PrefectHQ/prefect/blob/main/README.md"
ref = GitHubFileRef.from_url(url)

assert ref.owner == "PrefectHQ"
assert ref.repo == "prefect"
assert ref.ref == "main"
assert ref.filepath == "README.md"
assert ref.ref_type == "blob"

def test_from_url_requires_github_domain(self):
with pytest.raises(ValueError, match="Must include 'github.com' in the URL"):
GitHubFileRef.from_url("PrefectHQ/prefect/blob/main/README.md")

def test_from_url_with_multiple_path_segments(self):
url = "github.com/PrefectHQ/prefect/blob/main/src/prefect/cli/root.py"
ref = GitHubFileRef.from_url(url)

assert ref.owner == "PrefectHQ"
assert ref.repo == "prefect"
assert ref.ref == "main"
assert ref.filepath == "src/prefect/cli/root.py"
assert ref.ref_type == "blob"

def test_from_url_with_commit_sha(self):
url = "github.com/PrefectHQ/prefect/blob/a1b2c3d4e5f6/src/prefect/cli/root.py"
ref = GitHubFileRef.from_url(url)

assert ref.owner == "PrefectHQ"
assert ref.repo == "prefect"
assert ref.ref == "a1b2c3d4e5f6"
assert ref.filepath == "src/prefect/cli/root.py"
assert ref.ref_type == "blob"

def test_from_url_with_short_commit_sha(self):
url = "github.com/PrefectHQ/prefect/blob/a1b2c3d/README.md"
ref = GitHubFileRef.from_url(url)

assert ref.owner == "PrefectHQ"
assert ref.repo == "prefect"
assert ref.ref == "a1b2c3d"
assert ref.filepath == "README.md"
assert ref.ref_type == "blob"

def test_from_url_without_ref(self):
url = "github.com/PrefectHQ/prefect/blob/README.md"
with pytest.raises(
ValueError,
match="Invalid GitHub URL. Expected format: https://github.com/owner/repo/blob|tree/ref/path/to/file.py",
):
GitHubFileRef.from_url(url)

def test_from_url_with_tree_without_ref(self):
url = "github.com/PrefectHQ/prefect/tree/main/src/prefect"
ref = GitHubFileRef.from_url(url)

assert ref.owner == "PrefectHQ"
assert ref.repo == "prefect"
assert ref.ref == "main"
assert ref.filepath == "src/prefect"
assert ref.ref_type == "tree"


class TestGitHubContent:
@pytest.mark.asyncio
async def test_get_github_raw_content(self, respx_mock):
github_ref = GitHubFileRef(
owner="PrefectHQ",
repo="prefect",
ref="main",
filepath="README.md",
ref_type="blob",
)

expected_content = "# Test Content"
respx_mock.get(github_ref.api_url).mock(
return_value=Response(status_code=200, text=expected_content)
)

content = await get_github_raw_content(github_ref)
assert content == expected_content

@pytest.mark.asyncio
async def test_get_github_raw_content_with_credentials(self, respx_mock):
github_ref = GitHubFileRef(
owner="PrefectHQ",
repo="prefect",
ref="main",
filepath="README.md",
ref_type="blob",
)

test_token = "test-token"
expected_content = "# Test Content"

mock = respx_mock.get(github_ref.api_url).mock(
return_value=Response(status_code=200, text=expected_content)
)

content = await get_github_raw_content(github_ref, credentials=test_token)
assert content == expected_content

# Verify authorization header was sent
assert mock.calls[0].request.headers["Authorization"] == f"Bearer {test_token}"
assert (
mock.calls[0].request.headers["Accept"] == "application/vnd.github.v3.raw"
)

@pytest.mark.asyncio
async def test_get_github_raw_content_file_not_found(self, respx_mock):
github_ref = GitHubFileRef(
owner="PrefectHQ",
repo="prefect",
ref="main",
filepath="NONEXISTENT.md",
ref_type="blob",
)

respx_mock.get(github_ref.api_url).mock(return_value=Response(status_code=404))

with pytest.raises(FileNotFound):
await get_github_raw_content(github_ref)