Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proof-of-concept for using simple-repository instead of the non-standards based JSON API #10

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,21 @@ This will start up a Flask app, and will print out a line such as::

You can then call pip with::

pip install --index-url http://127.0.0.1:5000/ astropy
pip install --index-url http://127.0.0.1:5000/simple/ astropy
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also caused by some inflexibility on simple-repository-server (at https://github.com/simple-repository/simple-repository-server/blob/main/simple_repository_server/routers/simple.py#L41).

There is no good reason for this limitation, but it would require some more thought than I have capacity for this evening 😴


and this will then install the requested packages and all dependencies,
ignoring any releases after the cutoff date specified above.

How it works
~~~~~~~~~~~~

`pypi-timemachine` builds upon the simple-repository stack, and uses the
standards based PEP-503 repository. In order to filter by time, the upstream
repository PyPI must provide PEP-700 metadata (which PyPI does).
The results are filtered by pypi-timemachine, and then served as HTML or JSON
via the standard PEP-503 interface.


Caveats/warnings
~~~~~~~~~~~~~~~~

Expand Down
3 changes: 2 additions & 1 deletion pypi_timemachine/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .core import main


if __name__ == '__main__':
from pypi_timemachine.core import main
main()
139 changes: 88 additions & 51 deletions pypi_timemachine/core.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,97 @@
import socket
from contextlib import asynccontextmanager
import dataclasses
from datetime import datetime
import socket
import sys
import typing

import click
import requests
import fastapi
import httpx
from simple_repository.components.core import RepositoryContainer, SimpleRepository
from simple_repository.components.http import HttpRepository
from simple_repository_server.routers import simple
from simple_repository import model
import uvicorn


if sys.version_info >= (3, 12):
from typing import override
else:
override = lambda fn: fn

from tornado.ioloop import IOLoop
from tornado.web import RequestHandler, Application
from tornado.routing import PathMatches

MAIN_PYPI = 'https://pypi.org/simple/'
JSON_URL = 'https://pypi.org/pypi/{package}/json'

PACKAGE_HTML = """
<!DOCTYPE html>
<html>
<head>
<title>Links for {package}</title>
</head>
<body>
<h1>Links for {package}</h1>
{links}
</body>
</html>
"""


def parse_iso(dt):


def parse_iso(dt) -> datetime:
try:
return datetime.strptime(dt, '%Y-%m-%d')
except:
return datetime.strptime(dt, '%Y-%m-%dT%H:%M:%S')


def create_app(repo: SimpleRepository) -> fastapi.FastAPI:
@asynccontextmanager
async def lifespan(app: fastapi.FastAPI) -> typing.AsyncIterator[None]:
async with httpx.AsyncClient() as http_client:
app.include_router(simple.build_router(repo, http_client), prefix="")
yield

app = fastapi.FastAPI(
openapi_url=None, # Disables automatic OpenAPI documentation (Swagger & Redoc)
lifespan=lifespan,
)
return app


class DateFilteredReleases(RepositoryContainer):
"""
A component used to remove released projects from the source
repository if they were released after the configured date.

This component can be used only if the source repository exposes the upload
date according to PEP-700: https://peps.python.org/pep-0700/.

"""
def __init__(
self,
source: SimpleRepository,
cutoff_date: datetime,
) -> None:
self._cutoff_date = cutoff_date
super().__init__(source)

@override
async def get_project_page(
self,
project_name: str,
*,
request_context: model.RequestContext = model.RequestContext.DEFAULT,
) -> model.ProjectDetail:
project_page = await super().get_project_page(
project_name,
request_context=request_context,
)

return self._exclude_recent_distributions(
project_page=project_page,
now=datetime.now(),
)

def _exclude_recent_distributions(
self,
project_page: model.ProjectDetail,
now: datetime,
) -> model.ProjectDetail:
filtered_files = tuple(
file for file in project_page.files
if not file.upload_time or
(file.upload_time <= self._cutoff_date)
)
return dataclasses.replace(project_page, files=filtered_files)


@click.command()
@click.argument('cutoff_date')
@click.option('--port', default=None)
Expand All @@ -40,43 +100,20 @@ def main(cutoff_date, port, quiet):

CUTOFF = parse_iso(cutoff_date)

INDEX = requests.get(MAIN_PYPI).content
repo = DateFilteredReleases(
HttpRepository(MAIN_PYPI),
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forgot to say: There is no real reason not to parameterise this now... it is something that has been asked for to support private indexes. (e.g. https://www.reddit.com/r/Python/comments/1cte019/comment/l4bh02p/ #8)

cutoff_date=CUTOFF,
)

class MainIndexHandler(RequestHandler):

async def get(self):
return self.write(INDEX)

class PackageIndexHandler(RequestHandler):

async def get(self, package):

package_index = requests.get(JSON_URL.format(package=package)).json()
release_links = ""
for release in package_index['releases'].values():
for file in release:
release_date = parse_iso(file['upload_time'])
if release_date < CUTOFF:
if file['requires_python'] is None:
release_links += ' <a href="{url}#sha256={sha256}">{filename}</a><br/>\n'.format(url=file['url'], sha256=file['digests']['sha256'], filename=file['filename'])
else:
rp = file['requires_python'].replace('>', '&gt;')
release_links += ' <a href="{url}#sha256={sha256}" data-requires-python="{rp}">{filename}</a><br/>\n'.format(url=file['url'], sha256=file['digests']['sha256'], rp=rp, filename=file['filename'])

self.write(PACKAGE_HTML.format(package=package, links=release_links))
app = create_app(repo)

sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(('localhost', 0))
if port is None:
port = sock.getsockname()[1]
sock.close()

app = Application([(r"/", MainIndexHandler),
(PathMatches(r"/(?P<package>\S+)\//?"), PackageIndexHandler)])

app.listen(port=port)

if not quiet:
print(f'Starting pypi-timemachine server at http://localhost:{port}')

IOLoop.instance().start()
uvicorn.run(app=app, port=int(port))
6 changes: 4 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ packages = find:
setup_requires = setuptools_scm
install_requires =
click
requests
tornado
fastapi
httpx
simple-repository
simple-repository-server

[options.entry_points]
console_scripts =
Expand Down