Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add execution start and end time metadata for code cells #29

Merged
merged 17 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,33 @@
import pytest

pytest_plugins = ("pytest_jupyter.jupyter_server",)
pytest_plugins = [
"pytest_jupyter.jupyter_server",
"jupyter_server.pytest_plugin",
"jupyter_server_fileid.pytest_plugin",
"jupyter_server_ydoc.pytest_plugin"
]


@pytest.fixture
def jp_server_config(jp_server_config):
def jp_server_config(jp_root_dir, jp_server_config):
return {
"ServerApp": {
"jpserver_extensions": {"jupyter_server_nbmodel": True, "jupyter_server_ydoc": False}
'ServerApp': {
'jpserver_extensions': {
'jupyter_server_ydoc': True,
'jupyter_server_fileid': True,
'jupyter_server_nbmodel': True,
},
'token': '',
'password': '',
'disable_check_xsrf': True
},
"SQLiteYStore": {"db_path": str(jp_root_dir.joinpath(".rtc_test.db"))},
"BaseFileIdManager": {
"root_dir": str(jp_root_dir),
"db_path": str(jp_root_dir.joinpath(".fid_test.db")),
"db_journal_mode": "OFF",
},
"YDocExtension": {
"document_save_delay": 1
}
}
25 changes: 25 additions & 0 deletions jupyter_server_nbmodel/event_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from jupyter_events import EventLogger
import pathlib

_JUPYTER_SERVER_EVENTS_URI = "https://events.jupyter.org/jupyter_server_nbmodel"
_DEFAULT_EVENTS_SCHEMA_PATH = pathlib.Path(__file__).parent / "event_schemas"

class _EventLogger:
_event_logger = None

@classmethod
def init_event_logger(cls) -> EventLogger:
"""Initialize or return the existing Event Logger."""
if cls._event_logger is None:
cls._event_logger = EventLogger()
schema_ids = [
"https://events.jupyter.org/jupyter_server_nbmodel/cell_execution/v1",
]
for schema_id in schema_ids:
rel_schema_path = schema_id.replace(_JUPYTER_SERVER_EVENTS_URI + "/", "") + ".yaml"
schema_path = _DEFAULT_EVENTS_SCHEMA_PATH / rel_schema_path
cls._event_logger.register_event_schema(schema_path)
return cls._event_logger


event_logger = _EventLogger.init_event_logger()
42 changes: 42 additions & 0 deletions jupyter_server_nbmodel/event_schemas/cell_execution/v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"$id": https://events.jupyter.org/jupyter_server_nbmodel/cell_execution/v1
version: "1"
title: Cell Execution activities
personal-data: true
description: |
Record events of a cell execution.
type: object
required:
- event_type
- cell_id
- document_id
- timestamp
properties:
event_type:
enum:
- execution_start
- execution_end

cell_id:
type: string
description: |
Cell id.

document_id:
type: string
description: |
Document id.

success:
type: boolean
description: |
Whether the cell execution was successful or not.

kernel_error:
type: string
description: |
Error message from the kernel.

timestamp:
type: string
description: |
Timestamp of the event in ISO format.
50 changes: 45 additions & 5 deletions jupyter_server_nbmodel/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dataclasses import asdict, dataclass
from functools import partial
from http import HTTPStatus
from datetime import datetime, timezone

import jupyter_server
import jupyter_server.services
Expand All @@ -18,8 +19,10 @@
from jupyter_core.utils import ensure_async
from jupyter_server.base.handlers import APIHandler
from jupyter_server.extension.handler import ExtensionHandlerMixin
from jupyter_events import EventLogger

from .log import get_logger
from .event_logger import event_logger

if t.TYPE_CHECKING:
import jupyter_client
Expand Down Expand Up @@ -123,7 +126,6 @@ async def _get_ycell(
raise KeyError(
msg,
)

return ycell


Expand Down Expand Up @@ -199,6 +201,13 @@ def _stdin_hook(kernel_id: str, request_id: str, pending_input: PendingInput, ms
parent_header=header, input_request=InputRequest(**msg["content"])
)

def _get_error(outputs):
return "\n".join(
f"{output['ename']}: {output['evalue']}"
for output in outputs
if output.get("output_type") == "error"
)


async def _execute_snippet(
client: jupyter_client.asynchronous.client.AsyncKernelClient,
Expand All @@ -219,15 +228,30 @@ async def _execute_snippet(
The execution status and outputs.
"""
ycell = None
time_info = {}
if metadata is not None:
ycell = await _get_ycell(ydoc, metadata)
if ycell is not None:
execution_start_time = datetime.now(timezone.utc).isoformat()[:-6]
# Reset cell
with ycell.doc.transaction():
del ycell["outputs"][:]
ycell["execution_count"] = None
ycell["execution_state"] = "running"

if metadata.get("record_timing", False):
time_info = ycell["metadata"].get("execution", {})
krassowski marked this conversation as resolved.
Show resolved Hide resolved
time_info["shell.execute_reply.started"] = execution_start_time
krassowski marked this conversation as resolved.
Show resolved Hide resolved
ycell["metadata"]["execution"] = time_info
# Emit cell execution start event
event_logger.emit(
schema_id="https://events.jupyter.org/jupyter_server_nbmodel/cell_execution/v1",
data={
"event_type": "execution_start",
"cell_id": metadata["cell_id"],
"document_id": metadata["document_id"],
"timestamp": execution_start_time
}
)
outputs = []

# FIXME we don't check if the session is consistent (aka the kernel is linked to the document)
Expand All @@ -244,10 +268,28 @@ async def _execute_snippet(
reply_content = reply["content"]

if ycell is not None:
execution_end_time = datetime.now(timezone.utc).isoformat()[:-6]
with ycell.doc.transaction():
ycell["execution_count"] = reply_content.get("execution_count")
ycell["execution_state"] = "idle"

if metadata and metadata.get("record_timing", False):
if reply_content["status"] == "ok":
time_info["shell.execute_reply"] = execution_end_time
else:
time_info["execution_failed"] = execution_end_time
ycell["metadata"]["execution"] = time_info
# Emit cell execution end event
event_logger.emit(
schema_id="https://events.jupyter.org/jupyter_server_nbmodel/cell_execution/v1",
data={
"event_type": "execution_end",
"cell_id": metadata["cell_id"],
"document_id": metadata["document_id"],
"success": reply_content["status"]=="ok",
"kernel_error": _get_error(outputs),
"timestamp": execution_end_time
}
)
return {
"status": reply_content["status"],
"execution_count": reply_content.get("execution_count"),
Expand Down Expand Up @@ -524,9 +566,7 @@ async def post(self, kernel_id: str) -> None:
msg = f"Unknown kernel with id: {kernel_id}"
get_logger().error(msg)
raise tornado.web.HTTPError(status_code=HTTPStatus.NOT_FOUND, reason=msg)

uid = self._execution_stack.put(kernel_id, snippet, metadata)

self.set_status(HTTPStatus.ACCEPTED)
self.set_header("Location", f"/api/kernels/{kernel_id}/requests/{uid}")
self.finish("{}")
Expand Down
62 changes: 62 additions & 0 deletions jupyter_server_nbmodel/tests/test_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import datetime
import json
import re
import nbformat

import pytest
from jupyter_client.kernelspec import NATIVE_KERNEL_NAME
Expand Down Expand Up @@ -149,6 +150,67 @@ async def test_post_erroneous_execute(jp_fetch, pending_kernel_is_ready, snippet
await asyncio.sleep(1)


@pytest.mark.timeout(TEST_TIMEOUT)
async def test_execution_timing_metadata(jp_root_dir, jp_fetch, pending_kernel_is_ready, rtc_create_notebook, jp_serverapp):
snippet = "a = 1"
nb = nbformat.v4.new_notebook(
cells=[nbformat.v4.new_code_cell(source=snippet, execution_count=1)]
)
nb_content = nbformat.writes(nb, version=4)
path, _ = await rtc_create_notebook("test.ipynb", nb_content, store=True)
collaboration = jp_serverapp.web_app.settings["jupyter_server_ydoc"]
fim = jp_serverapp.web_app.settings["file_id_manager"]
document_id = f'json:notebook:{fim.get_id("test.ipynb")}'
cell_id = nb["cells"][0].get("id")

r = await jp_fetch(
"api", "kernels", method="POST", body=json.dumps({"name": NATIVE_KERNEL_NAME})
)
kernel = json.loads(r.body.decode())
await pending_kernel_is_ready(kernel["id"])

response = await wait_for_request(
jp_fetch,
"api",
"kernels",
kernel["id"],
"execute",
method="POST",
body=json.dumps({
"code": snippet,
"metadata": {
"cell_id": cell_id,
"document_id": document_id,
"record_timing": True
}
}),
)
assert response.code == 200

document = await collaboration.get_document(
path=path, content_type="notebook", file_format="json", copy=False
)
cell_data = document.get()["cells"][0]
assert 'execution' in cell_data['metadata'], "'execution' does not exist in 'metadata'"

# Assert that start and end time exist in 'execution'
execution = cell_data['metadata']['execution']
assert 'shell.execute_reply.started' in execution, "'shell.execute_reply.started' does not exist in 'execution'"
assert 'shell.execute_reply' in execution, "'shell.execute_reply' does not exist in 'execution'"

started_time = execution['shell.execute_reply.started']
reply_time = execution['shell.execute_reply']

started_dt = datetime.datetime.fromisoformat(started_time)
reply_dt = datetime.datetime.fromisoformat(reply_time)

# Assert that reply_time is greater than started_time
assert reply_dt > started_dt, "The reply time is not greater than the started time."
response2 = await jp_fetch("api", "kernels", kernel["id"], method="DELETE")
assert response2.code == 204
await asyncio.sleep(1)


@pytest.mark.timeout(TEST_TIMEOUT)
async def test_post_input_execute(jp_fetch, pending_kernel_is_ready):
# Start the first kernel
Expand Down
13 changes: 12 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,14 @@ dynamic = ["version", "description", "authors", "urls", "keywords"]

[project.optional-dependencies]
lab = ["jupyterlab>=4.2.0", "jupyter-docprovider>=1.0.0b1", "jupyter-server-ydoc>=1.0.0b1"]
test = ["pytest~=8.2", "pytest-cov", "pytest-jupyter[server]>=0.6", "pytest-timeout"]
test = [
"pytest~=8.2",
"pytest-cov",
"pytest-jupyter[server]>=0.6",
"pytest-timeout",
"jupyter-server-ydoc[test]>=1.0.0b1",
"jupyter-server-fileid"
]
lint = ["mdformat>0.7", "mdformat-gfm>=0.3.5", "ruff>=0.4.0"]
typing = ["mypy>=0.990"]

Expand Down Expand Up @@ -74,10 +81,14 @@ build_dir = "jupyter_server_nbmodel/labextension"
[tool.pytest.ini_options]
filterwarnings = [
"error",
"ignore:Unclosed context <zmq.asyncio.Context:ResourceWarning",
"ignore:Unclosed socket <zmq.asyncio.Socket:ResourceWarning",
"ignore:There is no current event loop:DeprecationWarning",
"module:make_current is deprecated:DeprecationWarning",
"module:clear_current is deprecated:DeprecationWarning",
"module:Jupyter is migrating its paths to use standard platformdirs:DeprecationWarning",
# From anyio https://github.com/agronholm/anyio/pull/715
"ignore:Unclosed <MemoryObjectSendStream:ResourceWarning",
]

[tool.mypy]
Expand Down
7 changes: 6 additions & 1 deletion src/executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,17 @@ export class NotebookCellServerExecutor implements INotebookCellExecutor {
const code = cell.model.sharedModel.getSource();
const cellId = cell.model.sharedModel.getId();
const documentId = notebook.sharedModel.getState('document_id');
const { recordTiming } = notebookConfig;

const init = {
method: 'POST',
body: JSON.stringify({
code,
metadata: { cell_id: cellId, document_id: documentId }
metadata: {
cell_id: cellId,
document_id: documentId,
record_timing: recordTiming
}
})
};
onCellExecutionScheduled({ cell });
Expand Down
Loading