Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rfc: merge upload_examples/_multipart #1477

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion python/docs/create_api_rst.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Script for auto-generating api_reference.rst."""

Check notice on line 1 in python/docs/create_api_rst.py

View workflow job for this annotation

GitHub Actions / benchmark

Benchmark results

........... WARNING: the benchmark result may be unstable * the standard deviation (75.5 ms) is 11% of the mean (675 ms) Try to rerun the benchmark with more runs, values and/or loops. Run 'python -m pyperf system tune' command to reduce the system jitter. Use pyperf stats, pyperf dump and pyperf hist to analyze results. Use --quiet option to hide these warnings. create_5_000_run_trees: Mean +- std dev: 675 ms +- 75 ms ........... create_10_000_run_trees: Mean +- std dev: 1.32 sec +- 0.09 sec ........... create_20_000_run_trees: Mean +- std dev: 2.64 sec +- 0.18 sec ........... dumps_class_nested_py_branch_and_leaf_200x400: Mean +- std dev: 713 us +- 9 us ........... dumps_class_nested_py_leaf_50x100: Mean +- std dev: 24.7 ms +- 0.2 ms ........... dumps_class_nested_py_leaf_100x200: Mean +- std dev: 104 ms +- 3 ms ........... dumps_dataclass_nested_50x100: Mean +- std dev: 24.9 ms +- 0.4 ms ........... WARNING: the benchmark result may be unstable * the standard deviation (15.6 ms) is 22% of the mean (69.6 ms) Try to rerun the benchmark with more runs, values and/or loops. Run 'python -m pyperf system tune' command to reduce the system jitter. Use pyperf stats, pyperf dump and pyperf hist to analyze results. Use --quiet option to hide these warnings. dumps_pydantic_nested_50x100: Mean +- std dev: 69.6 ms +- 15.6 ms ........... dumps_pydanticv1_nested_50x100: Mean +- std dev: 195 ms +- 3 ms

Check notice on line 1 in python/docs/create_api_rst.py

View workflow job for this annotation

GitHub Actions / benchmark

Comparison against main

+-----------------------------------------------+----------+------------------------+ | Benchmark | main | changes | +===============================================+==========+========================+ | dumps_pydanticv1_nested_50x100 | 220 ms | 195 ms: 1.13x faster | +-----------------------------------------------+----------+------------------------+ | create_20_000_run_trees | 2.70 sec | 2.64 sec: 1.02x faster | +-----------------------------------------------+----------+------------------------+ | create_5_000_run_trees | 684 ms | 675 ms: 1.01x faster | +-----------------------------------------------+----------+------------------------+ | dumps_class_nested_py_leaf_50x100 | 25.0 ms | 24.7 ms: 1.01x faster | +-----------------------------------------------+----------+------------------------+ | dumps_dataclass_nested_50x100 | 25.2 ms | 24.9 ms: 1.01x faster | +-----------------------------------------------+----------+------------------------+ | create_10_000_run_trees | 1.33 sec | 1.32 sec: 1.01x faster | +-----------------------------------------------+----------+------------------------+ | dumps_class_nested_py_leaf_100x200 | 104 ms | 104 ms: 1.00x faster | +-----------------------------------------------+----------+------------------------+ | dumps_class_nested_py_branch_and_leaf_200x400 | 709 us | 713 us: 1.01x slower | +-----------------------------------------------+----------+------------------------+ | dumps_pydantic_nested_50x100 | 67.0 ms | 69.6 ms: 1.04x slower | +-----------------------------------------------+----------+------------------------+ | Geometric mean | (ref) | 1.02x faster | +-----------------------------------------------+----------+------------------------+

from __future__ import annotations

Expand Down Expand Up @@ -111,7 +111,9 @@
else (
"enum"
if issubclass(type_, Enum)
else "Pydantic" if issubclass(type_, BaseModel) else "Regular"
else "Pydantic"
if issubclass(type_, BaseModel)
else "Regular"
)
)
classes_.append(
Expand Down
50 changes: 25 additions & 25 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4709,15 +4709,10 @@ def update_example(
def update_examples(
self,
*,
example_ids: Sequence[ID_TYPE],
inputs: Optional[Sequence[Optional[Dict[str, Any]]]] = None,
outputs: Optional[Sequence[Optional[Mapping[str, Any]]]] = None,
metadata: Optional[Sequence[Optional[Dict]]] = None,
splits: Optional[Sequence[Optional[str | List[str]]]] = None,
dataset_ids: Optional[Sequence[Optional[ID_TYPE]]] = None,
attachments_operations: Optional[
Sequence[Optional[ls_schemas.AttachmentsOperations]]
] = None,
dataset_id: ID_TYPE | None = None,
updates: Optional[List[ls_schemas.ExampleUpdateWithAttachments]] = None,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should rename ExampleUpdateWithAttachments to ExampleUpdate if no name collision with existing classes

dangerously_allow_filesystem: bool = False,
**kwargs: Any,
) -> Dict[str, Any]:
"""Update multiple examples.

Expand All @@ -4741,24 +4736,29 @@ def update_examples(
Returns:
Dict[str, Any]: The response from the server (specifies the number of examples updated).
"""
if attachments_operations is not None:
if kwargs and any(dataset_id, updates, dangerously_allow_filesystem):
raise ValueError("...")
elif kwargs and not kwargs.get("example_ids"):
raise ValueError("...")
elif kwargs:
example_ids = kwargs.pop("example_ids")
else:
return self.upload_examples_multipart(
dataset_id=dataset_id,
uploads=uploads,
dangerously_allow_filesystem=dangerously_allow_filesystem,
)

if kwargs.get("attachments_operations") is not None:
if not (self.info.instance_flags or {}).get(
"dataset_examples_multipart_enabled", False
):
raise ValueError(
"Your LangSmith version does not allow using the attachment operations, please update to the latest version."
)
sequence_args = {
"inputs": inputs,
"outputs": outputs,
"metadata": metadata,
"splits": splits,
"dataset_ids": dataset_ids,
"attachments_operations": attachments_operations,
}
# Since inputs are required, we will check against them
examples_len = len(example_ids)
for arg_name, arg_value in sequence_args.items():
for arg_name, arg_value in kwargs.items():
if arg_value is not None and len(arg_value) != examples_len:
raise ValueError(
f"Length of {arg_name} ({len(arg_value)}) does not match"
Expand All @@ -4776,12 +4776,12 @@ def update_examples(
}
for id_, in_, out_, metadata_, split_, dataset_id_, attachments_operations_ in zip(
example_ids,
inputs or [None] * len(example_ids),
outputs or [None] * len(example_ids),
metadata or [None] * len(example_ids),
splits or [None] * len(example_ids),
dataset_ids or [None] * len(example_ids),
attachments_operations or [None] * len(example_ids),
kwargs.get("inputs", [None] * len(example_ids)),
kwargs.get("outputs", [None] * len(example_ids)),
kwargs.get("metadata", [None] * len(example_ids)),
kwargs.get("splits", [None] * len(example_ids)),
kwargs.get("dataset_ids", [None] * len(example_ids)),
kwargs.get("attachments_operations", [None] * len(example_ids)),
)
]
response = self.request_with_retries(
Expand Down
Loading