Skip to content

Commit

Permalink
update docling and test results
Browse files Browse the repository at this point in the history
Signed-off-by: Michele Dolfi <[email protected]>
  • Loading branch information
dolfim-ibm committed Feb 10, 2025
1 parent ad1412e commit cafe758
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 12 deletions.
4 changes: 2 additions & 2 deletions transforms/language/doc_chunk/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
docling-core==2.3.0
pydantic>=2.0.0,<2.10.0
docling-core==2.18.0
pydantic>=2.0.0
llama-index-core>=0.11.22,<0.12.0
21 changes: 11 additions & 10 deletions transforms/language/doc_chunk/test-data/expected/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
"job name": "doc_chunk",
"job type": "pure python",
"job id": "job_id",
"start_time": "2024-10-30 18:38:40",
"end_time": "2024-10-30 18:38:40",
"start_time": "2025-02-10 15:20:06",
"end_time": "2025-02-10 15:20:07",
"status": "success"
},
"code": {
Expand All @@ -25,6 +25,7 @@
"output_bbox_column_name": "bbox",
"chunk_size_tokens": 128,
"chunk_overlap_tokens": 30,
"dl_min_chunk_len": null,
"checkpointing": false,
"max_files": -1,
"random_samples": -1,
Expand All @@ -34,29 +35,29 @@
"num_processors": 0
},
"execution_stats": {
"cpus": 19.5,
"cpus": 25.8,
"gpus": 0,
"memory": 27.48,
"memory": 24.41,
"object_store": 0,
"execution time, min": 0.001
},
"job_output_stats": {
"source_files": 1,
"source_size": 12073,
"result_files": 1,
"result_size": 14363,
"processing_time": 0.043,
"result_size": 16705,
"processing_time": 0.044,
"nfiles": 1,
"nrows": 39,
"nrows": 29,
"source_doc_count": 1,
"result_doc_count": 39
"result_doc_count": 29
},
"source": {
"name": "/Users/dol/codes/data-prep-kit/transforms/language/doc_chunk/python/test-data/input",
"name": "/Users/dol/codes/data-prep-kit/transforms/language/doc_chunk/test-data/input",
"type": "path"
},
"target": {
"name": "/Users/dol/codes/data-prep-kit/transforms/language/doc_chunk/python/output",
"name": "/Users/dol/codes/data-prep-kit/transforms/language/doc_chunk/output",
"type": "path"
}
}
Binary file modified transforms/language/doc_chunk/test-data/expected/test1.parquet
Binary file not shown.

0 comments on commit cafe758

Please sign in to comment.