Skip to content

Commit

Permalink
Merge pull request #968 from matouma/pending-version-change/1.0.0
Browse files Browse the repository at this point in the history
Starting new release cycle after cutoff 1.0.0
  • Loading branch information
touma-I authored Jan 24, 2025
2 parents 84b081a + 8e927bd commit 38039f6
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,4 @@ endif
#
# If you change the versions numbers, be sure to run "make set-versions" to
# update version numbers across the transform (e.g., pyproject.toml).
TRANSFORMS_PKG_VERSION=1.0.0a0
TRANSFORMS_PKG_VERSION=1.0.1.dev0
14 changes: 14 additions & 0 deletions release-notes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# Data Prep Kit Release notes

## Release 1.0.0 - 1/24/2025

## General

1. Refactored all language transforms and implemented simplified APIs for the refactored transforms
1. Added notebook examples for each of the transforms
1. Streamlined documentation and added tutorial for developers who want to build new transforms
1. Other minor enhancements and bug fixes were done for transforms, workflow pipelines, and CI/CD makefiles

### Transforms

1. Added new similarity transform (for detecting confidentiality, copyright, and/or plagiarism in documents)


## Release 0.2.3 - 12/15/2024

## General
Expand Down
2 changes: 1 addition & 1 deletion transforms/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_transforms"
version = "1.0.0a6"
version = "1.0.1.dev0"
requires-python = ">=3.10,<3.13"
keywords = ["transforms", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
description = "Data Preparation Toolkit Transforms using Ray"
Expand Down
22 changes: 3 additions & 19 deletions transforms/transforms-1.0-lang-ray.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"outputs": [],
"source": [
"%%capture\n",
"!pip install 'data-prep-toolkit-transforms[ray, language]==1.0.0a4'\n",
"!pip install 'data-prep-toolkit-transforms[ray,language]'\n",
"import pyarrow.parquet as pq\n",
"import pandas as pd"
]
Expand Down Expand Up @@ -335,8 +335,8 @@
"outputs": [],
"source": [
"##### **** To explote the output from eDedup, run the code below\n",
"table = pq.read_table('files-ededup/arxiv_org_2408.09869v5.pdf_application.parquet')\n",
"table.to_pandas()"
"#table = pq.read_table('files-ededup/arxiv_org_2408.09869v5.pdf_application.parquet')\n",
"#table.to_pandas()"
]
},
{
Expand Down Expand Up @@ -595,22 +595,6 @@
"#import glob\n",
"#glob.glob(\"files-fdedup/*\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36c243b7-5097-4a3c-bd4e-45c3b8273a90",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "52d85768-7a15-46bc-8c46-6782dba53d69",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
6 changes: 3 additions & 3 deletions transforms/transforms-1.0-lang.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"outputs": [],
"source": [
"%%capture\n",
"!pip install 'data-prep-toolkit-transforms[language]==1.0.0a1'\n",
"!pip install 'data-prep-toolkit-transforms[language]'\n",
"import pyarrow.parquet as pq\n",
"import pandas as pd"
]
Expand Down Expand Up @@ -330,8 +330,8 @@
"outputs": [],
"source": [
"##### **** To explote the output from eDedup, run the code below\n",
"table = pq.read_table('files-ededup/arxiv_org_2408.09869v5.pdf_application.parquet')\n",
"table.to_pandas()"
"#table = pq.read_table('files-ededup/arxiv_org_2408.09869v5.pdf_application.parquet')\n",
"#table.to_pandas()"
]
},
{
Expand Down

0 comments on commit 38039f6

Please sign in to comment.