diff --git a/.github/workflows/test-language-gneissweb_classification.yml b/.github/workflows/test-language-gneissweb_classification.yml
new file mode 100644
index 000000000..16e942d60
--- /dev/null
+++ b/.github/workflows/test-language-gneissweb_classification.yml
@@ -0,0 +1,133 @@
+#
+# DO NOT EDIT THIS FILE: it is generated from test-transform.template,  Edit there and run make to change these files
+#
+name: Test - transforms/language/gneissweb_classification
+
+on:
+    workflow_dispatch:
+    push:
+        branches:
+            - "dev"
+            - "releases/**"
+        tags:
+            - "*"
+        paths:
+            - ".make.*"
+            - "transforms/.make.transforms"
+            - "transforms/language/gneissweb_classification/**"
+            - "data-processing-lib/**"
+            - "!transforms/language/gneissweb_classification/**/kfp_ray/**" # This is/will be tested in separate workflow
+            - "!data-processing-lib/**/test/**"
+            - "!data-processing-lib/**/test-data/**"
+            - "!**.md"
+            - "!**/doc/**"
+            - "!**/images/**"
+            - "!**.gitignore"
+    pull_request:
+        branches:
+            - "dev"
+            - "releases/**"
+        paths:
+            - ".make.*"
+            - "transforms/.make.transforms"
+            - "transforms/language/gneissweb_classification/**"
+            - "data-processing-lib/**"
+            - "!transforms/language/gneissweb_classification/**/kfp_ray/**" # This is/will be tested in separate workflow
+            - "!data-processing-lib/**/test/**"
+            - "!data-processing-lib/**/test-data/**"
+            - "!**.md"
+            - "!**/doc/**"
+            - "!**/images/**"
+            - "!**.gitignore"
+
+# Taken from https://stackoverflow.com/questions/66335225/how-to-cancel-previous-runs-in-the-pr-when-you-push-new-commitsupdate-the-curre
+concurrency:
+    group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+    cancel-in-progress: true
+
+jobs:
+    check_if_push_image:
+        # check whether the Docker images should be pushed to the remote repository
+        # The images are pushed if it is a merge to dev branch or a new tag is created.
+        # The latter being part of the release process.
+        # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file.
+        runs-on: ubuntu-22.04
+        outputs:
+            publish_images: ${{ steps.version.outputs.publish_images }}
+        steps:
+            - id: version
+              run: |
+                  publish_images='false'
+                  if  [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
+                  then
+                    publish_images='true'
+                  fi
+                  if  [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
+                  then
+                    publish_images='true'
+                  fi
+                  echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT"
+    test-src:
+        runs-on: ubuntu-22.04
+        steps:
+            - name: Checkout
+              uses: actions/checkout@v4
+            - name: Free up space in github runner
+              # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+              run: |
+                  df -h
+                  sudo rm -rf "/usr/local/share/boost"
+                  sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+                  sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup
+                  sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
+                  df -h
+            - name: Test transform source in transforms/language/gneissweb_classification
+              run: |
+                  if [ -e "transforms/language/gneissweb_classification/Makefile" ]; then
+                      make -C transforms/language/gneissweb_classification DOCKER=docker test-src
+                  else
+                      echo "transforms/language/gneissweb_classification/Makefile not found - source testing disabled for this transform."
+                  fi
+    test-image:
+        needs: [check_if_push_image]
+        runs-on: ubuntu-22.04
+        timeout-minutes: 120
+        env:
+            DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }}
+            DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }}
+        steps:
+            - name: Checkout
+              uses: actions/checkout@v4
+            - name: Free up space in github runner
+              # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+              run: |
+                  df -h
+                  sudo rm -rf /opt/ghc
+                  sudo rm -rf "/usr/local/share/boost"
+                  sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+                  sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup
+                  sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
+                  df -h
+            - name: Test transform image in transforms/language/gneissweb_classification
+              run: |
+                  if [ -e "transforms/language/gneissweb_classification/Makefile" ]; then
+                      if [ -d "transforms/language/gneissweb_classification/spark" ]; then
+                          make -C data-processing-lib/spark DOCKER=docker image
+                      fi
+                      make -C transforms/language/gneissweb_classification DOCKER=docker test-image
+                  else
+                      echo "transforms/language/gneissweb_classification/Makefile not found - testing disabled for this transform."
+                  fi
+            - name: Print space
+              # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+              run: |
+                  df -h
+                  docker images
+            - name: Publish images
+              if: needs.check_if_push_image.outputs.publish_images == 'true'
+              run: |
+                  if [ -e "transforms/language/gneissweb_classification/Makefile" ]; then
+                      make -C transforms/language/gneissweb_classification publish
+                  else
+                      echo "transforms/language/gneissweb_classification/Makefile not found - publishing disabled for this transform."
+                  fi
diff --git a/transforms/language/gneissweb_classification/Dockerfile.python b/transforms/language/gneissweb_classification/Dockerfile.python
new file mode 100644
index 000000000..420edae58
--- /dev/null
+++ b/transforms/language/gneissweb_classification/Dockerfile.python
@@ -0,0 +1,46 @@
+FROM docker.io/python:3.11.11-slim-bullseye
+
+RUN pip install --upgrade --no-cache-dir pip 
+
+# install pytest
+RUN pip install --no-cache-dir pytest
+
+# Create a user and use it to run the transform
+RUN useradd -ms /bin/bash dpk
+USER dpk
+WORKDIR /home/dpk
+ARG DPK_WHEEL_FILE_NAME
+
+# Copy and install data processing libraries 
+# These are expected to be placed in the docker context before this is run (see the make image).
+COPY --chown=dpk:root data-processing-dist/ data-processing-dist/
+RUN  pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}
+
+# END OF STEPS destined for a data-prep-kit base image 
+
+# set up environment required to install and use huggingface and fasttext
+USER root
+RUN apt update && apt install gcc g++ -y
+RUN mkdir -p /home/dpk/.cache/huggingface/hub && chmod -R 777 /home/dpk/.cache/huggingface/hub
+USER dpk
+
+COPY --chown=dpk:root dpk_gneissweb_classification/ dpk_gneissweb_classification/
+COPY --chown=dpk:root requirements.txt requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# clean up apt
+USER root 
+RUN apt-get remove gcc g++ -y \
+    && apt clean \
+    && rm -rf /var/cache/apt/archives/* /var/lib/apt/lists/*
+USER dpk
+
+
+# Set environment
+ENV PYTHONPATH /home/dpk
+
+# Put these at the end since they seem to upset the docker cache.
+ARG BUILD_DATE
+ARG GIT_COMMIT
+LABEL build-date=$BUILD_DATE
+LABEL git-commit=$GIT_COMMIT
diff --git a/transforms/language/gneissweb_classification/Dockerfile.ray b/transforms/language/gneissweb_classification/Dockerfile.ray
new file mode 100644
index 000000000..42f12a118
--- /dev/null
+++ b/transforms/language/gneissweb_classification/Dockerfile.ray
@@ -0,0 +1,46 @@
+ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py311
+
+FROM ${BASE_IMAGE}
+
+# see https://docs.openshift.com/container-platform/4.17/openshift_images/create-images.html#use-uid_create-images
+USER root
+RUN chown ray:root /home/ray && chmod 775 /home/ray
+USER ray
+
+RUN pip install --upgrade --no-cache-dir pip 
+
+# install pytest
+RUN pip install --no-cache-dir pytest
+ARG DPK_WHEEL_FILE_NAME
+
+# set up environment required to install and use huggingface and fasttext
+USER root
+RUN sudo apt update && sudo apt install gcc g++ -y
+RUN mkdir -p /home/ray/.cache/huggingface/hub && chmod -R 777 /home/ray/.cache/huggingface/hub
+USER ray
+
+# Copy and install data processing libraries 
+# These are expected to be placed in the docker context before this is run (see the make image).
+COPY --chmod=775 --chown=ray:root data-processing-dist data-processing-dist
+RUN  pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray]
+
+
+COPY --chmod=775 --chown=ray:root dpk_gneissweb_classification/ dpk_gneissweb_classification/
+COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt 
+RUN pip install --no-cache-dir -r requirements.txt
+
+# clean up apt
+USER root 
+RUN sudo apt remove gcc g++ -y \
+    && sudo apt clean \
+    && sudo rm -rf /var/cache/apt/archives/* /var/lib/apt/lists/*
+USER ray
+
+# Set environment
+ENV PYTHONPATH /home/ray
+
+# Put these at the end since they seem to upset the docker cache.
+ARG BUILD_DATE
+ARG GIT_COMMIT
+LABEL build-date=$BUILD_DATE
+LABEL git-commit=$GIT_COMMIT
diff --git a/transforms/language/gneissweb_classification/Makefile b/transforms/language/gneissweb_classification/Makefile
new file mode 100644
index 000000000..f3a088ff3
--- /dev/null
+++ b/transforms/language/gneissweb_classification/Makefile
@@ -0,0 +1,36 @@
+REPOROOT=../../..
+# Use make help, to see the available rules
+include $(REPOROOT)/transforms/.make.cicd.targets
+
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree,  so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name. 
+TRANSFORM_NAME=$(shell basename `pwd`)
+
+################################################################################
+
+
+
+run-cli-sample:
+	make venv
+	source venv/bin/activate && \
+	$(PYTHON) -m dpk_$(TRANSFORM_NAME).transform_python \
+                --data_local_config "{ 'input_folder' : 'test-data/input', 'output_folder' : 'output'}"  \
+                --gcls_model_credential "PUT YOUR OWN HUGGINGFACE CREDENTIAL"	\
+				--gcls_model_file_name "model.bin"	\
+				--gcls_model_url "facebook/fasttext-language-identification" \
+				--gcls_content_column_name "text"
+
+run-cli-ray-sample: 
+	make venv
+	source venv/bin/activate && \
+	$(PYTHON) -m dpk_$(TRANSFORM_NAME).ray.transform \
+                --run_locally True --data_local_config "{ 'input_folder' : 'test-data/input', 'output_folder' : 'output'}"  \
+                --gcls_model_credential "PUT YOUR OWN HUGGINGFACE CREDENTIAL"	\
+				--gcls_model_file_name "model.bin"	\
+				--gcls_model_url "facebook/fasttext-language-identification" \
+				--gcls_content_column_name "text"
diff --git a/transforms/language/gneissweb_classification/README.md b/transforms/language/gneissweb_classification/README.md
new file mode 100644
index 000000000..4c22cf033
--- /dev/null
+++ b/transforms/language/gneissweb_classification/README.md
@@ -0,0 +1,79 @@
+# Gneissweb Classification Transform
+The Gneissweb Classification transform serves as a simple exemplar to demonstrate the development
+of a simple 1:1 transform.  
+Please see the set of [transform project conventions](../../README.md#transform-project-conventions) for details on general project conventions, transform configuration, testing and IDE set up.
+
+## Summary 
+This transform will classify each text with confidence score with fasttext classification model such as [ref](https://huggingface.co/facebook/fasttext-language-identification).
+
+## Configuration and command line Options
+
+The set of dictionary keys holding [ClassificationTransform](dpk_gneissweb_classification/transform.py) 
+configuration for values are as follows:
+
+| Configuration Parameters  | Default  | Description |
+|------------|----------|--------------|
+| gcls_model_credential | _unset_ | specifies the credential you use to get model. This will be huggingface token. [Guide to get huggingface token](https://huggingface.co/docs/hub/security-tokens) |
+| gcls_model_file_name | _unset_ | specifies what filename of model you use to get model, like `model.bin` |
+| gcls_model_url | _unset_ |  specifies url that model locates. For fasttext, this will be repo name of the model, like `facebook/fasttext-language-identification` |
+| gcls_content_column_name | `contents` | specifies name of the column containing documents |
+| gcls_output_lablel_column_name | `label` | specifies name of the output column to hold predicted classes|
+| gcls_output_score_column_name | `score` | specifies name of the output column to hold score of prediction |
+
+## Running
+
+### Launched Command Line Options 
+The following command line arguments are available in addition to 
+the options provided by 
+the [launcher](../../../data-processing-lib/doc/launcher-options.md).
+The prefix gcls is short name for Gneissweb CLaSsification.
+```
+  --gcls_model_credential GCLS_MODEL_CREDENTIAL   the credential you use to get model. This will be huggingface token.
+  --gcls_model_file_name GCLS_MODEL_KIND   filename of model you use to get model. Currently,like `model.bin`
+  --gcls_model_url GCLS_MODEL_URL   url that model locates. For fasttext, this will be repo name of the model, like `facebook/fasttext-language-identification`
+  --gcls_content_column_name GCLS_CONTENT_COLUMN_NAME   A name of the column containing documents
+  --gcls_output_lable_column_name GCLS_OUTPUT_LABEL_COLUMN_NAME   Column name to store classification results
+  --gcls_output_score_column_name GCLS_OUTPUT_SCORE_COLUMN_NAME   Column name to store the score of prediction
+```
+These correspond to the configuration keys described above.
+
+### Code example
+Here is a sample [notebook](gneissweb_classification.ipynb)
+
+## Troubleshooting guide
+
+For M1 Mac user, if you see following error during make command, `error: command '/usr/bin/clang' failed with exit code 1`, you should follow [this step](https://freeman.vc/notes/installing-fasttext-on-an-m1-mac)
+
+
+### Transforming data using the transform image
+
+To use the transform image to transform your data, please refer to the 
+[running images quickstart](../../../doc/quick-start/run-transform-image.md),
+substituting the name of this transform image and runtime as appropriate.
+
+# Gneissweb Classification Ray Transform 
+Please see the set of
+[transform project conventions](../../README.md#transform-project-conventions)
+for details on general project conventions, transform configuration,
+testing and IDE set up.
+
+## Summary 
+This project wraps the gneissweb classification transform with a Ray runtime.
+
+## Configuration and command line Options
+
+Gneissweb Classification configuration and command line options are the same as for the base python transform. 
+
+### Launched Command Line Options 
+In addition to those available to the transform as defined here,
+the set of 
+[launcher options](../../../data-processing-lib/doc/launcher-options.md) are available.
+
+### Code example (Ray version)
+Here is a sample [notebook](gneissweb_classification-ray.ipynb)
+
+### Transforming data using the transform image
+
+To use the transform image to transform your data, please refer to the 
+[running images quickstart](../../../doc/quick-start/run-transform-image.md),
+substituting the name of this transform image and runtime as appropriate.
diff --git a/transforms/language/gneissweb_classification/dpk_gneissweb_classification/classification_models.py b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/classification_models.py
new file mode 100644
index 000000000..f24a06487
--- /dev/null
+++ b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/classification_models.py
@@ -0,0 +1,63 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import math
+from abc import ABCMeta, abstractmethod
+
+import fasttext
+import numpy as np
+from huggingface_hub import hf_hub_download
+import os
+from langcodes import standardize_tag
+
+
+class ClassificationModel(metaclass=ABCMeta):
+    @abstractmethod
+    def detect_label(self, text: str) -> tuple[str, float]:
+        pass
+
+
+class NoopModel(ClassificationModel):
+    def detect_label(self, text: str) -> tuple[str, float]:  # pylint: disable=unused-argument
+        return "en", 0.0
+
+
+class FastTextModel(ClassificationModel):
+    def __init__(self, url, file_name, credential):
+        model_path = hf_hub_download(repo_id=url, filename=file_name, token=credential)
+        self.nlp = fasttext.load_model(model_path)
+        self.url = url
+
+    def detect_label(self, text: str) -> tuple[str, float]:
+        if self.url == "facebook/fasttext-language-identification":
+            label, score = self.nlp.predict(
+            text.replace("\n", " "), 1
+            )  # replace newline to avoid ERROR: predict processes one line at a time (remove '\n') skipping the file
+            return standardize_tag(label[0].replace("__label__", "")), math.floor(score[0] * 1000) / 1000
+        elif self.url == "mlfoundations/fasttext-oh-eli5":
+            label, score = self.nlp.predict(" ".join(text.strip().splitlines()))
+            score = score[0]
+            if label == "__label__cc":
+                score = 1 - score
+            return label[0].replace("__label__", ""), score
+
+        else:
+            label, score = self.nlp.predict(
+            text.replace("\n", " "), 1
+            )  # replace newline to avoid ERROR: predict processes one line at a time (remove '\n') skipping the file
+            return label[0].replace("__label__", ""), math.floor(score[0] * 1000) / 1000
+
+
+class ClassificationModelFactory:
+    @staticmethod
+    def create_model( url: str, file_name:str, credential: str) -> ClassificationModel:
+        return FastTextModel(url, file_name, credential)
\ No newline at end of file
diff --git a/transforms/language/gneissweb_classification/dpk_gneissweb_classification/local.py b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/local.py
new file mode 100644
index 000000000..c5de1a4d4
--- /dev/null
+++ b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/local.py
@@ -0,0 +1,48 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+
+from data_processing.data_access import DataAccessLocal
+from dpk_gneissweb_classification.transform import (
+    ClassificationTransform,
+    content_column_name_key,
+    model_credential_key,
+    model_file_name_key,
+    model_url_key,
+)
+
+
+# create parameters
+input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test-data", "input"))
+
+classification_params = {
+    model_credential_key: "PUT YOUR OWN HUGGINGFACE CREDENTIAL",
+    model_file_name_key: "model.bin",
+    model_url_key:"facebook/fasttext-language-identification",
+    content_column_name_key: "text",
+}
+if __name__ == "__main__":
+    # Here we show how to run outside of the runtime
+    # Create and configure the transform.
+    transform = ClassificationTransform(classification_params)
+    # Use the local data access to read a parquet table.
+    data_access = DataAccessLocal()
+    table, _ = data_access.get_table(os.path.join(input_folder, "test_01.parquet"))
+    print(f"input table: {table}")
+    # Transform the table
+    try:
+        table_list, metadata = transform.transform(table)
+        print(f"\noutput table: {table_list}")
+        print(f"output metadata : {metadata}")
+    except Exception as e:
+        print(f"Exception executing transform {e}")
diff --git a/transforms/language/gneissweb_classification/dpk_gneissweb_classification/local_python.py b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/local_python.py
new file mode 100644
index 000000000..bc2845d9e
--- /dev/null
+++ b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/local_python.py
@@ -0,0 +1,54 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+import sys
+
+from data_processing.runtime.pure_python import PythonTransformLauncher
+from data_processing.utils import ParamsUtils
+from dpk_gneissweb_classification.transform import (
+    content_column_name_cli_param,
+    model_credential_cli_param,
+    model_file_name_cli_param,
+    model_url_cli_param,
+)
+from dpk_gneissweb_classification.transform_python import ClassificationPythonTransformConfiguration
+
+
+# create parameters
+input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test-data", "input"))
+output_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "output"))
+local_conf = {
+    "input_folder": input_folder,
+    "output_folder": output_folder,
+}
+code_location = {"github": "github", "commit_hash": "12345", "path": "path"}
+params = {
+    # Data access. Only required parameters are specified
+    "data_local_config": ParamsUtils.convert_to_ast(local_conf),
+    # execution info
+    "runtime_pipeline_id": "pipeline_id",
+    "runtime_job_id": "job_id",
+    "runtime_code_location": ParamsUtils.convert_to_ast(code_location),
+    # classification params
+    model_credential_cli_param: "PUT YOUR OWN HUGGINGFACE CREDENTIAL",
+    model_file_name_cli_param: "model.bin",
+    model_url_cli_param: "facebook/fasttext-language-identification",
+    content_column_name_cli_param: "text",
+}
+if __name__ == "__main__":
+    # Set the simulated command line args
+    sys.argv = ParamsUtils.dict_to_req(d=params)
+    # create launcher
+    launcher = PythonTransformLauncher(runtime_config=ClassificationPythonTransformConfiguration())
+    # Launch the ray actor(s) to process the input
+    launcher.launch()
diff --git a/transforms/language/gneissweb_classification/dpk_gneissweb_classification/nlp.py b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/nlp.py
new file mode 100644
index 000000000..a4bfe3ac5
--- /dev/null
+++ b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/nlp.py
@@ -0,0 +1,46 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+from typing import Any
+
+import pyarrow as pa
+from data_processing.utils import TransformUtils, get_logger
+from dpk_gneissweb_classification.classification_models import ClassificationModel
+
+
+logger = get_logger(__name__)
+
+
+def get_label_ds_pa(
+    table: pa.table,
+    nlp: ClassificationModel,
+    content_column_name: str,
+    output_label_column_name: str,
+    output_score_column_name: str,
+) -> tuple[pa.table, dict[str, Any]]:
+    detected_label = pa.Table.from_pylist(
+        list(
+            map(
+                lambda r: {"label": r[0], "score": r[1]},
+                map(nlp.detect_label, table[content_column_name].to_pylist()),
+            )
+        )
+    )
+    stats = pa.table([detected_label["label"]], names=["label"]).group_by("label").aggregate([("label", "count")])
+    stats_dict = {}
+    for batch in stats.to_batches():
+        d = batch.to_pydict()
+        for label, count in zip(d["label"], d["label_count"]):
+            stats_dict[label] = count
+    result = TransformUtils.add_column(table=table, name=output_label_column_name, content=detected_label["label"])
+    result = TransformUtils.add_column(table=result, name=output_score_column_name, content=detected_label["score"])
+    return result, stats_dict
diff --git a/transforms/language/gneissweb_classification/dpk_gneissweb_classification/ray/local.py b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/ray/local.py
new file mode 100644
index 000000000..a77a6bc76
--- /dev/null
+++ b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/ray/local.py
@@ -0,0 +1,64 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+import sys
+
+from data_processing.utils import ParamsUtils
+from data_processing_ray.runtime.ray import RayTransformLauncher
+from dpk_gneissweb_classification.ray.transform import ClassificationRayTransformConfiguration
+from dpk_gneissweb_classification.transform import (
+    content_column_name_cli_param,
+    model_credential_cli_param,
+    model_file_name_cli_param,
+    model_url_cli_param,
+    output_label_column_name_cli_param,
+    output_score_column_name_cli_param,
+)
+
+
+# create parameters
+input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "test-data", "input"))
+output_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "output"))
+local_conf = {
+    "input_folder": input_folder,
+    "output_folder": output_folder,
+}
+worker_options = {"num_cpus": 0.8}
+code_location = {"github": "github", "commit_hash": "12345", "path": "path"}
+params = {
+    # where to run
+    "run_locally": True,
+    # Data access. Only required parameters are specified
+    "data_local_config": ParamsUtils.convert_to_ast(local_conf),
+    # orchestrator
+    "runtime_worker_options": ParamsUtils.convert_to_ast(worker_options),
+    "runtime_num_workers": 3,
+    "runtime_pipeline_id": "pipeline_id",
+    "runtime_job_id": "job_id",
+    "runtime_creation_delay": 0,
+    "runtime_code_location": ParamsUtils.convert_to_ast(code_location),
+    # classification params
+    model_credential_cli_param: "PUT YOUR OWN HUGGINGFACE CREDENTIAL",
+    model_file_name_cli_param: "model.bin",
+    model_url_cli_param:"facebook/fasttext-language-identification",
+    content_column_name_cli_param: "text",
+    output_label_column_name_cli_param: "ft_label",
+    output_score_column_name_cli_param: "ft_score",
+}
+if __name__ == "__main__":
+    # Set the simulated command line args
+    sys.argv = ParamsUtils.dict_to_req(d=params)
+    # create launcher
+    launcher = RayTransformLauncher(ClassificationRayTransformConfiguration())
+    # Launch the ray actor(s) to process the input
+    launcher.launch()
diff --git a/transforms/language/gneissweb_classification/dpk_gneissweb_classification/ray/s3.py b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/ray/s3.py
new file mode 100644
index 000000000..af91ca4c0
--- /dev/null
+++ b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/ray/s3.py
@@ -0,0 +1,73 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+import sys
+
+from data_processing.utils import ParamsUtils
+from data_processing_ray.runtime.ray import RayTransformLauncher
+from dpk_gneissweb_classification.ray.transform import ClassificationRayTransformConfiguration
+from dpk_gneissweb_classification.transform import (
+    content_column_name_cli_param,
+    model_credential_cli_param,
+    model_file_name_cli_param,
+    model_url_cli_param,
+    output_label_column_name_cli_param,
+    output_score_column_name_cli_param,
+)
+
+
+
+print(os.environ)
+# create launcher
+launcher = RayTransformLauncher(ClassificationRayTransformConfiguration())
+# create parameters
+s3_cred = {
+    "access_key": "localminioaccesskey",
+    "secret_key": "localminiosecretkey",
+    "url": "http://localhost:9000",
+}
+s3_conf = {
+    "input_folder": "test/gneissweb_classification/input",
+    "output_folder": "test/gneissweb_classification/output",
+}
+
+
+worker_options = {"num_cpus": 0.8}
+code_location = {"github": "github", "commit_hash": "12345", "path": "path"}
+params = {
+    # where to run
+    "run_locally": True,
+    # Data access. Only required parameters are specified
+    "data_s3_cred": ParamsUtils.convert_to_ast(s3_cred),
+    "data_s3_config": ParamsUtils.convert_to_ast(s3_conf),
+    # orchestrator
+    "runtime_worker_options": ParamsUtils.convert_to_ast(worker_options),
+    "runtime_num_workers": 3,
+    "runtime_pipeline_id": "pipeline_id",
+    "runtime_job_id": "job_id",
+    "runtime_creation_delay": 0,
+    "runtime_code_location": ParamsUtils.convert_to_ast(code_location),
+    # classification params
+    model_credential_cli_param: "PUT YOUR OWN HUGGINGFACE CREDENTIAL",
+    model_file_name_cli_param: "model.bin",
+    model_url_cli_param:"facebook/fasttext-language-identification",
+    content_column_name_cli_param: "text",
+    output_label_column_name_cli_param: "ft_label",
+    output_score_column_name_cli_param: "ft_score",
+}
+sys.argv = ParamsUtils.dict_to_req(d=params)
+# for arg in sys.argv:
+#     print(arg)
+
+# launch
+launcher.launch()
diff --git a/transforms/language/gneissweb_classification/dpk_gneissweb_classification/ray/transform.py b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/ray/transform.py
new file mode 100644
index 000000000..fbd56dd99
--- /dev/null
+++ b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/ray/transform.py
@@ -0,0 +1,75 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import sys
+
+from data_processing.utils import ParamsUtils, get_logger
+from data_processing_ray.runtime.ray import RayTransformLauncher
+from data_processing_ray.runtime.ray.runtime_configuration import (
+    RayTransformRuntimeConfiguration,
+)
+from dpk_gneissweb_classification.transform import ClassificationTransformConfiguration
+
+
+logger = get_logger(__name__)
+
+
+class ClassificationRayTransformConfiguration(RayTransformRuntimeConfiguration):
+    """
+    Implements the RayTransformConfiguration for Classification as required by the RayTransformLauncher.
+    Classification does not use a RayRuntime class so the superclass only needs the base
+    python-only configuration.
+    """
+
+    def __init__(self):
+        """
+        Initialization
+        :param base_configuration - base configuration class
+        """
+        super().__init__(transform_config=ClassificationTransformConfiguration())
+
+
+# Class used by the notebooks to ingest binary files and create parquet files
+class Classification:
+    def __init__(self, **kwargs):
+        self.params = {}
+        for key in kwargs:
+            self.params[key] = kwargs[key]
+        # if input_folder and output_folder are specified, then assume it is represent data_local_config
+        try:
+            local_conf = {k: self.params[k] for k in ("input_folder", "output_folder")}
+            self.params["data_local_config"] = ParamsUtils.convert_to_ast(local_conf)
+            del self.params["input_folder"]
+            del self.params["output_folder"]
+        except:
+            pass
+        try:
+            worker_options = {k: self.params[k] for k in ("num_cpus", "memory")}
+            self.params["runtime_worker_options"] = ParamsUtils.convert_to_ast(worker_options)
+            del self.params["num_cpus"]
+            del self.params["memory"]
+        except:
+            pass
+
+    def transform(self):
+        sys.argv = ParamsUtils.dict_to_req(d=(self.params))
+        # create launcher
+        launcher = RayTransformLauncher(ClassificationRayTransformConfiguration())
+        # launch
+        return_code = launcher.launch()
+        return return_code
+
+
+if __name__ == "__main__":
+    launcher = RayTransformLauncher(ClassificationRayTransformConfiguration())
+    logger.info("Launching classification transform")
+    launcher.launch()
diff --git a/transforms/language/gneissweb_classification/dpk_gneissweb_classification/transform.py b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/transform.py
new file mode 100644
index 000000000..4825d16cd
--- /dev/null
+++ b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/transform.py
@@ -0,0 +1,171 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+from argparse import ArgumentParser, Namespace
+from typing import Any
+
+import pyarrow as pa
+
+from data_processing.transform import AbstractTableTransform, TransformConfiguration
+from data_processing.utils import CLIArgumentProvider, TransformUtils
+from dpk_gneissweb_classification.classification_models import ClassificationModelFactory, ClassificationModel
+from dpk_gneissweb_classification.nlp import get_label_ds_pa
+
+
+short_name = "gcls"
+cli_prefix = f"{short_name}_"
+model_credential_key = "model_credential"
+model_file_name_key = "model_file_name"
+model_url_key = "model_url"
+content_column_name_key = "content_column_name"
+output_label_column_name_key = "output_label_column_name"
+output_score_column_name_key = "output_score_column_name"
+model_credential_cli_param = f"{cli_prefix}{model_credential_key}"
+model_file_name_cli_param = f"{cli_prefix}{model_file_name_key}"
+model_url_cli_param = f"{cli_prefix}{model_url_key}"
+content_column_name_cli_param = f"{cli_prefix}{content_column_name_key}"
+output_label_column_name_cli_param = f"{cli_prefix}{output_label_column_name_key}"
+output_score_column_name_cli_param = f"{cli_prefix}{output_score_column_name_key}"
+
+default_content_column_name = "contents"
+default_output_label_column_name = "lang"
+default_output_score_column_name = "score"
+
+
+class ClassificationTransform(AbstractTableTransform):
+    """
+    Implements a simple copy of a pyarrow Table.
+    """
+
+    nlp_classfication: ClassificationModel
+    content_column_name: str
+    output_label_column_name: str
+    output_score_column_name: str
+
+    def __init__(self, config: dict[str, Any]):
+        """
+        Initialize based on the dictionary of configuration information.
+        This is generally called with configuration parsed from the CLI arguments defined
+        by the companion runtime, ClassificationTransformRuntime.  If running inside the RayMutatingDriver,
+        these will be provided by that class with help from the RayMutatingDriver.
+        """
+        # Make sure that the param name corresponds to the name used in apply_input_params method
+        # of ClassificationTransformConfiguration class
+        super().__init__(config)
+        self.nlp_classfication = self._get_nlp_classfication(config)
+        self.content_column_name = config.get(content_column_name_cli_param, default_content_column_name)
+        self.output_label_column_name = config.get(output_label_column_name_cli_param, default_output_label_column_name)
+        self.output_score_column_name = config.get(output_score_column_name_cli_param, default_output_score_column_name)
+
+    @staticmethod
+    def _get_nlp_classfication(config) -> ClassificationModel:
+        nlp_classfication: ClassificationModel
+
+        model_credential = config.get(model_credential_cli_param)
+        model_file_name = config.get(model_file_name_cli_param)
+        model_url = config.get(model_url_cli_param)
+
+        if model_credential is None or len(model_credential) == 0:
+            raise ValueError("model_credential_cli_param is not specified.")
+        elif model_file_name is None or len(model_credential) == 0:
+            raise ValueError("model_file_name_cli_param is not specified.")
+        else:
+            nlp_classfication = ClassificationModelFactory.create_model(url=model_url, file_name = model_file_name, credential=model_credential)
+
+        return nlp_classfication
+
+    def transform(self, table: pa.Table, file_name: str | None = None) -> tuple[list[pa.Table], dict[str, Any]]:  # pylint:disable=unused-argument
+        """
+        Put Transform-specific to convert one Table to 0 or more tables. It also returns
+        a dictionary of execution statistics - arbitrary dictionary
+        This implementation makes no modifications so effectively implements a copy of the
+        input parquet to the output folder, without modification.
+        """
+        TransformUtils.validate_columns(table, [self.content_column_name])
+        if self.output_label_column_name in table.schema.names:
+            raise Exception(f"column to store label ({self.output_label_column_name}) already exist")
+        if self.output_score_column_name in table.schema.names:
+            raise Exception(
+                f"column to store score of label ({self.output_score_column_name}) already exist"
+            )
+        self.logger.debug(f"Transforming one table with {len(table)} rows")
+        table, stats = get_label_ds_pa(
+            table,
+            self.nlp_classfication,
+            self.content_column_name,
+            self.output_label_column_name,
+            self.output_score_column_name,
+        )
+        self.logger.debug(f"Transformed one table with {len(table)} rows")
+        return [table], stats
+
+
+class ClassificationTransformConfiguration(TransformConfiguration):
+    """
+    Provides support for configuring and using the associated Transform class include
+    configuration with CLI args.
+    """
+
+    def __init__(self):
+        super().__init__(
+            name=short_name,
+            transform_class=ClassificationTransform,
+        )
+        from data_processing.utils import get_logger
+
+        self.logger = get_logger(__name__)
+
+    def add_input_params(self, parser: ArgumentParser) -> None:
+        """
+        Add Transform-specific arguments to the given  parser.
+        This will be included in a dictionary used to initialize the ClassificationTransform.
+        By convention a common prefix should be used for all transform-specific CLI args
+        (e.g, noop_, pii_, etc.)
+        """
+        parser.add_argument(
+            f"--{model_credential_cli_param}",
+            required=True,
+            help="Credential to access huggingface model",
+        )
+        parser.add_argument(
+            f"--{model_file_name_cli_param}",
+            type=str,
+            default="",
+            help="filename of model",
+        )
+        parser.add_argument(f"--{model_url_cli_param}", help="Url to model")
+        parser.add_argument(
+            f"--{content_column_name_cli_param}",
+            default=default_content_column_name,
+            help="Column name to get content",
+        )
+        parser.add_argument(
+            f"--{output_label_column_name_cli_param}",
+            default=default_output_label_column_name,
+            help="Column name to store label",
+        )
+        parser.add_argument(
+            f"--{output_score_column_name_cli_param}",
+            default=default_output_score_column_name,
+            help="Column name to store the score",
+        )
+
+    def apply_input_params(self, args: Namespace) -> bool:
+        """
+        Validate and apply the arguments that have been parsed
+        :param args: user defined arguments.
+        :return: True, if validate pass or False otherwise
+        """
+        captured = CLIArgumentProvider.capture_parameters(args, cli_prefix, True)
+        self.params = self.params | captured
+        self.logger.info(f"parameters are : {self.params}")
+        return True
diff --git a/transforms/language/gneissweb_classification/dpk_gneissweb_classification/transform_python.py b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/transform_python.py
new file mode 100644
index 000000000..381e1d172
--- /dev/null
+++ b/transforms/language/gneissweb_classification/dpk_gneissweb_classification/transform_python.py
@@ -0,0 +1,66 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+import sys
+
+from data_processing.runtime.pure_python import PythonTransformLauncher
+from data_processing.runtime.pure_python.runtime_configuration import (
+    PythonTransformRuntimeConfiguration,
+)
+from data_processing.utils import ParamsUtils, get_logger
+from dpk_gneissweb_classification.transform import ClassificationTransformConfiguration
+
+
+logger = get_logger(__name__)
+
+
+class ClassificationPythonTransformConfiguration(PythonTransformRuntimeConfiguration):
+    """
+    Implements the PythonTransformConfiguration for classification as required by the PythonTransformLauncher.
+    Classification does not use a RayRuntime class so the superclass only needs the base
+    python-only configuration.
+    """
+
+    def __init__(self):
+        """
+        Initialization
+        :param base_configuration - base configuration class
+        """
+        super().__init__(transform_config=ClassificationTransformConfiguration())
+
+
+class Classification:
+    def __init__(self, **kwargs):
+        self.params = {}
+        for key in kwargs:
+            self.params[key] = kwargs[key]
+        # if input_folder and output_folder are specified, then assume it is represent data_local_config
+        try:
+            local_conf = {k: self.params[k] for k in ("input_folder", "output_folder")}
+            self.params["data_local_config"] = ParamsUtils.convert_to_ast(local_conf)
+            del self.params["input_folder"]
+            del self.params["output_folder"]
+        except:
+            pass
+
+    def transform(self):
+        sys.argv = ParamsUtils.dict_to_req(d=(self.params))
+        # create launcher
+        launcher = PythonTransformLauncher(ClassificationPythonTransformConfiguration())
+        # launch
+        return_code = launcher.launch()
+        return return_code
+
+
+if __name__ == "__main__":
+    launcher = PythonTransformLauncher(ClassificationPythonTransformConfiguration())
+    logger.info("Launching classification transform")
+    launcher.launch()
diff --git a/transforms/language/gneissweb_classification/gneissweb_classification-ray.ipynb b/transforms/language/gneissweb_classification/gneissweb_classification-ray.ipynb
new file mode 100644
index 000000000..a22ebae54
--- /dev/null
+++ b/transforms/language/gneissweb_classification/gneissweb_classification-ray.ipynb
@@ -0,0 +1,193 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "afd55886-5f5b-4794-838e-ef8179fb0394",
+   "metadata": {},
+   "source": [
+    "##### **** These pip installs need to be adapted to use the appropriate release level. Alternatively, The venv running the jupyter lab could be pre-configured with a requirement file that includes the right release. Example for transform developers working from git clone:\n",
+    "```\n",
+    "make venv \n",
+    "source venv/bin/activate \n",
+    "pip install jupyterlab\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "4c45c3c6-e4d7-4e61-8de6-32d61f2ce695",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "## This is here as a reference only\n",
+    "# Users and application developers must use the right tag for the latest from pypi\n",
+    "%pip install 'data-prep-toolkit[ray]'\n",
+    "%pip install 'data-prep-toolkit-transforms[gneissweb_classification]'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "407fd4e4-265d-4ec7-bbc9-b43158f5f1f3",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true
+   },
+   "source": [
+    "##### **** Configure the transform parameters. The set of dictionary keys holding DocIDTransform configuration for values are as follows: \n",
+    "| Configuration Parameters | Default  | Description |\n",
+    "|------------|----------|--------------|\n",
+    "| gcls_model_credential | _unset_ | specifies the credential you use to get model. This will be huggingface token. [Guide to get huggingface token](https://huggingface.co/docs/hub/security-tokens) |\n",
+    "| gcls_model_file_name | _unset_ | specifies what filename of model you use to get model, like `model.bin` |\n",
+    "| gcls_model_url | _unset_ |  specifies url that model locates. For fasttext, this will be repo nme of the model, like `facebook/fasttext-language-identification` |\n",
+    "| gcls_content_column_name | `contents` | specifies name of the column containing documents |\n",
+    "| gcls_output_label_column_name | `label` | specifies name of the output column to hold predicted classes |\n",
+    "| gcls_output_score_column_name | `score` | specifies name of the output column to hold score of prediction |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ebf1f782-0e61-485c-8670-81066beb734c",
+   "metadata": {},
+   "source": [
+    "##### ***** Import required classes and modules"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "9669273a-8fcc-4b40-9b20-8df658e2ab58",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dpk_gneissweb_classification.ray.transform import Classification"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7234563c-2924-4150-8a31-4aec98c1bf33",
+   "metadata": {},
+   "source": [
+    "##### ***** Setup runtime parameters for this transform"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "badafb96-64d2-4bb8-9f3e-b23713fd5c3f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "09:56:06 INFO - parameters are : {'model_credential': 'PUT YOUR OWN HUGGINGFACE CREDENTIAL', 'model_file_name': 'model.bin', 'model_url': 'facebook/fasttext-language-identification', 'content_column_name': 'text', 'output_label_column_name': 'lang', 'output_score_column_name': 'score'}\n",
+      "09:56:06 INFO - pipeline id pipeline_id\n",
+      "09:56:06 INFO - code location None\n",
+      "09:56:06 INFO - number of workers 1 worker options {'num_cpus': 0.8, 'max_restarts': -1}\n",
+      "09:56:06 INFO - actor creation delay 0\n",
+      "09:56:06 INFO - job details {'job category': 'preprocessing', 'job name': 'gcls', 'job type': 'ray', 'job id': 'job_id'}\n",
+      "09:56:06 INFO - data factory data_ is using local data access: input_folder - test-data/input output_folder - output\n",
+      "09:56:06 INFO - data factory data_ max_files -1, n_sample -1\n",
+      "09:56:06 INFO - data factory data_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "09:56:06 INFO - Running locally\n",
+      "2025-01-27 09:56:08,919\tINFO worker.py:1777 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n",
+      "\u001b[36m(orchestrate pid=97043)\u001b[0m 09:56:09 INFO - orchestrator started at 2025-01-27 09:56:09\n",
+      "\u001b[36m(orchestrate pid=97043)\u001b[0m 09:56:09 INFO - Number of files is 3, source profile {'max_file_size': 0.3023223876953125, 'min_file_size': 0.037346839904785156, 'total_file_size': 0.4433746337890625}\n",
+      "\u001b[36m(orchestrate pid=97043)\u001b[0m 09:56:09 INFO - Cluster resources: {'cpus': 10, 'gpus': 0, 'memory': 28.60002746619284, 'object_store': 2.0}\n",
+      "\u001b[36m(orchestrate pid=97043)\u001b[0m 09:56:09 INFO - Number of workers - 1 with {'num_cpus': 0.8, 'max_restarts': -1} each\n",
+      "\u001b[36m(RayTransformFileProcessor pid=97047)\u001b[0m Warning : `load_model` does not return WordVectorModel or SupervisedModel any more, but a `FastText` object which is very similar.\n",
+      "\u001b[36m(orchestrate pid=97043)\u001b[0m 09:56:12 INFO - Completed 1 files in 0.004 min\n",
+      "\u001b[36m(orchestrate pid=97043)\u001b[0m 09:56:12 INFO - Completed 2 files in 0.006 min\n",
+      "\u001b[36m(orchestrate pid=97043)\u001b[0m 09:56:12 INFO - Completed 2 files (66.667%)  in 0.006 min. Waiting for completion\n",
+      "\u001b[36m(orchestrate pid=97043)\u001b[0m 09:56:12 INFO - Completed processing 3 files in 0.008 min\n",
+      "\u001b[36m(orchestrate pid=97043)\u001b[0m 09:56:12 INFO - done flushing in 0.001 sec\n",
+      "09:56:22 INFO - Completed execution in 0.26 min, execution result 0\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "Classification(input_folder= \"test-data/input\",\n",
+    "        output_folder= \"output\",\n",
+    "        gcls_model_credential= \"PUT YOUR OWN HUGGINGFACE CREDENTIAL\",\n",
+    "        gcls_model_file_name= \"model.bin\",\n",
+    "        gcls_model_url= \"facebook/fasttext-language-identification\",\n",
+    "        run_locally= True,\n",
+    "        gcls_content_column_name= \"text\").transform()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c3df5adf-4717-4a03-864d-9151cd3f134b",
+   "metadata": {},
+   "source": [
+    "##### **** The specified folder will include the transformed parquet files."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "7276fe84-6512-4605-ab65-747351e13a7c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['output/test_03.parquet',\n",
+       " 'output/test_02.parquet',\n",
+       " 'output/metadata.json',\n",
+       " 'output/test_01.parquet']"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import glob\n",
+    "glob.glob(\"output/*\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "845a75cf-f4a9-467d-87fa-ccbac1c9beb8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "dpk-outer",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/transforms/language/gneissweb_classification/gneissweb_classification.ipynb b/transforms/language/gneissweb_classification/gneissweb_classification.ipynb
new file mode 100644
index 000000000..17a5a2e7b
--- /dev/null
+++ b/transforms/language/gneissweb_classification/gneissweb_classification.ipynb
@@ -0,0 +1,342 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "afd55886-5f5b-4794-838e-ef8179fb0394",
+   "metadata": {},
+   "source": [
+    "##### **** These pip installs need to be adapted to use the appropriate release level. Alternatively, The venv running the jupyter lab could be pre-configured with a requirement file that includes the right release. Example for transform developers working from git clone:\n",
+    "```\n",
+    "make venv \n",
+    "source venv/bin/activate \n",
+    "pip install jupyterlab\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4c45c3c6-e4d7-4e61-8de6-32d61f2ce695",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "## This is here as a reference only\n",
+    "# Users and application developers must use the right tag for the latest from pypi\n",
+    "%pip install data-prep-toolkit\n",
+    "%pip install 'data-prep-toolkit-transforms[gneissweb_classificationo]'\n",
+    "%pip install pandas"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "407fd4e4-265d-4ec7-bbc9-b43158f5f1f3",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true
+   },
+   "source": [
+    "##### **** Configure the transform parameters. The set of dictionary keys holding DocIDTransform configuration for values are as follows: \n",
+    "| Configuration Parameters  | Default  | Description |\n",
+    "|------------|----------|--------------|\n",
+    "| gcls_model_credential | _unset_ | specifies the credential you use to get model. This will be huggingface token. [Guide to get huggingface token](https://huggingface.co/docs/hub/security-tokens) |\n",
+    "| gcls_model_file_name | _unset_ | specifies what filename of model you use to get model, like `model.bin` |\n",
+    "| gcls_model_url | _unset_ |  specifies url that model locates. For fasttext, this will be repo nme of the model, like `facebook/fasttext-language-identification` |\n",
+    "| gcls_content_column_name | `contents` | specifies name of the column containing documents |\n",
+    "| gcls_output_label_column_name | `label` | specifies name of the output column to hold predicted classes |\n",
+    "| gcls_output_score_column_name | `score` | specifies name of the output column to hold score of prediction |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ebf1f782-0e61-485c-8670-81066beb734c",
+   "metadata": {},
+   "source": [
+    "##### ***** Import required classes and modules"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "9669273a-8fcc-4b40-9b20-8df658e2ab58",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dpk_gneissweb_classification.transform_python import Classification"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7234563c-2924-4150-8a31-4aec98c1bf33",
+   "metadata": {},
+   "source": [
+    "##### ***** Setup runtime parameters for this transform"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "badafb96-64d2-4bb8-9f3e-b23713fd5c3f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "09:52:55 INFO - parameters are : {'model_credential': 'PUT YOUR OWN HUGGINGFACE CREDENTIAL', 'model_file_name': 'model.bin', 'model_url': 'facebook/fasttext-language-identification', 'content_column_name': 'text', 'output_label_column_name': 'lang', 'output_score_column_name': 'score'}\n",
+      "09:52:55 INFO - pipeline id pipeline_id\n",
+      "09:52:55 INFO - code location None\n",
+      "09:52:55 INFO - data factory data_ is using local data access: input_folder - test-data/input output_folder - output\n",
+      "09:52:55 INFO - data factory data_ max_files -1, n_sample -1\n",
+      "09:52:55 INFO - data factory data_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n",
+      "09:52:55 INFO - orchestrator gcls started at 2025-01-27 09:52:55\n",
+      "09:52:55 INFO - Number of files is 3, source profile {'max_file_size': 0.3023223876953125, 'min_file_size': 0.037346839904785156, 'total_file_size': 0.4433746337890625}\n",
+      "Warning : `load_model` does not return WordVectorModel or SupervisedModel any more, but a `FastText` object which is very similar.\n",
+      "09:52:57 INFO - Completed 1 files (33.33%) in 0.01 min\n",
+      "09:52:57 INFO - Completed 2 files (66.67%) in 0.011 min\n",
+      "09:52:57 INFO - Completed 3 files (100.0%) in 0.014 min\n",
+      "09:52:57 INFO - Done processing 3 files, waiting for flush() completion.\n",
+      "09:52:57 INFO - done flushing in 0.0 sec\n",
+      "09:52:57 INFO - Completed execution in 0.029 min, execution result 0\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "Classification(input_folder= \"test-data/input\",\n",
+    "        output_folder= \"output\",\n",
+    "        gcls_model_credential= \"PUT YOUR OWN HUGGINGFACE CREDENTIAL\",\n",
+    "        gcls_model_file_name= \"model.bin\",\n",
+    "        gcls_model_url= \"facebook/fasttext-language-identification\",\n",
+    "        gcls_content_column_name= \"text\").transform()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c3df5adf-4717-4a03-864d-9151cd3f134b",
+   "metadata": {},
+   "source": [
+    "##### **** The specified folder will include the transformed parquet files."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "7276fe84-6512-4605-ab65-747351e13a7c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['output/test_03.parquet',\n",
+       " 'output/test_02.parquet',\n",
+       " 'output/metadata.json',\n",
+       " 'output/test_01.parquet']"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import glob\n",
+    "glob.glob(\"output/*\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "845a75cf-f4a9-467d-87fa-ccbac1c9beb8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>text</th>\n",
+       "      <th>count()</th>\n",
+       "      <th>lang</th>\n",
+       "      <th>score</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>- Notice of name-email change.doc</td>\n",
+       "      <td>6</td>\n",
+       "      <td>en</td>\n",
+       "      <td>0.858</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>- Nov13ENAOnly.doc</td>\n",
+       "      <td>2</td>\n",
+       "      <td>de</td>\n",
+       "      <td>0.264</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>- OHIO_C~1.XLS</td>\n",
+       "      <td>2</td>\n",
+       "      <td>de</td>\n",
+       "      <td>0.603</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>- Oneok(5-30)final.doc</td>\n",
+       "      <td>1</td>\n",
+       "      <td>vi</td>\n",
+       "      <td>0.152</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>- OpeningBrief.doc</td>\n",
+       "      <td>6</td>\n",
+       "      <td>ko-Hang</td>\n",
+       "      <td>0.365</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>195</th>\n",
+       "      <td>- invite.doc</td>\n",
+       "      <td>2</td>\n",
+       "      <td>ro</td>\n",
+       "      <td>0.717</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>196</th>\n",
+       "      <td>- issues wrt portland and calgary signing shor...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>en</td>\n",
+       "      <td>0.997</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>197</th>\n",
+       "      <td>- jan3102.XLS</td>\n",
+       "      <td>2</td>\n",
+       "      <td>de</td>\n",
+       "      <td>0.399</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>198</th>\n",
+       "      <td>- job market.gif</td>\n",
+       "      <td>2</td>\n",
+       "      <td>en</td>\n",
+       "      <td>0.791</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>199</th>\n",
+       "      <td>- kick~1.mpe</td>\n",
+       "      <td>4</td>\n",
+       "      <td>eo</td>\n",
+       "      <td>0.253</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>200 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                  text  count()     lang  \\\n",
+       "0                    - Notice of name-email change.doc        6       en   \n",
+       "1                                   - Nov13ENAOnly.doc        2       de   \n",
+       "2                                       - OHIO_C~1.XLS        2       de   \n",
+       "3                               - Oneok(5-30)final.doc        1       vi   \n",
+       "4                                   - OpeningBrief.doc        6  ko-Hang   \n",
+       "..                                                 ...      ...      ...   \n",
+       "195                                       - invite.doc        2       ro   \n",
+       "196  - issues wrt portland and calgary signing shor...        2       en   \n",
+       "197                                      - jan3102.XLS        2       de   \n",
+       "198                                   - job market.gif        2       en   \n",
+       "199                                       - kick~1.mpe        4       eo   \n",
+       "\n",
+       "     score  \n",
+       "0    0.858  \n",
+       "1    0.264  \n",
+       "2    0.603  \n",
+       "3    0.152  \n",
+       "4    0.365  \n",
+       "..     ...  \n",
+       "195  0.717  \n",
+       "196  0.997  \n",
+       "197  0.399  \n",
+       "198  0.791  \n",
+       "199  0.253  \n",
+       "\n",
+       "[200 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "pd.read_parquet('output/test_01.parquet', engine='pyarrow')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7aef6ac9-96cf-40ad-a472-b5d9036436e5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "dpk-outer",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/transforms/language/gneissweb_classification/requirements.txt b/transforms/language/gneissweb_classification/requirements.txt
new file mode 100644
index 000000000..eb31d1dea
--- /dev/null
+++ b/transforms/language/gneissweb_classification/requirements.txt
@@ -0,0 +1,4 @@
+fasttext>=0.9.3 ; platform_system != "Windows"
+langcodes>=3.5.0
+huggingface-hub >= 0.21.4, <1.0.0
+numpy>=1.26.4, < 1.29.0
diff --git a/transforms/language/gneissweb_classification/test-data/expected/metadata.json b/transforms/language/gneissweb_classification/test-data/expected/metadata.json
new file mode 100644
index 000000000..04bc26cce
--- /dev/null
+++ b/transforms/language/gneissweb_classification/test-data/expected/metadata.json
@@ -0,0 +1,84 @@
+{
+    "pipeline": "pipeline_id",
+    "job details": {
+        "job category": "preprocessing",
+        "job name": "LangIdentification",
+        "job type": "ray",
+        "job id": "job_id",
+        "start_time": "2024-05-28 18:53:20",
+        "end_time": "2024-05-28 18:53:23",
+        "status": "success"
+    },
+    "code": null,
+    "job_input_params": {
+        "lang_id_model_kind": "fasttext",
+        "lang_id_model_url": "facebook/fasttext-language-identification",
+        "lang_id_content_column_name": "text",
+        "checkpointing": false,
+        "max_files": -1,
+        "random_samples": -1,
+        "files_to_use": [".parquet"],
+        "number of workers": 1,
+        "worker options": {
+            "num_cpus": 0.8
+        },
+        "actor creation delay": 0
+    },
+    "execution_stats": {
+        "cpus": 4,
+        "gpus": 0,
+        "memory": 3.495941162109375,
+        "object_store": 1.7479705810546875
+    },
+    "job_output_stats": {
+        "source_files": 3,
+        "source_size": 464912,
+        "result_files": 3,
+        "result_size": 470832,
+        "processing_time": 0.44434642791748047,
+        "en": 357,
+        "de": 38,
+        "vi": 16,
+        "ko-Hang": 49,
+        "lb": 2,
+        "ca": 5,
+        "rm": 2,
+        "lt": 1,
+        "yue-Hant": 58,
+        "hu": 1,
+        "sv": 3,
+        "it": 2,
+        "vec-Latn": 2,
+        "azb-Arab": 7,
+        "tr": 1,
+        "fr": 4,
+        "ro": 4,
+        "pl": 12,
+        "cs": 7,
+        "es": 3,
+        "ast-Latn": 4,
+        "eo": 2,
+        "oc-Latn": 3,
+        "lmo-Latn": 1,
+        "da": 1,
+        "eu": 1,
+        "nl": 4,
+        "source_doc_count": 600,
+        "result_doc_count": 600,
+        "sk": 1,
+        "lvs-Latn": 1,
+        "li-Latn": 1,
+        "nn": 1,
+        "bo-Tibt": 4,
+        "af": 1,
+        "nb": 1
+    },
+    "source": {
+        "name": "/home/kind/data-prep-kit-inner/transforms/language/language_id/test-data/input",
+        "type": "path"
+    },
+    "target": {
+        "name": "/tmp/LangIdentificationp6jsp6zh",
+        "type": "path"
+    }
+}
diff --git a/transforms/language/gneissweb_classification/test-data/expected/test_01.parquet b/transforms/language/gneissweb_classification/test-data/expected/test_01.parquet
new file mode 100644
index 000000000..600695457
Binary files /dev/null and b/transforms/language/gneissweb_classification/test-data/expected/test_01.parquet differ
diff --git a/transforms/language/gneissweb_classification/test-data/expected/test_02.parquet b/transforms/language/gneissweb_classification/test-data/expected/test_02.parquet
new file mode 100644
index 000000000..710a50848
Binary files /dev/null and b/transforms/language/gneissweb_classification/test-data/expected/test_02.parquet differ
diff --git a/transforms/language/gneissweb_classification/test-data/expected/test_03.parquet b/transforms/language/gneissweb_classification/test-data/expected/test_03.parquet
new file mode 100644
index 000000000..0942231a0
Binary files /dev/null and b/transforms/language/gneissweb_classification/test-data/expected/test_03.parquet differ
diff --git a/transforms/language/gneissweb_classification/test-data/input/test_01.parquet b/transforms/language/gneissweb_classification/test-data/input/test_01.parquet
new file mode 100644
index 000000000..ea7714a37
Binary files /dev/null and b/transforms/language/gneissweb_classification/test-data/input/test_01.parquet differ
diff --git a/transforms/language/gneissweb_classification/test-data/input/test_02.parquet b/transforms/language/gneissweb_classification/test-data/input/test_02.parquet
new file mode 100644
index 000000000..2162b6641
Binary files /dev/null and b/transforms/language/gneissweb_classification/test-data/input/test_02.parquet differ
diff --git a/transforms/language/gneissweb_classification/test-data/input/test_03.parquet b/transforms/language/gneissweb_classification/test-data/input/test_03.parquet
new file mode 100644
index 000000000..9d78e3ee4
Binary files /dev/null and b/transforms/language/gneissweb_classification/test-data/input/test_03.parquet differ
diff --git a/transforms/language/gneissweb_classification/test/test_gneissweb_classification.py b/transforms/language/gneissweb_classification/test/test_gneissweb_classification.py
new file mode 100644
index 000000000..905ff1ab5
--- /dev/null
+++ b/transforms/language/gneissweb_classification/test/test_gneissweb_classification.py
@@ -0,0 +1,142 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import pyarrow as pa
+from data_processing.test_support.transform.table_transform_test import (
+    AbstractTableTransformTest,
+)
+from dpk_gneissweb_classification.transform import ClassificationTransform
+
+
+class TestLangIdentificationTransform(AbstractTableTransformTest):
+    """
+    Extends the super-class to define the test data for the tests defined there.
+    The name of this class MUST begin with the word Test so that pytest recognizes it as a test class.
+    """
+
+    def get_test_transform_fixtures(self) -> list[tuple]:
+        config = {
+            "gcls_model_credential": "PUT YOUR OWN HUGGINGFACE CREDENTIAL",
+            "gcls_model_file_name": "model.bin",
+            "gcls_model_url": "facebook/fasttext-language-identification",
+            "gcls_content_column_name": "contents",
+            "gcls_output_label_column_name": "l",
+            "gcls_output_score_column_name": "s",
+        }
+
+
+        table = pa.Table.from_arrays(
+            [
+                pa.array(
+                    [
+                        "Der Tell Sabi Abyad („Hügel des weißen Jungen“) ist eine historische "
+                        "Siedlungsstätte im Belich-Tal in Nordsyrien nahe bei dem modernen Dorf Hammam et-Turkman, ",
+                        "Mu2 Gruis (36 Gruis) é uma estrela na direção da constelação de Grus. Possui uma ascensão "
+                        "reta de 22h 16m 26.57s e uma declinação de −41° 37′ 37.9″. Sua magnitude aparente é igual a 5.11. ",
+                        "タッチダウンオフィスは、LANと電源を用意して他のオフィスからやってきた利用者が作業できる環境を整えた場所。 " "生産性の向上を目的に通常オフィスの内部に設けられる。",
+                        "Raneem El Weleily, née le à Alexandrie, est une joueuse professionnelle de squash représentant l'Égypte. "
+                        "Elle atteint, en septembre 2015, la première place mondiale sur le circuit international, ",
+                        "En la mitología griega, Amarinceo (en griego Ἀμαρυγκεύς) era un caudillo de los eleos. "
+                        "Su padre es llamado Aléctor o Acetor o bien un tal Onesímaco,nombre dudoso. Su madre era Diogenía, "
+                        "hija de Forbante y nieta de Lápites. ",
+                    ]
+                )
+            ],
+            names=["contents"],
+        )
+        expected_table = pa.Table.from_arrays(
+            [
+                pa.array(
+                    [
+                        "Der Tell Sabi Abyad („Hügel des weißen Jungen“) ist eine historische "
+                        "Siedlungsstätte im Belich-Tal in Nordsyrien nahe bei dem modernen Dorf Hammam et-Turkman, ",
+                        "Mu2 Gruis (36 Gruis) é uma estrela na direção da constelação de Grus. Possui uma ascensão "
+                        "reta de 22h 16m 26.57s e uma declinação de −41° 37′ 37.9″. Sua magnitude aparente é igual a 5.11. ",
+                        "タッチダウンオフィスは、LANと電源を用意して他のオフィスからやってきた利用者が作業できる環境を整えた場所。 " "生産性の向上を目的に通常オフィスの内部に設けられる。",
+                        "Raneem El Weleily, née le à Alexandrie, est une joueuse professionnelle de squash représentant l'Égypte. "
+                        "Elle atteint, en septembre 2015, la première place mondiale sur le circuit international, ",
+                        "En la mitología griega, Amarinceo (en griego Ἀμαρυγκεύς) era un caudillo de los eleos. "
+                        "Su padre es llamado Aléctor o Acetor o bien un tal Onesímaco,nombre dudoso. Su madre era Diogenía, "
+                        "hija de Forbante y nieta de Lápites. ",
+                    ]
+                ),
+                pa.array(["de", "pt", "ja", "fr", "es"]),
+                pa.array(
+                    [
+                        0.998,
+                        1.000,
+                        0.930,
+                        0.998,
+                        0.998,
+                    ]
+                ),
+            ],
+            names=["contents", "l", "s"],
+        )
+        
+        invalid_content_column_name_table = pa.Table.from_arrays(
+            [
+                pa.array(
+                    [
+                        "This text won't be processed",
+                    ]
+                )
+            ],
+            names=["text"],
+        )
+        invalid_output_lang_column_name_table = pa.Table.from_arrays(
+            [
+                pa.array(
+                    [
+                        "This content for lang column test won't be processed",
+                    ]
+                ),
+                pa.array(
+                    [
+                        "en",
+                    ]
+                ),
+                pa.array([1.000]),
+            ],
+            names=["contents", "lang", "s"],
+        )
+        invalid_output_score_column_name_table = pa.Table.from_arrays(
+            [
+                pa.array(
+                    [
+                        "This content for score column test won't be processed",
+                    ]
+                ),
+                pa.array(
+                    [
+                        "en",
+                    ]
+                ),
+                pa.array([1.000]),
+            ],
+            names=["contents", "l", "score"],
+        )
+        
+        return [
+            (
+                ClassificationTransform(config),
+                [
+                    table,
+                    invalid_content_column_name_table,
+                    invalid_output_lang_column_name_table,
+                    invalid_output_score_column_name_table,
+                ],
+                [expected_table],
+                [{"de": 1, "es": 1, "fr": 1, "ja": 1, "pt": 1}, {}],
+            )
+        ]
+        
diff --git a/transforms/language/gneissweb_classification/test/test_gneissweb_classification_python.py b/transforms/language/gneissweb_classification/test/test_gneissweb_classification_python.py
new file mode 100644
index 000000000..eccf847eb
--- /dev/null
+++ b/transforms/language/gneissweb_classification/test/test_gneissweb_classification_python.py
@@ -0,0 +1,43 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+
+from data_processing.runtime.pure_python import PythonTransformLauncher
+from data_processing.test_support.launch.transform_test import (
+    AbstractTransformLauncherTest,
+)
+from dpk_gneissweb_classification.transform_python import ClassificationPythonTransformConfiguration
+
+
+class TestPythonClassificationTransform(AbstractTransformLauncherTest):
+    """
+    Extends the super-class to define the test data for the tests defined there.
+    The name of this class MUST begin with the word Test so that pytest recognizes it as a test class.
+    """
+
+    def get_test_transform_fixtures(self) -> list[tuple]:
+        cli_params = {
+            "gcls_model_credential": "PUT YOUR OWN HUGGINGFACE CREDENTIAL",
+            "gcls_model_file_name": "model.bin",
+            "gcls_model_url":"facebook/fasttext-language-identification",
+            "gcls_content_column_name": "text",
+            "gcls_output_label_column_name": "ft_lang",
+            "gcls_output_score_column_name": "ft_score",
+        }
+    
+
+        basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../test-data"))
+        fixtures = []
+        launcher = PythonTransformLauncher(ClassificationPythonTransformConfiguration())
+        fixtures.append((launcher, cli_params, basedir + "/input", basedir + "/expected"))
+        return fixtures
diff --git a/transforms/language/gneissweb_classification/test/test_gneissweb_classification_ray.py b/transforms/language/gneissweb_classification/test/test_gneissweb_classification_ray.py
new file mode 100644
index 000000000..2009c1524
--- /dev/null
+++ b/transforms/language/gneissweb_classification/test/test_gneissweb_classification_ray.py
@@ -0,0 +1,54 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+import os
+
+from data_processing.test_support.launch.transform_test import (
+    AbstractTransformLauncherTest,
+)
+from data_processing_ray.runtime.ray import RayTransformLauncher
+from dpk_gneissweb_classification.ray.transform import ClassificationRayTransformConfiguration
+from dpk_gneissweb_classification.transform import (
+    content_column_name_cli_param,
+    model_credential_cli_param,
+    model_file_name_cli_param,
+    model_url_cli_param,
+    output_label_column_name_cli_param,
+    output_score_column_name_cli_param,
+)
+
+class TestRayLangIdentificationTransform(AbstractTransformLauncherTest):
+    """
+    Extends the super-class to define the test data for the tests defined there.
+    The name of this class MUST begin with the word Test so that pytest recognizes it as a test class.
+    """
+
+    def get_test_transform_fixtures(self) -> list[tuple]:
+        basedir = "../test-data"
+        basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), basedir))
+        config = {
+            model_credential_cli_param: "PUT YOUR OWN HUGGINGFACE CREDENTIAL",
+            model_file_name_cli_param: "model.bin",
+            model_url_cli_param:"facebook/fasttext-language-identification",
+            content_column_name_cli_param: "text",
+            output_label_column_name_cli_param: "ft_lang",
+            output_score_column_name_cli_param: "ft_score",
+            "run_locally": True,
+
+        }
+        return [
+            (
+                RayTransformLauncher(ClassificationRayTransformConfiguration()),
+                config,
+                basedir + "/input",
+                basedir + "/expected",
+            )
+        ]
diff --git a/transforms/language/gneissweb_classification/test/test_nlp.py b/transforms/language/gneissweb_classification/test/test_nlp.py
new file mode 100644
index 000000000..f563cb982
--- /dev/null
+++ b/transforms/language/gneissweb_classification/test/test_nlp.py
@@ -0,0 +1,42 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import pyarrow as pa
+from dpk_gneissweb_classification.classification_models import ClassificationModelFactory
+from dpk_gneissweb_classification.nlp import get_label_ds_pa
+
+
+def test_classification():
+    nlp_langid = ClassificationModelFactory.create_model(
+      "facebook/fasttext-language-identification", "model.bin","YOUR_HUGGINGFACE_ACCESS_TOKEN"
+    )
+
+    documents = pa.array(
+        [
+            "Der Tell Sabi Abyad („Hügel des weißen Jungen“) ist eine historische "
+            "Siedlungsstätte im Belich-Tal in Nordsyrien nahe bei dem modernen Dorf Hammam et-Turkman, ",
+            "Mu2 Gruis (36 Gruis) é uma estrela na direção da constelação de Grus. Possui uma ascensão "
+            "reta de 22h 16m 26.57s e uma declinação de −41° 37′ 37.9″. Sua magnitude aparente é igual a 5.11. ",
+            "タッチダウンオフィスは、LANと電源を用意して他のオフィスからやってきた利用者が作業できる環境を整えた場所。 " "生産性の向上を目的に通常オフィスの内部に設けられる。",
+            "Raneem El Weleily, née le à Alexandrie, est une joueuse professionnelle de squash représentant l'Égypte. "
+            "Elle atteint, en septembre 2015, la première place mondiale sur le circuit international, ",
+            "En la mitología griega, Amarinceo (en griego Ἀμαρυγκεύς) era un caudillo de los eleos. "
+            "Su padre es llamado Aléctor o Acetor o bien un tal Onesímaco,nombre dudoso. Su madre era Diogenía, "
+            "hija de Forbante y nieta de Lápites. ",
+        ]
+    )
+    table = pa.Table.from_arrays([documents], names=["contents"])
+    table, stats = get_label_ds_pa(table, nlp_langid, "contents", "label", "score")
+    assert table["label"].to_pylist() == ["de", "pt", "ja", "fr", "es"]
+    assert len(table["score"].to_pylist()) == len(table["label"].to_pylist())
+    assert "ft_lang" not in table.column_names
+    assert "ft_score" not in table.column_names