diff --git a/.github/workflows/test-code-higher_order_syntactic_profiler.yml b/.github/workflows/test-code-higher_order_syntactic_profiler.yml
new file mode 100644
index 0000000000..7165e9466a
--- /dev/null
+++ b/.github/workflows/test-code-higher_order_syntactic_profiler.yml
@@ -0,0 +1,124 @@
+#
+# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files
+#
+name: Test - transforms/code/higher_order_syntactic_profiler
+
+on:
+ workflow_dispatch:
+ push:
+ branches:
+ - "dev"
+ - "releases/**"
+ tags:
+ - "*"
+ paths:
+ - "transforms/code/higher_order_syntactic_profiler/**"
+ - "data-processing-lib/**"
+ - "!transforms/code/higher_order_syntactic_profiler/**/kfp_ray/**" # This is/will be tested in separate workflow
+ - "!data-processing-lib/**/test/**"
+ - "!data-processing-lib/**/test-data/**"
+ - "!**.md"
+ - "!**/doc/**"
+ - "!**/images/**"
+ - "!**.gitignore"
+ pull_request:
+ branches:
+ - "dev"
+ - "releases/**"
+ paths:
+ - "transforms/code/higher_order_syntactic_profiler/**"
+ - "data-processing-lib/**"
+ - "!transforms/code/higher_order_syntactic_profiler/**/kfp_ray/**" # This is/will be tested in separate workflow
+ - "!data-processing-lib/**/test/**"
+ - "!data-processing-lib/**/test-data/**"
+ - "!**.md"
+ - "!**/doc/**"
+ - "!**/images/**"
+ - "!**.gitignore"
+
+jobs:
+ check_if_push_image:
+ # check whether the Docker images should be pushed to the remote repository
+ # The images are pushed if it is a merge to dev branch or a new tag is created.
+ # The latter being part of the release process.
+ # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file.
+ runs-on: ubuntu-22.04
+ outputs:
+ publish_images: ${{ steps.version.outputs.publish_images }}
+ steps:
+ - id: version
+ run: |
+ publish_images='false'
+ if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
+ then
+ publish_images='true'
+ fi
+ if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
+ then
+ publish_images='true'
+ fi
+ echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT"
+ test-src:
+ runs-on: ubuntu-22.04
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Free up space in github runner
+ # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+ run: |
+ df -h
+ sudo rm -rf "/usr/local/share/boost"
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+ sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup
+ sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
+ df -h
+ - name: Test transform source in transforms/code/higher_order_syntactic_profiler
+ run: |
+ if [ -e "transforms/code/higher_order_syntactic_profiler/Makefile" ]; then
+ make -C transforms/code/higher_order_syntactic_profiler DOCKER=docker test-src
+ else
+ echo "transforms/code/higher_order_syntactic_profiler/Makefile not found - source testing disabled for this transform."
+ fi
+ test-image:
+ needs: [check_if_push_image]
+ runs-on: ubuntu-22.04
+ timeout-minutes: 120
+ env:
+ DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }}
+ DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }}
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Free up space in github runner
+ # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+ run: |
+ df -h
+ sudo rm -rf /opt/ghc
+ sudo rm -rf "/usr/local/share/boost"
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+ sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup
+ sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
+ df -h
+ - name: Test transform image in transforms/code/higher_order_syntactic_profiler
+ run: |
+ if [ -e "transforms/code/higher_order_syntactic_profiler/Makefile" ]; then
+ if [ -d "transforms/code/higher_order_syntactic_profiler/spark" ]; then
+ make -C data-processing-lib/spark DOCKER=docker image
+ fi
+ make -C transforms/code/higher_order_syntactic_profiler DOCKER=docker test-image
+ else
+ echo "transforms/code/higher_order_syntactic_profiler/Makefile not found - testing disabled for this transform."
+ fi
+ - name: Print space
+ # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+ run: |
+ df -h
+ docker images
+ - name: Publish images
+ if: needs.check_if_push_image.outputs.publish_images == 'true'
+ run: |
+ if [ -e "transforms/code/higher_order_syntactic_profiler/Makefile" ]; then
+ make -C transforms/code/higher_order_syntactic_profiler publish
+ else
+ echo "transforms/code/higher_order_syntactic_profiler/Makefile not found - publishing disabled for this transform."
+ fi
diff --git a/.github/workflows/test-code-semantic_profiler.yml b/.github/workflows/test-code-semantic_profiler.yml
new file mode 100644
index 0000000000..6c301ee386
--- /dev/null
+++ b/.github/workflows/test-code-semantic_profiler.yml
@@ -0,0 +1,124 @@
+#
+# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files
+#
+name: Test - transforms/code/semantic_profiler
+
+on:
+ workflow_dispatch:
+ push:
+ branches:
+ - "dev"
+ - "releases/**"
+ tags:
+ - "*"
+ paths:
+ - "transforms/code/semantic_profiler/**"
+ - "data-processing-lib/**"
+ - "!transforms/code/semantic_profiler/**/kfp_ray/**" # This is/will be tested in separate workflow
+ - "!data-processing-lib/**/test/**"
+ - "!data-processing-lib/**/test-data/**"
+ - "!**.md"
+ - "!**/doc/**"
+ - "!**/images/**"
+ - "!**.gitignore"
+ pull_request:
+ branches:
+ - "dev"
+ - "releases/**"
+ paths:
+ - "transforms/code/semantic_profiler/**"
+ - "data-processing-lib/**"
+ - "!transforms/code/semantic_profiler/**/kfp_ray/**" # This is/will be tested in separate workflow
+ - "!data-processing-lib/**/test/**"
+ - "!data-processing-lib/**/test-data/**"
+ - "!**.md"
+ - "!**/doc/**"
+ - "!**/images/**"
+ - "!**.gitignore"
+
+jobs:
+ check_if_push_image:
+ # check whether the Docker images should be pushed to the remote repository
+ # The images are pushed if it is a merge to dev branch or a new tag is created.
+ # The latter being part of the release process.
+ # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file.
+ runs-on: ubuntu-22.04
+ outputs:
+ publish_images: ${{ steps.version.outputs.publish_images }}
+ steps:
+ - id: version
+ run: |
+ publish_images='false'
+ if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
+ then
+ publish_images='true'
+ fi
+ if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
+ then
+ publish_images='true'
+ fi
+ echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT"
+ test-src:
+ runs-on: ubuntu-22.04
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Free up space in github runner
+ # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+ run: |
+ df -h
+ sudo rm -rf "/usr/local/share/boost"
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+ sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup
+ sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
+ df -h
+ - name: Test transform source in transforms/code/semantic_profiler
+ run: |
+ if [ -e "transforms/code/semantic_profiler/Makefile" ]; then
+ make -C transforms/code/semantic_profiler DOCKER=docker test-src
+ else
+ echo "transforms/code/semantic_profiler/Makefile not found - source testing disabled for this transform."
+ fi
+ test-image:
+ needs: [check_if_push_image]
+ runs-on: ubuntu-22.04
+ timeout-minutes: 120
+ env:
+ DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }}
+ DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }}
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Free up space in github runner
+ # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+ run: |
+ df -h
+ sudo rm -rf /opt/ghc
+ sudo rm -rf "/usr/local/share/boost"
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+ sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup
+ sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
+ df -h
+ - name: Test transform image in transforms/code/semantic_profiler
+ run: |
+ if [ -e "transforms/code/semantic_profiler/Makefile" ]; then
+ if [ -d "transforms/code/semantic_profiler/spark" ]; then
+ make -C data-processing-lib/spark DOCKER=docker image
+ fi
+ make -C transforms/code/semantic_profiler DOCKER=docker test-image
+ else
+ echo "transforms/code/semantic_profiler/Makefile not found - testing disabled for this transform."
+ fi
+ - name: Print space
+ # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+ run: |
+ df -h
+ docker images
+ - name: Publish images
+ if: needs.check_if_push_image.outputs.publish_images == 'true'
+ run: |
+ if [ -e "transforms/code/semantic_profiler/Makefile" ]; then
+ make -C transforms/code/semantic_profiler publish
+ else
+ echo "transforms/code/semantic_profiler/Makefile not found - publishing disabled for this transform."
+ fi
diff --git a/.make.versions b/.make.versions
index 93e4efcb13..1c737b9efd 100644
--- a/.make.versions
+++ b/.make.versions
@@ -111,6 +111,11 @@ HTML2PARQUET_PYTHON_VERSION=$(DPK_VERSION)
DPK_TRANSFORMS_VERSION=$(DPK_VERSION)
+HOSP_PYTHON_VERSION=$(DPK_VERSION)
+HOSP_RAY_VERSION=$(DPK_VERSION)
+SP_PYTHON_VERSION=$(DPK_VERSION)
+SP_RAY_VERSION=$(DPK_VERSION)
+
################## ################## ################## ################## ################## ##################
# Begin versions that the repo depends on.
diff --git a/transforms/code/higher_order_syntactic_profiler/Makefile b/transforms/code/higher_order_syntactic_profiler/Makefile
new file mode 100644
index 0000000000..e8acb2e36d
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/Makefile
@@ -0,0 +1,78 @@
+REPOROOT=../../..
+# Use make help, to see the available rules
+include $(REPOROOT)/.make.defaults
+
+setup::
+ @# Help: Recursively make $@ all subdirs
+ $(MAKE) RULE=$@ .recurse
+
+clean::
+ @# Help: Recursively make $@ all subdirs
+ $(MAKE) RULE=$@ .recurse
+
+build::
+ @# Help: Recursively make $@ in subdirs
+ $(MAKE) RULE=$@ .recurse
+venv::
+ @# Help: Recursively make $@ in subdirs
+ $(MAKE) RULE=$@ .recurse
+
+image::
+ @# Help: Recursively make $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
+
+set-versions:
+ @# Help: Recursively $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
+
+publish::
+ @# Help: Recursively make $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
+
+test-image::
+ @# Help: Recursively make $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
+
+test::
+ @# Help: Recursively make $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
+
+test-src::
+ @# Help: Recursively make $@ in all subdirs
+ $(MAKE) RULE=$@ .recurse
+
+kind-load-image::
+ @# Help: Recursively make $@ in all subdirs
+ $(MAKE) RULE=$@ .recurse
+
+docker-load-image::
+ @# Help: Recursively make $@ in all subdirs
+ $(MAKE) RULE=$@ .recurse
+
+docker-save-image::
+ @# Help: Recursively make $@ in all subdirs
+ $(MAKE) RULE=$@ .recurse
+
+.PHONY: workflow-venv
+workflow-venv:
+ if [ -e kfp_ray ]; then \
+ $(MAKE) -C kfp_ray workflow-venv; \
+ fi
+
+.PHONY: workflow-test
+workflow-test:
+ if [ -e kfp_ray ]; then \
+ $(MAKE) -C kfp_ray workflow-test; \
+ fi
+
+.PHONY: workflow-upload
+workflow-upload:
+ if [ -e kfp_ray ]; then \
+ $(MAKE) -C kfp_ray workflow-upload; \
+ fi
+
+.PHONY: workflow-build
+workflow-build:
+ if [ -e kfp_ray ]; then \
+ $(MAKE) -C kfp_ray workflow-build; \
+ fi
\ No newline at end of file
diff --git a/transforms/code/higher_order_syntactic_profiler/README.md b/transforms/code/higher_order_syntactic_profiler/README.md
new file mode 100644
index 0000000000..512630714c
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/README.md
@@ -0,0 +1,12 @@
+# SP Transform
+The HOSP transform enables the profiling of a given code dataset based on higher order
+syntactic and semantic concepts as specified by the user. It also generates a profiling
+report in HTML, based on the output table. Per the set of
+[transform project conventions](../../README.md#transform-project-conventions)
+the following runtimes are available:
+
+* [python](python/README.md) - provides the base python-based transformation
+implementation.
+* [ray](ray/README.md) - enables the running of the base python transformation
+in a Ray runtime
+
diff --git a/transforms/code/higher_order_syntactic_profiler/python/.dockerignore b/transforms/code/higher_order_syntactic_profiler/python/.dockerignore
new file mode 100644
index 0000000000..f7275bbbd0
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/.dockerignore
@@ -0,0 +1 @@
+venv/
diff --git a/transforms/code/higher_order_syntactic_profiler/python/Dockerfile b/transforms/code/higher_order_syntactic_profiler/python/Dockerfile
new file mode 100644
index 0000000000..8f444ec156
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/Dockerfile
@@ -0,0 +1,41 @@
+FROM docker.io/python:3.10.14-slim-bullseye
+
+RUN pip install --upgrade --no-cache-dir pip
+
+# install pytest
+RUN pip install --no-cache-dir pytest
+
+# Create a user and use it to run the transform
+RUN useradd -ms /bin/bash dpk
+USER dpk
+WORKDIR /home/dpk
+
+# Copy and install data processing libraries
+# These are expected to be placed in the docker context before this is run (see the make image).
+COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
+RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
+
+# END OF STEPS destined for a data-prep-kit base image
+
+COPY --chown=dpk:root src/ src/
+COPY --chown=dpk:root pyproject.toml pyproject.toml
+RUN pip install --no-cache-dir -e .
+
+# copy transform main() entry point to the image
+COPY ./src/hosp_transform_python.py .
+
+# copy some of the samples in
+COPY ./src/hosp_local.py local/
+
+# copy test
+COPY test/ test/
+COPY test-data/ test-data/
+
+# Set environment
+ENV PYTHONPATH /home/dpk
+
+# Put these at the end since they seem to upset the docker cache.
+ARG BUILD_DATE
+ARG GIT_COMMIT
+LABEL build-date=$BUILD_DATE
+LABEL git-commit=$GIT_COMMIT
diff --git a/transforms/code/higher_order_syntactic_profiler/python/Makefile b/transforms/code/higher_order_syntactic_profiler/python/Makefile
new file mode 100644
index 0000000000..ac9d7d5fa3
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/Makefile
@@ -0,0 +1,66 @@
+# Define the root of the local git clone for the common rules to be able
+# know where they are running from.
+REPOROOT=../../../..
+# Include a library of common .transform.* targets which most
+# transforms should be able to reuse. However, feel free
+# to override/redefine the rules below.
+
+# $(REPOROOT)/.make.versions file contains the versions
+include $(REPOROOT)/transforms/.make.transforms
+
+TRANSFORM_NAME=hosp
+
+
+venv:: .transforms.python-venv
+
+test:: .transforms.python-test
+
+clean:: .transforms.clean
+
+image:: .transforms.python-image
+
+test-src:: .transforms.test-src
+
+setup:: .transforms.setup
+
+build:: build-dist image
+
+publish: publish-image
+
+publish-image:: .transforms.publish-image-python
+
+setup:: .transforms.setup
+
+# distribution versions is the same as image version.
+set-versions:
+ $(MAKE) TRANSFORM_PYTHON_VERSION=$(HOSP_PYTHON_VERSION) TOML_VERSION=$(HOSP_PYTHON_VERSION) .transforms.set-versions
+
+build-dist:: .defaults.build-dist
+
+publish-dist:: .defaults.publish-dist
+
+test-image:: .transforms.python-test-image
+
+# Ensure RUN_ARGS has a default value
+RUN_ARGS ?= ""
+
+run-cli-sample: .transforms.run-cli-python-sample
+
+run-local-sample: .transforms.run-local-sample
+
+run-local-python-sample: .transforms.run-local-python-sample
+
+# run-local-python-sample:
+# $(MAKE) RUN_FILE=sp_local_python.py \
+# .transforms.run-local-python-sample
+
+# RUN_ARGS="--sp_ikb 'Contents' --language 'Language'" \
+#run-s3-ray-sample: .transforms.run-s3-ray-sample
+
+minio-start: .minio-start
+
+kind-load-image:: .transforms.kind-load-image
+
+docker-load-image: .defaults.docker-load-image
+
+docker-save-image: .defaults.docker-save-image
diff --git a/transforms/code/higher_order_syntactic_profiler/python/README.md b/transforms/code/higher_order_syntactic_profiler/python/README.md
new file mode 100644
index 0000000000..677c13de18
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/README.md
@@ -0,0 +1,70 @@
+# SP Transform
+Please see the set of
+[transform project conventions](../../../README.md#transform-project-conventions)
+for details on general project conventions, transform configuration,
+testing and IDE set up.
+
+## Summary
+This transform implements the higher order concept profiler of a given code dataset.
+An user can specify the concepts which are of interest to the downstream usecase. These
+concepts can be a complex combination of syntactic and semantic criteria. Based on this,
+the input table containing the UBSRs, base syntactic, and semantic concepts is queried to generate
+the required results. The current implementation implements a single metric - code-to-comment ratio.
+However, this is easily extensible. Examples of other higher order concepts are cyclomatic complexity
+of all python samples in the dataset and Line Coverage of all samples in a given semantic category.
+
+## Configuration and command line Options
+
+The set of dictionary keys holding [HOSPTransform](src/hosp_transform.py)
+configuration for values are as follows:
+
+* _hosp_metrics_list_ - specifies the list of metrics that the user requires in the profiling report.
+The list of metrics has to be predefined and their corresponding implementation logic have to be implemented
+apriori in the code.
+
+
+## Running
+
+### Launched Command Line Options
+The following command line arguments are available in addition to
+the options provided by
+the [python launcher](../../../../data-processing-lib/doc/python-launcher-options.md).
+```
+ --hosp_metrics_list HOSP_METRICS_LIST
+
+ List of metrics specified by the user for the profiling report.
+```
+
+| Parameter | Default | Description |
+|------------|----------|--------------|
+| `HOSP_METRICS_LIST` | `CCR` | Metrics to be calculated for profiling. Multiple metrics can be entered separated by space. Only valid metric is `CCR` as of now. |
+
+These correspond to the configuration keys described above.
+
+### Running the samples
+To run the samples, use the following `make` targets
+
+* `run-cli-sample` - runs src/hosp_transform.py using command line args
+* `run-local-sample` - runs src/hosp_local.py
+
+These targets will activate the virtual environment and set up any configuration needed.
+Use the `-n` option of `make` to see the detail of what is done to run the sample.
+
+For example,
+```shell
+make run-cli-sample
+...
+```
+Then
+
+```shell
+ls output
+```
+To see results of the transform.
+
+### Transforming data using the transform image
+
+To use the transform image to transform your data, please refer to the
+[running images quickstart](../../../../doc/quick-start/run-transform-image.md),
+substituting the name of this transform image and runtime as appropriate.
+
diff --git a/transforms/code/higher_order_syntactic_profiler/python/pyproject.toml b/transforms/code/higher_order_syntactic_profiler/python/pyproject.toml
new file mode 100644
index 0000000000..a996e16cf7
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/pyproject.toml
@@ -0,0 +1,55 @@
+[project]
+name = "dpk_hosp_transform_python"
+version = "0.2.1.dev0"
+requires-python = ">=3.10"
+description = "Higher Order Syntactic Profiler Python Transform"
+license = {text = "Apache-2.0"}
+readme = {file = "README.md", content-type = "text/markdown"}
+authors = [
+ { name = "Aishwariya Chakraborty", email = "aishwariya.chakraborty1@ibm.com" },
+]
+dependencies = [
+ "data-prep-toolkit==0.2.1.dev0",
+ "networkx==3.0.0",
+ "jinja2==3.1.2",
+ "plotly==5.15.0",
+ "matplotlib"
+
+]
+
+[build-system]
+requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
+build-backend = "setuptools.build_meta"
+
+[project.optional-dependencies]
+dev = [
+ "twine",
+ "pytest>=7.3.2",
+ "pytest-dotenv>=0.5.2",
+ "pytest-env>=1.0.0",
+ "pre-commit>=3.3.2",
+ "pytest-cov>=4.1.0",
+ "pytest-mock>=3.10.0",
+ "moto==5.0.5",
+ "markupsafe==2.0.1",
+]
+
+[options]
+package_dir = ["src","test"]
+
+[options.packages.find]
+where = ["src/"]
+
+[tool.pytest.ini_options]
+# Currently we use low coverage since we have to run tests separately (see makefile)
+#addopts = "--cov --cov-report term-missing --cov-fail-under 25"
+markers = ["unit: unit tests", "integration: integration tests"]
+
+[tool.coverage.run]
+include = ["src/*"]
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[tool.setuptools.package-data]
+'src' = ['template.html']
\ No newline at end of file
diff --git a/transforms/code/higher_order_syntactic_profiler/python/src/UAST.py b/transforms/code/higher_order_syntactic_profiler/python/src/UAST.py
new file mode 100644
index 0000000000..6406a7aee2
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/src/UAST.py
@@ -0,0 +1,270 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import json
+import networkx
+import matplotlib.pyplot as plt
+
+class UASTNode:
+ """
+ Represents a node in the Universal Abstract Syntax Tree (UAST).
+
+ Attributes:
+ id (int): The unique identifier of the node.
+ code_snippet (str): The line(s) of code associated with the node.
+ node_type (str): The type of the node.
+ parents (list): The list of parent nodes.
+ children (list): The list of child nodes.
+ metadata (dict): The associated information/metadata of the node
+ start_point (tuple(int, int)): The start line number and byte of the line of the node.
+ end_point (tuple(int, int)): The end line number and byte of the node.
+ """
+
+ def __init__(self,
+ id: int = 0,
+ code_snippet: str = None,
+ node_type: str = None,
+ parents: list = list(),
+ children: list = list(),
+ metadata : dict = dict(),
+ start_point : tuple[int,int] = (None, None),
+ end_point : tuple[int,int] = (None, None)) -> None:
+
+ self.id = id
+ self.code_snippet = code_snippet
+ self.node_type = node_type
+ self.parents = parents
+ self.children = children
+ self.metadata = metadata
+ self.start_point = start_point
+ self.end_point = end_point
+
+ def __str__(self) -> str:
+ return f"ID: {self.id}, Type: {self.node_type}, Snippet: {repr(self.code_snippet)}, Parents: {self.parents}, Children: {self.children}, Metadata = {self.metadata}"
+
+ def __repr__(self) -> str:
+ return f"ID: {self.id}, Type: {self.node_type}, Snippet: {repr(self.code_snippet)}, Parents: {self.parents}, Children: {self.children}, Metadata = {self.metadata}"
+
+ def __eq__(self, other) -> bool:
+ return self.id == other.id and self.code_snippet == other.code_snippet and self.node_type == other.node_type and self.parents == other.parents and self.children == other.children and self.metadata == other.metadata and self.start_point == other.start_point and self.end_point == other.end_point
+
+class UASTEdge:
+ """
+ Represents an edge in the UAST (Universal Abstract Syntax Tree).
+
+ Attributes:
+ start_id (int): The ID of the starting node of the edge.
+ end_id (int): The ID of the ending node of the edge.
+ directed_relation (str): The directed relation between the nodes.
+ metadata (dict): The metadata information associated with the edge.
+ """
+
+ def __init__(self,
+ start_id: int = None,
+ end_id: int = None,
+ directed_relation: str = None,
+ metadata : dict = dict()):
+
+ self.start_id = start_id
+ self.end_id = end_id
+ self.directed_relation = directed_relation
+ self.metadata = metadata
+
+ def __str__(self) -> str:
+ return f"Start: {self.start_id}, End: {self.end_id}, Relation: {self.directed_relation}, Metadata = {self.metadata}, Metadata: {self.metadata}"
+
+ def __repr__(self) -> str:
+ return f"Start: {self.start_id}, End: {self.end_id}, Relation: {self.directed_relation}, Metadata = {self.metadata}, Metadata: {self.metadata}"
+
+ def __eq__(self, other) -> bool:
+ return self.start_id == other.start_id and self.end_id == other.end_id and self.directed_relation == other.directed_relation and self.metadata == other.metadata
+
+ def __hash__(self) -> int:
+ return hash((self.start_id, self.end_id, self.directed_relation, self.metadata))
+
+class UAST:
+ """
+ Represents a graph of a Universal Abstract Syntax Tree (UAST).
+
+ Attributes:
+ nodes (dict[int, UASTNode]): A dictionary mapping node IDs to UASTNode objects.
+ edges (list[UASTEdge]): A list of UASTEdge objects representing the edges between nodes.
+ assigned_id (int): The ID to be assigned to the next node added to the UAST.
+
+ Methods:
+ __init__(): Initializes an empty UAST object.
+ __len__(): Returns the number of nodes in the UAST.
+ __str__(): Returns a string representation of the UAST.
+ __repr__(): Returns a string representation of the UAST.
+ __eq__(other): Checks if the UAST is equal to another UAST.
+ add_node(node): Adds a node to the UAST.
+ _create_root(): Creates a root node for the UAST.
+ create_node(node_type, code_snippet, start_point, end_point): Creates a new node and adds it to the UAST, also returns the node object.
+ add_edge(node1, node2, directed_relation, metadata): Adds an edge between two nodes in the UAST.
+ get_node(id): Retrieves a node from the UAST based on its ID.
+ get_nodes_of_type(node_type): Retrieves the ID of all nodes of the input type
+ get_children(node): Retrieves the children of a node in the UAST.
+ get_parents(node): Retrieves the parent of a node in the UAST.
+ print_graph(id): Prints the UAST starting from the specified node ID.
+ save_to_file(file_path): Saves the UAST to a file in JSON format.
+ load_from_file(file_path): Loads the UAST from a file in JSON format.
+ visualize(): Visualizes the graph using NetworkX
+ """
+ def __init__(self):
+ self.nodes : dict[int,UASTNode] = dict()
+ self.edges : list[UASTEdge] = list()
+ self.assigned_id : int = 0
+ self.nodes_of_type : dict = dict()
+ self.root = self._create_root()
+
+ def __len__(self) -> int:
+ return len(self.nodes)
+
+ def __str__(self) -> str:
+ return f"Nodes: {self.nodes} \nEdges: {self.edges}"
+
+ def __repr__(self) -> str:
+ return f"Nodes: {self.nodes} \nEdges: {self.edges}"
+
+ def __eq__(self, other) -> bool:
+ return self.nodes == other.nodes and self.edges == other.edges
+
+ def add_node(self, node : UASTNode) -> None:
+ self.nodes[self.assigned_id] = node
+ self.assigned_id += 1
+ if node.node_type not in self.nodes_of_type :
+ self.nodes_of_type[node.node_type] = list()
+ self.nodes_of_type[node.node_type].append(node.id)
+ return
+
+ def _create_root(self) -> UASTNode:
+ return self.create_node(node_type = "uast_root", code_snippet = "root", metadata= {"info" : "links to all"}, start_point = (-1,0), end_point = (-1,3))
+
+ def create_node(self,
+ node_type : str = None,
+ code_snippet : str = None,
+ metadata : dict = dict(),
+ start_point : tuple[int,int] = (None, None),
+ end_point : tuple[int,int] = (None, None)) -> UASTNode:
+
+ node = UASTNode(id = self.assigned_id, node_type = node_type, code_snippet = code_snippet, metadata = metadata, start_point = start_point, end_point = end_point, children= list(), parents = list())
+ self.add_node(node)
+ return node
+
+ def add_edge(self, node1 : UASTNode = None, node2 : UASTNode = None, directed_relation : str = None, metadata : dict = dict())-> UASTEdge:
+ edge = UASTEdge(start_id = node1.id, end_id = node2.id, directed_relation = directed_relation, metadata = metadata)
+ node2.parents.append(node1.id)
+ node1.children.append(node2.id)
+ self.edges.append(edge)
+ return edge
+
+ def get_node(self, id : int) -> UASTNode:
+ return self.nodes[id]
+
+ def get_nodes_of_type(self, node_type : str) -> list[int]:
+ return self.nodes_of_type[node_type]
+
+ def get_children(self, node : UASTNode) -> list[int]:
+ return node.children
+
+ def get_parents(self, node : UASTNode) -> int:
+ return node.parents
+
+ def print_graph(self, id):
+ if id not in self.nodes:
+ return
+ visited = set()
+
+ def dfs(id, visited):
+ visited.add(id)
+ print(self.nodes[id])
+ for child in self.nodes[id].children:
+ if child not in visited:
+ dfs(child, visited)
+
+ dfs(id, visited)
+ del visited
+
+
+ def save_to_file(self, file_path):
+ # convert children list to list for serialization
+ copy_nodes = self.nodes.copy()
+ for k, v in self.nodes.items():
+ v.children = list(v.children)
+ v.parents = list(v.parents)
+ copy_nodes[k] = v
+
+
+ data = {
+ "nodes": {str(k): v.__dict__ for k, v in self.nodes.items()},
+ "edges": [edge.__dict__ for edge in self.edges]
+ }
+
+ with open(file_path, 'w') as f:
+ json.dump(data, f, indent= 4)
+
+ return
+
+ def get_json(self):
+
+ copy_nodes = self.nodes.copy()
+ for k, v in self.nodes.items():
+ v.children = list(v.children)
+ v.parents = list(v.parents)
+ copy_nodes[k] = v
+
+ data = {
+ "nodes": {str(k): v.__dict__ for k, v in self.nodes.items()},
+ "edges": [edge.__dict__ for edge in self.edges]
+ }
+
+ return data
+
+ def load_from_json_string(self, obj: str):
+ data = json.loads(obj)
+ self.nodes = {int(k): UASTNode(**v) for k, v in data["nodes"].items()}
+ self.edges = [UASTEdge(**edge) for edge in data["edges"]]
+ self.assigned_id = max(self.nodes.keys()) + 1
+ for node in self.nodes.values():
+ node.start_point = tuple(node.start_point)
+ node.end_point = tuple(node.end_point)
+ return
+
+ def load_from_file(self, file_path):
+ with open(file_path, 'r') as f:
+ data = json.load(f)
+ self.nodes = {int(k): UASTNode(**v) for k, v in data["nodes"].items()}
+ self.edges = [UASTEdge(**edge) for edge in data["edges"]]
+ self.assigned_id = max(self.nodes.keys()) + 1
+ for node in self.nodes.values():
+ node.start_point = tuple(node.start_point)
+ node.end_point = tuple(node.end_point)
+ return
+
+ def visualize(self):
+ edges_viz = []
+ labeldict = {}
+ for edge in self.edges:
+ edges_viz.append([edge.start_id, edge.end_id])
+ labeldict[edge.start_id] = self.nodes[edge.start_id].node_type
+ labeldict[edge.end_id] = self.nodes[edge.end_id].node_type
+ print(labeldict)
+ plt.figure(figsize=(10,10))
+ plt.rcParams["font.size"] = 20
+ G = networkx.Graph()
+ G.add_edges_from(edges_viz)
+ pos = networkx.spring_layout(G)
+ networkx.draw_networkx_labels(G, pos, labels= labeldict, font_size= 12, )
+ networkx.draw_networkx_nodes(G, pos, nodelist= self.nodes.keys(), node_size= 300)
+ networkx.draw_networkx_edges(G, pos, edgelist= edges_viz)
+ plt.show()
+ return
diff --git a/transforms/code/higher_order_syntactic_profiler/python/src/UAST_parser.py b/transforms/code/higher_order_syntactic_profiler/python/src/UAST_parser.py
new file mode 100644
index 0000000000..6b1fd07bb1
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/src/UAST_parser.py
@@ -0,0 +1,254 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+from UAST import UAST
+import json
+from tree_sitter import Tree
+import os
+"""
+Initialize the parser with a path for rules and grammar.
+"""
+class UASTParser():
+ def __init__(self):
+ self.language : str = None
+ self.uast : UAST = None
+ self.rules : dict = None
+ self.cached_rules = dict()
+
+ # Load UAST Grammar
+ self.grammar_path = "../../python/src/grammar/UAST_Grammar.json"
+ if not os.path.exists(self.grammar_path):
+ print("Current working directory:", os.getcwd())
+ raise FileNotFoundError(f"UAST Grammar file not found at {self.grammar_path}. Please ensure it exists.")
+
+ with open(self.grammar_path, "r") as grammar_file:
+ self.grammar = json.load(grammar_file)
+
+ # Rule directory and file
+ self.rule_directory = "../../python/src/ruleset/"
+ if not os.path.isdir(self.rule_directory):
+ print("Current working directory:", os.getcwd())
+ raise FileNotFoundError(f"Ruleset directory not found at {self.rule_directory}. Please ensure it exists.")
+
+ self.rule_file_name: str = "UAST_rules_"
+
+ self.AST : Tree = None
+ # self.offset : int = None
+ # self.prev_line : int = -1
+ self.extracted : str = None
+ self.function_info = dict()
+ self.class_info = dict()
+ self.user_defined_entity = {"uast_function": "self.function_info[snippet] = id",
+ "uast_class": "self.class_info[snippet] = id"}
+
+
+ def set_rule_dir_path(self, path: str):
+ self.rule_directory = path
+
+ def set_grammar_path(self, path : str):
+ self.grammar_path = path
+ self.grammar = json.load(open(self.grammar_path, "r"))
+
+ # set language for the parser
+ def set_language(self, language : str):
+ self.language = language
+
+ if (language not in self.cached_rules):
+ rules_cache = json.load(open(self.rule_directory + self.rule_file_name + self.language + '.json', "r"))
+ self.cached_rules[language] = rules_cache
+
+ self.rules = self.cached_rules[language]
+
+ # initialise a DFS traversal on the AST and an empty UAST.
+ def parse(self, AST, code_snippet) :
+ if(self.language == None) :
+ print("Language not loaded")
+ return
+ self.AST = AST
+ self.uast = UAST()
+ self.uast.root.metadata["language"] = self.language
+ self.uast.root.metadata["loc_snippet"] = self.count_loc(code_snippet, self.language)
+ self._dfs(AST_node = self.AST.root_node, parent = self.uast.root)
+ '''
+ # commenting this block temporarily
+ # Call the new modularized function to calculate the code-to-comment ratio
+ code_to_comment_ratio = self.calculate_code_to_comment_ratio(self.uast.root)
+ # Add the code_to_comment_ratio to the root node's metadata
+ self.uast.root.metadata["code_to_comment_ratio"] = code_to_comment_ratio
+ '''
+ return self.uast
+
+ def calculate_code_to_comment_ratio(self, root_node):
+ # Get the loc_snippet from the root node's metadata
+ loc_snippet = root_node.metadata.get("loc_snippet", 0)
+
+ # Sum all loc_original_code for uast_comment nodes
+ total_comment_loc = 0
+
+ # Recursive function to sum comment LOC
+ def sum_comment_loc(node):
+ nonlocal total_comment_loc
+
+ # Check if the node is a comment node
+ if node.node_type == "uast_comment":
+ total_comment_loc += node.metadata.get("loc_original_code", 0)
+
+ # Traverse the children, ensuring we get the actual node objects
+ for child_id in node.children:
+ child_node = self.uast.get_node(child_id) # Fetch the actual child node using self.uast
+ sum_comment_loc(child_node) # Recursively sum for the child node
+
+ # Start summing loc_original_code from the root node
+ sum_comment_loc(root_node)
+
+ # Calculate the code-to-comment ratio (handling division by zero)
+ if total_comment_loc > 0:
+ return loc_snippet / total_comment_loc
+ else:
+ return None # Handle no comments
+
+ def count_lo_comments(self, code_snippet):
+ lines = code_snippet.split('\n')
+ loc_count = 0
+ for line in lines:
+ stripped_line = line.strip()
+ # Count all lines except blank ones
+ if stripped_line:
+ loc_count += 1
+ return loc_count
+
+ def count_loc(self, code_snippet, language):
+ # Define the comment markers for each language
+ language_comment_markers = {
+ "c": ('//', '/*', '*/'),
+ "java": ('//', '/*', '*/'),
+ "C#": ('//', '/*', '*/'),
+ "c_sharp": ('//', '/*', '*/'),
+ "cpp": ('//', '/*', '*/'),
+ "objc": ('//', '/*', '*/'),
+ "rust": ('//', '/*', '*/'),
+ "go": ('//', '/*', '*/'),
+ "kotlin": ('//', '/*', '*/'),
+ "VHDL": ('--', None, None),
+ "py": ('#', '"""', '"""'),
+ "js": ('//', '/*', '*/'),
+ "dart": ('//', '/*', '*/'),
+ "QML": ('//', None, None),
+ "typescript": ('//', '/*', '*/'),
+ "perl": ('#', None, None),
+ "haskell": ('--', '{-', '-}'),
+ "elm": ('--', '{-', '-}'),
+ "agda": ('--', '{-', '-}'),
+ "d": ('//', '/*', '*/'),
+ "nim": ('#', '##', None),
+ "ocaml": ('(*', '(*', '*)'),
+ "scala": ('//', '/*', '*/')
+ }
+
+ single_line_comment, multi_line_comment_start, multi_line_comment_end = language_comment_markers.get(language, (None, None, None))
+
+ if not single_line_comment:
+ raise ValueError(f"Unsupported language: {language}")
+
+ lines = code_snippet.split('\n')
+ loc_count = 0
+ inside_multiline_comment = False
+
+ for line in lines:
+ stripped_line = line.strip()
+
+ # Skip empty lines
+ if not stripped_line:
+ continue
+
+ # Handle multi-line comments
+ if multi_line_comment_start and multi_line_comment_end:
+ if inside_multiline_comment:
+ # Check if the line contains the end of a multi-line comment
+ if multi_line_comment_end in stripped_line:
+ inside_multiline_comment = False
+ continue
+ elif multi_line_comment_start in stripped_line:
+ # If the line starts a multi-line comment
+ inside_multiline_comment = True
+ continue
+
+ # Skip single-line comments
+ if stripped_line.startswith(single_line_comment):
+ continue
+
+ # If the line is neither a comment nor blank, count it as LOC
+ loc_count += 1
+
+ return loc_count
+
+ def _add_user_defined(self, node):
+ id = node.id
+ type = node.node_type
+
+ if node.code_snippet is not None:
+ snippet = node.code_snippet.replace(type, '').strip()
+ # Add further processing with the snippet
+ else:
+ # Handle the case where code_snippet is None
+ snippet = ""
+ # You can log a warning or take other appropriate action
+ print(f"Warning: node.code_snippet is None for node type: {type}")
+
+ if (type in self.user_defined_entity):
+ exec(self.user_defined_entity[type])
+ node.metadata["user_defined"] = True
+
+ del id
+ del type
+ del snippet
+ return
+
+ # Traversing through the AST to create nodes recursively.
+ def _dfs(self, AST_node, parent) :
+ if (AST_node.type in self.rules) :
+ ast_snippet = AST_node.text.decode("utf8")
+ node_type = self.rules[AST_node.type]["uast_node_type"]
+ exec_string = self.rules[AST_node.type]["extractor"]
+ uast_snippet = self._extract(ast_snippet = ast_snippet, node_type = node_type, exec_string = exec_string)
+
+ if node_type == "uast_comment":
+ loc_original_code = self.count_lo_comments(ast_snippet)
+ else:
+ loc_original_code = self.count_loc(ast_snippet, self.language)
+
+ node = self.uast.create_node(
+ node_type = node_type,
+ code_snippet = uast_snippet,
+ # choose to enable or disbale the storage of original code by removing the following line.
+ metadata = {
+ "original_code" : ast_snippet,
+ "loc_original_code": loc_original_code
+ },
+ )
+ self._add_user_defined(node)
+ self.uast.add_edge(node1 = parent, node2 = node, directed_relation = "parent_node")
+ parent = node
+
+ for child in AST_node.children:
+ self._dfs(AST_node= child, parent = parent)
+
+ def _extract(self, ast_snippet, node_type, exec_string):
+ code_snippet = ast_snippet
+ try:
+ exec(exec_string)
+ except Exception as e:
+ print(e)
+ try:
+ return self.grammar[node_type]["keyword"] + " " + self.extracted
+ except Exception as e:
+ print(e)
\ No newline at end of file
diff --git a/transforms/code/higher_order_syntactic_profiler/python/src/hosp_local.py b/transforms/code/higher_order_syntactic_profiler/python/src/hosp_local.py
new file mode 100644
index 0000000000..4d66f5bd71
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/src/hosp_local.py
@@ -0,0 +1,35 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+
+from data_processing.data_access import DataAccessLocal
+from hosp_transform import HigherOrderSyntacticProfilerTransform
+
+
+# create parameters
+input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test-data", "input"))
+
+hosp_params = {"metrics_list": ["CCR"]}
+
+if __name__ == "__main__":
+ # Here we show how to run outside of the runtime
+ # Create and configure the transform.
+ transform = HigherOrderSyntacticProfilerTransform(hosp_params)
+ # Use the local data access to read a parquet table.
+ data_access = DataAccessLocal()
+ table, _ = data_access.get_table(os.path.join(input_folder, "test.parquet"))
+ print(f"input table: {table}")
+ # Transform the table
+ table_list, metadata = transform.transform(table)
+ print(f"\noutput table: {table_list}")
+ print(f"output metadata : {metadata}")
diff --git a/transforms/code/higher_order_syntactic_profiler/python/src/hosp_local_python.py b/transforms/code/higher_order_syntactic_profiler/python/src/hosp_local_python.py
new file mode 100644
index 0000000000..f6981e99bd
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/src/hosp_local_python.py
@@ -0,0 +1,46 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+import sys
+
+from data_processing.runtime.pure_python import PythonTransformLauncher
+from data_processing.utils import ParamsUtils
+from hosp_transform_python import HigherOrderSyntacticProfilerPythonTransformConfiguration
+
+
+# create parameters
+input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test-data", "input"))
+output_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "output"))
+local_conf = {
+ "input_folder": input_folder,
+ "output_folder": output_folder,
+}
+code_location = {"github": "github", "commit_hash": "12345", "path": "path"}
+params = {
+ # Data access. Only required parameters are specified
+ "data_local_config": ParamsUtils.convert_to_ast(local_conf),
+ # execution info
+ "runtime_pipeline_id": "pipeline_id",
+ "runtime_job_id": "job_id",
+ "runtime_code_location": ParamsUtils.convert_to_ast(code_location),
+ # hosp params
+ "hosp_metrics_list": ["CCR"]
+
+}
+if __name__ == "__main__":
+ # Set the simulated command line args
+ sys.argv = ParamsUtils.dict_to_req(d=params)
+ # create launcher
+ launcher = PythonTransformLauncher(runtime_config=HigherOrderSyntacticProfilerPythonTransformConfiguration())
+ # Launch the ray actor(s) to process the input
+ launcher.launch()
diff --git a/transforms/code/higher_order_syntactic_profiler/python/src/hosp_local_python_multiprocessor.py b/transforms/code/higher_order_syntactic_profiler/python/src/hosp_local_python_multiprocessor.py
new file mode 100644
index 0000000000..f5861ce134
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/src/hosp_local_python_multiprocessor.py
@@ -0,0 +1,46 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+import sys
+
+from data_processing.runtime.pure_python import PythonTransformLauncher
+from data_processing.utils import ParamsUtils
+from hosp_transform_python import HigherOrderSyntacticProfilerPythonTransformConfiguration
+
+
+# create parameters
+input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test-data", "input"))
+output_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "output"))
+local_conf = {
+ "input_folder": input_folder,
+ "output_folder": output_folder,
+}
+code_location = {"github": "github", "commit_hash": "12345", "path": "path"}
+params = {
+ # Data access. Only required parameters are specified
+ "data_local_config": ParamsUtils.convert_to_ast(local_conf),
+ # execution info
+ "runtime_pipeline_id": "pipeline_id",
+ "runtime_job_id": "job_id",
+ "runtime_code_location": ParamsUtils.convert_to_ast(code_location),
+ # "runtime_num_processors": 2,
+ # hosp params
+ "hosp_metrics_list": ["CCR"]
+}
+if __name__ == "__main__":
+ # Set the simulated command line args
+ sys.argv = ParamsUtils.dict_to_req(d=params)
+ # create launcher
+ launcher = PythonTransformLauncher(runtime_config=HigherOrderSyntacticProfilerPythonTransformConfiguration())
+ # Launch the ray actor(s) to process the input
+ launcher.launch()
diff --git a/transforms/code/higher_order_syntactic_profiler/python/src/hosp_transform.py b/transforms/code/higher_order_syntactic_profiler/python/src/hosp_transform.py
new file mode 100644
index 0000000000..aa5dad2404
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/src/hosp_transform.py
@@ -0,0 +1,167 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import time
+from argparse import ArgumentParser, Namespace
+from typing import Any
+import csv
+from pathlib import Path
+
+import pyarrow as pa
+import pyarrow.parquet as pq
+from data_processing.transform import AbstractTableTransform, TransformConfiguration
+from data_processing.utils import CLIArgumentProvider
+from UAST import *
+from report import *
+
+
+short_name = "hosp"
+cli_prefix = f"{short_name}_"
+
+metrics_list = "metrics_list"
+hosp_metrics_cli_param = f"{cli_prefix}{metrics_list}"
+
+base_constructs = ['Library', 'Language', 'Concepts']
+
+
+def uast_read(jsonstring):
+ """
+ Reads an input json string into UAST class object
+ """
+ uast = UAST()
+ if jsonstring is not None and jsonstring != 'null':
+ uast.load_from_json_string(jsonstring)
+ return uast
+ return None
+
+def extract_ccr(uast):
+ """
+ Calculates the code to comment ratio given an UAST object as input
+ """
+ if uast is not None:
+ total_comment_loc = 0
+ for node_idx in uast.nodes:
+ node = uast.get_node(node_idx)
+ if node.node_type == 'uast_comment':
+ total_comment_loc += node.metadata.get("loc_original_code", 0)
+ elif node.node_type == 'uast_root':
+ loc_snippet = node.metadata.get("loc_snippet", 0)
+ if total_comment_loc > 0:
+ return loc_snippet / total_comment_loc
+ else:
+ return None
+ return None
+
+def generate_report(table: pa.Table, metrics_list):
+ """
+ Generates the profiler report given the table name and the metrics list given as input by the user.
+ """
+ columns = base_constructs + metrics_list
+ script_dir = Path(__file__).parent.resolve()
+ template_file = str(script_dir / 'template.html')
+ output_file = str(script_dir / 'output.html')
+ report = Report(template_file)
+ count = 0
+ for column in columns:
+ plot = Plot(table, column)
+ plot_html = plot.generate_distribution_plot()
+ report.add_metric(count, column, plot_html)
+ count+=1
+ report.save(output_file)
+
+
+
+class HigherOrderSyntacticProfilerTransform(AbstractTableTransform):
+ """
+ Implements a simple copy of a pyarrow Table.
+ """
+
+ def __init__(self, config: dict[str, Any]):
+ """
+ Initialize based on the dictionary of configuration information.
+ This is generally called with configuration parsed from the CLI arguments defined
+ by the companion runtime, HigherOrderSyntacticProfilerTransformRuntime. If running inside the RayMutatingDriver,
+ these will be provided by that class with help from the RayMutatingDriver.
+ """
+ # Make sure that the param name corresponds to the name used in apply_input_params method
+ # of HigherOrderSyntacticProfilerTransformConfiguration class
+ super().__init__(config)
+ self.metrics_list = config.get("metrics", ["CCR"])
+
+
+ def transform(self, table: pa.Table, file_name: str = None) -> tuple[list[pa.Table], dict[str, Any]]:
+ """
+ Put Transform-specific to convert one Table to 0 or more tables. It also returns
+ a dictionary of execution statistics - arbitrary dictionary
+ This implementation makes no modifications so effectively implements a copy of the
+ input parquet to the output folder, without modification.
+ """
+ self.logger.debug(f"Transforming one table with {len(table)} rows")
+ if self.metrics_list is not None:
+ for metric in self.metrics_list:
+ if metric == "CCR":
+ self.logger.info(f"Generating {metric} values")
+ uasts = [uast_read(uast_json) for uast_json in table['UAST'].to_pylist()]
+ ccrs = [extract_ccr(uast) for uast in uasts]
+ new_table = table.append_column(metric, pa.array(ccrs))
+ if 'UAST' in new_table.schema.names and 'Concepts' in new_table.schema.names:
+ generate_report(new_table,self.metrics_list)
+ self.logger.debug(f"Transformed one table with {len(new_table)} rows")
+ metadata = {"nfiles": 1, "nrows": len(new_table)}
+ return [new_table], metadata
+
+
+class HigherOrderSyntacticProfilerTransformConfiguration(TransformConfiguration):
+
+ """
+ Provides support for configuring and using the associated Transform class include
+ configuration with CLI args.
+ """
+
+ def __init__(self):
+ super().__init__(
+ name=short_name,
+ transform_class=HigherOrderSyntacticProfilerTransform,
+ # remove_from_metadata=[pwd_key],
+ )
+ from data_processing.utils import get_logger
+
+ self.logger = get_logger(__name__)
+
+ def add_input_params(self, parser: ArgumentParser) -> None:
+ """
+ Add Transform-specific arguments to the given parser.
+ This will be included in a dictionary used to initialize the HigherOrderSyntacticProfilerTransform.
+ By convention a common prefix should be used for all transform-specific CLI args
+ (e.g, sp_, pii_, etc.)
+ """
+
+ # Add argument for a list of strings
+ parser.add_argument(
+ f"--{hosp_metrics_cli_param}",
+ type=str,
+ nargs='+', # Accept one or more strings
+ default=["CCR"], # Set a default value as a list
+ help="List of higher order syntactic profiling metrics (default: ['CCR'])",
+ )
+
+
+ def apply_input_params(self, args: Namespace) -> bool:
+ """
+ Validate and apply the arguments that have been parsed
+ :param args: user defined arguments.
+ :return: True, if validate pass or False otherwise
+ """
+ captured = CLIArgumentProvider.capture_parameters(args, cli_prefix, False)
+ self.params = self.params | captured
+ self.logger.info(f"hosp parameters are : {self.params}")
+ return True
diff --git a/transforms/code/higher_order_syntactic_profiler/python/src/hosp_transform_python.py b/transforms/code/higher_order_syntactic_profiler/python/src/hosp_transform_python.py
new file mode 100644
index 0000000000..1c419765c0
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/src/hosp_transform_python.py
@@ -0,0 +1,45 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import time
+
+from data_processing.runtime.pure_python import PythonTransformLauncher
+from data_processing.runtime.pure_python.runtime_configuration import (
+ PythonTransformRuntimeConfiguration,
+)
+from data_processing.utils import get_logger
+from hosp_transform import HigherOrderSyntacticProfilerTransformConfiguration
+
+
+logger = get_logger(__name__)
+
+
+class HigherOrderSyntacticProfilerPythonTransformConfiguration(PythonTransformRuntimeConfiguration):
+ """
+ Implements the PythonTransformConfiguration for HigherOrderSyntacticProfiler as required by the PythonTransformLauncher.
+ HigherOrderSyntacticProfiler does not use a RayRuntime class so the superclass only needs the base
+ python-only configuration.
+ """
+
+ def __init__(self):
+ """
+ Initialization
+ :param base_configuration - base configuration class
+ """
+ super().__init__(transform_config=HigherOrderSyntacticProfilerTransformConfiguration())
+
+
+if __name__ == "__main__":
+ # launcher = HigherOrderSyntacticProfilerRayLauncher()
+ launcher = PythonTransformLauncher(HigherOrderSyntacticProfilerPythonTransformConfiguration())
+ logger.info("Launching hosp transform")
+ launcher.launch()
diff --git a/transforms/code/higher_order_syntactic_profiler/python/src/output.html b/transforms/code/higher_order_syntactic_profiler/python/src/output.html
new file mode 100644
index 0000000000..630383180a
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/src/output.html
@@ -0,0 +1,169 @@
+
+
+
+
+
+ Profiler Report
+
+
+
+
+
+
+
+
+
Syntactic and Semantic Profile
+
This report presents the detailed profiling report of the input dataset.
+
+
+
+
+
+
diff --git a/transforms/code/higher_order_syntactic_profiler/python/test-data/expected/metadata.json b/transforms/code/higher_order_syntactic_profiler/python/test-data/expected/metadata.json
new file mode 100644
index 0000000000..42bb81a07a
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/test-data/expected/metadata.json
@@ -0,0 +1,46 @@
+{
+ "pipeline": "pipeline_id",
+ "job details": {
+ "job category": "preprocessing",
+ "job name": "HigherOrderSyntacticProfiler",
+ "job type": "ray",
+ "job id": "job_id",
+ "start_time": "2024-03-01 15:17:56",
+ "end_time": "2024-03-01 15:17:57",
+ "status": "success"
+ },
+ "code": [null],
+ "job_input_params": {
+ "sleep": 0,
+ "checkpointing": false,
+ "max_files": -1,
+ "number of workers": 1,
+ "worker options": {
+ "num_cpus": 0.8
+ },
+ "actor creation delay": 0
+ },
+ "execution_stats": {
+ "cpus": 10,
+ "gpus": 0,
+ "memory": 14.031964112073183,
+ "object_store": 2.0
+ },
+ "job_output_stats": {
+ "source_files": 1,
+ "source_size": 16534,
+ "result_files": 1,
+ "result_size": 16534,
+ "table_processing": 0.012392997741699219,
+ "nfiles": 1,
+ "nrows": 5
+ },
+ "source": {
+ "name": "test-data/data_processing/ray/hosp/input",
+ "type": "path"
+ },
+ "target": {
+ "name": "/tmp/HOSP4o9gv2bq",
+ "type": "path"
+ }
+}
diff --git a/transforms/code/higher_order_syntactic_profiler/python/test-data/expected/test.parquet b/transforms/code/higher_order_syntactic_profiler/python/test-data/expected/test.parquet
new file mode 100644
index 0000000000..e0dda5bf66
Binary files /dev/null and b/transforms/code/higher_order_syntactic_profiler/python/test-data/expected/test.parquet differ
diff --git a/transforms/code/higher_order_syntactic_profiler/python/test-data/input/test.parquet b/transforms/code/higher_order_syntactic_profiler/python/test-data/input/test.parquet
new file mode 100644
index 0000000000..7a4080d86f
Binary files /dev/null and b/transforms/code/higher_order_syntactic_profiler/python/test-data/input/test.parquet differ
diff --git a/transforms/code/higher_order_syntactic_profiler/python/test/test_hosp.py b/transforms/code/higher_order_syntactic_profiler/python/test/test_hosp.py
new file mode 100644
index 0000000000..9ef6dfa79e
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/test/test_hosp.py
@@ -0,0 +1,45 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+import os
+
+import pyarrow as pa
+from data_processing.test_support import get_tables_in_folder
+from data_processing.test_support.transform.table_transform_test import (
+ AbstractTableTransformTest,
+)
+from hosp_transform import HigherOrderSyntacticProfilerTransform, metrics_list
+
+
+# table = pa.Table.from_pydict({"name": pa.array(["Tom"]), "age": pa.array([23])})
+# expected_table = table # We're a sp after all.
+# expected_metadata_list = [{"nfiles": 1, "nrows": 1}, {}] # transform() result # flush() result
+
+
+class TestHigherOrderSyntacticProfilerTransform(AbstractTableTransformTest):
+ """
+ Extends the super-class to define the test data for the tests defined there.
+ The name of this class MUST begin with the word Test so that pytest recognizes it as a test class.
+ """
+
+ def get_test_transform_fixtures(self) -> list[tuple]:
+ src_file_dir = os.path.abspath(os.path.dirname(__file__))
+ input_dir = os.path.join(src_file_dir, "../test-data/input")
+ expected_dir = os.path.join(src_file_dir, "../test-data/expected")
+ input_tables = get_tables_in_folder(input_dir)
+ expected_tables = get_tables_in_folder(expected_dir)
+
+ expected_metadata_list = [{"nfiles": 1, "nrows": len(expected_tables[0])}, {}]
+ config = {metrics_list: ["CCR"]}
+ fixtures = [
+ (HigherOrderSyntacticProfilerTransform(config), input_tables, expected_tables, expected_metadata_list),
+ ]
+ return fixtures
diff --git a/transforms/code/higher_order_syntactic_profiler/python/test/test_hosp_python.py b/transforms/code/higher_order_syntactic_profiler/python/test/test_hosp_python.py
new file mode 100644
index 0000000000..9554d57983
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/python/test/test_hosp_python.py
@@ -0,0 +1,48 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+
+from data_processing.runtime.pure_python import PythonTransformLauncher
+from data_processing.test_support.launch.transform_test import (
+ AbstractTransformLauncherTest,
+)
+from hosp_transform import hosp_metrics_cli_param
+from hosp_transform_python import HigherOrderSyntacticProfilerPythonTransformConfiguration
+
+
+class TestPythonSemanticProfilerTransform(AbstractTransformLauncherTest):
+ """
+ Extends the super-class to define the test data for the tests defined there.
+ The name of this class MUST begin with the word Test so that pytest recognizes it as a test class.
+ """
+
+ def get_test_transform_fixtures(self) -> list[tuple]:
+ src_file_dir = os.path.abspath(os.path.dirname(__file__))
+ fixtures = []
+
+ launcher = PythonTransformLauncher(HigherOrderSyntacticProfilerPythonTransformConfiguration())
+ input_dir = os.path.join(src_file_dir, "../test-data/input")
+ expected_dir = os.path.join(src_file_dir, "../test-data/expected")
+
+ transform_config = {hosp_metrics_cli_param: ["CCR"]}
+ fixtures.append(
+ (
+ launcher,
+ transform_config,
+ input_dir,
+ expected_dir,
+ [], # optional list of column names to ignore in comparing test-generated with expected.
+ )
+ )
+
+ return fixtures
diff --git a/transforms/code/higher_order_syntactic_profiler/ray/.dockerignore b/transforms/code/higher_order_syntactic_profiler/ray/.dockerignore
new file mode 100644
index 0000000000..f7275bbbd0
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/ray/.dockerignore
@@ -0,0 +1 @@
+venv/
diff --git a/transforms/code/higher_order_syntactic_profiler/ray/.gitignore b/transforms/code/higher_order_syntactic_profiler/ray/.gitignore
new file mode 100644
index 0000000000..3ea7fd4abb
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/ray/.gitignore
@@ -0,0 +1,38 @@
+test-data/output
+output/*
+/output/
+data-processing-lib/
+
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+
+# Distribution / packaging
+bin/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+.tox/
+htmlcov
+.coverage
+.cache
+nosetests.xml
+coverage.xml
\ No newline at end of file
diff --git a/transforms/code/higher_order_syntactic_profiler/ray/Dockerfile b/transforms/code/higher_order_syntactic_profiler/ray/Dockerfile
new file mode 100644
index 0000000000..fcc3b3cbb8
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/ray/Dockerfile
@@ -0,0 +1,42 @@
+ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310
+FROM ${BASE_IMAGE}
+
+RUN pip install --upgrade --no-cache-dir pip
+
+# install pytest
+RUN pip install --no-cache-dir pytest
+
+# Copy and install data processing libraries
+# These are expected to be placed in the docker context before this is run (see the make image).
+COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/
+RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
+COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
+RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .
+COPY --chown=ray:users python-transform/ python-transform/
+RUN cd python-transform && pip install --no-cache-dir -e .
+
+#COPY requirements.txt requirements.txt
+#RUN pip install --no-cache-dir -r requirements.txt
+
+COPY --chown=ray:users src/ src/
+COPY --chown=ray:users pyproject.toml pyproject.toml
+RUN pip install --no-cache-dir -e .
+
+# copy the main() entry point to the image
+COPY ./src/hosp_transform_ray.py .
+
+# copy some of the samples in
+COPY ./src/hosp_local_ray.py local/
+
+# copy test
+COPY test/ test/
+COPY test-data/ test-data/
+
+# Set environment
+ENV PYTHONPATH /home/ray
+
+# Put these at the end since they seem to upset the docker cache.
+ARG BUILD_DATE
+ARG GIT_COMMIT
+LABEL build-date=$BUILD_DATE
+LABEL git-commit=$GIT_COMMIT
diff --git a/transforms/code/higher_order_syntactic_profiler/ray/Makefile b/transforms/code/higher_order_syntactic_profiler/ray/Makefile
new file mode 100644
index 0000000000..4bc8cd28c1
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/ray/Makefile
@@ -0,0 +1,58 @@
+# Define the root of the local git clone for the common rules to be able
+# know where they are running from.
+REPOROOT=../../../..
+# Include a library of common .transform.* targets which most
+# transforms should be able to reuse. However, feel free
+# to override/redefine the rules below.
+
+include $(REPOROOT)/transforms/.make.transforms
+
+TRANSFORM_NAME=hosp
+
+BASE_IMAGE=${RAY_BASE_IMAGE}
+venv:: .transforms.ray-venv
+
+test:: .transforms.ray-test
+
+clean:: .transforms.clean
+
+image:: .transforms.ray-image
+
+test-src:: .transforms.test-src
+
+setup:: .transforms.setup
+
+test-image:: .transforms.ray-test-image
+
+build:: build-dist image
+
+publish: publish-image
+
+publish-image:: .transforms.publish-image-ray
+
+setup:: .transforms.setup
+
+# set the version of python transform that this depends on.
+set-versions:
+ $(MAKE) TRANSFORM_PYTHON_VERSION=${HOSP_PYTHON_VERSION} TOML_VERSION=$(HOSP_RAY_VERSION) .transforms.set-versions
+
+build-dist:: .defaults.build-dist
+
+publish-dist:: .defaults.publish-dist
+
+# Ensure RUN_ARGS has a default value
+RUN_ARGS ?= ""
+
+run-cli-sample: .transforms.run-cli-ray-sample
+
+run-local-sample: .transforms.run-local-ray-sample
+
+# run-s3-sample: .transforms.run-s3-ray-sample
+
+minio-start: .minio-start
+
+kind-load-image:: .transforms.kind-load-image
+
+docker-load-image: .defaults.docker-load-image
+
+docker-save-image: .defaults.docker-save-image
diff --git a/transforms/code/higher_order_syntactic_profiler/ray/README.md b/transforms/code/higher_order_syntactic_profiler/ray/README.md
new file mode 100644
index 0000000000..038c830f98
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/ray/README.md
@@ -0,0 +1,46 @@
+# SP Ray Transform
+Please see the set of
+[transform project conventions](../../../README.md#transform-project-conventions)
+for details on general project conventions, transform configuration,
+testing and IDE set up.
+
+## Summary
+This project wraps the [hosp transform](../python) with a Ray runtime.
+
+## Configuration and command line Options
+
+HOSP configuration and command line options are the same as for the [base python](../python) transform.
+
+## Running
+
+### Launched Command Line Options
+In addition to those available to the transform as defined in [here](../python/README.md),
+the set of
+[ray launcher](../../../../data-processing-lib/doc/ray-launcher-options.md) are available.
+
+### Running the samples
+To run the samples, use the following `make` targets
+
+* `run-cli-sample` - runs src/hosp_transform.py using command line args
+* `run-local-sample` - runs src/hosp_local_ray.py
+
+
+These targets will activate the virtual environment and set up any configuration needed.
+Use the `-n` option of `make` to see the detail of what is done to run the sample.
+
+For example,
+```shell
+make run-cli-sample
+...
+```
+Then
+```shell
+ls output
+```
+To see results of the transform.
+
+### Transforming data using the transform image
+
+To use the transform image to transform your data, please refer to the
+[running images quickstart](../../../../doc/quick-start/run-transform-image.md),
+substituting the name of this transform image and runtime as appropriate.
diff --git a/transforms/code/higher_order_syntactic_profiler/ray/pyproject.toml b/transforms/code/higher_order_syntactic_profiler/ray/pyproject.toml
new file mode 100644
index 0000000000..96a5bb28ab
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/ray/pyproject.toml
@@ -0,0 +1,45 @@
+[project]
+name = "dpk_hosp_transform_ray"
+version = "0.2.1.dev0"
+requires-python = ">=3.10"
+description = "HOSP Ray Transform"
+license = {text = "Apache-2.0"}
+readme = {file = "README.md", content-type = "text/markdown"}
+authors = [
+ { name = "Aishwariya Chakraborty", email = "aishwariya.chakraborty1@ibm.com" },
+]
+dependencies = [
+ "dpk-hosp-transform-python==0.2.1.dev0",
+ "data-prep-toolkit-ray==0.2.1.dev0",
+]
+
+[build-system]
+requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
+build-backend = "setuptools.build_meta"
+
+[project.optional-dependencies]
+dev = [
+ "twine",
+ "pytest>=7.3.2",
+ "pytest-dotenv>=0.5.2",
+ "pytest-env>=1.0.0",
+ "pre-commit>=3.3.2",
+ "pytest-cov>=4.1.0",
+ "pytest-mock>=3.10.0",
+ "moto==5.0.5",
+ "markupsafe==2.0.1",
+]
+
+[options]
+package_dir = ["src","test"]
+
+[options.packages.find]
+where = ["src/"]
+
+[tool.pytest.ini_options]
+# Currently we use low coverage since we have to run tests separately (see makefile)
+#addopts = "--cov --cov-report term-missing --cov-fail-under 25"
+markers = ["unit: unit tests", "integration: integration tests"]
+
+[tool.coverage.run]
+include = ["src/*"]
diff --git a/transforms/code/higher_order_syntactic_profiler/ray/src/hosp_local_ray.py b/transforms/code/higher_order_syntactic_profiler/ray/src/hosp_local_ray.py
new file mode 100644
index 0000000000..d639aa0fcc
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/ray/src/hosp_local_ray.py
@@ -0,0 +1,51 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+import sys
+
+from data_processing.utils import ParamsUtils
+from data_processing_ray.runtime.ray import RayTransformLauncher
+from hosp_transform_ray import HigherOrderSyntacticProfilerRayTransformConfiguration
+
+
+# create parameters
+input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test-data", "input"))
+output_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "output"))
+local_conf = {
+ "input_folder": input_folder,
+ "output_folder": output_folder,
+}
+worker_options = {"num_cpus": 0.8}
+code_location = {"github": "github", "commit_hash": "12345", "path": "path"}
+params = {
+ # where to run
+ "run_locally": True,
+ # Data access. Only required parameters are specified
+ "data_local_config": ParamsUtils.convert_to_ast(local_conf),
+ # orchestrator
+ "runtime_worker_options": ParamsUtils.convert_to_ast(worker_options),
+ "runtime_num_workers": 3,
+ "runtime_pipeline_id": "pipeline_id",
+ "runtime_job_id": "job_id",
+ "runtime_creation_delay": 0,
+ "runtime_code_location": ParamsUtils.convert_to_ast(code_location),
+ # hosp params
+ "hosp_metrics_list": ["CCR"]
+}
+if __name__ == "__main__":
+ # Set the simulated command line args
+ sys.argv = ParamsUtils.dict_to_req(d=params)
+ # create launcher
+ launcher = RayTransformLauncher(HigherOrderSyntacticProfilerRayTransformConfiguration())
+ # Launch the ray actor(s) to process the input
+ launcher.launch()
diff --git a/transforms/code/higher_order_syntactic_profiler/ray/src/hosp_transform_ray.py b/transforms/code/higher_order_syntactic_profiler/ray/src/hosp_transform_ray.py
new file mode 100644
index 0000000000..fabf28e8ba
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/ray/src/hosp_transform_ray.py
@@ -0,0 +1,43 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+from data_processing.utils import CLIArgumentProvider, get_logger
+from data_processing_ray.runtime.ray import RayTransformLauncher
+from data_processing_ray.runtime.ray.runtime_configuration import (
+ RayTransformRuntimeConfiguration,
+)
+from hosp_transform import HigherOrderSyntacticProfilerTransformConfiguration
+
+
+logger = get_logger(__name__)
+
+
+class HigherOrderSyntacticProfilerRayTransformConfiguration(RayTransformRuntimeConfiguration):
+ """
+ Implements the RayTransformConfiguration for HOSP as required by the RayTransformLauncher.
+ HOSP does not use a RayRuntime class so the superclass only needs the base
+ python-only configuration.
+ """
+
+ def __init__(self):
+ """
+ Initialization
+ :param base_configuration - base configuration class
+ """
+ super().__init__(transform_config=HigherOrderSyntacticProfilerTransformConfiguration())
+
+
+if __name__ == "__main__":
+ # launcher = HOSPRayLauncher()
+ launcher = RayTransformLauncher(HigherOrderSyntacticProfilerRayTransformConfiguration())
+ logger.info("Launching hosp transform")
+ launcher.launch()
diff --git a/transforms/code/higher_order_syntactic_profiler/ray/test-data/expected/metadata.json b/transforms/code/higher_order_syntactic_profiler/ray/test-data/expected/metadata.json
new file mode 100644
index 0000000000..42bb81a07a
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/ray/test-data/expected/metadata.json
@@ -0,0 +1,46 @@
+{
+ "pipeline": "pipeline_id",
+ "job details": {
+ "job category": "preprocessing",
+ "job name": "HigherOrderSyntacticProfiler",
+ "job type": "ray",
+ "job id": "job_id",
+ "start_time": "2024-03-01 15:17:56",
+ "end_time": "2024-03-01 15:17:57",
+ "status": "success"
+ },
+ "code": [null],
+ "job_input_params": {
+ "sleep": 0,
+ "checkpointing": false,
+ "max_files": -1,
+ "number of workers": 1,
+ "worker options": {
+ "num_cpus": 0.8
+ },
+ "actor creation delay": 0
+ },
+ "execution_stats": {
+ "cpus": 10,
+ "gpus": 0,
+ "memory": 14.031964112073183,
+ "object_store": 2.0
+ },
+ "job_output_stats": {
+ "source_files": 1,
+ "source_size": 16534,
+ "result_files": 1,
+ "result_size": 16534,
+ "table_processing": 0.012392997741699219,
+ "nfiles": 1,
+ "nrows": 5
+ },
+ "source": {
+ "name": "test-data/data_processing/ray/hosp/input",
+ "type": "path"
+ },
+ "target": {
+ "name": "/tmp/HOSP4o9gv2bq",
+ "type": "path"
+ }
+}
diff --git a/transforms/code/higher_order_syntactic_profiler/ray/test-data/expected/test.parquet b/transforms/code/higher_order_syntactic_profiler/ray/test-data/expected/test.parquet
new file mode 100644
index 0000000000..8f89f008ae
Binary files /dev/null and b/transforms/code/higher_order_syntactic_profiler/ray/test-data/expected/test.parquet differ
diff --git a/transforms/code/higher_order_syntactic_profiler/ray/test-data/input/test.parquet b/transforms/code/higher_order_syntactic_profiler/ray/test-data/input/test.parquet
new file mode 100644
index 0000000000..f9ac1f0247
Binary files /dev/null and b/transforms/code/higher_order_syntactic_profiler/ray/test-data/input/test.parquet differ
diff --git a/transforms/code/higher_order_syntactic_profiler/ray/test/test_hosp_ray.py b/transforms/code/higher_order_syntactic_profiler/ray/test/test_hosp_ray.py
new file mode 100644
index 0000000000..16defe6f32
--- /dev/null
+++ b/transforms/code/higher_order_syntactic_profiler/ray/test/test_hosp_ray.py
@@ -0,0 +1,47 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+
+from data_processing.test_support.launch.transform_test import (
+ AbstractTransformLauncherTest,
+)
+from data_processing_ray.runtime.ray import RayTransformLauncher
+from hosp_transform import hosp_metrics_cli_param
+from hosp_transform_ray import HigherOrderSyntacticProfilerRayTransformConfiguration
+
+
+class TestRayHigherOrderSyntacticProfilerTransform(AbstractTransformLauncherTest):
+ """
+ Extends the super-class to define the test data for the tests defined there.
+ The name of this class MUST begin with the word Test so that pytest recognizes it as a test class.
+ """
+
+ def get_test_transform_fixtures(self) -> list[tuple]:
+ src_file_dir = os.path.abspath(os.path.dirname(__file__))
+ fixtures = []
+
+ launcher = RayTransformLauncher(HigherOrderSyntacticProfilerRayTransformConfiguration())
+ input_dir = os.path.join(src_file_dir, "../test-data/input")
+ expected_dir = os.path.join(src_file_dir, "../test-data/expected")
+ runtime_config = {"run_locally": True}
+ transform_config = {hosp_metrics_cli_param: ["CCR"]}
+ fixtures.append(
+ (
+ launcher,
+ transform_config | runtime_config,
+ input_dir,
+ expected_dir,
+ [], # optional list of column names to ignore in comparing test-generated with expected.
+ )
+ )
+ return fixtures
diff --git a/transforms/code/semantic_profiler/Makefile b/transforms/code/semantic_profiler/Makefile
new file mode 100644
index 0000000000..a98281e4d6
--- /dev/null
+++ b/transforms/code/semantic_profiler/Makefile
@@ -0,0 +1,78 @@
+REPOROOT=../../..
+# Use make help, to see the available rules
+include $(REPOROOT)/.make.defaults
+
+setup::
+ @# Help: Recursively make $@ all subdirs
+ $(MAKE) RULE=$@ .recurse
+
+clean::
+ @# Help: Recursively make $@ all subdirs
+ $(MAKE) RULE=$@ .recurse
+
+build::
+ @# Help: Recursively make $@ in subdirs
+ $(MAKE) RULE=$@ .recurse
+venv::
+ @# Help: Recursively make $@ in subdirs
+ $(MAKE) RULE=$@ .recurse
+
+image::
+ @# Help: Recursively make $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
+
+set-versions:
+ @# Help: Recursively $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
+
+publish::
+ @# Help: Recursively make $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
+
+test-image::
+ @# Help: Recursively make $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
+
+test::
+ @# Help: Recursively make $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
+
+test-src::
+ @# Help: Recursively make $@ in all subdirs
+ $(MAKE) RULE=$@ .recurse
+
+kind-load-image::
+ @# Help: Recursively make $@ in all subdirs
+ $(MAKE) RULE=$@ .recurse
+
+docker-load-image::
+ @# Help: Recursively make $@ in all subdirs
+ $(MAKE) RULE=$@ .recurse
+
+docker-save-image::
+ @# Help: Recursively make $@ in all subdirs
+ $(MAKE) RULE=$@ .recurse
+
+.PHONY: workflow-venv
+workflow-venv:
+ if [ -e kfp_ray ]; then \
+ $(MAKE) -C kfp_ray workflow-venv; \
+ fi
+
+.PHONY: workflow-test
+workflow-test:
+ if [ -e kfp_ray ]; then \
+ $(MAKE) -C kfp_ray workflow-test; \
+ fi
+
+.PHONY: workflow-upload
+workflow-upload:
+ if [ -e kfp_ray ]; then \
+ $(MAKE) -C kfp_ray workflow-upload; \
+ fi
+
+.PHONY: workflow-build
+workflow-build:
+ if [ -e kfp_ray ]; then \
+ $(MAKE) -C kfp_ray workflow-build; \
+ fi
diff --git a/transforms/code/semantic_profiler/README.md b/transforms/code/semantic_profiler/README.md
new file mode 100644
index 0000000000..9090a9b20a
--- /dev/null
+++ b/transforms/code/semantic_profiler/README.md
@@ -0,0 +1,12 @@
+# SP Transform
+The SP transform performs the semantic profiling of code snippets in a dataset. This
+is done based on the libraries and their categorization obtained from an Internal
+Knowledge Base (IKB) which is generated offline using LLMs.Per the set of
+[transform project conventions](../../README.md#transform-project-conventions)
+the following runtimes are available:
+
+* [python](python/README.md) - provides the base python-based transformation
+implementation.
+* [ray](ray/README.md) - enables the running of the base python transformation
+in a Ray runtime
+
diff --git a/transforms/code/semantic_profiler/python/.dockerignore b/transforms/code/semantic_profiler/python/.dockerignore
new file mode 100644
index 0000000000..f7275bbbd0
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/.dockerignore
@@ -0,0 +1 @@
+venv/
diff --git a/transforms/code/semantic_profiler/python/Dockerfile b/transforms/code/semantic_profiler/python/Dockerfile
new file mode 100644
index 0000000000..65760f9387
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/Dockerfile
@@ -0,0 +1,41 @@
+FROM docker.io/python:3.10.14-slim-bullseye
+
+RUN pip install --upgrade --no-cache-dir pip
+
+# install pytest
+RUN pip install --no-cache-dir pytest
+
+# Create a user and use it to run the transform
+RUN useradd -ms /bin/bash dpk
+USER dpk
+WORKDIR /home/dpk
+
+# Copy and install data processing libraries
+# These are expected to be placed in the docker context before this is run (see the make image).
+COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
+RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
+
+# END OF STEPS destined for a data-prep-kit base image
+
+COPY --chown=dpk:root src/ src/
+COPY --chown=dpk:root pyproject.toml pyproject.toml
+RUN pip install --no-cache-dir -e .
+
+# copy transform main() entry point to the image
+COPY ./src/sp_transform_python.py .
+
+# copy some of the samples in
+COPY ./src/sp_local.py local/
+
+# copy test
+COPY test/ test/
+COPY test-data/ test-data/
+
+# Set environment
+ENV PYTHONPATH /home/dpk
+
+# Put these at the end since they seem to upset the docker cache.
+ARG BUILD_DATE
+ARG GIT_COMMIT
+LABEL build-date=$BUILD_DATE
+LABEL git-commit=$GIT_COMMIT
diff --git a/transforms/code/semantic_profiler/python/Makefile b/transforms/code/semantic_profiler/python/Makefile
new file mode 100644
index 0000000000..ea01570e5b
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/Makefile
@@ -0,0 +1,66 @@
+# Define the root of the local git clone for the common rules to be able
+# know where they are running from.
+REPOROOT=../../../..
+# Include a library of common .transform.* targets which most
+# transforms should be able to reuse. However, feel free
+# to override/redefine the rules below.
+
+# $(REPOROOT)/.make.versions file contains the versions
+include $(REPOROOT)/transforms/.make.transforms
+
+TRANSFORM_NAME=sp
+
+
+venv:: .transforms.python-venv
+
+test:: .transforms.python-test
+
+clean:: .transforms.clean
+
+image:: .transforms.python-image
+
+test-src:: .transforms.test-src
+
+setup:: .transforms.setup
+
+build:: build-dist image
+
+publish: publish-image
+
+publish-image:: .transforms.publish-image-python
+
+setup:: .transforms.setup
+
+# distribution versions is the same as image version.
+set-versions:
+ $(MAKE) TRANSFORM_PYTHON_VERSION=$(SP_PYTHON_VERSION) TOML_VERSION=$(SP_PYTHON_VERSION) .transforms.set-versions
+
+build-dist:: .defaults.build-dist
+
+publish-dist:: .defaults.publish-dist
+
+test-image:: .transforms.python-test-image
+
+# Ensure RUN_ARGS has a default value
+RUN_ARGS ?= ""
+
+run-cli-sample: .transforms.run-cli-python-sample
+
+run-local-sample: .transforms.run-local-sample
+
+run-local-python-sample: .transforms.run-local-python-sample
+
+# run-local-python-sample:
+# $(MAKE) RUN_FILE=sp_local_python.py \
+# .transforms.run-local-python-sample
+
+# RUN_ARGS="--sp_ikb 'Contents' --language 'Language'" \
+#run-s3-ray-sample: .transforms.run-s3-ray-sample
+
+minio-start: .minio-start
+
+kind-load-image:: .transforms.kind-load-image
+
+docker-load-image: .defaults.docker-load-image
+
+docker-save-image: .defaults.docker-save-image
diff --git a/transforms/code/semantic_profiler/python/README.md b/transforms/code/semantic_profiler/python/README.md
new file mode 100644
index 0000000000..5d9bd3c7d5
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/README.md
@@ -0,0 +1,73 @@
+# SP Transform
+Please see the set of
+[transform project conventions](../../../README.md#transform-project-conventions)
+for details on general project conventions, transform configuration,
+testing and IDE set up.
+
+## Summary
+This transform implements semantic profiling of a code dataset. Given an input dataset
+as a pyarrow table with the UBSRs of code data points, this transform extracts the libraries
+and obtains their semantic mapping by consulting the IKB. The semantic concepts obatined per data
+point are then added as a new column in the input dataset. Those libraries which are not present in the
+IKB are recorded in a separate 'null_libs' file for offline processing. This file is passed as an input
+to the [offline path](src/offline_path/) which reads the libraries and obtains their semantic categories
+from predefined set by prompting an LLM. The examples passed into the prompt are present in the [examples folder](src/examples/)
+
+## Configuration and command line Options
+
+The set of dictionary keys holding [SPTransform](src/sp_transform.py)
+configuration for values are as follows:
+
+* _sp_ikb_file_ - This is the path to the IKB file which is a CSV file and by default located in the [IKB](src/ikb/) folder.
+ It contains three columns - Library, Language, Category. The set of categories is defined in the
+ [concept map file](src/concept_map/).
+* _sp_null_libs_file_ - This is the path to the null_libs file which is also a CSV file containing two columns -
+ Library, Language. Its default value is src/ikb/null_libs.csv.
+
+## Running
+
+### Launched Command Line Options
+The following command line arguments are available in addition to
+the options provided by
+the [python launcher](../../../../data-processing-lib/doc/python-launcher-options.md).
+```
+ --sp_ikb_file SP_IKB_FILE
+ Path to the IKB file
+ --sp_null_libs_file SP_NULL_LIBS_FILE
+ Path to the file to store the libraries for which no match could be found in the IKB
+```
+
+| Parameter | Default | Description |
+|------------|----------|--------------|
+| `SP_IKB_FILE` | `ikb/ikb_model.csv` | Path to IKB file. |
+| `SP_NULL_LIBS_FILE` | `ikb/null_libs.csv` | Path to file in which libraries with no matching entries in IKB are recorded. |
+
+These correspond to the configuration keys described above.
+
+### Running the samples
+To run the samples, use the following `make` targets
+
+* `run-cli-sample` - runs src/sp_transform.py using command line args
+* `run-local-sample` - runs src/sp_local.py
+
+These targets will activate the virtual environment and set up any configuration needed.
+Use the `-n` option of `make` to see the detail of what is done to run the sample.
+
+For example,
+```shell
+make run-cli-sample
+...
+```
+Then
+
+```shell
+ls output
+```
+To see results of the transform.
+
+### Transforming data using the transform image
+
+To use the transform image to transform your data, please refer to the
+[running images quickstart](../../../../doc/quick-start/run-transform-image.md),
+substituting the name of this transform image and runtime as appropriate.
+
diff --git a/transforms/code/semantic_profiler/python/pyproject.toml b/transforms/code/semantic_profiler/python/pyproject.toml
new file mode 100644
index 0000000000..01ea445f10
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/pyproject.toml
@@ -0,0 +1,50 @@
+[project]
+name = "dpk_sp_transform_python"
+version = "0.2.1.dev0"
+requires-python = ">=3.10"
+description = "SemanticProfiler Python Transform"
+license = {text = "Apache-2.0"}
+readme = {file = "README.md", content-type = "text/markdown"}
+authors = [
+ { name = "Aishwariya Chakraborty", email = "aishwariya.chakraborty1@ibm.com" },
+]
+dependencies = [
+ "data-prep-toolkit==0.2.1.dev0",
+]
+
+[build-system]
+requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
+build-backend = "setuptools.build_meta"
+
+[project.optional-dependencies]
+dev = [
+ "twine",
+ "pytest>=7.3.2",
+ "pytest-dotenv>=0.5.2",
+ "pytest-env>=1.0.0",
+ "pre-commit>=3.3.2",
+ "pytest-cov>=4.1.0",
+ "pytest-mock>=3.10.0",
+ "moto==5.0.5",
+ "markupsafe==2.0.1",
+]
+
+[options]
+package_dir = ["src","test"]
+
+[options.packages.find]
+where = ["src/"]
+
+[tool.pytest.ini_options]
+# Currently we use low coverage since we have to run tests separately (see makefile)
+#addopts = "--cov --cov-report term-missing --cov-fail-under 25"
+markers = ["unit: unit tests", "integration: integration tests"]
+
+[tool.coverage.run]
+include = ["src/*"]
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[tool.setuptools.package-data]
+'src' = ['*.csv']
\ No newline at end of file
diff --git a/transforms/code/semantic_profiler/python/src/concept_map/updated_concept_list.csv b/transforms/code/semantic_profiler/python/src/concept_map/updated_concept_list.csv
new file mode 100644
index 0000000000..685d62d3d9
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/src/concept_map/updated_concept_list.csv
@@ -0,0 +1,14 @@
+Category
+Algorithms and Data Structures
+Database Management
+File Handling
+Networking and Messaging
+Graphical User Interface Design
+Security
+Scheduling and Concurrency
+Logging and Monitoring
+Web Development
+Mathematics and Numerics
+Code Analysis and Linting
+Testing
+Data Serialization
\ No newline at end of file
diff --git a/transforms/code/semantic_profiler/python/src/examples/examples-i.csv b/transforms/code/semantic_profiler/python/src/examples/examples-i.csv
new file mode 100644
index 0000000000..639735b66b
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/src/examples/examples-i.csv
@@ -0,0 +1,27 @@
+Library,Language
+algorithms,Python
+asyncio,Python
+arrow,Python
+authlib,Python
+webassets,Python
+scipy,Python
+pymysql,Python
+mimetypes,Python
+logging,Python
+flake8,Python
+mamba,Python
+marshmallow,Python
+tkinter,Python
+com.leansoft.bigqueue,Java
+com.cisco.commons.networking,Java
+net.time4j,Java
+org.apache.shiro,Java
+java.net.http,Java
+org.apache.commons.math4,Java
+ch.vorburger.mariaDB4j,Java
+com.google.jimfs,Java
+java.logging,Java
+org.sonar,Java
+org.junit,Java
+com.cedarsoftware:json-io,Java
+java.desktop,Java
diff --git a/transforms/code/semantic_profiler/python/src/examples/examples-o.csv b/transforms/code/semantic_profiler/python/src/examples/examples-o.csv
new file mode 100644
index 0000000000..b7eb9397a1
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/src/examples/examples-o.csv
@@ -0,0 +1,27 @@
+Library,Language,Category
+algorithms,Python,Algorithms and Data Structures
+asyncio,Python,Networking and Messaging
+arrow,Python,Scheduling and Concurrency
+authlib,Python,Security
+webassets,Python,Web Development
+scipy,Python,Mathematics and Numerics
+pymysql,Python,Database Management
+mimetypes,Python,File Handling
+logging,Python,Logging and Monitoring
+flake8,Python,Code Analysis and Linting
+mamba,Python,Testing
+marshmallow,Python,Data Serialization
+tkinter,Python,Graphical User Interface Design
+com.leansoft.bigqueue,Java,Algorithms and Data Structures
+com.cisco.commons.networking,Java,Networking and Messaging
+net.time4j,Java,Scheduling and Concurrency
+org.apache.shiro,Java,Security
+java.net.http,Java,Web Development
+org.apache.commons.math4,Java,Mathematics and Numerics
+ch.vorburger.mariaDB4j,Java,Database Management
+com.google.jimfs,Java,File Handling
+java.logging,Java,Logging and Monitoring
+org.sonar,Java,Code Analysis and Linting
+org.junit,Java,Testing
+com.cedarsoftware:json-io,Java,Data Serialization
+java.desktop,Java,Graphical User Interface Design
\ No newline at end of file
diff --git a/transforms/code/semantic_profiler/python/src/ikb/ikb_model.csv b/transforms/code/semantic_profiler/python/src/ikb/ikb_model.csv
new file mode 100644
index 0000000000..bda9d2a667
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/src/ikb/ikb_model.csv
@@ -0,0 +1,1021 @@
+Library,Language,Category
+dynamic_bitset,Cpp,Algorithms and Data Structures
+tries,Cpp,Algorithms and Data Structures
+algorithm,Cpp,Algorithms and Data Structures
+uni-algo,Cpp,Algorithms and Data Structures
+boost.asio,Cpp,Networking and Messaging
+cpp-netlib,Cpp,Networking and Messaging
+zmq,Cpp,Networking and Messaging
+azmq,Cpp,Networking and Messaging
+thread-pool,Cpp,Scheduling and Concurrency
+chrono,Cpp,Scheduling and Concurrency
+concurrencpp,Cpp,Scheduling and Concurrency
+time,Cpp,Scheduling and Concurrency
+libressl,Cpp,Security
+libgcrypt,Cpp,Security
+nettle,Cpp,Security
+digestpp,Cpp,Security
+libonion,Cpp,Web Development
+cpp-httplib,Cpp,Web Development
+jwt-cpp,Cpp,Security
+libfv,Cpp,Mathematics and Numerics
+blaze,Cpp,Mathematics and Numerics
+cnl,Cpp,Mathematics and Numerics
+eigen,Cpp,Mathematics and Numerics
+linalg,Cpp,Mathematics and Numerics
+clickhouse,Cpp,Database Management
+leveldb,Cpp,Database Management
+libpqxx,Cpp,Database Management
+sqlite,Cpp,Database Management
+filesystem,Cpp,File Handling
+llfio,Cpp,File Handling
+glob,Cpp,File Handling
+tinydir,Cpp,File Handling
+spdlog,Cpp,Logging and Monitoring
+boost.log,Cpp,Logging and Monitoring
+glog,Cpp,Logging and Monitoring
+reckless,Cpp,Algorithms and Data Structures
+clang-tidy,Cpp,Code Analysis and Linting
+clangd,Cpp,Code Analysis and Linting
+cquery,Cpp,Code Analysis and Linting
+cppcheck,Cpp,Code Analysis and Linting
+boost.test,Cpp,Testing
+benchmark,Cpp,Testing
+cpputest,Cpp,Testing
+ctest,Cpp,Testing
+dlib,Cpp,Algorithms and Data Structures
+blitz,Cpp,Algorithms and Data Structures
+armadillo,Cpp,Algorithms and Data Structures
+oneapi/dal,Cpp,Database Management
+frozen,Cpp,Data Serialization
+glaze,Cpp,Data Serialization
+cppcodec,Cpp,Data Serialization
+boost.serialization,Cpp,Data Serialization
+infra,Cpp,Networking and Messaging
+workflow,Cpp,Scheduling and Concurrency
+taskflow,Cpp,Scheduling and Concurrency
+libthrift,Cpp,Networking and Messaging
+cegui,Cpp,Graphical User Interface Design
+wxwidgets,Cpp,Graphical User Interface Design
+gtk,Cpp,Graphical User Interface Design
+nanogui,Cpp,Graphical User Interface Design
+com.leansoft.bigqueue,Java,Algorithms and Data Structures
+com.liveramp.hyperminhash,Java,Algorithms and Data Structures
+org.pcollections,Java,Algorithms and Data Structures
+org.ojalgo,Java,Algorithms and Data Structures
+com.cisco.commons.networking,Java,Networking and Messaging
+io.netty,Java,Networking and Messaging
+org.apache.kafka,Java,Networking and Messaging
+com.rabbitmq,Java,Networking and Messaging
+net.time4j,Java,Scheduling and Concurrency
+org.jobrunr:jobrunr,Java,Scheduling and Concurrency
+org.quartz,Java,Scheduling and Concurrency
+org.knowm.sundial,Java,Scheduling and Concurrency
+org.apache.shiro,Java,Security
+org.bouncycastle,Java,Security
+jdk.crypto.cryptoki,Java,Security
+jdk.security,Java,Security
+java.net.http,Java,Web Development
+jdk.httpserver,Java,Web Development
+io.activej.codegen,Java,Code Analysis and Linting
+ninja,Java,Code Analysis and Linting
+org.apache.commons.math4,Java,Mathematics and Numerics
+org.apache.commons.numbers,Java,Mathematics and Numerics
+org.apache.commons.rng,Java,Mathematics and Numerics
+com.mathLibrary,Java,Mathematics and Numerics
+ch.vorburger.mariaDB4j,Java,Database Management
+java.sql,Java,Database Management
+redis.clients.jedis,Java,Database Management
+org.jooq,Java,Database Management
+com.google.jimfs,Java,File Handling
+java.io,Java,File Handling
+java.nio.file,Java,File Handling
+org.apache.commons.vfs2,Java,File Handling
+java.logging,Java,Logging and Monitoring
+jdk.jconsole,Java,IT Automation
+java.util.logging,Java,Logging and Monitoring
+org.slf4j.Logger,Java,Logging and Monitoring
+org.sonar,Java,Code Analysis and Linting
+fr.inria.gforge.spoon,Java,Code Analysis and Linting
+com.puppycrawl.tools.checkstyle,Java,Code Analysis and Linting
+net.sourceforge.pmd,Java,Code Analysis and Linting
+org.junit,Java,Testing
+com.intuit.karate,Java,Testing
+org.mockito,Java,Testing
+org.apache.jmeter,Java,Testing
+org.influxdb,Java,Data Analysis
+org.apache.spark,Java,Data Analysis
+org.apache.flink,Java,Data Analysis
+weka,Java,Data Analysis
+com.cedarsoftware:json-io,Java,Data Serialization
+com.google.flatbuffers,Java,Data Serialization
+org.msgpack,Java,Data Serialization
+com.esotericsoftware.kryo,Java,Data Serialization
+jenkins.model.Jenkins,Java,IT Automation
+org.apache.maven,Java,IT Automation
+org.gradle,Java,IT Automation
+com.microsoft.terraform,Java,IT Automation
+java.desktop,Java,Graphical User Interface Design
+java.awt,Java,Graphical User Interface Design
+org.openjfx,Java,Graphical User Interface Design
+org.eclipse.swt,Java,Graphical User Interface Design
+ngraph.graph,JavaScript,Algorithms and Data Structures
+buckets,JavaScript,Algorithms and Data Structures
+mori,JavaScript,Algorithms and Data Structures
+graphlib,JavaScript,Algorithms and Data Structures
+socket.io,JavaScript,Networking and Messaging
+request,JavaScript,Web Development
+amqplib,JavaScript,Networking and Messaging
+mqtt,JavaScript,Networking and Messaging
+fullcalendar,JavaScript,Graphical User Interface Design
+later,JavaScript,Scheduling and Concurrency
+date-fns,JavaScript,Mathematics and Numerics
+Moment,JavaScript,Mathematics and Numerics
+helmet,JavaScript,Security
+bcrypt,JavaScript,Security
+js-xss,JavaScript,Security
+xss-filters,JavaScript,Security
+vue,JavaScript,Graphical User Interface Design
+react,JavaScript,Graphical User Interface Design
+express,JavaScript,Web Development
+angular,JavaScript,Graphical User Interface Design
+Polynomial,JavaScript,Mathematics and Numerics
+Numeral-js,JavaScript,Mathematics and Numerics
+accounting,JavaScript,Mathematics and Numerics
+odometer,JavaScript,Mathematics and Numerics
+datavore,JavaScript,Data Analysis
+DB,JavaScript,Database Management
+sql,JavaScript,Database Management
+NeDB,JavaScript,Database Management
+jStorage,JavaScript,Database Management
+store,JavaScript,Database Management
+cross-storage,JavaScript,File Handling
+localForage,JavaScript,File Handling
+console.log-wrapper,JavaScript,Logging and Monitoring
+storybook,JavaScript,Graphical User Interface Design
+minilog,JavaScript,Logging and Monitoring
+loglevel,JavaScript,Logging and Monitoring
+eslint,JavaScript,Code Analysis and Linting
+jshint,JavaScript,Code Analysis and Linting
+tslint,JavaScript,Code Analysis and Linting
+sonarqube,JavaScript,Code Analysis and Linting
+jest,JavaScript,Testing
+Cypress,JavaScript,Testing
+jasmine,JavaScript,Testing
+qunit,JavaScript,Testing
+fabric,JavaScript,Web Development
+d3,JavaScript,Graphical User Interface Design
+three,JavaScript,Graphical User Interface Design
+sigma,JavaScript,Graphical User Interface Design
+tempo,JavaScript,Graphical User Interface Design
+jsfmt,JavaScript,Data Serialization
+fecha,JavaScript,Data Serialization
+protobufjs,JavaScript,Data Serialization
+shelljs,JavaScript,IT Automation
+forever,JavaScript,Scheduling and Concurrency
+node-cron,JavaScript,Scheduling and Concurrency
+jenkins,JavaScript,IT Automation
+react,JavaScript,Web Development
+vue,JavaScript,Web Development
+electron,JavaScript,Web Development
+angular,JavaScript,Web Development
+stdgpu,C,Algorithms and Data Structures
+urdfdom,C,Algorithms and Data Structures
+cxxgraph,C,Algorithms and Data Structures
+metis,C,Algorithms and Data Structures
+nanomsg,C,Networking and Messaging
+curl,C,Web Development
+librabbitmq,C,Networking and Messaging
+mosquitto,C,Networking and Messaging
+uv,C,Scheduling and Concurrency
+time,C,Scheduling and Concurrency
+pth,C,Scheduling and Concurrency
+pthread,C,Scheduling and Concurrency
+OpenSSL,C,Security
+GnuTLS,C,Security
+libsodium,C,Security
+libgcrypt,C,Security
+facil.io,C,File Handling
+kcgi,C,Web Development
+KLone,C,Web Development
+civetweb,C,Web Development
+apophenia,C,Data Analysis
+cmathl,C,Mathematics and Numerics
+GSL,C,Mathematics and Numerics
+SLEPc,C,Mathematics and Numerics
+DuckDB,C,Database Management
+MySQL,C,Database Management
+sophia,C,Database Management
+SQLite,C,Database Management
+stdio,C,File Handling
+POSIX,C,IT Automation
+HDF5,C,File Handling
+fstream,C,File Handling
+syslog,C,Logging and Monitoring
+spdlog,C,Logging and Monitoring
+collectd,C,Data Analysis
+nagios-plugins,C,IT Automation
+libclang,C,Code Analysis and Linting
+Cppcheck,C,Code Analysis and Linting
+libclang-tidy,C,Code Analysis and Linting
+Infer,C,Code Analysis and Linting
+CMocka,C,Testing
+MinUnit,C,Testing
+Valgrind,C,Testing
+Check,C,Testing
+gsl-lite,C,Mathematics and Numerics
+libcsv,C,Data Analysis
+dataframe,C,Data Analysis
+iqa,C,Data Analysis
+libyaml,C,Data Serialization
+libfmt,C,Data Serialization
+flatbuffers,C,Data Serialization
+msgpack-c,C,Data Serialization
+nix_api_util,C,IT Automation
+libcircmetrics,C,Logging and Monitoring
+etcd-api,C,Networking and Messaging
+cetcd,C,Networking and Messaging
+microui,C,Graphical User Interface Design
+tinyfiledialogs,C,Graphical User Interface Design
+luigi ,C,IT Automation
+GTK,C,Graphical User Interface Design
+Akade.IndexedSet,C#,Algorithms and Data Structures
+Akka.DistributedData,C#,Algorithms and Data Structures
+dotnet-mgcb-compute,C#,Mathematics and Numerics
+QuantConnect.Algorithm.CSharp,C#,Algorithms and Data Structures
+Microsoft.AspNetCore.Connections,C#,Networking and Messaging
+System.Net.Http.WinHttpHandler,C#,Web Development
+Microsoft.AspNetCore.WebUtilities,C#,Web Development
+MessagePipe,C#,Networking and Messaging
+Microsoft.SemanticKernel.Plugins.MsGraph,C#,Algorithms and Data Structures
+System.Threading.Tasks,C#,Scheduling and Concurrency
+Hangfire,C#,Scheduling and Concurrency
+OrchardCore.PublishLater,C#,Scheduling and Concurrency
+CefSharp.WinForm.Net.Core,C#,Graphical User Interface Design
+System.DirectoryServices.AccountManagement,C#,IT Automation
+System.Security.Permissions,C#,Security
+System.Security.AccessControl,C#,Security
+@pavelsavara/dotnet-runtime,C#,IT Automation
+@abp/ng.oauth,C#,Security
+@abp/core,C#,Web Development
+@abp/ng.components,C#,Web Development
+SharpDX.Mathematics,C#,Mathematics and Numerics
+AvaloniaMath,C#,Mathematics and Numerics
+WpfMath,C#,Mathematics and Numerics
+NCalcSync,C#,Mathematics and Numerics
+microsoft.entityframeworkcore.tools,C#,Database Management
+Dapper,C#,Database Management
+Microsoft.Azure.Management.PostgreSQL,C#,Database Management
+Microsoft.Azure.Management.CosmosDB,C#,Database Management
+Reloaded.Mod.Loader.IO,C#,File Handling
+DICOMcloud,C#,Data Analysis
+Aurio,C#,Graphical User Interface Design
+SeekableS3Stream,C#,File Handling
+Microsoft.Extensions.Logging,C#,Logging and Monitoring
+Microsoft.Azure.Management.Profiles.hybrid_2019_03_01.Monitor,C#,IT Automation
+Azure.Monitor.OpenTelemetry.AspNetCore,C#,Logging and Monitoring
+Microsoft.AspNetCore.Identity,C#,Security
+roslyn,C#,Code Analysis and Linting
+Microsoft.Toolkit.Uwp.PlatformSpecificAnalyzer,C#,Code Analysis and Linting
+Uno.Microsoft.Toolkit.Uwp.PlatformSpecificAnalyzer,C#,Code Analysis and Linting
+Microsoft.CST.ApplicationInspector.Common,C#,Code Analysis and Linting
+Microsoft.AspNetCore.TestHost,C#,Testing
+Microsoft.AspNetCore.Mvc.Testing,C#,Testing
+Microsoft.AspNetCore.SignalR.Specification.Tests,C#,Testing
+KIF,C#,Algorithms and Data Structures
+Microsoft.Data.Analysis,C#,Data Analysis
+Azure.Media.VideoAnalyzer.Edge,C#,Data Analysis
+Google.Cloud.Trace.V1,C#,Logging and Monitoring
+ClosedXML.Report,C#,Data Serialization
+System.Formats,C#,Data Serialization
+System.IO.Ports,C#,File Handling
+System.Text.Json,C#,Data Serialization
+App.Metrics.Formatters.Graphite,C#,Logging and Monitoring
+Microsoft.Crank.AzureDevOpsWorker,C#,IT Automation
+AWSSDK.DevOpsGuru,C#,IT Automation
+Microsoft.SourceLink.AzureDevOpsServer.Git,C#,IT Automation
+Saritasa.Tools.Messages.TestRuns,C#,Testing
+SSRD.IdentityUI,C#,Security
+bashforms,C#,Graphical User Interface Design
+NSCI,C#,Algorithms and Data Structures
+WSCT.GUI,C#,Graphical User Interface Design
+lock-free,D,Algorithms and Data Structures
+liblfdsd,D,Algorithms and Data Structures
+bitranged,D,Algorithms and Data Structures
+dstruct,D,Algorithms and Data Structures
+vibe-d,D,Web Development
+hunt-net,D,Networking and Messaging
+nbuff,D,Algorithms and Data Structures
+collie,D,Algorithms and Data Structures
+photon,D,Algorithms and Data Structures
+scheduled,D,Scheduling and Concurrency
+meta,D,Code Analysis and Linting
+ctini,D,Security
+hunt-security,D,Security
+hunt-shiro,D,Security
+secured,D,Security
+csprng,D,Security
+pgator-backend,D,Web Development
+hunt-cache,D,Data Analysis
+formoshlep,D,Data Analysis
+web-config,D,Web Development
+simple-math,D,Mathematics and Numerics
+evalex,D,Mathematics and Numerics
+dualnumbers,D,Mathematics and Numerics
+tau,D,Mathematics and Numerics
+mysql-native,D,Database Management
+derelict-pq,D,Database Management
+ddbc,D,Database Management
+dpq2,D,Database Management
+inifiled,D,File Handling
+fswatch,D,File Handling
+tinyfiledialogs,D,Graphical User Interface Design
+thepath,D,File Handling
+hunt,D,Testing
+gogga,D,Data Analysis
+dlog,D,Logging and Monitoring
+colorlog,D,Logging and Monitoring
+code_checker,D,Code Analysis and Linting
+dfmt,D,Data Serialization
+dscanner,D,Code Analysis and Linting
+dparse,D,Algorithms and Data Structures
+silly,D,Algorithms and Data Structures
+unit-threaded,D,Testing
+fluent-asserts,D,Testing
+dests,D,Algorithms and Data Structures
+magpie,D,Algorithms and Data Structures
+dvec,D,Mathematics and Numerics
+d-tree,D,Algorithms and Data Structures
+d_dataframes,D,Data Analysis
+jsonizer,D,Data Serialization
+mir-ion,D,Algorithms and Data Structures
+protobuf,D,Data Serialization
+siryul,D,Security
+iup,D,Graphical User Interface Design
+declui,D,Graphical User Interface Design
+d_imgui,D,Graphical User Interface Design
+dlangui,D,Graphical User Interface Design
+libgit2,D,Database Management
+yamkeys,D,Security
+lua-jit-d,D,IT Automation
+led,D,Graphical User Interface Design
+array-tool,Rust,Algorithms and Data Structures
+petgraph,Rust,Algorithms and Data Structures
+heapless,Rust,Algorithms and Data Structures
+argon2,Rust,Security
+mio,Rust,Networking and Messaging
+actix-rt,Rust,Scheduling and Concurrency
+socket2,Rust,Networking and Messaging
+crossbeam-channel,Rust,Networking and Messaging
+cron,Rust,Scheduling and Concurrency
+crossbeam-deque,Rust,Algorithms and Data Structures
+smolscale,Rust,Data Analysis
+job_scheduler,Rust,Scheduling and Concurrency
+zeroize,Rust,Security
+rocket,Rust,Web Development
+rpassword,Rust,Security
+trust-dns-resolver,Rust,Networking and Messaging
+@farmfe/core,Rust,IT Automation
+wasmer-clif-fork-frontend,Rust,Web Development
+seed,Rust,Graphical User Interface Design
+@farmfe/cli,Rust,IT Automation
+num-traits,Rust,Mathematics and Numerics
+num,Rust,Mathematics and Numerics
+num-bigint,Rust,Mathematics and Numerics
+cgmath,Rust,Mathematics and Numerics
+rusqlite,Rust,Database Management
+redis,Rust,Database Management
+diesel,Rust,Database Management
+postgres,Rust,Database Management
+fs_extra,Rust,File Handling
+toml,Rust,Data Serialization
+tempfile,Rust,File Handling
+zip,Rust,File Handling
+log,Rust,Logging and Monitoring
+env_logger,Rust,Logging and Monitoring
+tracing,Rust,Logging and Monitoring
+slog,Rust,Logging and Monitoring
+@cubejs-backend/linter,Rust,Code Analysis and Linting
+selene-lib,Rust,Data Analysis
+ast-grep,Rust,Code Analysis and Linting
+cargo-crev,Rust,Code Analysis and Linting
+assert_cmd,Rust,Testing
+quickcheck,Rust,Testing
+proptest,Rust,Testing
+wasm-bindgen-test,Rust,Testing
+rls-analysis,Rust,Code Analysis and Linting
+rstats,Rust,Data Analysis
+amadeus-commoncrawl,Rust,Data Analysis
+opendp,Rust,Data Analysis
+serde,Rust,Data Serialization
+serde_json,Rust,Data Serialization
+serde_yaml,Rust,Data Serialization
+bincode,Rust,Data Serialization
+lsio,Rust,File Handling
+shuttle-runtime,Rust,IT Automation
+rustc_data_structures,Rust,Algorithms and Data Structures
+compiler_base_span,Rust,Algorithms and Data Structures
+slint,Rust,Algorithms and Data Structures
+qinpel-wiz,Rust,Algorithms and Data Structures
+arc,Rust,Algorithms and Data Structures
+cushy,Rust,Algorithms and Data Structures
+tumblr/XExtensionItem,Objective-C,Algorithms and Data Structures
+TBQuadTree,Objective-C,Algorithms and Data Structures
+POSDataStructures,Objective-C,Algorithms and Data Structures
+PESGraph,Objective-C,Algorithms and Data Structures
+AFNetworking,Objective-C,Networking and Messaging
+CocoaAsyncSocket,Objective-C,Networking and Messaging
+Atlas,Objective-C,Graphical User Interface Design
+RestKit,Objective-C,Web Development
+SZServerTimeManager,Objective-C,Scheduling and Concurrency
+CalendarLib,Objective-C,Scheduling and Concurrency
+Selene,Objective-C,Security
+ZMJGanttChart,Objective-C,Graphical User Interface Design
+AWSCognitoIdentityProviderASF,Objective-C,Security
+gObfuscator,Objective-C,Security
+Lockbox,Objective-C,Security
+STPrivilegedTask,Objective-C,IT Automation
+vtx,Objective-C,Algorithms and Data Structures
+ColendiWebViewSDK,Objective-C,Web Development
+@abp/bootstrap-daterangepicker,Objective-C,Web Development
+@abp/ng.oauth,Objective-C,Security
+vMAT,Objective-C,Mathematics and Numerics
+crlibm,Objective-C,Mathematics and Numerics
+MCKNumerics,Objective-C,Mathematics and Numerics
+ACMatrix,Objective-C,Mathematics and Numerics
+DKDBManager,Objective-C,Database Management
+FlexileDatabase,Objective-C,Database Management
+KKDSqlite,Objective-C,Database Management
+SNDBManager,Objective-C,Database Management
+APSmartStorage,Objective-C,File Handling
+zipzap,Objective-C,File Handling
+AliyunOSSiOS,Objective-C,File Handling
+YTKKeyValueStore,Objective-C,Data Serialization
+github.com/github.com/CocoaLumberjack/CocoaLumberjack,Objective-C,Logging and Monitoring
+VENVersionTracker,Objective-C,IT Automation
+NSLogger,Objective-C,Logging and Monitoring
+NetworkEye,Objective-C,Networking and Messaging
+nq-test-react-native-maps,Objective-C,Graphical User Interface Design
+KIF,Objective-C,Testing
+facebookarchive/xctool,Objective-C,Code Analysis and Linting
+xctool,Objective-C,Code Analysis and Linting
+KRGreyTheory,Objective-C,Mathematics and Numerics
+DataGrinch,Objective-C,Data Analysis
+XsdaKit,Objective-C,Data Serialization
+cordova-pgyer-dandelion,Objective-C,Web Development
+sbjson,Objective-C,Data Serialization
+FXParser,Objective-C,Data Analysis
+CSV,Objective-C,Data Analysis
+NSMutableData+MultipartFormData,Objective-C,File Handling
+Masonry,Objective-C,Graphical User Interface Design
+Chameleon,Objective-C,Graphical User Interface Design
+Nimbus,Objective-C,Graphical User Interface Design
+GPUImage,Objective-C,Graphical User Interface Design
+infer,Objective-C,Code Analysis and Linting
+OCLint,Objective-C,Code Analysis and Linting
+sonatype,Objective-C,IT Automation
+sigrid,Objective-C,IT Automation
+fastlane,Objective-C,IT Automation
+hammerspoon,Objective-C,Graphical User Interface Design
+punic,Objective-C,IT Automation
+jenkins-mobile-pipeline-shared-libraries,Objective-C,IT Automation
+brotli,Ocaml,Data Compression
+dtoa,Ocaml,Algorithms and Data Structures
+bin_tree,Ocaml,Algorithms and Data Structures
+base_trie,Ocaml,Algorithms and Data Structures
+apero-net,Ocaml,Networking and Messaging
+conduit,Ocaml,Networking and Messaging
+netamqp,Ocaml,Networking and Messaging
+posix-mqueue,Ocaml,File Handling
+bap-primus-exploring-scheduler,Ocaml,Scheduling and Concurrency
+builder,Ocaml,IT Automation
+daypack-lib,Ocaml,Data Analysis
+riot,Ocaml,Web Development
+tls,Ocaml,Security
+osx-acl,Ocaml,Security
+content_security_policy,Ocaml,Security
+aws-sts,Ocaml,Security
+async_websocket,Ocaml,Web Development
+benchpress-server,Ocaml,Web Development
+builder-web,Ocaml,Web Development
+cduce_ws,Ocaml,Web Development
+posix-math,Ocaml,Mathematics and Numerics
+smol,Ocaml,Data Serialization
+crlibm,Ocaml,Mathematics and Numerics
+lem,Ocaml,Code Analysis and Linting
+caqti,Ocaml,Database Management
+dbforge,Ocaml,Database Management
+irmin,Ocaml,Database Management
+links-mysql,Ocaml,Database Management
+bitlib,Ocaml,Algorithms and Data Structures
+chamelon,Ocaml,Web Development
+fpath,Ocaml,File Handling
+fileutils,Ocaml,File Handling
+bolt,Ocaml,Algorithms and Data Structures
+dolog,Ocaml,Logging and Monitoring
+easy_logging,Ocaml,Logging and Monitoring
+loga,Ocaml,Logging and Monitoring
+bisect_ppx,Ocaml,Code Analysis and Linting
+calli,Ocaml,Algorithms and Data Structures
+clangml-transforms,Ocaml,Algorithms and Data Structures
+dolmen_bin,Ocaml,Algorithms and Data Structures
+base_quickcheck,Ocaml,Testing
+caravan,Ocaml,Web Development
+kaputt,Ocaml,Algorithms and Data Structures
+ounit2,Ocaml,Testing
+conformist,Ocaml,Code Analysis and Linting
+dataframe,Ocaml,Data Analysis
+dsfo,Ocaml,Data Analysis
+llama_midi,Ocaml,Graphical User Interface Design
+atdgen,Ocaml,Code Analysis and Linting
+bitpack_serializer,Ocaml,Data Serialization
+coq-serapi,Ocaml,Algorithms and Data Structures
+grpc,Ocaml,Networking and Messaging
+bap-build,Ocaml,IT Automation
+argsh,Ocaml,IT Automation
+conf-automake,Ocaml,IT Automation
+dtools,Ocaml,IT Automation
+bogue,Ocaml,Algorithms and Data Structures
+unison-gui,Ocaml,Graphical User Interface Design
+imguiml,Ocaml,Graphical User Interface Design
+altgr-ergo,Ocaml,Algorithms and Data Structures
+bk-tree,Haskell,Algorithms and Data Structures
+algebraic-graphs,Haskell,Algorithms and Data Structures
+recursion-schemes,Haskell,Algorithms and Data Structures
+AvlTree,Haskell,Algorithms and Data Structures
+grenade,Haskell,Security
+network-conduit,Haskell,Networking and Messaging
+streamly,Haskell,Algorithms and Data Structures
+hedgehog,Haskell,Testing
+haxl,Haskell,Web Development
+amazonka-scheduler,Haskell,Scheduling and Concurrency
+massiv-scheduler,Haskell,Scheduling and Concurrency
+gogol-datafusion,Haskell,Data Analysis
+tamarin-prover-theory,Haskell,Mathematics and Numerics
+tamarin-prover,Haskell,Mathematics and Numerics
+yst,Haskell,Data Analysis
+fireward,Haskell,Security
+snap-core,Haskell,Web Development
+snap-server,Haskell,Web Development
+gogol-pagespeed,Haskell,Web Development
+gogol-indexing,Haskell,Data Analysis
+pandoc,Haskell,Data Serialization
+Agda,Haskell,Mathematics and Numerics
+math-functions,Haskell,Mathematics and Numerics
+commodities,Haskell,Data Analysis
+gogol-spanner,Haskell,Database Management
+gogol-sqladmin,Haskell,Database Management
+gogol-datastore,Haskell,Database Management
+dbmigrations,Haskell,Database Management
+bytestring,Haskell,File Handling
+io-streams,Haskell,File Handling
+regions,Haskell,Algorithms and Data Structures
+amazonka-kinesis-video-webrtc-storage,Haskell,Data Analysis
+tensorflow-logging,Haskell,Logging and Monitoring
+wai-extra,Haskell,Web Development
+co-log,Haskell,Logging and Monitoring
+gogol-cloudmonitoring,Haskell,IT Automation
+pandoc,Haskell,Data Serialization
+cassava,Haskell,Data Analysis
+commonmark,Haskell,Data Serialization
+auto,Haskell,Code Analysis and Linting
+amazonka-devops-guru,Haskell,IT Automation
+deptrack-devops,Haskell,IT Automation
+gogol-testing,Haskell,Testing
+LogicGrowsOnTrees,Haskell,Algorithms and Data Structures
+gogol-datafusion,Haskell,Data Analysis
+vty-ui,Haskell,Graphical User Interface Design
+YampaSynth,Haskell,Algorithms and Data Structures
+master-plan,Haskell,IT Automation
+stan,Haskell,Data Analysis
+hlint,Haskell,Code Analysis and Linting
+liquidhaskell,Haskell,Code Analysis and Linting
+ghc,Haskell,IT Automation
+purescript,Haskell,Code Analysis and Linting
+ghcide-test-utils,Haskell,Testing
+hls-test-utils,Haskell,Testing
+yesod-test,Haskell,Testing
+statistics,Haskell,Mathematics and Numerics
+statistics-skinny,Haskell,Mathematics and Numerics
+ajhc,Haskell,Code Analysis and Linting
+fortran-src,Haskell,Algorithms and Data Structures
+BitVector,Nim,Algorithms and Data Structures
+rbtree,Nim,Algorithms and Data Structures
+binaryheap,Nim,Algorithms and Data Structures
+algorithm,Nim,Algorithms and Data Structures
+nativesockets,Nim,Networking and Messaging
+net,Nim,Networking and Messaging
+nimrdkafka,Nim,Networking and Messaging
+mqtt,Nim,Networking and Messaging
+monotimes,Nim,Scheduling and Concurrency
+times,Nim,Scheduling and Concurrency
+osproc,Nim,IT Automation
+schedules,Nim,Scheduling and Concurrency
+nimcrypt,Nim,Security
+seccomp,Nim,Security
+nimpass,Nim,Security
+quickcrypt,Nim,Security
+nerve,Nim,Networking and Messaging
+palladian,Nim,Web Development
+staticserver,Nim,Web Development
+phoon,Nim,Web Development
+seqmath,Nim,Mathematics and Numerics
+extmath,Nim,Mathematics and Numerics
+geometrymath,Nim,Mathematics and Numerics
+neo,Nim,Database Management
+niledb,Nim,Database Management
+couchdb,Nim,Database Management
+zfdbms,Nim,Database Management
+pdba,Nim,Database Management
+osfiles,Nim,File Handling
+fileinput,Nim,File Handling
+filetype,Nim,File Handling
+stor,Nim,File Handling
+octolog,Nim,Logging and Monitoring
+morelogging,Nim,Logging and Monitoring
+promexplorer,Nim,Data Analysis
+metrics,Nim,Data Analysis
+nimfmt,Nim,Code Analysis and Linting
+coco,Nim,Code Analysis and Linting
+treesitter,Nim,Code Analysis and Linting
+nimalyzer,Nim,Code Analysis and Linting
+testify,Nim,Testing
+nimtest,Nim,Testing
+testutils,Nim,Testing
+halonium,Nim,Networking and Messaging
+nimdata,Nim,Data Analysis
+datamancer,Nim,Data Analysis
+nimdataframe,Nim,Data Analysis
+mpfit,Nim,Mathematics and Numerics
+tomlserialization,Nim,Data Serialization
+protobufserialization,Nim,Data Serialization
+bson,Nim,Data Serialization
+eminim,Nim,Algorithms and Data Structures
+autome,Nim,IT Automation
+monit,Nim,Logging and Monitoring
+autonim,Nim,IT Automation
+nake,Nim,IT Automation
+nimblegui,Nim,Graphical User Interface Design
+nigui,Nim,Graphical User Interface Design
+sigui,Nim,Graphical User Interface Design
+rdgui,Nim,Graphical User Interface Design
+de.sciss:fingertree_2.11,Scala,Algorithms and Data Structures
+org.scalameta:semanticdb-scalac-core_2.11.12,Scala,Code Analysis and Linting
+org.axle-lang:axle-algorithms_2.11,Scala,Algorithms and Data Structures
+de.sciss:strugatzki_2.10,Scala,Algorithms and Data Structures
+org.apache.spark:spark-network-common_2.11,Scala,Networking and Messaging
+com.github.molecule-labs:molecule-net_2.9.3,Scala,Networking and Messaging
+org.elasticmq,Scala,Database Management
+com.typesafe.akka:akka-stream_2.12,Scala,Networking and Messaging
+com.miguno.akka:akka-mock-scheduler_2.11,Scala,Scheduling and Concurrency
+com.enragedginger:akka-quartz-scheduler_2.11,Scala,Scheduling and Concurrency
+edu.gemini:lucuma-typed-scheduler_sjs1_3,Scala,Scheduling and Concurrency
+io.getkyo:kyo-scheduler_2.13,Scala,Scheduling and Concurrency
+dev.zio:zio-json_3,Scala,Data Serialization
+dev.zio:zio-json_2.12,Scala,Data Serialization
+recheck,Scala,Code Analysis and Linting
+org.beangle.security:beangle-security-core,Scala,Security
+com.softwaremill.sttp:async-http-client-backend-future_2.12,Scala,Web Development
+com.softwaremill.sttp:akka-http-backend_2.12,Scala,Web Development
+com.eed3si9n:gigahorse-okhttp_2.12,Scala,Web Development
+com.softwaremill.sttp.client3:slf4j-backend_2.12,Scala,Logging and Monitoring
+com.github.vagmcs:optimus_2.11,Scala,Mathematics and Numerics
+com.github.vagmcs:optimus-solver-oj_2.11,Scala,Mathematics and Numerics
+io.github.scalamath:vecmatlib,Scala,Mathematics and Numerics
+io.github.scalamath:cmplxlib,Scala,Mathematics and Numerics
+com.typesafe.slick:slick_2.11,Scala,Database Management
+org.tpolecat:doobie-core_2.12,Scala,Database Management
+org.reactivemongo:reactivemongo_2.11,Scala,Database Management
+org.tpolecat:doobie-postgres_2.12,Scala,Database Management
+org.specs2:specs2_2.11,Scala,Testing
+com.github.pathikrit:better-files_2.12,Scala,File Handling
+com.github.scala-incubator.io:scala-io-file_2.10,Scala,File Handling
+de.sciss:audiofile_2.11,Scala,Data Analysis
+com.typesafe.scala-logging:scala-logging_2.12,Scala,Logging and Monitoring
+com.typesafe.scala-logging:scala-logging-slf4j_2.11,Scala,Logging and Monitoring
+org.clapper:grizzled-slf4j_2.11,Scala,Logging and Monitoring
+com.outr:scribe_2.12,Scala,Data Serialization
+org.psywerx.hairyfotr.linter,Scala,Code Analysis and Linting
+scala.meta.parsers,Scala,Algorithms and Data Structures
+org.scalastyle,Scala,Code Analysis and Linting
+com.sksamuel.scapegoat,Scala,Code Analysis and Linting
+org.scala-js:scalajs-test-bridge_2.13,Scala,Testing
+org.scala-js:scalajs-test-interface_2.12,Scala,Testing
+com.typesafe.play:play-test_2.11,Scala,Testing
+org.scalatest:scalatest_2.9.1,Scala,Testing
+org.finra.megasparkdiff:mega-spark-diff,Scala,Data Analysis
+com.github.vicpara:exploratory-data-analysis_2.10,Scala,Data Analysis
+org.emmalanguage:emma,Scala,Data Analysis
+org.emmalanguage:emma-benchmarks,Scala,Data Analysis
+org.simplex3d:simplex3d-data-format_2.10,Scala,Data Serialization
+org.wvlet.airframe:airframe-tablet_2.13.0-RC2,Scala,Data Serialization
+org.gnieh:fs2-data-text_2.13,Scala,Data Serialization
+com.fasterxml.jackson.module:jackson-module-scala_2.12,Scala,Data Serialization
+tech.orkestra:orkestra-core_sjs0.6_2.12,Scala,IT Automation
+com.goyeau:orchestra-cron_2.12,Scala,Scheduling and Concurrency
+com.aamend.spark:archetype,Scala,IT Automation
+io.kevinlee:sbt-devoops-github-core_2.12_1.0,Scala,IT Automation
+de.sciss:dotterweide-ui_2.11,Scala,Graphical User Interface Design
+org.scala-lang.modules.scala-swing,Scala,Graphical User Interface Design
+io.github.kacperfkorban.guinep-web,Scala,Web Development
+io.github.mimoguz.layeredfonticon-core,Scala,Graphical User Interface Design
+piecemeal,Dart,Algorithms and Data Structures
+collection,Dart,Algorithms and Data Structures
+pointycastle,Dart,Security
+graphs,Dart,Algorithms and Data Structures
+connectivity_plus,Dart,Networking and Messaging
+cached_network_image,Dart,File Handling
+connectivity,Dart,Networking and Messaging
+firebase_messaging,Dart,Networking and Messaging
+reflutter,Dart,Web Development
+server_universe,Dart,Web Development
+create-fullstack-app-cli,Dart,IT Automation
+angel_graphql,Dart,Web Development
+flutter_local_notifications,Dart,Graphical User Interface Design
+cron,Dart,Scheduling and Concurrency
+timer_builder,Dart,Scheduling and Concurrency
+syncfusion_flutter_calendar,Dart,Graphical User Interface Design
+google_sign_in,Dart,Security
+mqtt_client,Dart,Networking and Messaging
+angel_security,Dart,Security
+envied,Dart,Code Analysis and Linting
+math_expressions,Dart,Mathematics and Numerics
+more,Dart,Algorithms and Data Structures
+ml_linalg,Dart,Mathematics and Numerics
+fixed,Dart,Algorithms and Data Structures
+sqflite,Dart,Database Management
+cloud_firestore,Dart,Database Management
+postgres,Dart,Database Management
+hive,Dart,Database Management
+path_provider,Dart,File Handling
+image,Dart,Graphical User Interface Design
+glob,Dart,File Handling
+file,Dart,File Handling
+logging,Dart,Logging and Monitoring
+logger,Dart,Logging and Monitoring
+ansicolor,Dart,Logging and Monitoring
+pretty_dio_logger,Dart,Logging and Monitoring
+flutter_lints,Dart,Code Analysis and Linting
+pedantic_mono,Dart,Code Analysis and Linting
+carapacik_lints,Dart,Code Analysis and Linting
+velvet_custom_lints,Dart,Code Analysis and Linting
+test,Dart,Testing
+unittest,Dart,Testing
+build_test,Dart,Testing
+mocktail,Dart,Testing
+grizzly_array,Dart,Algorithms and Data Structures
+flutter_insights,Dart,Data Analysis
+packhorse,Dart,IT Automation
+plugin_mappintelligence,Dart,IT Automation
+yaml,Dart,Data Serialization
+http_parser,Dart,Web Development
+built_value,Dart,Data Serialization
+bson,Dart,Data Serialization
+unleash,Dart,IT Automation
+docrunner,Dart,IT Automation
+cobertura,Dart,Code Analysis and Linting
+bitwarden_secrets,Dart,Security
+magical_widget,Dart,Graphical User Interface Design
+flutter_auto_gui,Dart,Graphical User Interface Design
+gui_shape,Dart,Graphical User Interface Design
+rinf,Dart,Algorithms and Data Structures
+collections,Python,Algorithms and Data Structures
+heapq,Python,Algorithms and Data Structures
+algorithms,Python,Algorithms and Data Structures
+sortedcontainers,Python,Algorithms and Data Structures
+asyncio,Python,Networking and Messaging
+socket,Python,Networking and Messaging
+kafka-python,Python,Networking and Messaging
+dramatiq,Python,Networking and Messaging
+arrow,Python,Scheduling and Concurrency
+dateutil,Python,Scheduling and Concurrency
+threading-framework,Python,Scheduling and Concurrency
+schedule,Python,Scheduling and Concurrency
+authlib,Python,Security
+pyjwt,Python,Security
+django-allauth,Python,Security
+cryptography,Python,Security
+webassets,Python,Web Development
+html2text,Python,Web Development
+websockets,Python,Web Development
+tornado,Python,Web Development
+scipy,Python,Mathematics and Numerics
+numpy,Python,Mathematics and Numerics
+statsmodel,Python,Mathematics and Numerics
+sympy,Python,Mathematics and Numerics
+pymysql,Python,Database Management
+psycopg,Python,Database Management
+pymongo,Python,Database Management
+pickledb,Python,Database Management
+mimetypes,Python,File Handling
+pathlib,Python,File Handling
+python-magic,Python,File Handling
+wqtchdog,Python,Scheduling and Concurrency
+logging,Python,Logging and Monitoring
+structlog,Python,Logging and Monitoring
+loguru,Python,Logging and Monitoring
+psutil,Python,System Administration
+flake8,Python,Code Analysis and Linting
+pyflakes,Python,Code Analysis and Linting
+pycodestyle,Python,Code Analysis and Linting
+pylint,Python,Code Analysis and Linting
+mamba,Python,Testing
+pytest,Python,Testing
+unittest,Python,Testing
+selenium,Python,Web Development
+pandas,Python,Data Analysis
+optimus,Python,Data Analysis
+schema,Python,Data Analysis
+pydantic,Python,Data Serialization
+marshmallow,Python,Data Serialization
+pysimdjson,Python,Data Serialization
+json,Python,Data Serialization
+prophy,Python,Data Analysis
+ansible,Python,IT Automation
+pyinfra,Python,IT Automation
+fabric,Python,IT Automation
+borg,Python,System Administration
+tkinter,Python,Graphical User Interface Design
+pyglet,Python,Graphical User Interface Design
+pyqt,Python,Graphical User Interface Design
+kivy,Python,Graphical User Interface Design
+Graph,Perl,Algorithms and Data Structures
+MetaMap-DataStructures,Perl,Algorithms and Data Structures
+Array-Circular,Perl,Algorithms and Data Structures
+Tree-R,Perl,Algorithms and Data Structures
+NetAddr-MAC,Perl,Networking and Messaging
+Net-OpenSSH,Perl,Networking and Messaging
+Parse-IPCommand,Perl,Networking and Messaging
+Net-SSH2,Perl,Networking and Messaging
+docpad-plugin-scheduling,Perl,Scheduling and Concurrency
+Async-Event-Interval,Perl,Scheduling and Concurrency
+Schedule-SGELK,Perl,Scheduling and Concurrency
+Mojolicious-Plugin-Cron-Scheduler,Perl,Scheduling and Concurrency
+DBIx-Class-BcryptColumn,Perl,Security
+Crypt-DRBG,Perl,Security
+WWW-KeePassRest,Perl,Web Development
+Plack-Middleware-SecureHeaders,Perl,Security
+Mojolicious,Perl,Web Development
+Dancer2,Perl,Web Development
+Catalyst,Perl,Web Development
+Kossy,Perl,Web Development
+SPVM-Math,Perl,Mathematics and Numerics
+App-Math-Tutor,Perl,Mathematics and Numerics
+Math-RPN-Tiny,Perl,Mathematics and Numerics
+Math-Sidef,Perl,Mathematics and Numerics
+DBD-mysql,Perl,Database Management
+Redis,Perl,Database Management
+github.com/percona/percona-toolkit,Perl,Database Management
+Database-Abstraction,Perl,Database Management
+Path-Tiny,Perl,File Handling
+File-Util,Perl,File Handling
+PDF-API2,Perl,Data Serialization
+IO-All,Perl,File Handling
+CPAN-Testers-Schema,Perl,Data Analysis
+Log-Report,Perl,Logging and Monitoring
+Log-Contextual,Perl,Logging and Monitoring
+event-tracer,Perl,Logging and Monitoring
+Perl-Lint,Perl,Code Analysis and Linting
+Perl-Critic,Perl,Code Analysis and Linting
+B-Lint,Perl,Code Analysis and Linting
+Perl-Analyzer,Perl,Code Analysis and Linting
+Test-Strict,Perl,Testing
+Math-BigInt,Perl,Mathematics and Numerics
+Test-MockModule,Perl,Testing
+Test-Without-Module,Perl,Testing
+CLIPSeqTools,Perl,Data Analysis
+App-RecordStream,Perl,Data Analysis
+Data::Table,Perl,Data Analysis
+PDL::Dataframe,Perl,Data Analysis
+wxPerl,Perl,Graphical User Interface Design
+Perl-Tk,Perl,Graphical User Interface Design
+Prima,Perl,Graphical User Interface Design
+Perl/KDE,Perl,Graphical User Interface Design
+AnyData,Perl,Data Serialization
+Data-Format-Pretty-YAML,Perl,Data Serialization
+TOML-Tiny,Perl,Data Serialization
+CatalystX-Controller-ExtJS-REST-SimpleExcel,Perl,Web Development
+Rex,Perl,IT Automation
+com.viliussutkus89:SampleLibraryForSonatypePromotionTesting,Perl,IT Automation
+Jenkins::API,Perl,IT Automation
+Minilla,Perl,IT Automation
+@discordjs/collection,TypeScript,Algorithms and Data Structures
+js-sdsl,TypeScript,Algorithms and Data Structures
+typescript-collections,TypeScript,Algorithms and Data Structures
+fast-array-diff,TypeScript,Algorithms and Data Structures
+libp2p,TypeScript,Networking and Messaging
+@multiformats/multiaddr,TypeScript,Networking and Messaging
+@ethersproject/networks,TypeScript,Networking and Messaging
+nats,TypeScript,Networking and Messaging
+@types/node-schedule,TypeScript,Scheduling and Concurrency
+agenda,TypeScript,Scheduling and Concurrency
+@nestjs/schedule,TypeScript,Scheduling and Concurrency
+@solid-primitives/scheduled,TypeScript,Scheduling and Concurrency
+helmet,TypeScript,Security
+snyk,TypeScript,Security
+express-rate-limit,TypeScript,Web Development
+jssha,TypeScript,Security
+vite,TypeScript,Web Development
+vue-template-compiler,TypeScript,Web Development
+@testing-library/user-event,TypeScript,Testing
+antd,TypeScript,Graphical User Interface Design
+random-js,TypeScript,Mathematics and Numerics
+math-expression-evaluator,TypeScript,Mathematics and Numerics
+normal-distribution,TypeScript,Mathematics and Numerics
+@mathigon/fermat,TypeScript,Mathematics and Numerics
+mongodb,TypeScript,Database Management
+sequelize,TypeScript,Database Management
+firebase,TypeScript,Database Management
+typeorm,TypeScript,Database Management
+rollup-plugin-dts,TypeScript,Code Analysis and Linting
+tsx,TypeScript,Code Analysis and Linting
+ts-node-dev,TypeScript,Code Analysis and Linting
+serve,TypeScript,Web Development
+@oclif/errors,TypeScript,Error Handling
+@storybook/addon-console,TypeScript,Graphical User Interface Design
+conventional-changelog-writer,TypeScript,IT Automation
+git-raw-commits,TypeScript,IT Automation
+@codemirror/lint,TypeScript,Code Analysis and Linting
+@start/plugin-lib-eslint,TypeScript,Code Analysis and Linting
+remark-lint-fenced-code-flag-case,TypeScript,Code Analysis and Linting
+tslint-rxjs-subject-restrictions-rule,TypeScript,Code Analysis and Linting
+jest,TypeScript,Testing
+ts-jest,TypeScript,Testing
+babel-jest,TypeScript,Testing
+vitest,TypeScript,Testing
+data-forge,TypeScript,Data Analysis
+vue-component-meta,TypeScript,Graphical User Interface Design
+@opticss/element-analysis,TypeScript,Graphical User Interface Design
+@antv/l7-scene,TypeScript,Graphical User Interface Design
+table,TypeScript,Data Analysis
+form-data-encoder,TypeScript,File Handling
+ion-js,TypeScript,Web Development
+@nsis/language-data,TypeScript,Data Analysis
+docker-compose,TypeScript,IT Automation
+commitlint-azure-pipelines-cli,TypeScript,IT Automation
+azure-devops-node-api,TypeScript,IT Automation
+@karmaniverous/get-dotenv,TypeScript,File Handling
+happy-dom,TypeScript,Graphical User Interface Design
+react-png-tooltip,TypeScript,Graphical User Interface Design
+infamous,TypeScript,Graphical User Interface Design
+lume,TypeScript,Web Development
+github.com/davecgh/go-spew,Go,Algorithms and Data Structures
+github.com/google/btree,Go,Algorithms and Data Structures
+github.com/lann/ps,Go,Algorithms and Data Structures
+github.com/cespare/xxhash/v2,Go,Algorithms and Data Structures
+golang.org/x/net,Go,Networking and Messaging
+github.com/vishvananda/netns,Go,Networking and Messaging
+github.com/nats-io/nats,Go,Networking and Messaging
+github.com/jackc/pgproto3/v2,Go,Database Management
+k8s.io/kubernetes,Go,IT Automation
+github.com/go-co-op/gocron,Go,Scheduling and Concurrency
+atomicgo.dev/schedule,Go,Scheduling and Concurrency
+github.com/jasonlvhit/gocron,Go,Scheduling and Concurrency
+github.com/google/uuid,Go,Algorithms and Data Structures
+github.com/golang-jwt/jwt/v4,Go,Security
+github.com/microcosm-cc/bluemonday,Go,Security
+github.com/99designs/keyring,Go,Security
+github.com/gin-gonic/gin,Go,Web Development
+github.com/go-redis/cache/v8,Go,Database Management
+github.com/gorilla/sessions,Go,Web Development
+github.com/labstack/echo/v4,Go,Web Development
+gopkg.in/inf.v0,Go,Algorithms and Data Structures
+github.com/go-corelibs/maths,Go,Mathematics and Numerics
+github.com/go-inf/inf,Go,Algorithms and Data Structures
+github.com/pkg/math,Go,Mathematics and Numerics
+github.com/go-sql-driver/mysql,Go,Database Management
+github.com/lib/pq,Go,Database Management
+go.mongodb.org/mongo-driver,Go,Database Management
+go.etcd.io/bbolt,Go,Database Management
+github.com/pelletier/go-toml/v2,Go,Data Serialization
+github.com/joho/godotenv,Go,File Handling
+cloud.google.com/go/storage,Go,Database Management
+github.com/minio/minio-go/v7,Go,Database Management
+github.com/sirupsen/logrus,Go,Logging and Monitoring
+go.uber.org/zap,Go,Logging and Monitoring
+github.com/go-logr/logr,Go,Logging and Monitoring
+go.opentelemetry.io/otel,Go,Logging and Monitoring
+golang.org/x/lint,Go,Code Analysis and Linting
+github.com/golangci/lint-1,Go,Code Analysis and Linting
+github.com/mvdan/lint,Go,Code Analysis and Linting
+github.com/golang/lint,Go,Code Analysis and Linting
+github.com/stretchr/testify,Go,Testing
+github.com/google/go-cmp,Go,Code Analysis and Linting
+gopkg.in/check.v1,Go,Testing
+github.com/onsi/ginkgo,Go,Testing
+github.com/rocketlaunchr/dataframe-go,Go,Data Analysis
+github.com/fjukstad/walrus,Go,Algorithms and Data Structures
+github.com/hokiegeek/hgtealib,Go,Algorithms and Data Structures
+github.com/forchain/bitcoinbigdata,Go,Data Analysis
+github.com/google/orderedcode,Go,Code Analysis and Linting
+github.com/ipfs/go-block-format,Go,File Handling
+github.com/linkedin/goavro/v2,Go,Data Serialization
+github.com/minio/sio,Go,File Handling
+github.com/power-devops/perfstat,Go,Logging and Monitoring
+github.com/gruntwork-io/terratest,Go,Testing
+go.mozilla.org/sops/v3,Go,Security
+github.com/vladimirvivien/gexe,Go,Algorithms and Data Structures
+qtypes,Go,Algorithms and Data Structures
+github.com/ctessum/gobra,Go,Algorithms and Data Structures
+github.com/yogischogi/ui2go,Go,Graphical User Interface Design
+github.com/bhojpur/gui,Go,Graphical User Interface Design
diff --git a/transforms/code/semantic_profiler/python/src/ikb/null_libs.csv b/transforms/code/semantic_profiler/python/src/ikb/null_libs.csv
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/transforms/code/semantic_profiler/python/src/offline_path/generate_ikb.py b/transforms/code/semantic_profiler/python/src/offline_path/generate_ikb.py
new file mode 100644
index 0000000000..eb966a77c8
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/src/offline_path/generate_ikb.py
@@ -0,0 +1,123 @@
+import os
+import argparse
+import csv
+import pyarrow as pa
+import pyarrow.csv as pv
+from io import StringIO,BytesIO
+from watsonxai import generateResponseWatsonx
+
+
+def getStringFromCSV(file):
+ table = pv.read_csv(file)
+ csv_buffer = StringIO()
+ column_names = table.column_names
+ csv_buffer.write(','.join(column_names) + '\n')
+ for row in range(table.num_rows):
+ row_data = [str(table[column][row].as_py()) for column in column_names]
+ csv_buffer.write(','.join(row_data) + '\n')
+ return csv_buffer.getvalue()
+
+
+
+def gen_combined_strings(file_data):
+ file_data = file_data.splitlines()
+ headers = file_data[0]
+ null_libraries = file_data[1:]
+ combined_strings = []
+ combined_string = ""
+ for idx, entry in enumerate(null_libraries, start=1):
+ if combined_string == "":
+ combined_string += f"{headers.strip()}\n"
+ combined_string += f"{entry}\n"
+ if idx % 30 == 0 or idx == len(null_libraries):
+ combined_strings.append(combined_string)
+ combined_string = ""
+ return combined_strings
+
+
+if __name__ == "__main__":
+
+ parser = argparse.ArgumentParser(description='Generate IKB.')
+ parser.add_argument('--null_libs_file', type=str, help='Path to null libraries file.', default=os.getenv('NULL_LIBS_FILE', '../ikb/null_libs.csv'))
+ parser.add_argument('--cmap_file', type=str, help='Path to concept map file.', default=os.getenv('CMAP_FILE', '../concept_map/updated_concept_list.csv'))
+ parser.add_argument('--input_examples_file', type=str, help='Path to input examples file.', default=os.getenv('EXAMPLES_I_FILE', '../examples/examples-i.csv'))
+ parser.add_argument('--output_examples_file', type=str, help='Path to output examples file.', default=os.getenv('EXAMPLES_O_FILE', '../examples/examples-o.csv'))
+ parser.add_argument('--extracted_data_file', type=str, help='Path to file in which LLM output will be stored.', default=os.getenv('EXTRACTED_DATA_FILE', '../ikb/extracted_data.csv'))
+ parser.add_argument('--api_type', type=str, help='API Type', default=os.getenv('API_TYPE', 'WatsonxAI'))
+ parser.add_argument('--api_key', type=str, help='API key', default=os.getenv('API_KEY', ''))
+ parser.add_argument('--api_endpoint', type=str, help='API endpoint', default=os.getenv('API_ENDPOINT', 'https://us-south.ml.cloud.ibm.com'))
+ parser.add_argument('--project_id', type=str, help='Project ID', default=os.getenv('PROJECT_ID', ''))
+ parser.add_argument('--model_id', type=str, help='LLM model ID', default=os.getenv('MODEL_ID', 'meta-llama/llama-3-70b-instruct'))
+
+
+
+ args = parser.parse_args()
+ concepts = getStringFromCSV(args.cmap_file)
+ input_examples = getStringFromCSV(args.input_examples_file)
+ output_examples = getStringFromCSV(args.output_examples_file)
+
+ null_libs_file_data = getStringFromCSV(args.null_libs_file)
+ combined_strings = gen_combined_strings(null_libs_file_data)
+
+ endtoken = ""
+ prompt_name = "My-prompt"
+ prompt_template = '''You are responsible for classifying programming language packages based on their functionality into one of the following STRICT categories:
+ ''' + concepts + '''
+
+ Instructions:
+
+ 1. Input: A CSV containing two columns:
+ a. Library – the name of the package
+ b. Language – the programming language of the package
+ Your task is to append a third column called Category where you will classify the package's primary function into one of the following categories.\n
+
+ 2. Output: The updated CSV with the new Category column.
+
+ 3. Categorization Guidelines:
+ a. Classify each package based on its primary functionality.
+ b. Only use categories from the given list. Do not invent or modify categories.
+
+ 4. Output format: Provide the updated CSV data in the exact format as shown below:
+ a. Columns: Library, Language, Category
+ b. End the response with to indicate completion.
+ c. Do not include any double quotes in the output.
+
+ 5. Only use categories from the given list. Do not invent or modify categories.
+
+ 6. Strictly do not provide any explanations or commentary or notes before and/or after the table.
+
+ Examples:
+ INPUT:
+ ''' + str(input_examples) + "OUTPUT:\n" + str(output_examples).strip("\n")+"\n"
+
+ headers = ["Library", "Language", "Category"]
+ file_exists = os.path.exists(args.extracted_data_file)
+ if not file_exists:
+ with open(args.extracted_data_file, mode='w', newline='') as f:
+ csv_writer = csv.writer(f, quoting=csv.QUOTE_NONE, escapechar='\\')
+ csv_writer.writerow(headers)
+
+
+ for combined_string in combined_strings:
+ input_template = prompt_template + f"\n\nINPUT: {combined_string} \nOUTPUT: "
+ if args.api_type == 'WatsonxAI':
+ response = generateResponseWatsonx(args.api_key, args.api_endpoint, args.model_id, args.project_id, input_template)
+ data = response.split(endtoken)[0]
+ csv_file = BytesIO(data.strip().encode('utf-8'))
+ csv_content = data.splitlines()
+ not_first_row = 0
+ with open(args.extracted_data_file, mode='a', newline='') as f:
+ csv_writer = csv.writer(f, quoting=csv.QUOTE_NONE, escapechar='\\')
+ for line in csv_content:
+ if not_first_row:
+ row = line.split(',')
+ csv_writer.writerow(row)
+ not_first_row = 1
+
+
+
+
+
+
+
+
diff --git a/transforms/code/semantic_profiler/python/src/offline_path/watsonxai.py b/transforms/code/semantic_profiler/python/src/offline_path/watsonxai.py
new file mode 100644
index 0000000000..e346c18944
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/src/offline_path/watsonxai.py
@@ -0,0 +1,25 @@
+from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
+from ibm_watsonx_ai.foundation_models import ModelInference
+from ibm_watsonx_ai import Credentials
+
+
+
+
+def generateResponseWatsonx(api_key, api_endpoint, model_id, project_id, input_template):
+ credentials = Credentials(api_key=api_key, url=api_endpoint)
+ parameters = {
+ GenParams.DECODING_METHOD: "greedy",
+ GenParams.MAX_NEW_TOKENS: 100,
+ GenParams.STOP_SEQUENCES: [""]
+ }
+ model = ModelInference(
+ model_id=model_id,
+ params=parameters,
+ credentials=credentials,
+ project_id=project_id)
+ response = model.generate_text(input_template)
+ return response
+
+
+
+
diff --git a/transforms/code/semantic_profiler/python/src/sp_helper.py b/transforms/code/semantic_profiler/python/src/sp_helper.py
new file mode 100644
index 0000000000..3986abda4a
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/src/sp_helper.py
@@ -0,0 +1,91 @@
+import pyarrow.csv as pacsv
+import csv
+
+
+
+class TrieNode:
+ '''
+ Implements one node of a Trie datastructure
+ '''
+ def __init__(self):
+ self.children = {}
+ self.is_end_of_word = False
+ self.data = None
+
+class Trie:
+ '''
+ Implements a Trie datastructure for efficient retrieval of concepts from the IKB.
+ '''
+ def __init__(self):
+ self.root = TrieNode()
+
+ def insert(self, library_name, programming_language, functionality):
+ node = self.root
+ for char in library_name:
+ if char not in node.children:
+ node.children[char] = TrieNode()
+ node = node.children[char]
+ node.data = {}
+ node.data['Category'] = functionality
+ node.data['Language'] = programming_language
+ node.is_end_of_word = True
+
+ def search(self, library_name, programming_language):
+ node = self.root
+ for char in library_name:
+ if char not in node.children:
+ return None
+ node = node.children[char]
+ if node.is_end_of_word and node.data:
+ return node.data
+ return None
+
+
+class knowledge_base:
+ '''
+ Implements the internal knowledge base.
+ '''
+ knowledge_base_file = ''
+ null_file = ''
+ knowledge_base_table = None
+ knowledge_base_trie = None
+ entries_with_null_coverage = set()
+
+ def __init__(self, ikb_file, null_libs_file):
+ self.knowledge_base_file = ikb_file
+ self.null_file = null_libs_file
+
+ def load_ikb_trie(self):
+ self.knowledge_base_table = pacsv.read_csv(self.knowledge_base_file)
+ self.knowledge_base_trie = Trie()
+ library_column = self.knowledge_base_table.column('Library').to_pylist()
+ language_column = self.knowledge_base_table.column('Language').to_pylist()
+ category_column = self.knowledge_base_table.column('Category').to_pylist()
+ for library, language, category in zip(library_column, language_column, category_column):
+ self.knowledge_base_trie.insert(str.lower(library), language, category)
+
+ def write_null_files(self):
+ with open(self.null_file, 'a+', newline='', encoding='utf-8') as csvfile:
+ writer = csv.writer(csvfile)
+ for entry in self.entries_with_null_coverage:
+ writer.writerow([entry[0], entry[1]])
+ self.entries_with_null_coverage = set()
+
+
+def concept_extractor(libraries,language,ikb):
+ '''
+ Given a set of libraries and the corresponding programming language along with the IKB trie, this function
+ returns the matching concept(s) as a comma separated list joined into a string.
+ '''
+ concept_coverage = set()
+ language = language
+ libraries = [item.strip() for item in libraries.split(",")]
+ for library in libraries:
+ if library:
+ extracted_base_name = str.lower(library)
+ matched_entry = ikb.knowledge_base_trie.search(extracted_base_name, language)
+ if matched_entry:
+ concept_coverage.add(matched_entry['Category'].strip())
+ else:
+ ikb.entries_with_null_coverage.add((library,language))
+ return ','.join(sorted(list(concept_coverage)))
\ No newline at end of file
diff --git a/transforms/code/semantic_profiler/python/src/sp_local.py b/transforms/code/semantic_profiler/python/src/sp_local.py
new file mode 100644
index 0000000000..33a863f7a7
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/src/sp_local.py
@@ -0,0 +1,35 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+
+from data_processing.data_access import DataAccessLocal
+from sp_transform import SemanticProfilerTransform
+
+
+# create parameters
+input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test-data", "input"))
+
+sp_params = {"ikb_file": "src/ikb/ikb_model.csv", "null_libs_file": "src/ikb/null_libs.csv"}
+
+if __name__ == "__main__":
+ # Here we show how to run outside of the runtime
+ # Create and configure the transform.
+ transform = SemanticProfilerTransform(sp_params)
+ # Use the local data access to read a parquet table.
+ data_access = DataAccessLocal()
+ table, _ = data_access.get_table(os.path.join(input_folder, "test.parquet"))
+ print(f"input table: {table}")
+ # Transform the table
+ table_list, metadata = transform.transform(table)
+ print(f"\noutput table: {table_list}")
+ print(f"output metadata : {metadata}")
diff --git a/transforms/code/semantic_profiler/python/src/sp_local_python.py b/transforms/code/semantic_profiler/python/src/sp_local_python.py
new file mode 100644
index 0000000000..be468d761d
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/src/sp_local_python.py
@@ -0,0 +1,47 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+import sys
+
+from data_processing.runtime.pure_python import PythonTransformLauncher
+from data_processing.utils import ParamsUtils
+from sp_transform_python import SemanticProfilerPythonTransformConfiguration
+
+
+# create parameters
+input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test-data", "input"))
+output_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "output"))
+local_conf = {
+ "input_folder": input_folder,
+ "output_folder": output_folder,
+}
+code_location = {"github": "github", "commit_hash": "12345", "path": "path"}
+params = {
+ # Data access. Only required parameters are specified
+ "data_local_config": ParamsUtils.convert_to_ast(local_conf),
+ # execution info
+ "runtime_pipeline_id": "pipeline_id",
+ "runtime_job_id": "job_id",
+ "runtime_code_location": ParamsUtils.convert_to_ast(code_location),
+ # sp params
+ "sp_ikb_file": "src/ikb/ikb_model.csv",
+ "sp_null_libs_file": "src/ikb/null_libs.csv"
+
+}
+if __name__ == "__main__":
+ # Set the simulated command line args
+ sys.argv = ParamsUtils.dict_to_req(d=params)
+ # create launcher
+ launcher = PythonTransformLauncher(runtime_config=SemanticProfilerPythonTransformConfiguration())
+ # Launch the ray actor(s) to process the input
+ launcher.launch()
diff --git a/transforms/code/semantic_profiler/python/src/sp_local_python_multiprocessor.py b/transforms/code/semantic_profiler/python/src/sp_local_python_multiprocessor.py
new file mode 100644
index 0000000000..607c42770b
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/src/sp_local_python_multiprocessor.py
@@ -0,0 +1,47 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+import sys
+
+from data_processing.runtime.pure_python import PythonTransformLauncher
+from data_processing.utils import ParamsUtils
+from sp_transform_python import SemanticProfilerPythonTransformConfiguration
+
+
+# create parameters
+input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test-data", "input"))
+output_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "output"))
+local_conf = {
+ "input_folder": input_folder,
+ "output_folder": output_folder,
+}
+code_location = {"github": "github", "commit_hash": "12345", "path": "path"}
+params = {
+ # Data access. Only required parameters are specified
+ "data_local_config": ParamsUtils.convert_to_ast(local_conf),
+ # execution info
+ "runtime_pipeline_id": "pipeline_id",
+ "runtime_job_id": "job_id",
+ "runtime_code_location": ParamsUtils.convert_to_ast(code_location),
+ # "runtime_num_processors": 2,
+ # sp params
+ "sp_ikb_file": "src/ikb/ikb_model.csv",
+ "sp_null_libs_file": "src/ikb/null_libs.csv"
+}
+if __name__ == "__main__":
+ # Set the simulated command line args
+ sys.argv = ParamsUtils.dict_to_req(d=params)
+ # create launcher
+ launcher = PythonTransformLauncher(runtime_config=SemanticProfilerPythonTransformConfiguration())
+ # Launch the ray actor(s) to process the input
+ launcher.launch()
diff --git a/transforms/code/semantic_profiler/python/src/sp_transform.py b/transforms/code/semantic_profiler/python/src/sp_transform.py
new file mode 100644
index 0000000000..9069ca8053
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/src/sp_transform.py
@@ -0,0 +1,124 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+
+from argparse import ArgumentParser, Namespace
+from typing import Any
+
+import pyarrow as pa
+from data_processing.transform import AbstractTableTransform, TransformConfiguration
+from data_processing.utils import CLIArgumentProvider
+from sp_helper import *
+
+
+short_name = "sp"
+cli_prefix = f"{short_name}_"
+
+ikb_file = "ikb_file"
+null_libs_file = "null_libs_file"
+
+ikb_file_cli_param = f"{cli_prefix}{ikb_file}"
+null_libs_file_cli_param = f"{cli_prefix}{null_libs_file}"
+
+
+
+class SemanticProfilerTransform(AbstractTableTransform):
+ """
+ Implements the semantic profiler transform on a pyarrow table
+ """
+
+ def __init__(self, config: dict[str, Any]):
+ """
+ Initialize based on the dictionary of configuration information.
+ This is generally called with configuration parsed from the CLI arguments defined
+ by the companion runtime, SemanticProfilerTransformRuntime. If running inside the RayMutatingDriver,
+ these will be provided by that class with help from the RayMutatingDriver.
+ """
+ # Make sure that the param name corresponds to the name used in apply_input_params method
+ # of SemanticProfilerTransformConfiguration class
+ super().__init__(config)
+ self.ikb_file = config.get("ikb_file", "../src/ikb/ikb_model.csv")
+ self.null_libs_file = config.get("null_libs_file", "../src/ikb/null_libs.csv")
+
+ def transform(self, table: pa.Table, file_name: str = None) -> tuple[list[pa.Table], dict[str, Any]]:
+ """
+ Put Transform-specific to convert one Table to 0 or more tables. It also returns
+ a dictionary of execution statistics - arbitrary dictionary
+ This implementation takes a pyarrow table (ouput of the USBR transform) as input and obtains the
+ semantic mapping of each datapoint from the Internal Knowledge Base. These semantic concepts are added as
+ a new column into the input table and returned as output. The points for which no semantic mapping is found are
+ written into the "null_libs.csv" file.
+ """
+ self.logger.debug(f"Transforming one table with {len(table)} rows")
+ ikb = knowledge_base(self.ikb_file, self.null_libs_file)
+ ikb.load_ikb_trie()
+ libraries = table.column('Library').to_pylist()
+ language = table.column('Language').to_pylist()
+ concepts = [concept_extractor(lib, lang, ikb) for lib, lang in zip(libraries, language)]
+ new_col = pa.array(concepts)
+ table = table.append_column('Concepts', new_col)
+ ikb.write_null_files()
+ # Add some sample metadata.
+ self.logger.debug(f"Transformed one table with {len(table)} rows")
+ metadata = {"nfiles": 1, "nrows": len(table)}
+ return [table], metadata
+
+
+class SemanticProfilerTransformConfiguration(TransformConfiguration):
+
+ """
+ Provides support for configuring and using the associated Transform class include
+ configuration with CLI args.
+ """
+
+ def __init__(self):
+ super().__init__(
+ name=short_name,
+ transform_class=SemanticProfilerTransform,
+ )
+ from data_processing.utils import get_logger
+
+ self.logger = get_logger(__name__)
+
+ def add_input_params(self, parser: ArgumentParser) -> None:
+ """
+ Add Transform-specific arguments to the given parser.
+ This will be included in a dictionary used to initialize the SemanticProfilerTransform.
+ By convention a common prefix should be used for all transform-specific CLI args
+ (e.g, sp_, pii_, etc.)
+ """
+
+ parser.add_argument(
+ f"--{ikb_file_cli_param}",
+ type=str,
+ default=None,
+ help="Default IKB file",
+ )
+
+ parser.add_argument(
+ f"--{null_libs_file_cli_param}",
+ type=str,
+ default=None,
+ help="Default Null Libraries file",
+ )
+
+
+ def apply_input_params(self, args: Namespace) -> bool:
+ """
+ Validate and apply the arguments that have been parsed
+ :param args: user defined arguments.
+ :return: True, if validate pass or False otherwise
+ """
+ captured = CLIArgumentProvider.capture_parameters(args, cli_prefix, False)
+ self.params = self.params | captured
+ self.logger.info(f"sp parameters are : {self.params}")
+ return True
diff --git a/transforms/code/semantic_profiler/python/src/sp_transform_python.py b/transforms/code/semantic_profiler/python/src/sp_transform_python.py
new file mode 100644
index 0000000000..d353283647
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/src/sp_transform_python.py
@@ -0,0 +1,45 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import time
+
+from data_processing.runtime.pure_python import PythonTransformLauncher
+from data_processing.runtime.pure_python.runtime_configuration import (
+ PythonTransformRuntimeConfiguration,
+)
+from data_processing.utils import get_logger
+from sp_transform import SemanticProfilerTransformConfiguration
+
+
+logger = get_logger(__name__)
+
+
+class SemanticProfilerPythonTransformConfiguration(PythonTransformRuntimeConfiguration):
+ """
+ Implements the PythonTransformConfiguration for SemanticProfiler as required by the PythonTransformLauncher.
+ SemanticProfiler does not use a RayRuntime class so the superclass only needs the base
+ python-only configuration.
+ """
+
+ def __init__(self):
+ """
+ Initialization
+ :param base_configuration - base configuration class
+ """
+ super().__init__(transform_config=SemanticProfilerTransformConfiguration())
+
+
+if __name__ == "__main__":
+ # launcher = SemanticProfilerRayLauncher()
+ launcher = PythonTransformLauncher(SemanticProfilerPythonTransformConfiguration())
+ logger.info("Launching sp transform")
+ launcher.launch()
diff --git a/transforms/code/semantic_profiler/python/test-data/expected/metadata.json b/transforms/code/semantic_profiler/python/test-data/expected/metadata.json
new file mode 100644
index 0000000000..8797e64fa6
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/test-data/expected/metadata.json
@@ -0,0 +1,46 @@
+{
+ "pipeline": "pipeline_id",
+ "job details": {
+ "job category": "preprocessing",
+ "job name": "SemanticProfiler",
+ "job type": "ray",
+ "job id": "job_id",
+ "start_time": "2024-03-01 15:17:56",
+ "end_time": "2024-03-01 15:17:57",
+ "status": "success"
+ },
+ "code": [null],
+ "job_input_params": {
+ "sleep": 0,
+ "checkpointing": false,
+ "max_files": -1,
+ "number of workers": 1,
+ "worker options": {
+ "num_cpus": 0.8
+ },
+ "actor creation delay": 0
+ },
+ "execution_stats": {
+ "cpus": 10,
+ "gpus": 0,
+ "memory": 14.031964112073183,
+ "object_store": 2.0
+ },
+ "job_output_stats": {
+ "source_files": 1,
+ "source_size": 16534,
+ "result_files": 1,
+ "result_size": 16534,
+ "table_processing": 0.012392997741699219,
+ "nfiles": 1,
+ "nrows": 5
+ },
+ "source": {
+ "name": "test-data/data_processing/ray/sp/input",
+ "type": "path"
+ },
+ "target": {
+ "name": "/tmp/SP4o9gv2bq",
+ "type": "path"
+ }
+}
diff --git a/transforms/code/semantic_profiler/python/test-data/expected/test.parquet b/transforms/code/semantic_profiler/python/test-data/expected/test.parquet
new file mode 100644
index 0000000000..748db85ba5
Binary files /dev/null and b/transforms/code/semantic_profiler/python/test-data/expected/test.parquet differ
diff --git a/transforms/code/semantic_profiler/python/test-data/input/test.parquet b/transforms/code/semantic_profiler/python/test-data/input/test.parquet
new file mode 100644
index 0000000000..f9ac1f0247
Binary files /dev/null and b/transforms/code/semantic_profiler/python/test-data/input/test.parquet differ
diff --git a/transforms/code/semantic_profiler/python/test/test_sp.py b/transforms/code/semantic_profiler/python/test/test_sp.py
new file mode 100644
index 0000000000..1728589455
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/test/test_sp.py
@@ -0,0 +1,45 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+import os
+
+import pyarrow as pa
+from data_processing.test_support import get_tables_in_folder
+from data_processing.test_support.transform.table_transform_test import (
+ AbstractTableTransformTest,
+)
+from sp_transform import SemanticProfilerTransform, null_libs_file, ikb_file
+
+
+# table = pa.Table.from_pydict({"name": pa.array(["Tom"]), "age": pa.array([23])})
+# expected_table = table # We're a sp after all.
+# expected_metadata_list = [{"nfiles": 1, "nrows": 1}, {}] # transform() result # flush() result
+
+
+class TestSemanticProfilerTransform(AbstractTableTransformTest):
+ """
+ Extends the super-class to define the test data for the tests defined there.
+ The name of this class MUST begin with the word Test so that pytest recognizes it as a test class.
+ """
+
+ def get_test_transform_fixtures(self) -> list[tuple]:
+ src_file_dir = os.path.abspath(os.path.dirname(__file__))
+ input_dir = os.path.join(src_file_dir, "../test-data/input")
+ expected_dir = os.path.join(src_file_dir, "../test-data/expected")
+ input_tables = get_tables_in_folder(input_dir)
+ expected_tables = get_tables_in_folder(expected_dir)
+
+ expected_metadata_list = [{"nfiles": 1, "nrows": len(expected_tables[0])}, {}]
+ config = {ikb_file: os.path.join(src_file_dir,"../src/ikb/ikb_model.csv"), null_libs_file: os.path.join(src_file_dir,"../src/ikb/null_libs.csv")}
+ fixtures = [
+ (SemanticProfilerTransform(config), input_tables, expected_tables, expected_metadata_list),
+ ]
+ return fixtures
diff --git a/transforms/code/semantic_profiler/python/test/test_sp_python.py b/transforms/code/semantic_profiler/python/test/test_sp_python.py
new file mode 100644
index 0000000000..191978dbc8
--- /dev/null
+++ b/transforms/code/semantic_profiler/python/test/test_sp_python.py
@@ -0,0 +1,48 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+
+from data_processing.runtime.pure_python import PythonTransformLauncher
+from data_processing.test_support.launch.transform_test import (
+ AbstractTransformLauncherTest,
+)
+from sp_transform import ikb_file_cli_param, null_libs_file_cli_param
+from sp_transform_python import SemanticProfilerPythonTransformConfiguration
+
+
+class TestPythonSemanticProfilerTransform(AbstractTransformLauncherTest):
+ """
+ Extends the super-class to define the test data for the tests defined there.
+ The name of this class MUST begin with the word Test so that pytest recognizes it as a test class.
+ """
+
+ def get_test_transform_fixtures(self) -> list[tuple]:
+ src_file_dir = os.path.abspath(os.path.dirname(__file__))
+ fixtures = []
+
+ launcher = PythonTransformLauncher(SemanticProfilerPythonTransformConfiguration())
+ input_dir = os.path.join(src_file_dir, "../test-data/input")
+ expected_dir = os.path.join(src_file_dir, "../test-data/expected")
+
+ transform_config = {ikb_file_cli_param: os.path.join(src_file_dir, "../src/ikb/ikb_model.csv"), null_libs_file_cli_param: os.path.join(src_file_dir, "../src/ikb/null_libs.csv")}
+ fixtures.append(
+ (
+ launcher,
+ transform_config,
+ input_dir,
+ expected_dir,
+ [], # optional list of column names to ignore in comparing test-generated with expected.
+ )
+ )
+
+ return fixtures
diff --git a/transforms/code/semantic_profiler/ray/.dockerignore b/transforms/code/semantic_profiler/ray/.dockerignore
new file mode 100644
index 0000000000..f7275bbbd0
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/.dockerignore
@@ -0,0 +1 @@
+venv/
diff --git a/transforms/code/semantic_profiler/ray/.gitignore b/transforms/code/semantic_profiler/ray/.gitignore
new file mode 100644
index 0000000000..3ea7fd4abb
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/.gitignore
@@ -0,0 +1,38 @@
+test-data/output
+output/*
+/output/
+data-processing-lib/
+
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+
+# Distribution / packaging
+bin/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+.tox/
+htmlcov
+.coverage
+.cache
+nosetests.xml
+coverage.xml
\ No newline at end of file
diff --git a/transforms/code/semantic_profiler/ray/Dockerfile b/transforms/code/semantic_profiler/ray/Dockerfile
new file mode 100644
index 0000000000..df7d2a5a44
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/Dockerfile
@@ -0,0 +1,42 @@
+ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310
+FROM ${BASE_IMAGE}
+
+RUN pip install --upgrade --no-cache-dir pip
+
+# install pytest
+RUN pip install --no-cache-dir pytest
+
+# Copy and install data processing libraries
+# These are expected to be placed in the docker context before this is run (see the make image).
+COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/
+RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
+COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
+RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .
+COPY --chown=ray:users python-transform/ python-transform/
+RUN cd python-transform && pip install --no-cache-dir -e .
+
+#COPY requirements.txt requirements.txt
+#RUN pip install --no-cache-dir -r requirements.txt
+
+COPY --chown=ray:users src/ src/
+COPY --chown=ray:users pyproject.toml pyproject.toml
+RUN pip install --no-cache-dir -e .
+
+# copy the main() entry point to the image
+COPY ./src/sp_transform_ray.py .
+
+# copy some of the samples in
+COPY ./src/sp_local_ray.py local/
+
+# copy test
+COPY test/ test/
+COPY test-data/ test-data/
+
+# Set environment
+ENV PYTHONPATH /home/ray
+
+# Put these at the end since they seem to upset the docker cache.
+ARG BUILD_DATE
+ARG GIT_COMMIT
+LABEL build-date=$BUILD_DATE
+LABEL git-commit=$GIT_COMMIT
diff --git a/transforms/code/semantic_profiler/ray/Makefile b/transforms/code/semantic_profiler/ray/Makefile
new file mode 100644
index 0000000000..c4ddf5f0a4
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/Makefile
@@ -0,0 +1,58 @@
+# Define the root of the local git clone for the common rules to be able
+# know where they are running from.
+REPOROOT=../../../..
+# Include a library of common .transform.* targets which most
+# transforms should be able to reuse. However, feel free
+# to override/redefine the rules below.
+
+include $(REPOROOT)/transforms/.make.transforms
+
+TRANSFORM_NAME=sp
+
+BASE_IMAGE=${RAY_BASE_IMAGE}
+venv:: .transforms.ray-venv
+
+test:: .transforms.ray-test
+
+clean:: .transforms.clean
+
+image:: .transforms.ray-image
+
+test-src:: .transforms.test-src
+
+setup:: .transforms.setup
+
+test-image:: .transforms.ray-test-image
+
+build:: build-dist image
+
+publish: publish-image
+
+publish-image:: .transforms.publish-image-ray
+
+setup:: .transforms.setup
+
+# set the version of python transform that this depends on.
+set-versions:
+ $(MAKE) TRANSFORM_PYTHON_VERSION=${SP_PYTHON_VERSION} TOML_VERSION=$(SP_RAY_VERSION) .transforms.set-versions
+
+build-dist:: .defaults.build-dist
+
+publish-dist:: .defaults.publish-dist
+
+# Ensure RUN_ARGS has a default value
+RUN_ARGS ?= ""
+
+run-cli-sample: .transforms.run-cli-ray-sample
+
+run-local-sample: .transforms.run-local-ray-sample
+
+# run-s3-sample: .transforms.run-s3-ray-sample
+
+minio-start: .minio-start
+
+kind-load-image:: .transforms.kind-load-image
+
+docker-load-image: .defaults.docker-load-image
+
+docker-save-image: .defaults.docker-save-image
diff --git a/transforms/code/semantic_profiler/ray/README.md b/transforms/code/semantic_profiler/ray/README.md
new file mode 100644
index 0000000000..a6a10810c9
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/README.md
@@ -0,0 +1,45 @@
+# SP Ray Transform
+Please see the set of
+[transform project conventions](../../../README.md#transform-project-conventions)
+for details on general project conventions, transform configuration,
+testing and IDE set up.
+
+## Summary
+This project wraps the [sp transform](../python) with a Ray runtime.
+
+## Configuration and command line Options
+
+SP configuration and command line options are the same as for the [base python](../python) transform.
+
+## Running
+
+### Launched Command Line Options
+In addition to those available to the transform as defined in [here](../python/README.md),
+the set of
+[ray launcher](../../../../data-processing-lib/doc/ray-launcher-options.md) are available.
+
+### Running the samples
+To run the samples, use the following `make` targets
+
+* `run-cli-sample` - runs src/sp_transform.py using command line args
+* `run-local-sample` - runs src/sp_local_ray.py
+
+These targets will activate the virtual environment and set up any configuration needed.
+Use the `-n` option of `make` to see the detail of what is done to run the sample.
+
+For example,
+```shell
+make run-cli-sample
+...
+```
+Then
+```shell
+ls output
+```
+To see results of the transform.
+
+### Transforming data using the transform image
+
+To use the transform image to transform your data, please refer to the
+[running images quickstart](../../../../doc/quick-start/run-transform-image.md),
+substituting the name of this transform image and runtime as appropriate.
diff --git a/transforms/code/semantic_profiler/ray/pyproject.toml b/transforms/code/semantic_profiler/ray/pyproject.toml
new file mode 100644
index 0000000000..e175be027f
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/pyproject.toml
@@ -0,0 +1,45 @@
+[project]
+name = "dpk_sp_transform_ray"
+version = "0.2.1.dev0"
+requires-python = ">=3.10"
+description = "SP Ray Transform"
+license = {text = "Apache-2.0"}
+readme = {file = "README.md", content-type = "text/markdown"}
+authors = [
+ { name = "Aishwariya Chakraborty", email = "aishwariya.chakraborty1@ibm.com" },
+]
+dependencies = [
+ "dpk-sp-transform-python==0.2.1.dev0",
+ "data-prep-toolkit-ray==0.2.1.dev0",
+]
+
+[build-system]
+requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
+build-backend = "setuptools.build_meta"
+
+[project.optional-dependencies]
+dev = [
+ "twine",
+ "pytest>=7.3.2",
+ "pytest-dotenv>=0.5.2",
+ "pytest-env>=1.0.0",
+ "pre-commit>=3.3.2",
+ "pytest-cov>=4.1.0",
+ "pytest-mock>=3.10.0",
+ "moto==5.0.5",
+ "markupsafe==2.0.1",
+]
+
+[options]
+package_dir = ["src","test"]
+
+[options.packages.find]
+where = ["src/"]
+
+[tool.pytest.ini_options]
+# Currently we use low coverage since we have to run tests separately (see makefile)
+#addopts = "--cov --cov-report term-missing --cov-fail-under 25"
+markers = ["unit: unit tests", "integration: integration tests"]
+
+[tool.coverage.run]
+include = ["src/*"]
diff --git a/transforms/code/semantic_profiler/ray/src/concept_map/updated_concept_list.csv b/transforms/code/semantic_profiler/ray/src/concept_map/updated_concept_list.csv
new file mode 100644
index 0000000000..685d62d3d9
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/src/concept_map/updated_concept_list.csv
@@ -0,0 +1,14 @@
+Category
+Algorithms and Data Structures
+Database Management
+File Handling
+Networking and Messaging
+Graphical User Interface Design
+Security
+Scheduling and Concurrency
+Logging and Monitoring
+Web Development
+Mathematics and Numerics
+Code Analysis and Linting
+Testing
+Data Serialization
\ No newline at end of file
diff --git a/transforms/code/semantic_profiler/ray/src/examples/examples-i.csv b/transforms/code/semantic_profiler/ray/src/examples/examples-i.csv
new file mode 100644
index 0000000000..639735b66b
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/src/examples/examples-i.csv
@@ -0,0 +1,27 @@
+Library,Language
+algorithms,Python
+asyncio,Python
+arrow,Python
+authlib,Python
+webassets,Python
+scipy,Python
+pymysql,Python
+mimetypes,Python
+logging,Python
+flake8,Python
+mamba,Python
+marshmallow,Python
+tkinter,Python
+com.leansoft.bigqueue,Java
+com.cisco.commons.networking,Java
+net.time4j,Java
+org.apache.shiro,Java
+java.net.http,Java
+org.apache.commons.math4,Java
+ch.vorburger.mariaDB4j,Java
+com.google.jimfs,Java
+java.logging,Java
+org.sonar,Java
+org.junit,Java
+com.cedarsoftware:json-io,Java
+java.desktop,Java
diff --git a/transforms/code/semantic_profiler/ray/src/examples/examples-o.csv b/transforms/code/semantic_profiler/ray/src/examples/examples-o.csv
new file mode 100644
index 0000000000..b7eb9397a1
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/src/examples/examples-o.csv
@@ -0,0 +1,27 @@
+Library,Language,Category
+algorithms,Python,Algorithms and Data Structures
+asyncio,Python,Networking and Messaging
+arrow,Python,Scheduling and Concurrency
+authlib,Python,Security
+webassets,Python,Web Development
+scipy,Python,Mathematics and Numerics
+pymysql,Python,Database Management
+mimetypes,Python,File Handling
+logging,Python,Logging and Monitoring
+flake8,Python,Code Analysis and Linting
+mamba,Python,Testing
+marshmallow,Python,Data Serialization
+tkinter,Python,Graphical User Interface Design
+com.leansoft.bigqueue,Java,Algorithms and Data Structures
+com.cisco.commons.networking,Java,Networking and Messaging
+net.time4j,Java,Scheduling and Concurrency
+org.apache.shiro,Java,Security
+java.net.http,Java,Web Development
+org.apache.commons.math4,Java,Mathematics and Numerics
+ch.vorburger.mariaDB4j,Java,Database Management
+com.google.jimfs,Java,File Handling
+java.logging,Java,Logging and Monitoring
+org.sonar,Java,Code Analysis and Linting
+org.junit,Java,Testing
+com.cedarsoftware:json-io,Java,Data Serialization
+java.desktop,Java,Graphical User Interface Design
\ No newline at end of file
diff --git a/transforms/code/semantic_profiler/ray/src/ikb/extracted_data.csv b/transforms/code/semantic_profiler/ray/src/ikb/extracted_data.csv
new file mode 100644
index 0000000000..6fda787c83
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/src/ikb/extracted_data.csv
@@ -0,0 +1,39 @@
+dynamic_bitset,Cpp,Algorithms and Data Structures
+tries,Cpp,Algorithms and Data Structures
+algorithm,Cpp,Algorithms and Data Structures
+uni-algo,Cpp,Algorithms and Data Structures
+boost.asio,Cpp,Networking and Messaging
+cpp-netlib,Cpp,Networking and Messaging
+zmq,Cpp,Networking and Messaging
+azmq,Cpp,Networking and Messaging
+thread-pool,Cpp,Scheduling and Concurrency
+chrono,Cpp,Scheduling and Concurrency
+concurrencpp,Cpp,Scheduling and Concurrency
+time,Cpp,Scheduling and Concurrency
+libressl,Cpp,Security
+"dynamic_bitset","Cpp","Algorithms and Data Structures"
+"tries","Cpp","Algorithms and Data Structures"
+"algorithm","Cpp","Algorithms and Data Structures"
+"uni-algo","Cpp","Algorithms and Data Structures"
+"boost.asio","Cpp","Networking and Messaging"
+"cpp-netlib","Cpp","Networking and Messaging"
+"zmq","Cpp","Networking and Messaging"
+"azmq","Cpp","Networking and Messaging"
+"thread-pool","Cpp","Scheduling and Concurrency"
+"chrono","Cpp","Scheduling and Concurrency"
+"concurrencpp","Cpp","Scheduling and Concurrency"
+"time","Cpp","Scheduling and Concurrency"
+"libressl","Cpp","Security"
+"dynamic_bitset","Cpp","Algorithms and Data Structures"
+"tries","Cpp","Algorithms and Data Structures"
+"algorithm","Cpp","Algorithms and Data Structures"
+"uni-algo","Cpp","Algorithms and Data Structures"
+"boost.asio","Cpp","Networking and Messaging"
+"cpp-netlib","Cpp","Networking and Messaging"
+"zmq","Cpp","Networking and Messaging"
+"azmq","Cpp","Networking and Messaging"
+"thread-pool","Cpp","Scheduling and Concurrency"
+"chrono","Cpp","Scheduling and Concurrency"
+"concurrencpp","Cpp","Scheduling and Concurrency"
+"time","Cpp","Scheduling and Concurrency"
+"libressl","Cpp","Security"
diff --git a/transforms/code/semantic_profiler/ray/src/ikb/ikb_model.csv b/transforms/code/semantic_profiler/ray/src/ikb/ikb_model.csv
new file mode 100644
index 0000000000..bda9d2a667
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/src/ikb/ikb_model.csv
@@ -0,0 +1,1021 @@
+Library,Language,Category
+dynamic_bitset,Cpp,Algorithms and Data Structures
+tries,Cpp,Algorithms and Data Structures
+algorithm,Cpp,Algorithms and Data Structures
+uni-algo,Cpp,Algorithms and Data Structures
+boost.asio,Cpp,Networking and Messaging
+cpp-netlib,Cpp,Networking and Messaging
+zmq,Cpp,Networking and Messaging
+azmq,Cpp,Networking and Messaging
+thread-pool,Cpp,Scheduling and Concurrency
+chrono,Cpp,Scheduling and Concurrency
+concurrencpp,Cpp,Scheduling and Concurrency
+time,Cpp,Scheduling and Concurrency
+libressl,Cpp,Security
+libgcrypt,Cpp,Security
+nettle,Cpp,Security
+digestpp,Cpp,Security
+libonion,Cpp,Web Development
+cpp-httplib,Cpp,Web Development
+jwt-cpp,Cpp,Security
+libfv,Cpp,Mathematics and Numerics
+blaze,Cpp,Mathematics and Numerics
+cnl,Cpp,Mathematics and Numerics
+eigen,Cpp,Mathematics and Numerics
+linalg,Cpp,Mathematics and Numerics
+clickhouse,Cpp,Database Management
+leveldb,Cpp,Database Management
+libpqxx,Cpp,Database Management
+sqlite,Cpp,Database Management
+filesystem,Cpp,File Handling
+llfio,Cpp,File Handling
+glob,Cpp,File Handling
+tinydir,Cpp,File Handling
+spdlog,Cpp,Logging and Monitoring
+boost.log,Cpp,Logging and Monitoring
+glog,Cpp,Logging and Monitoring
+reckless,Cpp,Algorithms and Data Structures
+clang-tidy,Cpp,Code Analysis and Linting
+clangd,Cpp,Code Analysis and Linting
+cquery,Cpp,Code Analysis and Linting
+cppcheck,Cpp,Code Analysis and Linting
+boost.test,Cpp,Testing
+benchmark,Cpp,Testing
+cpputest,Cpp,Testing
+ctest,Cpp,Testing
+dlib,Cpp,Algorithms and Data Structures
+blitz,Cpp,Algorithms and Data Structures
+armadillo,Cpp,Algorithms and Data Structures
+oneapi/dal,Cpp,Database Management
+frozen,Cpp,Data Serialization
+glaze,Cpp,Data Serialization
+cppcodec,Cpp,Data Serialization
+boost.serialization,Cpp,Data Serialization
+infra,Cpp,Networking and Messaging
+workflow,Cpp,Scheduling and Concurrency
+taskflow,Cpp,Scheduling and Concurrency
+libthrift,Cpp,Networking and Messaging
+cegui,Cpp,Graphical User Interface Design
+wxwidgets,Cpp,Graphical User Interface Design
+gtk,Cpp,Graphical User Interface Design
+nanogui,Cpp,Graphical User Interface Design
+com.leansoft.bigqueue,Java,Algorithms and Data Structures
+com.liveramp.hyperminhash,Java,Algorithms and Data Structures
+org.pcollections,Java,Algorithms and Data Structures
+org.ojalgo,Java,Algorithms and Data Structures
+com.cisco.commons.networking,Java,Networking and Messaging
+io.netty,Java,Networking and Messaging
+org.apache.kafka,Java,Networking and Messaging
+com.rabbitmq,Java,Networking and Messaging
+net.time4j,Java,Scheduling and Concurrency
+org.jobrunr:jobrunr,Java,Scheduling and Concurrency
+org.quartz,Java,Scheduling and Concurrency
+org.knowm.sundial,Java,Scheduling and Concurrency
+org.apache.shiro,Java,Security
+org.bouncycastle,Java,Security
+jdk.crypto.cryptoki,Java,Security
+jdk.security,Java,Security
+java.net.http,Java,Web Development
+jdk.httpserver,Java,Web Development
+io.activej.codegen,Java,Code Analysis and Linting
+ninja,Java,Code Analysis and Linting
+org.apache.commons.math4,Java,Mathematics and Numerics
+org.apache.commons.numbers,Java,Mathematics and Numerics
+org.apache.commons.rng,Java,Mathematics and Numerics
+com.mathLibrary,Java,Mathematics and Numerics
+ch.vorburger.mariaDB4j,Java,Database Management
+java.sql,Java,Database Management
+redis.clients.jedis,Java,Database Management
+org.jooq,Java,Database Management
+com.google.jimfs,Java,File Handling
+java.io,Java,File Handling
+java.nio.file,Java,File Handling
+org.apache.commons.vfs2,Java,File Handling
+java.logging,Java,Logging and Monitoring
+jdk.jconsole,Java,IT Automation
+java.util.logging,Java,Logging and Monitoring
+org.slf4j.Logger,Java,Logging and Monitoring
+org.sonar,Java,Code Analysis and Linting
+fr.inria.gforge.spoon,Java,Code Analysis and Linting
+com.puppycrawl.tools.checkstyle,Java,Code Analysis and Linting
+net.sourceforge.pmd,Java,Code Analysis and Linting
+org.junit,Java,Testing
+com.intuit.karate,Java,Testing
+org.mockito,Java,Testing
+org.apache.jmeter,Java,Testing
+org.influxdb,Java,Data Analysis
+org.apache.spark,Java,Data Analysis
+org.apache.flink,Java,Data Analysis
+weka,Java,Data Analysis
+com.cedarsoftware:json-io,Java,Data Serialization
+com.google.flatbuffers,Java,Data Serialization
+org.msgpack,Java,Data Serialization
+com.esotericsoftware.kryo,Java,Data Serialization
+jenkins.model.Jenkins,Java,IT Automation
+org.apache.maven,Java,IT Automation
+org.gradle,Java,IT Automation
+com.microsoft.terraform,Java,IT Automation
+java.desktop,Java,Graphical User Interface Design
+java.awt,Java,Graphical User Interface Design
+org.openjfx,Java,Graphical User Interface Design
+org.eclipse.swt,Java,Graphical User Interface Design
+ngraph.graph,JavaScript,Algorithms and Data Structures
+buckets,JavaScript,Algorithms and Data Structures
+mori,JavaScript,Algorithms and Data Structures
+graphlib,JavaScript,Algorithms and Data Structures
+socket.io,JavaScript,Networking and Messaging
+request,JavaScript,Web Development
+amqplib,JavaScript,Networking and Messaging
+mqtt,JavaScript,Networking and Messaging
+fullcalendar,JavaScript,Graphical User Interface Design
+later,JavaScript,Scheduling and Concurrency
+date-fns,JavaScript,Mathematics and Numerics
+Moment,JavaScript,Mathematics and Numerics
+helmet,JavaScript,Security
+bcrypt,JavaScript,Security
+js-xss,JavaScript,Security
+xss-filters,JavaScript,Security
+vue,JavaScript,Graphical User Interface Design
+react,JavaScript,Graphical User Interface Design
+express,JavaScript,Web Development
+angular,JavaScript,Graphical User Interface Design
+Polynomial,JavaScript,Mathematics and Numerics
+Numeral-js,JavaScript,Mathematics and Numerics
+accounting,JavaScript,Mathematics and Numerics
+odometer,JavaScript,Mathematics and Numerics
+datavore,JavaScript,Data Analysis
+DB,JavaScript,Database Management
+sql,JavaScript,Database Management
+NeDB,JavaScript,Database Management
+jStorage,JavaScript,Database Management
+store,JavaScript,Database Management
+cross-storage,JavaScript,File Handling
+localForage,JavaScript,File Handling
+console.log-wrapper,JavaScript,Logging and Monitoring
+storybook,JavaScript,Graphical User Interface Design
+minilog,JavaScript,Logging and Monitoring
+loglevel,JavaScript,Logging and Monitoring
+eslint,JavaScript,Code Analysis and Linting
+jshint,JavaScript,Code Analysis and Linting
+tslint,JavaScript,Code Analysis and Linting
+sonarqube,JavaScript,Code Analysis and Linting
+jest,JavaScript,Testing
+Cypress,JavaScript,Testing
+jasmine,JavaScript,Testing
+qunit,JavaScript,Testing
+fabric,JavaScript,Web Development
+d3,JavaScript,Graphical User Interface Design
+three,JavaScript,Graphical User Interface Design
+sigma,JavaScript,Graphical User Interface Design
+tempo,JavaScript,Graphical User Interface Design
+jsfmt,JavaScript,Data Serialization
+fecha,JavaScript,Data Serialization
+protobufjs,JavaScript,Data Serialization
+shelljs,JavaScript,IT Automation
+forever,JavaScript,Scheduling and Concurrency
+node-cron,JavaScript,Scheduling and Concurrency
+jenkins,JavaScript,IT Automation
+react,JavaScript,Web Development
+vue,JavaScript,Web Development
+electron,JavaScript,Web Development
+angular,JavaScript,Web Development
+stdgpu,C,Algorithms and Data Structures
+urdfdom,C,Algorithms and Data Structures
+cxxgraph,C,Algorithms and Data Structures
+metis,C,Algorithms and Data Structures
+nanomsg,C,Networking and Messaging
+curl,C,Web Development
+librabbitmq,C,Networking and Messaging
+mosquitto,C,Networking and Messaging
+uv,C,Scheduling and Concurrency
+time,C,Scheduling and Concurrency
+pth,C,Scheduling and Concurrency
+pthread,C,Scheduling and Concurrency
+OpenSSL,C,Security
+GnuTLS,C,Security
+libsodium,C,Security
+libgcrypt,C,Security
+facil.io,C,File Handling
+kcgi,C,Web Development
+KLone,C,Web Development
+civetweb,C,Web Development
+apophenia,C,Data Analysis
+cmathl,C,Mathematics and Numerics
+GSL,C,Mathematics and Numerics
+SLEPc,C,Mathematics and Numerics
+DuckDB,C,Database Management
+MySQL,C,Database Management
+sophia,C,Database Management
+SQLite,C,Database Management
+stdio,C,File Handling
+POSIX,C,IT Automation
+HDF5,C,File Handling
+fstream,C,File Handling
+syslog,C,Logging and Monitoring
+spdlog,C,Logging and Monitoring
+collectd,C,Data Analysis
+nagios-plugins,C,IT Automation
+libclang,C,Code Analysis and Linting
+Cppcheck,C,Code Analysis and Linting
+libclang-tidy,C,Code Analysis and Linting
+Infer,C,Code Analysis and Linting
+CMocka,C,Testing
+MinUnit,C,Testing
+Valgrind,C,Testing
+Check,C,Testing
+gsl-lite,C,Mathematics and Numerics
+libcsv,C,Data Analysis
+dataframe,C,Data Analysis
+iqa,C,Data Analysis
+libyaml,C,Data Serialization
+libfmt,C,Data Serialization
+flatbuffers,C,Data Serialization
+msgpack-c,C,Data Serialization
+nix_api_util,C,IT Automation
+libcircmetrics,C,Logging and Monitoring
+etcd-api,C,Networking and Messaging
+cetcd,C,Networking and Messaging
+microui,C,Graphical User Interface Design
+tinyfiledialogs,C,Graphical User Interface Design
+luigi ,C,IT Automation
+GTK,C,Graphical User Interface Design
+Akade.IndexedSet,C#,Algorithms and Data Structures
+Akka.DistributedData,C#,Algorithms and Data Structures
+dotnet-mgcb-compute,C#,Mathematics and Numerics
+QuantConnect.Algorithm.CSharp,C#,Algorithms and Data Structures
+Microsoft.AspNetCore.Connections,C#,Networking and Messaging
+System.Net.Http.WinHttpHandler,C#,Web Development
+Microsoft.AspNetCore.WebUtilities,C#,Web Development
+MessagePipe,C#,Networking and Messaging
+Microsoft.SemanticKernel.Plugins.MsGraph,C#,Algorithms and Data Structures
+System.Threading.Tasks,C#,Scheduling and Concurrency
+Hangfire,C#,Scheduling and Concurrency
+OrchardCore.PublishLater,C#,Scheduling and Concurrency
+CefSharp.WinForm.Net.Core,C#,Graphical User Interface Design
+System.DirectoryServices.AccountManagement,C#,IT Automation
+System.Security.Permissions,C#,Security
+System.Security.AccessControl,C#,Security
+@pavelsavara/dotnet-runtime,C#,IT Automation
+@abp/ng.oauth,C#,Security
+@abp/core,C#,Web Development
+@abp/ng.components,C#,Web Development
+SharpDX.Mathematics,C#,Mathematics and Numerics
+AvaloniaMath,C#,Mathematics and Numerics
+WpfMath,C#,Mathematics and Numerics
+NCalcSync,C#,Mathematics and Numerics
+microsoft.entityframeworkcore.tools,C#,Database Management
+Dapper,C#,Database Management
+Microsoft.Azure.Management.PostgreSQL,C#,Database Management
+Microsoft.Azure.Management.CosmosDB,C#,Database Management
+Reloaded.Mod.Loader.IO,C#,File Handling
+DICOMcloud,C#,Data Analysis
+Aurio,C#,Graphical User Interface Design
+SeekableS3Stream,C#,File Handling
+Microsoft.Extensions.Logging,C#,Logging and Monitoring
+Microsoft.Azure.Management.Profiles.hybrid_2019_03_01.Monitor,C#,IT Automation
+Azure.Monitor.OpenTelemetry.AspNetCore,C#,Logging and Monitoring
+Microsoft.AspNetCore.Identity,C#,Security
+roslyn,C#,Code Analysis and Linting
+Microsoft.Toolkit.Uwp.PlatformSpecificAnalyzer,C#,Code Analysis and Linting
+Uno.Microsoft.Toolkit.Uwp.PlatformSpecificAnalyzer,C#,Code Analysis and Linting
+Microsoft.CST.ApplicationInspector.Common,C#,Code Analysis and Linting
+Microsoft.AspNetCore.TestHost,C#,Testing
+Microsoft.AspNetCore.Mvc.Testing,C#,Testing
+Microsoft.AspNetCore.SignalR.Specification.Tests,C#,Testing
+KIF,C#,Algorithms and Data Structures
+Microsoft.Data.Analysis,C#,Data Analysis
+Azure.Media.VideoAnalyzer.Edge,C#,Data Analysis
+Google.Cloud.Trace.V1,C#,Logging and Monitoring
+ClosedXML.Report,C#,Data Serialization
+System.Formats,C#,Data Serialization
+System.IO.Ports,C#,File Handling
+System.Text.Json,C#,Data Serialization
+App.Metrics.Formatters.Graphite,C#,Logging and Monitoring
+Microsoft.Crank.AzureDevOpsWorker,C#,IT Automation
+AWSSDK.DevOpsGuru,C#,IT Automation
+Microsoft.SourceLink.AzureDevOpsServer.Git,C#,IT Automation
+Saritasa.Tools.Messages.TestRuns,C#,Testing
+SSRD.IdentityUI,C#,Security
+bashforms,C#,Graphical User Interface Design
+NSCI,C#,Algorithms and Data Structures
+WSCT.GUI,C#,Graphical User Interface Design
+lock-free,D,Algorithms and Data Structures
+liblfdsd,D,Algorithms and Data Structures
+bitranged,D,Algorithms and Data Structures
+dstruct,D,Algorithms and Data Structures
+vibe-d,D,Web Development
+hunt-net,D,Networking and Messaging
+nbuff,D,Algorithms and Data Structures
+collie,D,Algorithms and Data Structures
+photon,D,Algorithms and Data Structures
+scheduled,D,Scheduling and Concurrency
+meta,D,Code Analysis and Linting
+ctini,D,Security
+hunt-security,D,Security
+hunt-shiro,D,Security
+secured,D,Security
+csprng,D,Security
+pgator-backend,D,Web Development
+hunt-cache,D,Data Analysis
+formoshlep,D,Data Analysis
+web-config,D,Web Development
+simple-math,D,Mathematics and Numerics
+evalex,D,Mathematics and Numerics
+dualnumbers,D,Mathematics and Numerics
+tau,D,Mathematics and Numerics
+mysql-native,D,Database Management
+derelict-pq,D,Database Management
+ddbc,D,Database Management
+dpq2,D,Database Management
+inifiled,D,File Handling
+fswatch,D,File Handling
+tinyfiledialogs,D,Graphical User Interface Design
+thepath,D,File Handling
+hunt,D,Testing
+gogga,D,Data Analysis
+dlog,D,Logging and Monitoring
+colorlog,D,Logging and Monitoring
+code_checker,D,Code Analysis and Linting
+dfmt,D,Data Serialization
+dscanner,D,Code Analysis and Linting
+dparse,D,Algorithms and Data Structures
+silly,D,Algorithms and Data Structures
+unit-threaded,D,Testing
+fluent-asserts,D,Testing
+dests,D,Algorithms and Data Structures
+magpie,D,Algorithms and Data Structures
+dvec,D,Mathematics and Numerics
+d-tree,D,Algorithms and Data Structures
+d_dataframes,D,Data Analysis
+jsonizer,D,Data Serialization
+mir-ion,D,Algorithms and Data Structures
+protobuf,D,Data Serialization
+siryul,D,Security
+iup,D,Graphical User Interface Design
+declui,D,Graphical User Interface Design
+d_imgui,D,Graphical User Interface Design
+dlangui,D,Graphical User Interface Design
+libgit2,D,Database Management
+yamkeys,D,Security
+lua-jit-d,D,IT Automation
+led,D,Graphical User Interface Design
+array-tool,Rust,Algorithms and Data Structures
+petgraph,Rust,Algorithms and Data Structures
+heapless,Rust,Algorithms and Data Structures
+argon2,Rust,Security
+mio,Rust,Networking and Messaging
+actix-rt,Rust,Scheduling and Concurrency
+socket2,Rust,Networking and Messaging
+crossbeam-channel,Rust,Networking and Messaging
+cron,Rust,Scheduling and Concurrency
+crossbeam-deque,Rust,Algorithms and Data Structures
+smolscale,Rust,Data Analysis
+job_scheduler,Rust,Scheduling and Concurrency
+zeroize,Rust,Security
+rocket,Rust,Web Development
+rpassword,Rust,Security
+trust-dns-resolver,Rust,Networking and Messaging
+@farmfe/core,Rust,IT Automation
+wasmer-clif-fork-frontend,Rust,Web Development
+seed,Rust,Graphical User Interface Design
+@farmfe/cli,Rust,IT Automation
+num-traits,Rust,Mathematics and Numerics
+num,Rust,Mathematics and Numerics
+num-bigint,Rust,Mathematics and Numerics
+cgmath,Rust,Mathematics and Numerics
+rusqlite,Rust,Database Management
+redis,Rust,Database Management
+diesel,Rust,Database Management
+postgres,Rust,Database Management
+fs_extra,Rust,File Handling
+toml,Rust,Data Serialization
+tempfile,Rust,File Handling
+zip,Rust,File Handling
+log,Rust,Logging and Monitoring
+env_logger,Rust,Logging and Monitoring
+tracing,Rust,Logging and Monitoring
+slog,Rust,Logging and Monitoring
+@cubejs-backend/linter,Rust,Code Analysis and Linting
+selene-lib,Rust,Data Analysis
+ast-grep,Rust,Code Analysis and Linting
+cargo-crev,Rust,Code Analysis and Linting
+assert_cmd,Rust,Testing
+quickcheck,Rust,Testing
+proptest,Rust,Testing
+wasm-bindgen-test,Rust,Testing
+rls-analysis,Rust,Code Analysis and Linting
+rstats,Rust,Data Analysis
+amadeus-commoncrawl,Rust,Data Analysis
+opendp,Rust,Data Analysis
+serde,Rust,Data Serialization
+serde_json,Rust,Data Serialization
+serde_yaml,Rust,Data Serialization
+bincode,Rust,Data Serialization
+lsio,Rust,File Handling
+shuttle-runtime,Rust,IT Automation
+rustc_data_structures,Rust,Algorithms and Data Structures
+compiler_base_span,Rust,Algorithms and Data Structures
+slint,Rust,Algorithms and Data Structures
+qinpel-wiz,Rust,Algorithms and Data Structures
+arc,Rust,Algorithms and Data Structures
+cushy,Rust,Algorithms and Data Structures
+tumblr/XExtensionItem,Objective-C,Algorithms and Data Structures
+TBQuadTree,Objective-C,Algorithms and Data Structures
+POSDataStructures,Objective-C,Algorithms and Data Structures
+PESGraph,Objective-C,Algorithms and Data Structures
+AFNetworking,Objective-C,Networking and Messaging
+CocoaAsyncSocket,Objective-C,Networking and Messaging
+Atlas,Objective-C,Graphical User Interface Design
+RestKit,Objective-C,Web Development
+SZServerTimeManager,Objective-C,Scheduling and Concurrency
+CalendarLib,Objective-C,Scheduling and Concurrency
+Selene,Objective-C,Security
+ZMJGanttChart,Objective-C,Graphical User Interface Design
+AWSCognitoIdentityProviderASF,Objective-C,Security
+gObfuscator,Objective-C,Security
+Lockbox,Objective-C,Security
+STPrivilegedTask,Objective-C,IT Automation
+vtx,Objective-C,Algorithms and Data Structures
+ColendiWebViewSDK,Objective-C,Web Development
+@abp/bootstrap-daterangepicker,Objective-C,Web Development
+@abp/ng.oauth,Objective-C,Security
+vMAT,Objective-C,Mathematics and Numerics
+crlibm,Objective-C,Mathematics and Numerics
+MCKNumerics,Objective-C,Mathematics and Numerics
+ACMatrix,Objective-C,Mathematics and Numerics
+DKDBManager,Objective-C,Database Management
+FlexileDatabase,Objective-C,Database Management
+KKDSqlite,Objective-C,Database Management
+SNDBManager,Objective-C,Database Management
+APSmartStorage,Objective-C,File Handling
+zipzap,Objective-C,File Handling
+AliyunOSSiOS,Objective-C,File Handling
+YTKKeyValueStore,Objective-C,Data Serialization
+github.com/github.com/CocoaLumberjack/CocoaLumberjack,Objective-C,Logging and Monitoring
+VENVersionTracker,Objective-C,IT Automation
+NSLogger,Objective-C,Logging and Monitoring
+NetworkEye,Objective-C,Networking and Messaging
+nq-test-react-native-maps,Objective-C,Graphical User Interface Design
+KIF,Objective-C,Testing
+facebookarchive/xctool,Objective-C,Code Analysis and Linting
+xctool,Objective-C,Code Analysis and Linting
+KRGreyTheory,Objective-C,Mathematics and Numerics
+DataGrinch,Objective-C,Data Analysis
+XsdaKit,Objective-C,Data Serialization
+cordova-pgyer-dandelion,Objective-C,Web Development
+sbjson,Objective-C,Data Serialization
+FXParser,Objective-C,Data Analysis
+CSV,Objective-C,Data Analysis
+NSMutableData+MultipartFormData,Objective-C,File Handling
+Masonry,Objective-C,Graphical User Interface Design
+Chameleon,Objective-C,Graphical User Interface Design
+Nimbus,Objective-C,Graphical User Interface Design
+GPUImage,Objective-C,Graphical User Interface Design
+infer,Objective-C,Code Analysis and Linting
+OCLint,Objective-C,Code Analysis and Linting
+sonatype,Objective-C,IT Automation
+sigrid,Objective-C,IT Automation
+fastlane,Objective-C,IT Automation
+hammerspoon,Objective-C,Graphical User Interface Design
+punic,Objective-C,IT Automation
+jenkins-mobile-pipeline-shared-libraries,Objective-C,IT Automation
+brotli,Ocaml,Data Compression
+dtoa,Ocaml,Algorithms and Data Structures
+bin_tree,Ocaml,Algorithms and Data Structures
+base_trie,Ocaml,Algorithms and Data Structures
+apero-net,Ocaml,Networking and Messaging
+conduit,Ocaml,Networking and Messaging
+netamqp,Ocaml,Networking and Messaging
+posix-mqueue,Ocaml,File Handling
+bap-primus-exploring-scheduler,Ocaml,Scheduling and Concurrency
+builder,Ocaml,IT Automation
+daypack-lib,Ocaml,Data Analysis
+riot,Ocaml,Web Development
+tls,Ocaml,Security
+osx-acl,Ocaml,Security
+content_security_policy,Ocaml,Security
+aws-sts,Ocaml,Security
+async_websocket,Ocaml,Web Development
+benchpress-server,Ocaml,Web Development
+builder-web,Ocaml,Web Development
+cduce_ws,Ocaml,Web Development
+posix-math,Ocaml,Mathematics and Numerics
+smol,Ocaml,Data Serialization
+crlibm,Ocaml,Mathematics and Numerics
+lem,Ocaml,Code Analysis and Linting
+caqti,Ocaml,Database Management
+dbforge,Ocaml,Database Management
+irmin,Ocaml,Database Management
+links-mysql,Ocaml,Database Management
+bitlib,Ocaml,Algorithms and Data Structures
+chamelon,Ocaml,Web Development
+fpath,Ocaml,File Handling
+fileutils,Ocaml,File Handling
+bolt,Ocaml,Algorithms and Data Structures
+dolog,Ocaml,Logging and Monitoring
+easy_logging,Ocaml,Logging and Monitoring
+loga,Ocaml,Logging and Monitoring
+bisect_ppx,Ocaml,Code Analysis and Linting
+calli,Ocaml,Algorithms and Data Structures
+clangml-transforms,Ocaml,Algorithms and Data Structures
+dolmen_bin,Ocaml,Algorithms and Data Structures
+base_quickcheck,Ocaml,Testing
+caravan,Ocaml,Web Development
+kaputt,Ocaml,Algorithms and Data Structures
+ounit2,Ocaml,Testing
+conformist,Ocaml,Code Analysis and Linting
+dataframe,Ocaml,Data Analysis
+dsfo,Ocaml,Data Analysis
+llama_midi,Ocaml,Graphical User Interface Design
+atdgen,Ocaml,Code Analysis and Linting
+bitpack_serializer,Ocaml,Data Serialization
+coq-serapi,Ocaml,Algorithms and Data Structures
+grpc,Ocaml,Networking and Messaging
+bap-build,Ocaml,IT Automation
+argsh,Ocaml,IT Automation
+conf-automake,Ocaml,IT Automation
+dtools,Ocaml,IT Automation
+bogue,Ocaml,Algorithms and Data Structures
+unison-gui,Ocaml,Graphical User Interface Design
+imguiml,Ocaml,Graphical User Interface Design
+altgr-ergo,Ocaml,Algorithms and Data Structures
+bk-tree,Haskell,Algorithms and Data Structures
+algebraic-graphs,Haskell,Algorithms and Data Structures
+recursion-schemes,Haskell,Algorithms and Data Structures
+AvlTree,Haskell,Algorithms and Data Structures
+grenade,Haskell,Security
+network-conduit,Haskell,Networking and Messaging
+streamly,Haskell,Algorithms and Data Structures
+hedgehog,Haskell,Testing
+haxl,Haskell,Web Development
+amazonka-scheduler,Haskell,Scheduling and Concurrency
+massiv-scheduler,Haskell,Scheduling and Concurrency
+gogol-datafusion,Haskell,Data Analysis
+tamarin-prover-theory,Haskell,Mathematics and Numerics
+tamarin-prover,Haskell,Mathematics and Numerics
+yst,Haskell,Data Analysis
+fireward,Haskell,Security
+snap-core,Haskell,Web Development
+snap-server,Haskell,Web Development
+gogol-pagespeed,Haskell,Web Development
+gogol-indexing,Haskell,Data Analysis
+pandoc,Haskell,Data Serialization
+Agda,Haskell,Mathematics and Numerics
+math-functions,Haskell,Mathematics and Numerics
+commodities,Haskell,Data Analysis
+gogol-spanner,Haskell,Database Management
+gogol-sqladmin,Haskell,Database Management
+gogol-datastore,Haskell,Database Management
+dbmigrations,Haskell,Database Management
+bytestring,Haskell,File Handling
+io-streams,Haskell,File Handling
+regions,Haskell,Algorithms and Data Structures
+amazonka-kinesis-video-webrtc-storage,Haskell,Data Analysis
+tensorflow-logging,Haskell,Logging and Monitoring
+wai-extra,Haskell,Web Development
+co-log,Haskell,Logging and Monitoring
+gogol-cloudmonitoring,Haskell,IT Automation
+pandoc,Haskell,Data Serialization
+cassava,Haskell,Data Analysis
+commonmark,Haskell,Data Serialization
+auto,Haskell,Code Analysis and Linting
+amazonka-devops-guru,Haskell,IT Automation
+deptrack-devops,Haskell,IT Automation
+gogol-testing,Haskell,Testing
+LogicGrowsOnTrees,Haskell,Algorithms and Data Structures
+gogol-datafusion,Haskell,Data Analysis
+vty-ui,Haskell,Graphical User Interface Design
+YampaSynth,Haskell,Algorithms and Data Structures
+master-plan,Haskell,IT Automation
+stan,Haskell,Data Analysis
+hlint,Haskell,Code Analysis and Linting
+liquidhaskell,Haskell,Code Analysis and Linting
+ghc,Haskell,IT Automation
+purescript,Haskell,Code Analysis and Linting
+ghcide-test-utils,Haskell,Testing
+hls-test-utils,Haskell,Testing
+yesod-test,Haskell,Testing
+statistics,Haskell,Mathematics and Numerics
+statistics-skinny,Haskell,Mathematics and Numerics
+ajhc,Haskell,Code Analysis and Linting
+fortran-src,Haskell,Algorithms and Data Structures
+BitVector,Nim,Algorithms and Data Structures
+rbtree,Nim,Algorithms and Data Structures
+binaryheap,Nim,Algorithms and Data Structures
+algorithm,Nim,Algorithms and Data Structures
+nativesockets,Nim,Networking and Messaging
+net,Nim,Networking and Messaging
+nimrdkafka,Nim,Networking and Messaging
+mqtt,Nim,Networking and Messaging
+monotimes,Nim,Scheduling and Concurrency
+times,Nim,Scheduling and Concurrency
+osproc,Nim,IT Automation
+schedules,Nim,Scheduling and Concurrency
+nimcrypt,Nim,Security
+seccomp,Nim,Security
+nimpass,Nim,Security
+quickcrypt,Nim,Security
+nerve,Nim,Networking and Messaging
+palladian,Nim,Web Development
+staticserver,Nim,Web Development
+phoon,Nim,Web Development
+seqmath,Nim,Mathematics and Numerics
+extmath,Nim,Mathematics and Numerics
+geometrymath,Nim,Mathematics and Numerics
+neo,Nim,Database Management
+niledb,Nim,Database Management
+couchdb,Nim,Database Management
+zfdbms,Nim,Database Management
+pdba,Nim,Database Management
+osfiles,Nim,File Handling
+fileinput,Nim,File Handling
+filetype,Nim,File Handling
+stor,Nim,File Handling
+octolog,Nim,Logging and Monitoring
+morelogging,Nim,Logging and Monitoring
+promexplorer,Nim,Data Analysis
+metrics,Nim,Data Analysis
+nimfmt,Nim,Code Analysis and Linting
+coco,Nim,Code Analysis and Linting
+treesitter,Nim,Code Analysis and Linting
+nimalyzer,Nim,Code Analysis and Linting
+testify,Nim,Testing
+nimtest,Nim,Testing
+testutils,Nim,Testing
+halonium,Nim,Networking and Messaging
+nimdata,Nim,Data Analysis
+datamancer,Nim,Data Analysis
+nimdataframe,Nim,Data Analysis
+mpfit,Nim,Mathematics and Numerics
+tomlserialization,Nim,Data Serialization
+protobufserialization,Nim,Data Serialization
+bson,Nim,Data Serialization
+eminim,Nim,Algorithms and Data Structures
+autome,Nim,IT Automation
+monit,Nim,Logging and Monitoring
+autonim,Nim,IT Automation
+nake,Nim,IT Automation
+nimblegui,Nim,Graphical User Interface Design
+nigui,Nim,Graphical User Interface Design
+sigui,Nim,Graphical User Interface Design
+rdgui,Nim,Graphical User Interface Design
+de.sciss:fingertree_2.11,Scala,Algorithms and Data Structures
+org.scalameta:semanticdb-scalac-core_2.11.12,Scala,Code Analysis and Linting
+org.axle-lang:axle-algorithms_2.11,Scala,Algorithms and Data Structures
+de.sciss:strugatzki_2.10,Scala,Algorithms and Data Structures
+org.apache.spark:spark-network-common_2.11,Scala,Networking and Messaging
+com.github.molecule-labs:molecule-net_2.9.3,Scala,Networking and Messaging
+org.elasticmq,Scala,Database Management
+com.typesafe.akka:akka-stream_2.12,Scala,Networking and Messaging
+com.miguno.akka:akka-mock-scheduler_2.11,Scala,Scheduling and Concurrency
+com.enragedginger:akka-quartz-scheduler_2.11,Scala,Scheduling and Concurrency
+edu.gemini:lucuma-typed-scheduler_sjs1_3,Scala,Scheduling and Concurrency
+io.getkyo:kyo-scheduler_2.13,Scala,Scheduling and Concurrency
+dev.zio:zio-json_3,Scala,Data Serialization
+dev.zio:zio-json_2.12,Scala,Data Serialization
+recheck,Scala,Code Analysis and Linting
+org.beangle.security:beangle-security-core,Scala,Security
+com.softwaremill.sttp:async-http-client-backend-future_2.12,Scala,Web Development
+com.softwaremill.sttp:akka-http-backend_2.12,Scala,Web Development
+com.eed3si9n:gigahorse-okhttp_2.12,Scala,Web Development
+com.softwaremill.sttp.client3:slf4j-backend_2.12,Scala,Logging and Monitoring
+com.github.vagmcs:optimus_2.11,Scala,Mathematics and Numerics
+com.github.vagmcs:optimus-solver-oj_2.11,Scala,Mathematics and Numerics
+io.github.scalamath:vecmatlib,Scala,Mathematics and Numerics
+io.github.scalamath:cmplxlib,Scala,Mathematics and Numerics
+com.typesafe.slick:slick_2.11,Scala,Database Management
+org.tpolecat:doobie-core_2.12,Scala,Database Management
+org.reactivemongo:reactivemongo_2.11,Scala,Database Management
+org.tpolecat:doobie-postgres_2.12,Scala,Database Management
+org.specs2:specs2_2.11,Scala,Testing
+com.github.pathikrit:better-files_2.12,Scala,File Handling
+com.github.scala-incubator.io:scala-io-file_2.10,Scala,File Handling
+de.sciss:audiofile_2.11,Scala,Data Analysis
+com.typesafe.scala-logging:scala-logging_2.12,Scala,Logging and Monitoring
+com.typesafe.scala-logging:scala-logging-slf4j_2.11,Scala,Logging and Monitoring
+org.clapper:grizzled-slf4j_2.11,Scala,Logging and Monitoring
+com.outr:scribe_2.12,Scala,Data Serialization
+org.psywerx.hairyfotr.linter,Scala,Code Analysis and Linting
+scala.meta.parsers,Scala,Algorithms and Data Structures
+org.scalastyle,Scala,Code Analysis and Linting
+com.sksamuel.scapegoat,Scala,Code Analysis and Linting
+org.scala-js:scalajs-test-bridge_2.13,Scala,Testing
+org.scala-js:scalajs-test-interface_2.12,Scala,Testing
+com.typesafe.play:play-test_2.11,Scala,Testing
+org.scalatest:scalatest_2.9.1,Scala,Testing
+org.finra.megasparkdiff:mega-spark-diff,Scala,Data Analysis
+com.github.vicpara:exploratory-data-analysis_2.10,Scala,Data Analysis
+org.emmalanguage:emma,Scala,Data Analysis
+org.emmalanguage:emma-benchmarks,Scala,Data Analysis
+org.simplex3d:simplex3d-data-format_2.10,Scala,Data Serialization
+org.wvlet.airframe:airframe-tablet_2.13.0-RC2,Scala,Data Serialization
+org.gnieh:fs2-data-text_2.13,Scala,Data Serialization
+com.fasterxml.jackson.module:jackson-module-scala_2.12,Scala,Data Serialization
+tech.orkestra:orkestra-core_sjs0.6_2.12,Scala,IT Automation
+com.goyeau:orchestra-cron_2.12,Scala,Scheduling and Concurrency
+com.aamend.spark:archetype,Scala,IT Automation
+io.kevinlee:sbt-devoops-github-core_2.12_1.0,Scala,IT Automation
+de.sciss:dotterweide-ui_2.11,Scala,Graphical User Interface Design
+org.scala-lang.modules.scala-swing,Scala,Graphical User Interface Design
+io.github.kacperfkorban.guinep-web,Scala,Web Development
+io.github.mimoguz.layeredfonticon-core,Scala,Graphical User Interface Design
+piecemeal,Dart,Algorithms and Data Structures
+collection,Dart,Algorithms and Data Structures
+pointycastle,Dart,Security
+graphs,Dart,Algorithms and Data Structures
+connectivity_plus,Dart,Networking and Messaging
+cached_network_image,Dart,File Handling
+connectivity,Dart,Networking and Messaging
+firebase_messaging,Dart,Networking and Messaging
+reflutter,Dart,Web Development
+server_universe,Dart,Web Development
+create-fullstack-app-cli,Dart,IT Automation
+angel_graphql,Dart,Web Development
+flutter_local_notifications,Dart,Graphical User Interface Design
+cron,Dart,Scheduling and Concurrency
+timer_builder,Dart,Scheduling and Concurrency
+syncfusion_flutter_calendar,Dart,Graphical User Interface Design
+google_sign_in,Dart,Security
+mqtt_client,Dart,Networking and Messaging
+angel_security,Dart,Security
+envied,Dart,Code Analysis and Linting
+math_expressions,Dart,Mathematics and Numerics
+more,Dart,Algorithms and Data Structures
+ml_linalg,Dart,Mathematics and Numerics
+fixed,Dart,Algorithms and Data Structures
+sqflite,Dart,Database Management
+cloud_firestore,Dart,Database Management
+postgres,Dart,Database Management
+hive,Dart,Database Management
+path_provider,Dart,File Handling
+image,Dart,Graphical User Interface Design
+glob,Dart,File Handling
+file,Dart,File Handling
+logging,Dart,Logging and Monitoring
+logger,Dart,Logging and Monitoring
+ansicolor,Dart,Logging and Monitoring
+pretty_dio_logger,Dart,Logging and Monitoring
+flutter_lints,Dart,Code Analysis and Linting
+pedantic_mono,Dart,Code Analysis and Linting
+carapacik_lints,Dart,Code Analysis and Linting
+velvet_custom_lints,Dart,Code Analysis and Linting
+test,Dart,Testing
+unittest,Dart,Testing
+build_test,Dart,Testing
+mocktail,Dart,Testing
+grizzly_array,Dart,Algorithms and Data Structures
+flutter_insights,Dart,Data Analysis
+packhorse,Dart,IT Automation
+plugin_mappintelligence,Dart,IT Automation
+yaml,Dart,Data Serialization
+http_parser,Dart,Web Development
+built_value,Dart,Data Serialization
+bson,Dart,Data Serialization
+unleash,Dart,IT Automation
+docrunner,Dart,IT Automation
+cobertura,Dart,Code Analysis and Linting
+bitwarden_secrets,Dart,Security
+magical_widget,Dart,Graphical User Interface Design
+flutter_auto_gui,Dart,Graphical User Interface Design
+gui_shape,Dart,Graphical User Interface Design
+rinf,Dart,Algorithms and Data Structures
+collections,Python,Algorithms and Data Structures
+heapq,Python,Algorithms and Data Structures
+algorithms,Python,Algorithms and Data Structures
+sortedcontainers,Python,Algorithms and Data Structures
+asyncio,Python,Networking and Messaging
+socket,Python,Networking and Messaging
+kafka-python,Python,Networking and Messaging
+dramatiq,Python,Networking and Messaging
+arrow,Python,Scheduling and Concurrency
+dateutil,Python,Scheduling and Concurrency
+threading-framework,Python,Scheduling and Concurrency
+schedule,Python,Scheduling and Concurrency
+authlib,Python,Security
+pyjwt,Python,Security
+django-allauth,Python,Security
+cryptography,Python,Security
+webassets,Python,Web Development
+html2text,Python,Web Development
+websockets,Python,Web Development
+tornado,Python,Web Development
+scipy,Python,Mathematics and Numerics
+numpy,Python,Mathematics and Numerics
+statsmodel,Python,Mathematics and Numerics
+sympy,Python,Mathematics and Numerics
+pymysql,Python,Database Management
+psycopg,Python,Database Management
+pymongo,Python,Database Management
+pickledb,Python,Database Management
+mimetypes,Python,File Handling
+pathlib,Python,File Handling
+python-magic,Python,File Handling
+wqtchdog,Python,Scheduling and Concurrency
+logging,Python,Logging and Monitoring
+structlog,Python,Logging and Monitoring
+loguru,Python,Logging and Monitoring
+psutil,Python,System Administration
+flake8,Python,Code Analysis and Linting
+pyflakes,Python,Code Analysis and Linting
+pycodestyle,Python,Code Analysis and Linting
+pylint,Python,Code Analysis and Linting
+mamba,Python,Testing
+pytest,Python,Testing
+unittest,Python,Testing
+selenium,Python,Web Development
+pandas,Python,Data Analysis
+optimus,Python,Data Analysis
+schema,Python,Data Analysis
+pydantic,Python,Data Serialization
+marshmallow,Python,Data Serialization
+pysimdjson,Python,Data Serialization
+json,Python,Data Serialization
+prophy,Python,Data Analysis
+ansible,Python,IT Automation
+pyinfra,Python,IT Automation
+fabric,Python,IT Automation
+borg,Python,System Administration
+tkinter,Python,Graphical User Interface Design
+pyglet,Python,Graphical User Interface Design
+pyqt,Python,Graphical User Interface Design
+kivy,Python,Graphical User Interface Design
+Graph,Perl,Algorithms and Data Structures
+MetaMap-DataStructures,Perl,Algorithms and Data Structures
+Array-Circular,Perl,Algorithms and Data Structures
+Tree-R,Perl,Algorithms and Data Structures
+NetAddr-MAC,Perl,Networking and Messaging
+Net-OpenSSH,Perl,Networking and Messaging
+Parse-IPCommand,Perl,Networking and Messaging
+Net-SSH2,Perl,Networking and Messaging
+docpad-plugin-scheduling,Perl,Scheduling and Concurrency
+Async-Event-Interval,Perl,Scheduling and Concurrency
+Schedule-SGELK,Perl,Scheduling and Concurrency
+Mojolicious-Plugin-Cron-Scheduler,Perl,Scheduling and Concurrency
+DBIx-Class-BcryptColumn,Perl,Security
+Crypt-DRBG,Perl,Security
+WWW-KeePassRest,Perl,Web Development
+Plack-Middleware-SecureHeaders,Perl,Security
+Mojolicious,Perl,Web Development
+Dancer2,Perl,Web Development
+Catalyst,Perl,Web Development
+Kossy,Perl,Web Development
+SPVM-Math,Perl,Mathematics and Numerics
+App-Math-Tutor,Perl,Mathematics and Numerics
+Math-RPN-Tiny,Perl,Mathematics and Numerics
+Math-Sidef,Perl,Mathematics and Numerics
+DBD-mysql,Perl,Database Management
+Redis,Perl,Database Management
+github.com/percona/percona-toolkit,Perl,Database Management
+Database-Abstraction,Perl,Database Management
+Path-Tiny,Perl,File Handling
+File-Util,Perl,File Handling
+PDF-API2,Perl,Data Serialization
+IO-All,Perl,File Handling
+CPAN-Testers-Schema,Perl,Data Analysis
+Log-Report,Perl,Logging and Monitoring
+Log-Contextual,Perl,Logging and Monitoring
+event-tracer,Perl,Logging and Monitoring
+Perl-Lint,Perl,Code Analysis and Linting
+Perl-Critic,Perl,Code Analysis and Linting
+B-Lint,Perl,Code Analysis and Linting
+Perl-Analyzer,Perl,Code Analysis and Linting
+Test-Strict,Perl,Testing
+Math-BigInt,Perl,Mathematics and Numerics
+Test-MockModule,Perl,Testing
+Test-Without-Module,Perl,Testing
+CLIPSeqTools,Perl,Data Analysis
+App-RecordStream,Perl,Data Analysis
+Data::Table,Perl,Data Analysis
+PDL::Dataframe,Perl,Data Analysis
+wxPerl,Perl,Graphical User Interface Design
+Perl-Tk,Perl,Graphical User Interface Design
+Prima,Perl,Graphical User Interface Design
+Perl/KDE,Perl,Graphical User Interface Design
+AnyData,Perl,Data Serialization
+Data-Format-Pretty-YAML,Perl,Data Serialization
+TOML-Tiny,Perl,Data Serialization
+CatalystX-Controller-ExtJS-REST-SimpleExcel,Perl,Web Development
+Rex,Perl,IT Automation
+com.viliussutkus89:SampleLibraryForSonatypePromotionTesting,Perl,IT Automation
+Jenkins::API,Perl,IT Automation
+Minilla,Perl,IT Automation
+@discordjs/collection,TypeScript,Algorithms and Data Structures
+js-sdsl,TypeScript,Algorithms and Data Structures
+typescript-collections,TypeScript,Algorithms and Data Structures
+fast-array-diff,TypeScript,Algorithms and Data Structures
+libp2p,TypeScript,Networking and Messaging
+@multiformats/multiaddr,TypeScript,Networking and Messaging
+@ethersproject/networks,TypeScript,Networking and Messaging
+nats,TypeScript,Networking and Messaging
+@types/node-schedule,TypeScript,Scheduling and Concurrency
+agenda,TypeScript,Scheduling and Concurrency
+@nestjs/schedule,TypeScript,Scheduling and Concurrency
+@solid-primitives/scheduled,TypeScript,Scheduling and Concurrency
+helmet,TypeScript,Security
+snyk,TypeScript,Security
+express-rate-limit,TypeScript,Web Development
+jssha,TypeScript,Security
+vite,TypeScript,Web Development
+vue-template-compiler,TypeScript,Web Development
+@testing-library/user-event,TypeScript,Testing
+antd,TypeScript,Graphical User Interface Design
+random-js,TypeScript,Mathematics and Numerics
+math-expression-evaluator,TypeScript,Mathematics and Numerics
+normal-distribution,TypeScript,Mathematics and Numerics
+@mathigon/fermat,TypeScript,Mathematics and Numerics
+mongodb,TypeScript,Database Management
+sequelize,TypeScript,Database Management
+firebase,TypeScript,Database Management
+typeorm,TypeScript,Database Management
+rollup-plugin-dts,TypeScript,Code Analysis and Linting
+tsx,TypeScript,Code Analysis and Linting
+ts-node-dev,TypeScript,Code Analysis and Linting
+serve,TypeScript,Web Development
+@oclif/errors,TypeScript,Error Handling
+@storybook/addon-console,TypeScript,Graphical User Interface Design
+conventional-changelog-writer,TypeScript,IT Automation
+git-raw-commits,TypeScript,IT Automation
+@codemirror/lint,TypeScript,Code Analysis and Linting
+@start/plugin-lib-eslint,TypeScript,Code Analysis and Linting
+remark-lint-fenced-code-flag-case,TypeScript,Code Analysis and Linting
+tslint-rxjs-subject-restrictions-rule,TypeScript,Code Analysis and Linting
+jest,TypeScript,Testing
+ts-jest,TypeScript,Testing
+babel-jest,TypeScript,Testing
+vitest,TypeScript,Testing
+data-forge,TypeScript,Data Analysis
+vue-component-meta,TypeScript,Graphical User Interface Design
+@opticss/element-analysis,TypeScript,Graphical User Interface Design
+@antv/l7-scene,TypeScript,Graphical User Interface Design
+table,TypeScript,Data Analysis
+form-data-encoder,TypeScript,File Handling
+ion-js,TypeScript,Web Development
+@nsis/language-data,TypeScript,Data Analysis
+docker-compose,TypeScript,IT Automation
+commitlint-azure-pipelines-cli,TypeScript,IT Automation
+azure-devops-node-api,TypeScript,IT Automation
+@karmaniverous/get-dotenv,TypeScript,File Handling
+happy-dom,TypeScript,Graphical User Interface Design
+react-png-tooltip,TypeScript,Graphical User Interface Design
+infamous,TypeScript,Graphical User Interface Design
+lume,TypeScript,Web Development
+github.com/davecgh/go-spew,Go,Algorithms and Data Structures
+github.com/google/btree,Go,Algorithms and Data Structures
+github.com/lann/ps,Go,Algorithms and Data Structures
+github.com/cespare/xxhash/v2,Go,Algorithms and Data Structures
+golang.org/x/net,Go,Networking and Messaging
+github.com/vishvananda/netns,Go,Networking and Messaging
+github.com/nats-io/nats,Go,Networking and Messaging
+github.com/jackc/pgproto3/v2,Go,Database Management
+k8s.io/kubernetes,Go,IT Automation
+github.com/go-co-op/gocron,Go,Scheduling and Concurrency
+atomicgo.dev/schedule,Go,Scheduling and Concurrency
+github.com/jasonlvhit/gocron,Go,Scheduling and Concurrency
+github.com/google/uuid,Go,Algorithms and Data Structures
+github.com/golang-jwt/jwt/v4,Go,Security
+github.com/microcosm-cc/bluemonday,Go,Security
+github.com/99designs/keyring,Go,Security
+github.com/gin-gonic/gin,Go,Web Development
+github.com/go-redis/cache/v8,Go,Database Management
+github.com/gorilla/sessions,Go,Web Development
+github.com/labstack/echo/v4,Go,Web Development
+gopkg.in/inf.v0,Go,Algorithms and Data Structures
+github.com/go-corelibs/maths,Go,Mathematics and Numerics
+github.com/go-inf/inf,Go,Algorithms and Data Structures
+github.com/pkg/math,Go,Mathematics and Numerics
+github.com/go-sql-driver/mysql,Go,Database Management
+github.com/lib/pq,Go,Database Management
+go.mongodb.org/mongo-driver,Go,Database Management
+go.etcd.io/bbolt,Go,Database Management
+github.com/pelletier/go-toml/v2,Go,Data Serialization
+github.com/joho/godotenv,Go,File Handling
+cloud.google.com/go/storage,Go,Database Management
+github.com/minio/minio-go/v7,Go,Database Management
+github.com/sirupsen/logrus,Go,Logging and Monitoring
+go.uber.org/zap,Go,Logging and Monitoring
+github.com/go-logr/logr,Go,Logging and Monitoring
+go.opentelemetry.io/otel,Go,Logging and Monitoring
+golang.org/x/lint,Go,Code Analysis and Linting
+github.com/golangci/lint-1,Go,Code Analysis and Linting
+github.com/mvdan/lint,Go,Code Analysis and Linting
+github.com/golang/lint,Go,Code Analysis and Linting
+github.com/stretchr/testify,Go,Testing
+github.com/google/go-cmp,Go,Code Analysis and Linting
+gopkg.in/check.v1,Go,Testing
+github.com/onsi/ginkgo,Go,Testing
+github.com/rocketlaunchr/dataframe-go,Go,Data Analysis
+github.com/fjukstad/walrus,Go,Algorithms and Data Structures
+github.com/hokiegeek/hgtealib,Go,Algorithms and Data Structures
+github.com/forchain/bitcoinbigdata,Go,Data Analysis
+github.com/google/orderedcode,Go,Code Analysis and Linting
+github.com/ipfs/go-block-format,Go,File Handling
+github.com/linkedin/goavro/v2,Go,Data Serialization
+github.com/minio/sio,Go,File Handling
+github.com/power-devops/perfstat,Go,Logging and Monitoring
+github.com/gruntwork-io/terratest,Go,Testing
+go.mozilla.org/sops/v3,Go,Security
+github.com/vladimirvivien/gexe,Go,Algorithms and Data Structures
+qtypes,Go,Algorithms and Data Structures
+github.com/ctessum/gobra,Go,Algorithms and Data Structures
+github.com/yogischogi/ui2go,Go,Graphical User Interface Design
+github.com/bhojpur/gui,Go,Graphical User Interface Design
diff --git a/transforms/code/semantic_profiler/ray/src/ikb/null_libs.csv b/transforms/code/semantic_profiler/ray/src/ikb/null_libs.csv
new file mode 100644
index 0000000000..821e3406f8
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/src/ikb/null_libs.csv
@@ -0,0 +1,14 @@
+Library,Language
+dynamic_bitset,Cpp
+tries,Cpp
+algorithm,Cpp
+uni-algo,Cpp
+boost.asio,Cpp
+cpp-netlib,Cpp
+zmq,Cpp
+azmq,Cpp
+thread-pool,Cpp
+chrono,Cpp
+concurrencpp,Cpp
+time,Cpp
+libressl,Cpp
\ No newline at end of file
diff --git a/transforms/code/semantic_profiler/ray/src/offline_path/generate_ikb.py b/transforms/code/semantic_profiler/ray/src/offline_path/generate_ikb.py
new file mode 100644
index 0000000000..04e5a7c01c
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/src/offline_path/generate_ikb.py
@@ -0,0 +1,110 @@
+import os
+import argparse
+import pyarrow as pa
+import pyarrow.csv as pv
+from io import StringIO,BytesIO
+from watsonxai import generateResponseWatsonx
+
+
+def gen_combined_strings(list_str):
+ combined_strings = []
+ combined_string = "\nLibrary,Language,Category\n"
+ for idx, entry in enumerate(list_str, start=1):
+ entry_string = ",".join([f"{value}" for key, value in entry.items()])
+ combined_string += f"{entry_string}\n"
+ if idx % 30 == 0 or idx == len(list_str):
+ combined_strings.append(combined_string)
+ combined_string = "Library,Language,Category\n"
+ return combined_strings
+
+
+def sanitize_table(table):
+ sanitized_columns = []
+ for column in table.columns:
+ sanitized_data = column.to_pylist()
+ sanitized_data = [str(val).replace('"', '') for val in sanitized_data]
+ sanitized_column = pa.array(sanitized_data)
+ sanitized_columns.append(sanitized_column)
+ sanitized_table = pa.table(sanitized_columns, names=table.column_names)
+ return sanitized_table
+
+parser = argparse.ArgumentParser(description='Generate IKB.')
+parser.add_argument('--null_libs_file', type=str, help='Path to null libraries file.', default=os.getenv('NULL_LIBS_FILE', '../ikb/null_libs.csv'))
+parser.add_argument('--cmap_file', type=str, help='Path to concept map file.', default=os.getenv('CMAP_FILE', '../concept_map/updated_concept_list.csv'))
+parser.add_argument('--input_examples_file', type=str, help='Path to input examples file.', default=os.getenv('EXAMPLES_I_FILE', '../examples/examples-i.csv'))
+parser.add_argument('--output_examples_file', type=str, help='Path to output examples file.', default=os.getenv('EXAMPLES_O_FILE', '../examples/examples-o.csv'))
+parser.add_argument('--extracted_data_file', type=str, help='Path to file in which LLM output will be stored.', default=os.getenv('EXTRACTED_DATA_FILE', '../ikb/extracted_data.csv'))
+parser.add_argument('--api_type', type=str, help='API Type', default=os.getenv('API_TYPE', 'WatsonxAI'))
+parser.add_argument('--api_key', type=str, help='API key', default=os.getenv('API_KEY', ''))
+parser.add_argument('--api_endpoint', type=str, help='API endpoint', default=os.getenv('API_ENDPOINT', 'https://us-south.ml.cloud.ibm.com'))
+parser.add_argument('--project_id', type=str, help='Project ID', default=os.getenv('PROJECT_ID', ''))
+parser.add_argument('--model_id', type=str, help='LLM model ID', default=os.getenv('MODEL_ID', 'meta-llama/llama-3-70b-instruct'))
+
+
+
+
+args = parser.parse_args()
+concepts_list = pv.read_csv(args.cmap_file).column('Category').to_pylist()
+concepts = ', '.join(concepts_list)
+
+csv_buffer_i = BytesIO()
+pv.write_csv(pv.read_csv(args.input_examples_file), csv_buffer_i)
+input_examples = csv_buffer_i.getvalue()
+
+csv_buffer_o = BytesIO()
+pv.write_csv(pv.read_csv(args.output_examples_file), csv_buffer_o)
+output_examples = csv_buffer_o.getvalue()
+
+cols=['Library', 'Language']
+table = pv.read_csv(args.null_libs_file, read_options=pv.ReadOptions(column_names=cols))
+null_library_names = [{col: table[i][j].as_py() for i, col in enumerate(cols)} for j in range(len(table))]
+combined_strings = gen_combined_strings(null_library_names)
+endtoken = ""
+
+prompt_name = "My-prompt"
+prompt_template = '''You are responsible for classifying programming language packages based on their functionality into one of the following STRICT categories:
+ ''' + concepts + '''
+
+ Instructions:
+
+ 1. Input: A CSV containing two columns:
+ a. Library – the name of the package
+ b. Language – the programming language of the package
+ Your task is to append a third column called Category where you will classify the package's primary function into one of the following categories.\n
+
+ 2. Output: The updated CSV with the new Category column.
+
+ 3. Categorization Guidelines:
+ a. Classify each package based on its primary functionality.
+ b. Only use categories from the given list. Do not invent or modify categories.
+
+ 4. Output format: Provide the updated CSV data in the exact format as shown below:
+ a. Columns: Library, Language, Category
+ b. End the response with to indicate completion.
+
+ 5. Only use categories from the given list. Do not invent or modify categories.
+
+ 6. Strictly do not provide any explanations or commentary or notes before and/or after the table.
+
+ Examples:
+ INPUT:
+ ''' + str(input_examples) + "OUTPUT:\n" + str(output_examples).strip("\n")+"\n"
+
+
+for combined_string in combined_strings:
+ input_template = prompt_template + f"\n\nINPUT: {combined_string} \nOUTPUT: "
+ if args.api_type == 'WatsonxAI':
+ response = generateResponseWatsonx(args.api_key, args.api_endpoint, args.model_id, args.project_id, input_template)
+ data = response.split(endtoken)[0]
+ csv_file = BytesIO(data.strip().encode('utf-8'))
+ table = pv.read_csv(csv_file)
+ table = sanitize_table(table)
+ with open(args.extracted_data_file, mode='ab') as f:
+ pv.write_csv(table, f, write_options=pv.WriteOptions(include_header=False))
+
+
+
+
+
+
+
diff --git a/transforms/code/semantic_profiler/ray/src/offline_path/watsonxai.py b/transforms/code/semantic_profiler/ray/src/offline_path/watsonxai.py
new file mode 100644
index 0000000000..bb27137c2b
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/src/offline_path/watsonxai.py
@@ -0,0 +1,25 @@
+from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
+from ibm_watsonx_ai.foundation_models import ModelInference
+from ibm_watsonx_ai import Credentials
+
+
+
+
+def generateResponseWatsonx(api_key, api_endpoint, model_id, project_id, input_template):
+ credentials = Credentials(api_key=api_key, url=api_endpoint)
+ parameters = {
+ GenParams.DECODING_METHOD: "greedy",
+ GenParams.MAX_NEW_TOKENS: 1000,
+ GenParams.STOP_SEQUENCES: [""]
+ }
+ model = ModelInference(
+ model_id=model_id,
+ params=parameters,
+ credentials=credentials,
+ project_id=project_id)
+ response = model.generate_text(input_template)
+ return response
+
+
+
+
diff --git a/transforms/code/semantic_profiler/ray/src/sp_local_ray.py b/transforms/code/semantic_profiler/ray/src/sp_local_ray.py
new file mode 100644
index 0000000000..4e9e498b1a
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/src/sp_local_ray.py
@@ -0,0 +1,52 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+import sys
+
+from data_processing.utils import ParamsUtils
+from data_processing_ray.runtime.ray import RayTransformLauncher
+from sp_transform_ray import SemanticProfilerRayTransformConfiguration
+
+
+# create parameters
+input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test-data", "input"))
+output_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "output"))
+local_conf = {
+ "input_folder": input_folder,
+ "output_folder": output_folder,
+}
+worker_options = {"num_cpus": 0.8}
+code_location = {"github": "github", "commit_hash": "12345", "path": "path"}
+params = {
+ # where to run
+ "run_locally": True,
+ # Data access. Only required parameters are specified
+ "data_local_config": ParamsUtils.convert_to_ast(local_conf),
+ # orchestrator
+ "runtime_worker_options": ParamsUtils.convert_to_ast(worker_options),
+ "runtime_num_workers": 3,
+ "runtime_pipeline_id": "pipeline_id",
+ "runtime_job_id": "job_id",
+ "runtime_creation_delay": 0,
+ "runtime_code_location": ParamsUtils.convert_to_ast(code_location),
+ # noop params
+ "sp_ikb_file": "src/ikb/ikb_model.csv",
+ "sp_null_libs_file": "src/ikb/null_libs.csv"
+}
+if __name__ == "__main__":
+ # Set the simulated command line args
+ sys.argv = ParamsUtils.dict_to_req(d=params)
+ # create launcher
+ launcher = RayTransformLauncher(SemanticProfilerRayTransformConfiguration())
+ # Launch the ray actor(s) to process the input
+ launcher.launch()
diff --git a/transforms/code/semantic_profiler/ray/src/sp_transform_ray.py b/transforms/code/semantic_profiler/ray/src/sp_transform_ray.py
new file mode 100644
index 0000000000..b54cb2536c
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/src/sp_transform_ray.py
@@ -0,0 +1,43 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+from data_processing.utils import CLIArgumentProvider, get_logger
+from data_processing_ray.runtime.ray import RayTransformLauncher
+from data_processing_ray.runtime.ray.runtime_configuration import (
+ RayTransformRuntimeConfiguration,
+)
+from sp_transform import SemanticProfilerTransformConfiguration
+
+
+logger = get_logger(__name__)
+
+
+class SemanticProfilerRayTransformConfiguration(RayTransformRuntimeConfiguration):
+ """
+ Implements the RayTransformConfiguration for SP as required by the RayTransformLauncher.
+ SP does not use a RayRuntime class so the superclass only needs the base
+ python-only configuration.
+ """
+
+ def __init__(self):
+ """
+ Initialization
+ :param base_configuration - base configuration class
+ """
+ super().__init__(transform_config=SemanticProfilerTransformConfiguration())
+
+
+if __name__ == "__main__":
+ # launcher = NOOPRayLauncher()
+ launcher = RayTransformLauncher(SemanticProfilerRayTransformConfiguration())
+ logger.info("Launching sp transform")
+ launcher.launch()
diff --git a/transforms/code/semantic_profiler/ray/test-data/expected/metadata.json b/transforms/code/semantic_profiler/ray/test-data/expected/metadata.json
new file mode 100644
index 0000000000..eed590d79b
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/test-data/expected/metadata.json
@@ -0,0 +1,46 @@
+{
+ "pipeline": "pipeline_id",
+ "job details": {
+ "job category": "preprocessing",
+ "job name": "NOOP",
+ "job type": "ray",
+ "job id": "job_id",
+ "start_time": "2024-03-01 15:17:56",
+ "end_time": "2024-03-01 15:17:57",
+ "status": "success"
+ },
+ "code": [null],
+ "job_input_params": {
+ "sleep": 0,
+ "checkpointing": false,
+ "max_files": -1,
+ "number of workers": 1,
+ "worker options": {
+ "num_cpus": 0.8
+ },
+ "actor creation delay": 0
+ },
+ "execution_stats": {
+ "cpus": 10,
+ "gpus": 0,
+ "memory": 14.031964112073183,
+ "object_store": 2.0
+ },
+ "job_output_stats": {
+ "source_files": 1,
+ "source_size": 16534,
+ "result_files": 1,
+ "result_size": 16534,
+ "table_processing": 0.012392997741699219,
+ "nfiles": 1,
+ "nrows": 5
+ },
+ "source": {
+ "name": "test-data/data_processing/ray/noop/input",
+ "type": "path"
+ },
+ "target": {
+ "name": "/tmp/NOOP4o9gv2bq",
+ "type": "path"
+ }
+}
diff --git a/transforms/code/semantic_profiler/ray/test-data/expected/test.parquet b/transforms/code/semantic_profiler/ray/test-data/expected/test.parquet
new file mode 100644
index 0000000000..748db85ba5
Binary files /dev/null and b/transforms/code/semantic_profiler/ray/test-data/expected/test.parquet differ
diff --git a/transforms/code/semantic_profiler/ray/test-data/input/test.parquet b/transforms/code/semantic_profiler/ray/test-data/input/test.parquet
new file mode 100644
index 0000000000..f9ac1f0247
Binary files /dev/null and b/transforms/code/semantic_profiler/ray/test-data/input/test.parquet differ
diff --git a/transforms/code/semantic_profiler/ray/test/test_sp_ray.py b/transforms/code/semantic_profiler/ray/test/test_sp_ray.py
new file mode 100644
index 0000000000..3b7daa8905
--- /dev/null
+++ b/transforms/code/semantic_profiler/ray/test/test_sp_ray.py
@@ -0,0 +1,47 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import os
+
+from data_processing.test_support.launch.transform_test import (
+ AbstractTransformLauncherTest,
+)
+from data_processing_ray.runtime.ray import RayTransformLauncher
+from sp_transform import ikb_file_cli_param,null_libs_file_cli_param
+from sp_transform_ray import SemanticProfilerRayTransformConfiguration
+
+
+class TestRaySemanticProfilerTransform(AbstractTransformLauncherTest):
+ """
+ Extends the super-class to define the test data for the tests defined there.
+ The name of this class MUST begin with the word Test so that pytest recognizes it as a test class.
+ """
+
+ def get_test_transform_fixtures(self) -> list[tuple]:
+ src_file_dir = os.path.abspath(os.path.dirname(__file__))
+ fixtures = []
+
+ launcher = RayTransformLauncher(SemanticProfilerRayTransformConfiguration())
+ input_dir = os.path.join(src_file_dir, "../test-data/input")
+ expected_dir = os.path.join(src_file_dir, "../test-data/expected")
+ runtime_config = {"run_locally": True}
+ transform_config = {ikb_file_cli_param: os.path.join(src_file_dir, "../src/ikb/ikb_model.csv"), null_libs_file_cli_param: os.path.join(src_file_dir,"../src/ikb/null_libs.csv")}
+ fixtures.append(
+ (
+ launcher,
+ transform_config | runtime_config,
+ input_dir,
+ expected_dir,
+ [], # optional list of column names to ignore in comparing test-generated with expected.
+ )
+ )
+ return fixtures