diff --git a/examples/data/pen_digit_test.pkl b/examples/data/pen_digit_test.pkl deleted file mode 100644 index abefa32..0000000 Binary files a/examples/data/pen_digit_test.pkl and /dev/null differ diff --git a/examples/data/pen_digit_train.pkl b/examples/data/pen_digit_train.pkl deleted file mode 100644 index 6a7ca55..0000000 Binary files a/examples/data/pen_digit_train.pkl and /dev/null differ diff --git a/examples/example.ipynb b/examples/example.ipynb deleted file mode 100644 index 90e3efc..0000000 --- a/examples/example.ipynb +++ /dev/null @@ -1,286 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "da33c65c-dabe-4ce4-8725-7b2cea275c9c", - "metadata": {}, - "source": [ - "### Installation instruction:\n", - "- pip install -i https://test.pypi.org/simple/ signature-mahalanobis-knn==0.1.1\n", - "- Install sktime (modified to fix a bug, see issue 1591), esig, iisignature for signature computations: \n", - " - pip install git+https://github.com/sz85512678/sktime.git\n", - " - pip install esig iisignature\n", - "- (Optional) Obtain test data from https://github.com/sz85512678/signature_mahalanobis_knn/tree/main/data" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "98f9253d-105b-4591-b74d-2cb93f803dfd", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import importlib\n", - "\n", - "import pandas as pd\n", - "\n", - "from sig_anomaly import SigMahaKNN" - ] - }, - { - "cell_type": "markdown", - "id": "90d069f1-b480-4196-a292-0b7ff487a469", - "metadata": {}, - "source": [ - "### Using anomaly detection on streams" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b07723b1-2896-4222-9ca1-ae3240ecdb63", - "metadata": {}, - "outputs": [], - "source": [ - "DATA_DIR = \"./data/\"\n", - "digit = 1\n", - "\n", - "train_df = pd.read_pickle(DATA_DIR + \"pen_digit_train.pkl\")\n", - "test_df = pd.read_pickle(DATA_DIR + \"pen_digit_test.pkl\")\n", - "corpus = train_df[train_df[\"Digit\"] == digit][\"Stream\"].values\n", - "test_inlier = test_df[test_df[\"Digit\"] == digit][\"Stream\"].values\n", - "test_outlier = test_df[test_df[\"Digit\"] != digit][\"Stream\"].values" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "b23f5fd7-405b-4405-91ca-411456e81728", - "metadata": {}, - "outputs": [], - "source": [ - "sig_maha_knn = SigMahaKNN.SignatureMahalanobisKNN()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cdf60db6-8479-4326-8970-f4475dab2cc3", - "metadata": {}, - "outputs": [], - "source": [ - "sig_maha_knn.fit(corpus)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4bc6f3f0-36e8-446d-943d-d7c0b7583c5c", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "inlier_dists = sig_maha_knn.conformance(test_inlier)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e3ab80da-ed49-4e6e-b669-7f1946c1e636", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "outlier_dists = sig_maha_knn.conformance(test_outlier)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c958f03f-1818-4c01-800f-78c4bbbb368f", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "auc = sig_maha_knn.compute_auc_given_dists(inlier_dists, outlier_dists)\n", - "print(\"Auc is\", auc)" - ] - }, - { - "cell_type": "markdown", - "id": "24bc63f8-874c-4d1e-9255-397f632478c8", - "metadata": {}, - "source": [ - "### Using the variance norm as a standalong object" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b826afcc-b66e-4b74-b132-3063015d9a4e", - "metadata": {}, - "outputs": [], - "source": [ - "from sktime.transformations.panel.signature_based import SignatureTransformer\n", - "from joblib import Parallel, delayed" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "8117ca13-7ca2-4204-a294-a3a07a9eca68", - "metadata": {}, - "outputs": [], - "source": [ - "mahalanobis = Distance.Mahalanobis()" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "ec7a5154-9bb9-431b-84bb-7f44bfabcbc7", - "metadata": {}, - "outputs": [], - "source": [ - "signature_transform = SignatureTransformer(\n", - " augmentation_list=None,\n", - " window_name=\"global\",\n", - " window_depth=None,\n", - " window_length=None,\n", - " window_step=None,\n", - " rescaling=None,\n", - " sig_tfm=\"signature\",\n", - " depth=2,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "06fded8c-0576-413c-840d-ea897988fc39", - "metadata": {}, - "outputs": [], - "source": [ - "sigs = Parallel(n_jobs=-2)(\n", - " delayed(signature_transform.fit_transform)(corpus[i]) for i in range(len(corpus))\n", - ")\n", - "sigs = pd.concat(sigs)" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "e88c72bb-fb28-4bd0-ac51-ea4250c60664", - "metadata": {}, - "outputs": [], - "source": [ - "mahalanobis.fit(sigs)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "92267731-855b-40fa-ba99-c2a4a409cb7a", - "metadata": {}, - "outputs": [], - "source": [ - "y1 = test_inlier[0]\n", - "sig_y1 = signature_transform.fit_transform(y1)" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "af911415-63d9-43f1-9610-6a82f3a121a4", - "metadata": {}, - "outputs": [], - "source": [ - "y2 = corpus[0]\n", - "sig_y2 = signature_transform.fit_transform(y2)" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "9e8089b4-0f34-46ea-b1a3-9c9e57979d4d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0
00.012913
\n", - "
" - ], - "text/plain": [ - " 0\n", - "0 0.012913" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mahalanobis.distance(sig_y1, sig_y2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "04d87bf8-8b04-4da3-9e0b-8e5826ab85ca", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.17" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}