diff --git a/annotation_import/audio.ipynb b/annotation_import/audio.ipynb index 4e05fee..10383eb 100644 --- a/annotation_import/audio.ipynb +++ b/annotation_import/audio.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Audio Annotation Import\n", @@ -53,111 +51,188 @@ "* Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", "* Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* For information on what types of annotations are supported per data type, refer to this documentation:\n", " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* Notes:\n", " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport uuid\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import uuid\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for Audio" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "##### Classification free text #####\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"text_audio\",\n value=lb_types.Text(answer=\"free text audio annotation\"),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"text_audio\",\n \"answer\": \"free text audio annotation\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "##### Classification free text #####\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"text_audio\",\n", + " value=lb_types.Text(answer=\"free text audio annotation\"),\n", + ")\n", + "\n", + "text_annotation_ndjson = {\n", + " \"name\": \"text_audio\",\n", + " \"answer\": \"free text audio annotation\",\n", + "}" + ] }, { - "metadata": {}, - "source": "##### Checklist Classification #######\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_audio\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_audio\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "##### Checklist Classification #######\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_audio\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"checklist_audio\",\n", + " \"answers\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\"\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "######## Radio Classification ######\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_audio\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\nradio_annotation_ndjson = {\n \"name\": \"radio_audio\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######## Radio Classification ######\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_audio\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\")),\n", + ")\n", + "\n", + "radio_annotation_ndjson = {\n", + " \"name\": \"radio_audio\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create one Labelbox dataset\n\nglobal_key = \"sample-audio-1.mp3\" + str(uuid.uuid4())\n\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/audio-sample-data/sample-audio-1.mp3\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"audio_annotation_import_demo_dataset\")\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows: \", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create one Labelbox dataset\n", + "\n", + "global_key = \"sample-audio-1.mp3\" + str(uuid.uuid4())\n", + "\n", + "asset = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/audio-sample-data/sample-audio-1.mp3\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"audio_annotation_import_demo_dataset\")\n", + "task = dataset.create_data_rows([asset])\n", + "task.wait_till_done()\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows: \", task.failed_data_rows)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an ontology\n", @@ -165,135 +240,232 @@ "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we create the text annotation, we provided the `name` as `text_audio`. Now, when we setup our ontology, we must ensure that the name of the tool is also `text_audio`. The same alignment must hold true for the other tools and classifications we create in our ontology." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"text_audio\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_audio\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_audio\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology Audio Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Audio,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(classifications=[\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"text_audio\"),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_audio\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_audio\",\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + "])\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Ontology Audio Annotations\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Audio,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", "## Step 3: Create a labeling project\n", "Connect the ontology to the labeling project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create Labelbox project\nproject = client.create_project(name=\"audio_project\",\n media_type=lb.MediaType.Audio)\n\n# Setup your ontology\nproject.setup_editor(\n ontology) # Connect your ontology and editor to your project", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create Labelbox project\n", + "project = client.create_project(name=\"audio_project\",\n", + " media_type=lb.MediaType.Audio)\n", + "\n", + "# Setup your ontology\n", + "project.connect_ontology(\n", + " ontology) # Connect your ontology and editor to your project" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send a batch of data rows to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Setup Batches and Ontology\n\n# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-audio-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Setup Batches and Ontology\n", + "\n", + "# Create a batch to send to your MAL project\n", + "batch = project.create_batch(\n", + " \"first-batch-audio-demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the annotations payload\n", "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label = []\n", + "label.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[text_annotation, checklist_annotation, radio_annotation],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### NDJSON annotations \n", "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1)." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\nfor annotations in [\n text_annotation_ndjson,\n checklist_annotation_ndjson,\n radio_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_ndjson = []\n", + "for annotations in [\n", + " text_annotation_ndjson,\n", + " checklist_annotation_ndjson,\n", + " radio_annotation_ndjson,\n", + "]:\n", + " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_ndjson.append(annotations)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or complete labels" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Assisted Labeling (MAL)\n", "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload our label using Model-Assisted Labeling\n", + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=f\"mal_job-{str(uuid.uuid4())}\",\n", + " predictions=label,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload label for this data row in project\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload label for this data row in project\n", + "upload_job = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"label_import_job\" + str(uuid.uuid4()),\n", + " labels=label,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/annotation_import/conversational.ipynb b/annotation_import/conversational.ipynb index d3b626e..815efc6 100644 --- a/annotation_import/conversational.ipynb +++ b/annotation_import/conversational.ipynb @@ -202,7 +202,7 @@ }, { "metadata": {}, - "source": "# Create Labelbox project\nproject = client.create_project(\n name=\"Conversational Text Annotation Import Demo\",\n media_type=lb.MediaType.Conversational,\n)\n\n# Setup your ontology\nproject.setup_editor(\n ontology) # Connect your ontology and editor to your project", + "source": "# Create Labelbox project\nproject = client.create_project(\n name=\"Conversational Text Annotation Import Demo\",\n media_type=lb.MediaType.Conversational,\n)\n\n# Setup your ontology\nproject.connect_ontology(\n ontology) # Connect your ontology and editor to your project", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/annotation_import/conversational_LLM.ipynb b/annotation_import/conversational_LLM.ipynb index 03edde1..835a511 100644 --- a/annotation_import/conversational_LLM.ipynb +++ b/annotation_import/conversational_LLM.ipynb @@ -215,7 +215,7 @@ }, { "metadata": {}, - "source": "# Create Labelbox project\nproject = client.create_project(\n name=\"Conversational Text Annotation Import Demo (Pairwise comparison)\",\n media_type=lb.MediaType.Conversational,\n)\n\n# Setup your ontology\nproject.setup_editor(\n ontology) # Connect your ontology and editor to your project", + "source": "# Create Labelbox project\nproject = client.create_project(\n name=\"Conversational Text Annotation Import Demo (Pairwise comparison)\",\n media_type=lb.MediaType.Conversational,\n)\n\n# Setup your ontology\nproject.connect_ontology(\n ontology) # Connect your ontology and editor to your project", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/annotation_import/dicom.ipynb b/annotation_import/dicom.ipynb index 1b39378..3c17c11 100644 --- a/annotation_import/dicom.ipynb +++ b/annotation_import/dicom.ipynb @@ -154,7 +154,7 @@ }, { "metadata": {}, - "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\n\nproject = client.create_project(name=\"dicom_project_demo\",\n media_type=lb.MediaType.Dicom)\n\n## connect ontology to your project\nproject.setup_editor(ontology)", + "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\n\nproject = client.create_project(name=\"dicom_project_demo\",\n media_type=lb.MediaType.Dicom)\n\n## connect ontology to your project\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/annotation_import/html.ipynb b/annotation_import/html.ipynb index 5d99add..efc11ba 100644 --- a/annotation_import/html.ipynb +++ b/annotation_import/html.ipynb @@ -193,7 +193,7 @@ }, { "metadata": {}, - "source": "# Create Labelbox project\nproject = client.create_project(name=\"HTML Import Annotation Demo\",\n media_type=lb.MediaType.Html)\n\n# Setup your ontology\nproject.setup_editor(\n ontology) # Connect your ontology and editor to your project", + "source": "# Create Labelbox project\nproject = client.create_project(name=\"HTML Import Annotation Demo\",\n media_type=lb.MediaType.Html)\n\n# Setup your ontology\nproject.connect_ontology(\n ontology) # Connect your ontology and editor to your project", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/annotation_import/image.ipynb b/annotation_import/image.ipynb index 5671099..fa66196 100644 --- a/annotation_import/image.ipynb +++ b/annotation_import/image.ipynb @@ -327,7 +327,7 @@ }, { "metadata": {}, - "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\nproject = client.create_project(name=\"Image Annotation Import Demo\",\n media_type=lb.MediaType.Image)\n\nproject.setup_editor(ontology)", + "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\nproject = client.create_project(name=\"Image Annotation Import Demo\",\n media_type=lb.MediaType.Image)\n\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/annotation_import/pdf.ipynb b/annotation_import/pdf.ipynb index 2621a4d..59964d1 100644 --- a/annotation_import/pdf.ipynb +++ b/annotation_import/pdf.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 1, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,17 +22,17 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# PDF Annotation Import" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -56,137 +54,538 @@ "- Bounding box \n", "- Entities \n", "- Relationships (only supported for MAL imports)" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import uuid\nimport json\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import uuid\n", + "import json\n", + "import requests\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key\n", "Guides on https://docs.labelbox.com/docs/create-an-api-key" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Supported Annotations" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########## Entity ##########\n\n# Annotation Types\nentities_annotations = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\n# NDJSON\nentities_annotations_ndjson = {\n \"name\":\n \"named_entity\",\n \"textSelections\": [{\n \"tokenIds\": [\"\",],\n \"groupId\": \"\",\n \"page\": 1,\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Entity ##########\n", + "\n", + "# Annotation Types\n", + "entities_annotations = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "# NDJSON\n", + "entities_annotations_ndjson = {\n", + " \"name\":\n", + " \"named_entity\",\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\",],\n", + " \"groupId\": \"\",\n", + " \"page\": 1,\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "########### Radio Classification #########\n\n# Annotation types\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n# NDJSON\nradio_annotation_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########### Radio Classification #########\n", + "\n", + "# Annotation types\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + ")\n", + "# NDJSON\n", + "radio_annotation_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "############ Checklist Classification ###########\n\n# Annotation types\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\n# NDJSON\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ Checklist Classification ###########\n", + "\n", + "# Annotation types\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"checklist_question\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\"\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "############ Bounding Box ###########\n\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n end=lb_types.Point(x=518.571,\n y=245.143), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nbbox_annotation_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": {\n \"top\": 135.3,\n \"left\": 102.771,\n \"height\": 109.843,\n \"width\": 415.8\n },\n \"page\": 0,\n \"unit\": \"POINTS\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ Bounding Box ###########\n", + "\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\", # must match your ontology feature\"s name\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n", + " end=lb_types.Point(x=518.571,\n", + " y=245.143), # x= left + width , y = top + height\n", + " page=0,\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " ),\n", + ")\n", + "\n", + "bbox_annotation_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"bbox\": {\n", + " \"top\": 135.3,\n", + " \"left\": 102.771,\n", + " \"height\": 109.843,\n", + " \"width\": 415.8\n", + " },\n", + " \"page\": 0,\n", + " \"unit\": \"POINTS\",\n", + "}" + ] }, { - "metadata": {}, - "source": "# ############ global nested classifications ###########\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# ############ global nested classifications ###########\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " },\n", + " }],\n", + " }],\n", + "}\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "############## Classification Free-form text ##############\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\"),\n)\n\ntext_annotation_ndjson = {\"name\": \"free_text\", \"answer\": \"sample text\"}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############## Classification Free-form text ##############\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", # must match your ontology feature\"s name\n", + " value=lb_types.Text(answer=\"sample text\"),\n", + ")\n", + "\n", + "text_annotation_ndjson = {\"name\": \"free_text\", \"answer\": \"sample text\"}" + ] }, { - "metadata": {}, - "source": "######### BBOX with nested classifications #########\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n end=lb_types.Point(x=566.657,\n y=420.986), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\")),\n )\n ],\n )),\n )\n ],\n)\n\nbbox_with_radio_subclass_annotation_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"classifications\": [{\n \"name\": \"second_sub_radio_question\",\n \"answer\": {\n \"name\": \"second_sub_radio_answer\"\n },\n }],\n },\n }],\n \"bbox\": {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### BBOX with nested classifications #########\n", + "\n", + "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n", + " end=lb_types.Point(x=566.657,\n", + " y=420.986), # x= left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"second_sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"second_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "bbox_with_radio_subclass_annotation_ndjson = {\n", + " \"name\": \"bbox_with_radio_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_sub_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"second_sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"second_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + " }],\n", + " \"bbox\": {\n", + " \"top\": 226.757,\n", + " \"left\": 317.271,\n", + " \"height\": 194.229,\n", + " \"width\": 249.386,\n", + " },\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\",\n", + "}" + ] }, { - "metadata": {}, - "source": "############ NER with nested classifications ########\n\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n text_selections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n)\n\nner_with_checklist_subclass_annotation_ndjson = {\n \"name\":\n \"ner_with_checklist_subclass\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": [{\n \"name\": \"first_sub_checklist_answer\"\n }],\n }],\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ NER with nested classifications ########\n", + "\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\",\n", + " text_selections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "ner_with_checklist_subclass_annotation_ndjson = {\n", + " \"name\":\n", + " \"ner_with_checklist_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " }],\n", + " }],\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\"],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "######### Relationships ##########\nentity_source = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\nentity_target = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\nentity_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=entity_source,\n target=entity_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source = str(uuid.uuid4())\nuuid_target = str(uuid.uuid4())\n\nentity_source_ndjson = {\n \"name\":\n \"named_entity\",\n \"uuid\":\n uuid_source,\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}\n\nentity_target_ndjson = {\n \"name\":\n \"named_entity\",\n \"uuid\":\n uuid_target,\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}\nner_relationship_annotation_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source,\n \"target\": uuid_target,\n \"type\": \"unidirectional\",\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### Relationships ##########\n", + "entity_source = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "entity_target = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "entity_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=entity_source,\n", + " target=entity_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ),\n", + ")\n", + "\n", + "## Only supported for MAL imports\n", + "uuid_source = str(uuid.uuid4())\n", + "uuid_target = str(uuid.uuid4())\n", + "\n", + "entity_source_ndjson = {\n", + " \"name\":\n", + " \"named_entity\",\n", + " \"uuid\":\n", + " uuid_source,\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\"],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }],\n", + "}\n", + "\n", + "entity_target_ndjson = {\n", + " \"name\":\n", + " \"named_entity\",\n", + " \"uuid\":\n", + " uuid_target,\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\"],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }],\n", + "}\n", + "ner_relationship_annotation_ndjson = {\n", + " \"name\": \"relationship\",\n", + " \"relationship\": {\n", + " \"source\": uuid_source,\n", + " \"target\": uuid_target,\n", + " \"type\": \"unidirectional\",\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "######### BBOX with relationships #############\n# Python Annotation\nbbox_source = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n end=lb_types.Point(x=270.907,\n y=149.556), # x = left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n)\n\nbbox_target = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=96.424, y=66.251),\n end=lb_types.Point(x=179.074, y=146.932),\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n)\n\nbbox_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=bbox_source,\n target=bbox_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source_2 = str(uuid.uuid4())\nuuid_target_2 = str(uuid.uuid4())\n\nbbox_source_ndjson = {\n \"name\": \"bounding_box\",\n \"uuid\": uuid_source_2,\n \"bbox\": {\n \"top\": 68.875,\n \"left\": 188.257,\n \"height\": 80.681,\n \"width\": 82.65\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}\n\nbbox_target_ndjson = {\n \"name\": \"bounding_box\",\n \"uuid\": uuid_target_2,\n \"bbox\": {\n \"top\": 66.251,\n \"left\": 96.424,\n \"height\": 80.681,\n \"width\": 82.65\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}\n\nbbox_relationship_annotation_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source_2,\n \"target\": uuid_target_2,\n \"type\": \"unidirectional\",\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### BBOX with relationships #############\n", + "# Python Annotation\n", + "bbox_source = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n", + " end=lb_types.Point(x=270.907,\n", + " y=149.556), # x = left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + ")\n", + "\n", + "bbox_target = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=96.424, y=66.251),\n", + " end=lb_types.Point(x=179.074, y=146.932),\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + ")\n", + "\n", + "bbox_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=bbox_source,\n", + " target=bbox_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ),\n", + ")\n", + "\n", + "## Only supported for MAL imports\n", + "uuid_source_2 = str(uuid.uuid4())\n", + "uuid_target_2 = str(uuid.uuid4())\n", + "\n", + "bbox_source_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"uuid\": uuid_source_2,\n", + " \"bbox\": {\n", + " \"top\": 68.875,\n", + " \"left\": 188.257,\n", + " \"height\": 80.681,\n", + " \"width\": 82.65\n", + " },\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\",\n", + "}\n", + "\n", + "bbox_target_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"uuid\": uuid_target_2,\n", + " \"bbox\": {\n", + " \"top\": 66.251,\n", + " \"left\": 96.424,\n", + " \"height\": 80.681,\n", + " \"width\": 82.65\n", + " },\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\",\n", + "}\n", + "\n", + "bbox_relationship_annotation_ndjson = {\n", + " \"name\": \"relationship\",\n", + " \"relationship\": {\n", + " \"source\": uuid_source_2,\n", + " \"target\": uuid_target_2,\n", + " \"type\": \"unidirectional\",\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", @@ -200,60 +599,206 @@ "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", "\n", "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "global_key = \"0801.3483_doc.pdf\" + str(uuid.uuid4())\nimg_url = {\n \"row_data\": {\n \"pdf_url\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n },\n \"global_key\": global_key,\n}\n\ndataset = client.create_dataset(name=\"pdf_demo_dataset\")\ntask = dataset.create_data_rows([img_url])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "global_key = \"0801.3483_doc.pdf\" + str(uuid.uuid4())\n", + "img_url = {\n", + " \"row_data\": {\n", + " \"pdf_url\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", + " },\n", + " \"global_key\": global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", + "task = dataset.create_data_rows([img_url])\n", + "task.wait_till_done()\n", + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if (\"Duplicate global key\" in error[\"message\"] and\n", + " dataset.row_count == 0):\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an Ontology for your project\n", "\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\",\n scope=lb.Classification.Scope.GLOBAL,\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n lb.Tool(\n tool=lb.Tool.Type.NER,\n name=\"ner_with_checklist_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(value=\"first_sub_checklist_answer\")],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.Option(\n value=\"first_sub_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"second_sub_radio_question\",\n options=[\n lb.Option(\"second_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Document Annotation Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Document,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "## Setup the ontology and link the tools created above.\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + " tools=[ # List of Tool objects\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", + " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", + " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.NER,\n", + " name=\"ner_with_checklist_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(value=\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_with_radio_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_sub_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"second_sub_radio_question\",\n", + " options=[\n", + " lb.Option(\"second_sub_radio_answer\")\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Document Annotation Import Demo\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Document,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Creating a labeling project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"PDF_annotation_demo\",\n media_type=lb.MediaType.Document)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Labelbox project\n", + "project = client.create_project(name=\"PDF_annotation_demo\",\n", + " media_type=lb.MediaType.Document)\n", + "project.connect_ontology(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5. Create the annotation payload\n", @@ -262,124 +807,356 @@ "Labelbox support NDJSON only for this data type.\n", "\n", "The resulting label should have exactly the same content for annotations that are supported by both (with exception of the uuid strings that are generated)" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### Step 5.1: First, we need to populate the text selections for Entity annotations\n", "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "To extract the generated text layer url we first need to export the data row" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "client.enable_experimental = True\ntask = lb.DataRow.export(client=client, global_keys=[global_key])\ntask.wait_till_done()\nstream = task.get_buffered_stream()\n\ntext_layer = \"\"\nfor output in stream:\n output_json = output.json\n text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\nprint(text_layer)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "task = lb.DataRow.export(client=client, global_keys=[global_key])\n", + "task.wait_till_done()\n", + "stream = task.get_buffered_stream()\n", + "\n", + "text_layer = \"\"\n", + "for output in stream:\n", + " output_json = output.json\n", + " text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\n", + "print(text_layer)" + ] }, { - "metadata": {}, - "source": "# Helper method\ndef update_text_selections(annotation, group_id, list_tokens, page):\n return annotation.update({\n \"textSelections\": [{\n \"groupId\": group_id,\n \"tokenIds\": list_tokens,\n \"page\": page\n }]\n })\n\n\n# Fetch the content of the text layer\nres = requests.get(text_layer)\n\n# Phrases that we want to annotation obtained from the text layer url\ncontent_phrases = [\n \"Metal-insulator (MI) transitions have been one of the\",\n \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n \"Organic charge transfer salts based on the donor\",\n \"the experimental investigations on this issue have not\",\n]\n\n# Parse the text layer\ntext_selections = []\ntext_selections_ner = []\ntext_selections_source = []\ntext_selections_target = []\n\nfor obj in json.loads(res.text):\n for group in obj[\"groups\"]:\n if group[\"content\"] == content_phrases[0]:\n list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n document_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n text_selections.append(document_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entities_annotations_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[1]:\n list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n ner_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n text_selections_ner.append(ner_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=ner_with_checklist_subclass_annotation_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens_2, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[2]:\n relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n text_selection_entity_source = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n text_selections_source.append(text_selection_entity_source)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entity_source_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n relationship_source, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[3]:\n relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n text_selection_entity_target = lb_types.DocumentTextSelection(\n group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n text_selections_target.append(text_selection_entity_target)\n # build text selections forthe NDJson annotations\n update_text_selections(\n annotation=entity_target_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n relationship_target, # ids representing individual words from the group\n page=1,\n )", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Helper method\n", + "def update_text_selections(annotation, group_id, list_tokens, page):\n", + " return annotation.update({\n", + " \"textSelections\": [{\n", + " \"groupId\": group_id,\n", + " \"tokenIds\": list_tokens,\n", + " \"page\": page\n", + " }]\n", + " })\n", + "\n", + "\n", + "# Fetch the content of the text layer\n", + "res = requests.get(text_layer)\n", + "\n", + "# Phrases that we want to annotation obtained from the text layer url\n", + "content_phrases = [\n", + " \"Metal-insulator (MI) transitions have been one of the\",\n", + " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", + " \"Organic charge transfer salts based on the donor\",\n", + " \"the experimental investigations on this issue have not\",\n", + "]\n", + "\n", + "# Parse the text layer\n", + "text_selections = []\n", + "text_selections_ner = []\n", + "text_selections_source = []\n", + "text_selections_target = []\n", + "\n", + "for obj in json.loads(res.text):\n", + " for group in obj[\"groups\"]:\n", + " if group[\"content\"] == content_phrases[0]:\n", + " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " document_text_selection = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", + " text_selections.append(document_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=entities_annotations_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " list_tokens, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + " if group[\"content\"] == content_phrases[1]:\n", + " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " ner_text_selection = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", + " text_selections_ner.append(ner_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=ner_with_checklist_subclass_annotation_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " list_tokens_2, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + " if group[\"content\"] == content_phrases[2]:\n", + " relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " text_selection_entity_source = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n", + " text_selections_source.append(text_selection_entity_source)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=entity_source_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " relationship_source, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + " if group[\"content\"] == content_phrases[3]:\n", + " relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " text_selection_entity_target = lb_types.DocumentTextSelection(\n", + " group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n", + " text_selections_target.append(text_selection_entity_target)\n", + " # build text selections forthe NDJson annotations\n", + " update_text_selections(\n", + " annotation=entity_target_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " relationship_target, # ids representing individual words from the group\n", + " page=1,\n", + " )" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Re-write the python annotations to include text selections (only required for python annotation types)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# re-write the entity annotation with text selections\nentities_annotation_document_entity = lb_types.DocumentEntity(\n name=\"named_entity\", textSelections=text_selections)\nentities_annotation = lb_types.ObjectAnnotation(\n name=\"named_entity\", value=entities_annotation_document_entity)\n\n# re-write the entity annotation + subclassification with text selections\nclassifications = [\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n]\nner_annotation_with_subclass = lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\", textSelections=text_selections_ner)\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=ner_annotation_with_subclass,\n classifications=classifications,\n)\n\n# re-write the entity source and target annotations withe text selectios\nentity_source_doc = lb_types.DocumentEntity(\n name=\"named_entity\", text_selections=text_selections_source)\nentity_source = lb_types.ObjectAnnotation(name=\"named_entity\",\n value=entity_source_doc)\n\nentity_target_doc = lb_types.DocumentEntity(\n name=\"named_entity\", text_selections=text_selections_target)\nentity_target = lb_types.ObjectAnnotation(name=\"named_entity\",\n value=entity_target_doc)\n\n# re-write the entity relationship with the re-created entities\nentity_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=entity_source,\n target=entity_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# re-write the entity annotation with text selections\n", + "entities_annotation_document_entity = lb_types.DocumentEntity(\n", + " name=\"named_entity\", textSelections=text_selections)\n", + "entities_annotation = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\", value=entities_annotation_document_entity)\n", + "\n", + "# re-write the entity annotation + subclassification with text selections\n", + "classifications = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + "]\n", + "ner_annotation_with_subclass = lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\", textSelections=text_selections_ner)\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " value=ner_annotation_with_subclass,\n", + " classifications=classifications,\n", + ")\n", + "\n", + "# re-write the entity source and target annotations withe text selectios\n", + "entity_source_doc = lb_types.DocumentEntity(\n", + " name=\"named_entity\", text_selections=text_selections_source)\n", + "entity_source = lb_types.ObjectAnnotation(name=\"named_entity\",\n", + " value=entity_source_doc)\n", + "\n", + "entity_target_doc = lb_types.DocumentEntity(\n", + " name=\"named_entity\", text_selections=text_selections_target)\n", + "entity_target = lb_types.ObjectAnnotation(name=\"named_entity\",\n", + " value=entity_target_doc)\n", + "\n", + "# re-write the entity relationship with the re-created entities\n", + "entity_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=entity_source,\n", + " target=entity_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ),\n", + ")" + ] }, { - "metadata": {}, - "source": "# Final NDJSON and python annotations\nprint(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\nprint(f\"entities_annotation={entities_annotation}\")\nprint(\n f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\"\n)\nprint(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\nprint(f\"entity_source_ndjson={entity_source_ndjson}\")\nprint(f\"entity_target_ndjson={entity_target_ndjson}\")\nprint(f\"entity_source={entity_source}\")\nprint(f\"entity_target={entity_target}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Final NDJSON and python annotations\n", + "print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n", + "print(f\"entities_annotation={entities_annotation}\")\n", + "print(\n", + " f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\"\n", + ")\n", + "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\n", + "print(f\"entity_source_ndjson={entity_source_ndjson}\")\n", + "print(f\"entity_target_ndjson={entity_target_ndjson}\")\n", + "print(f\"entity_source={entity_source}\")\n", + "print(f\"entity_target={entity_target}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. Note that only a handful of python annotation types are supported for PDF documents." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "labels = []\n\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n radio_annotation,\n nested_radio_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n ner_with_checklist_subclass_annotation,\n entity_source,\n entity_target,\n entity_relationship, # Only supported for MAL imports\n bbox_source,\n bbox_target,\n bbox_relationship, # Only supported for MAL imports\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "labels = []\n", + "\n", + "labels.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " entities_annotation,\n", + " checklist_annotation,\n", + " nested_checklist_annotation,\n", + " text_annotation,\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " bbox_annotation,\n", + " bbox_with_radio_subclass_annotation,\n", + " ner_with_checklist_subclass_annotation,\n", + " entity_source,\n", + " entity_target,\n", + " entity_relationship, # Only supported for MAL imports\n", + " bbox_source,\n", + " bbox_target,\n", + " bbox_relationship, # Only supported for MAL imports\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJson annotations\n", "Here we create the complete labels ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\nfor annot in [\n entities_annotations_ndjson,\n checklist_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n text_annotation_ndjson,\n radio_annotation_ndjson,\n nested_radio_annotation_ndjson,\n bbox_annotation_ndjson,\n bbox_with_radio_subclass_annotation_ndjson,\n ner_with_checklist_subclass_annotation_ndjson,\n entity_source_ndjson,\n entity_target_ndjson,\n ner_relationship_annotation_ndjson, # Only supported for MAL imports\n bbox_source_ndjson,\n bbox_target_ndjson,\n bbox_relationship_annotation_ndjson, # Only supported for MAL imports\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_ndjson.append(annot)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_ndjson = []\n", + "for annot in [\n", + " entities_annotations_ndjson,\n", + " checklist_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson,\n", + " text_annotation_ndjson,\n", + " radio_annotation_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + " bbox_annotation_ndjson,\n", + " bbox_with_radio_subclass_annotation_ndjson,\n", + " ner_with_checklist_subclass_annotation_ndjson,\n", + " entity_source_ndjson,\n", + " entity_target_ndjson,\n", + " ner_relationship_annotation_ndjson, # Only supported for MAL imports\n", + " bbox_source_ndjson,\n", + " bbox_target_ndjson,\n", + " bbox_relationship_annotation_ndjson, # Only supported for MAL imports\n", + "]:\n", + " annot.update({\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", + " },\n", + " })\n", + " label_ndjson.append(annot)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Import the annotation payload\n", "For the purpose of this tutorial only import one of the annotations payloads at the time (NDJSON or Python annotation types)." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Option A: Upload to a labeling project as pre-labels (MAL)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n predictions=labels,\n)\n\nupload_job.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n", + " predictions=labels,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Option B: Upload to a labeling project using ground truth" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Uncomment this code when excluding relationships from label import\n## Relationships are not currently supported for label import\n\n# upload_job = lb.LabelImport.create_from_objects(\n# client = client,\n# project_id = project.uid,\n# name=\"label_import_job\"+str(uuid.uuid4()),\n# labels=labels) ## Remove unsupported relationships from the labels list\n\n# print(\"Errors:\", upload_job.errors)\n# print(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Uncomment this code when excluding relationships from label import\n", + "## Relationships are not currently supported for label import\n", + "\n", + "# upload_job = lb.LabelImport.create_from_objects(\n", + "# client = client,\n", + "# project_id = project.uid,\n", + "# name=\"label_import_job\"+str(uuid.uuid4()),\n", + "# labels=labels) ## Remove unsupported relationships from the labels list\n", + "\n", + "# print(\"Errors:\", upload_job.errors)\n", + "# print(\"Status of uploads: \", upload_job.statuses)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/annotation_import/text.ipynb b/annotation_import/text.ipynb index d775976..decb3b8 100644 --- a/annotation_import/text.ipynb +++ b/annotation_import/text.ipynb @@ -202,7 +202,7 @@ }, { "metadata": {}, - "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\n\nproject = client.create_project(name=\"Text Annotation Import Demo\",\n media_type=lb.MediaType.Text)\n\nproject.setup_editor(ontology)", + "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\n\nproject = client.create_project(name=\"Text Annotation Import Demo\",\n media_type=lb.MediaType.Text)\n\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/annotation_import/tiled.ipynb b/annotation_import/tiled.ipynb index 68110a6..6c59e5d 100644 --- a/annotation_import/tiled.ipynb +++ b/annotation_import/tiled.ipynb @@ -225,7 +225,7 @@ }, { "metadata": {}, - "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\n\nproject = client.create_project(name=\"Geospatial Project Demo\",\n media_type=lb.MediaType.Geospatial_Tile)\n\nproject.setup_editor(ontology)", + "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\n\nproject = client.create_project(name=\"Geospatial Project Demo\",\n media_type=lb.MediaType.Geospatial_Tile)\n\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/annotation_import/video.ipynb b/annotation_import/video.ipynb index ecf61b7..dad0719 100644 --- a/annotation_import/video.ipynb +++ b/annotation_import/video.ipynb @@ -288,7 +288,7 @@ }, { "metadata": {}, - "source": "project = client.create_project(name=\"Video Annotation Import Demo\",\n media_type=lb.MediaType.Video)\n\n## connect ontology to your project\nproject.setup_editor(ontology)", + "source": "project = client.create_project(name=\"Video Annotation Import Demo\",\n media_type=lb.MediaType.Video)\n\n## connect ontology to your project\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/basics/batches.ipynb b/basics/batches.ipynb index 055cc25..5a35119 100644 --- a/basics/batches.ipynb +++ b/basics/batches.ipynb @@ -241,7 +241,7 @@ }, { "metadata": {}, - "source": "classification_features = [\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"Quality Issues\",\n options=[\n lb.Option(value=\"blurry\", label=\"Blurry\"),\n lb.Option(value=\"distorted\", label=\"Distorted\"),\n ],\n )\n]\n\nontology_builder = lb.OntologyBuilder(tools=[],\n classifications=classification_features)\n\nontology = client.create_ontology(\n \"Ontology from new features\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\n\nproject.setup_editor(ontology)", + "source": "classification_features = [\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"Quality Issues\",\n options=[\n lb.Option(value=\"blurry\", label=\"Blurry\"),\n lb.Option(value=\"distorted\", label=\"Distorted\"),\n ],\n )\n]\n\nontology_builder = lb.OntologyBuilder(tools=[],\n classifications=classification_features)\n\nontology = client.create_ontology(\n \"Ontology from new features\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\n\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/basics/data_rows.ipynb b/basics/data_rows.ipynb index 066b19f..4ffa472 100644 --- a/basics/data_rows.ipynb +++ b/basics/data_rows.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,17 +22,17 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Data rows" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* Data rows are the assets that are being labeled. We currently support the following asset types:\n", @@ -49,267 +47,520 @@ " * Conversational\n", "* A data row cannot exist without belonging to a dataset.\n", "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install labelbox -q", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install labelbox -q" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport uuid\nimport json", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import uuid\n", + "import json" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# API Key and Client\n", "Provide a valid api key below in order to properly connect to the Labelbox Client." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Get data rows from projects" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Pick a project with batches that have data rows with global keys\nPROJECT_ID = \"\"\nproject = client.get_project(PROJECT_ID)\nbatches = list(project.batches())\nprint(batches)\n# This is the same as\n# -> dataset = client.get_dataset(dataset_id)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Pick a project with batches that have data rows with global keys\n", + "PROJECT_ID = \"\"\n", + "project = client.get_project(PROJECT_ID)\n", + "batches = list(project.batches())\n", + "print(batches)\n", + "# This is the same as\n", + "# -> dataset = client.get_dataset(dataset_id)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Fetch data rows from project's batches\n", "\n", "Batches will need to be exported from your project as a export parameter. Before you can export from a project you will need an ontology attached." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "client.enable_experimental = True\n\nbatch_ids = [batch.uid for batch in batches]\n\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"performance_details\": True,\n \"batch_ids\":\n batch_ids, # Include batch ids if you only want to export specific batches, otherwise,\n # you can export all the data without using this parameter\n}\nfilters = {}\n\n# A task is returned, this provides additional information about the status of your task, such as\n# any errors encountered\nexport_task = project.export(params=export_params, filters=filters)\nexport_task.wait_till_done()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "batch_ids = [batch.uid for batch in batches]\n", + "\n", + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"performance_details\": True,\n", + " \"batch_ids\":\n", + " batch_ids, # Include batch ids if you only want to export specific batches, otherwise,\n", + " # you can export all the data without using this parameter\n", + "}\n", + "filters = {}\n", + "\n", + "# A task is returned, this provides additional information about the status of your task, such as\n", + "# any errors encountered\n", + "export_task = project.export(params=export_params, filters=filters)\n", + "export_task.wait_till_done()" + ] }, { - "metadata": {}, - "source": "data_rows = []\n\n\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n data_row = output.json\n data_rows.append(data_row)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "data_rows = []\n", + "\n", + "\n", + "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", + " data_row = output.json\n", + " data_rows.append(data_row)\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", + " stream_handler=lambda error: print(error))\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_buffered_stream(\n", + " stream_type=lb.StreamType.RESULT).start(\n", + " stream_handler=json_stream_handler)" + ] }, { - "metadata": {}, - "source": "# Get single data row\ndata_row = data_rows[0]\nprint(data_row)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Get single data row\n", + "data_row = data_rows[0]\n", + "print(data_row)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Get labels from the data row" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "print(\"Associated label(s)\", data_row[\"projects\"][project.uid][\"labels\"])\nprint(\"Global key\", data_row[\"data_row\"][\"global_key\"])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "print(\"Associated label(s)\", data_row[\"projects\"][project.uid][\"labels\"])\n", + "print(\"Global key\", data_row[\"data_row\"][\"global_key\"])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Get data row ids by using global keys" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "global_key = \"\"\ntask = client.get_data_row_ids_for_global_keys([global_key])\nprint(f\"Data row id: {task['results']}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "global_key = \"\"\n", + "task = client.get_data_row_ids_for_global_keys([global_key])\n", + "print(f\"Data row id: {task['results']}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Create\n", "We recommend the following methods to create data rows : `dataset.upsert_data_rows()`, and `dataset.create_data_rows()`, " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Create data rows via `dataset.upsert_data_rows()`" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a dataset\ndataset = client.create_dataset(name=\"data_rows_demo_dataset_6\")\n# You can also upload metadata along with your data row\nmdo = client.get_data_row_metadata_ontology()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a dataset\n", + "dataset = client.create_dataset(name=\"data_rows_demo_dataset_6\")\n", + "# You can also upload metadata along with your data row\n", + "mdo = client.get_data_row_metadata_ontology()" + ] }, { - "metadata": {}, - "source": "uploads = []\n# Generate data rows\nfor i in range(1, 8):\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n \"TEST-ID-%id\" % uuid.uuid1(),\n ## add metadata (optional)\n \"metadata_fields\": [\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"tag\"].\n uid, # specify the schema id\n value=\"tag_string\", # typed inputs\n ),\n ],\n \"attachments\": [\n {\n \"type\":\n \"IMAGE_OVERLAY\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n },\n {\n \"type\": \"RAW_TEXT\",\n \"value\": \"IOWA, Zone 2232, June 2022 [Text string]\",\n },\n {\n \"type\":\n \"TEXT_URL\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\",\n },\n {\n \"type\":\n \"IMAGE\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n },\n {\n \"type\":\n \"VIDEO\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\",\n },\n {\n \"type\":\n \"HTML\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\",\n },\n {\n \"type\":\n \"PDF_URL\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n },\n ],\n })\n\ntask1 = dataset.upsert_data_rows(uploads)\ntask1.wait_till_done()\nprint(\"ERRORS: \", task1.errors)\nprint(\"RESULTS:\", task1.result)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "uploads = []\n", + "# Generate data rows\n", + "for i in range(1, 8):\n", + " uploads.append({\n", + " \"row_data\":\n", + " f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", + " \"global_key\":\n", + " \"TEST-ID-%id\" % uuid.uuid1(),\n", + " ## add metadata (optional)\n", + " \"metadata_fields\": [\n", + " lb.DataRowMetadataField(\n", + " schema_id=mdo.reserved_by_name[\"tag\"].\n", + " uid, # specify the schema id\n", + " value=\"tag_string\", # typed inputs\n", + " ),\n", + " ],\n", + " \"attachments\": [\n", + " {\n", + " \"type\":\n", + " \"IMAGE_OVERLAY\",\n", + " \"value\":\n", + " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n", + " },\n", + " {\n", + " \"type\": \"RAW_TEXT\",\n", + " \"value\": \"IOWA, Zone 2232, June 2022 [Text string]\",\n", + " },\n", + " {\n", + " \"type\":\n", + " \"TEXT_URL\",\n", + " \"value\":\n", + " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\",\n", + " },\n", + " {\n", + " \"type\":\n", + " \"IMAGE\",\n", + " \"value\":\n", + " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n", + " },\n", + " {\n", + " \"type\":\n", + " \"VIDEO\",\n", + " \"value\":\n", + " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\",\n", + " },\n", + " {\n", + " \"type\":\n", + " \"HTML\",\n", + " \"value\":\n", + " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\",\n", + " },\n", + " {\n", + " \"type\":\n", + " \"PDF_URL\",\n", + " \"value\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n", + " },\n", + " ],\n", + " })\n", + "\n", + "task1 = dataset.upsert_data_rows(uploads)\n", + "task1.wait_till_done()\n", + "print(\"ERRORS: \", task1.errors)\n", + "print(\"RESULTS:\", task1.result)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Create data rows from data in your local path " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "from PIL import Image\n\n# Create dummy empty jpeg file\nwidth = 400\nheight = 300\ncolor = (255, 255, 255) # White color\nimage = Image.new(\"RGB\", (width, height), color)\n\n# Save the image as a JPEG file\nimage.save(\"dummy.jpg\")\n\nlocal_data_path = \"dummy.jpg\"\n\ndata = {\"row_data\": local_data_path, \"global_key\": str(uuid.uuid4())}\n\ntask3 = dataset.upsert_data_rows([data])\ntask3.wait_till_done()\nprint(\"ERRORS: \", task3.errors)\nprint(\"RESULTS:\", task3.result)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "from PIL import Image\n", + "\n", + "# Create dummy empty jpeg file\n", + "width = 400\n", + "height = 300\n", + "color = (255, 255, 255) # White color\n", + "image = Image.new(\"RGB\", (width, height), color)\n", + "\n", + "# Save the image as a JPEG file\n", + "image.save(\"dummy.jpg\")\n", + "\n", + "local_data_path = \"dummy.jpg\"\n", + "\n", + "data = {\"row_data\": local_data_path, \"global_key\": str(uuid.uuid4())}\n", + "\n", + "task3 = dataset.upsert_data_rows([data])\n", + "task3.wait_till_done()\n", + "print(\"ERRORS: \", task3.errors)\n", + "print(\"RESULTS:\", task3.result)" + ] }, { - "metadata": {}, - "source": "# You can mix local files with urls when creating data rows\ntask4 = dataset.upsert_data_rows([\n {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\",\n \"global_key\":\n str(uuid.uuid4()),\n },\n {\n \"row_data\": local_data_path,\n \"global_key\": str(uuid.uuid4())\n },\n])\ntask4.wait_till_done()\nprint(\"ERRORS: \", task4.errors)\nprint(\"RESULTS:\", task4.result)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "# You can mix local files with urls when creating data rows\n", + "task4 = dataset.upsert_data_rows([\n", + " {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\",\n", + " \"global_key\":\n", + " str(uuid.uuid4()),\n", + " },\n", + " {\n", + " \"row_data\": local_data_path,\n", + " \"global_key\": str(uuid.uuid4())\n", + " },\n", + "])\n", + "task4.wait_till_done()\n", + "print(\"ERRORS: \", task4.errors)\n", + "print(\"RESULTS:\", task4.result)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "### Create data rows via `dataset.create_data_rows()`\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "dataset_2 = client.create_dataset(name=\"data_rows_demo_dataset_3\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "dataset_2 = client.create_dataset(name=\"data_rows_demo_dataset_3\")" + ] }, { - "metadata": {}, - "source": "uploads = []\n# Generate data rows\nfor i in range(1, 9):\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n \"TEST-ID-%id\" % uuid.uuid1(),\n ## add metadata (optional)\n \"metadata_fields\": [\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"tag\"].\n uid, # specify the schema id\n value=\"tag_string\", # typed inputs\n ),\n ],\n })\n\ntask1_2 = dataset_2.create_data_rows(uploads)\ntask1_2.wait_till_done()\nprint(\"ERRORS: \", task1_2.errors)\nprint(\"RESULTS:\", task1_2.result)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "uploads = []\n", + "# Generate data rows\n", + "for i in range(1, 9):\n", + " uploads.append({\n", + " \"row_data\":\n", + " f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", + " \"global_key\":\n", + " \"TEST-ID-%id\" % uuid.uuid1(),\n", + " ## add metadata (optional)\n", + " \"metadata_fields\": [\n", + " lb.DataRowMetadataField(\n", + " schema_id=mdo.reserved_by_name[\"tag\"].\n", + " uid, # specify the schema id\n", + " value=\"tag_string\", # typed inputs\n", + " ),\n", + " ],\n", + " })\n", + "\n", + "task1_2 = dataset_2.create_data_rows(uploads)\n", + "task1_2.wait_till_done()\n", + "print(\"ERRORS: \", task1_2.errors)\n", + "print(\"RESULTS:\", task1_2.result)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Update\n", "`dataset.upsert_data_rows()` can also be use to update data rows\n", "\n", "To update data rows using this method, you need to pass a `key`, which can reference either a global key or a data row ID. Additionally, include any fields that you wish to update along with their new values.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Fetch a data row from the first dataset example\nts = dataset.export()\nts.wait_till_done()\nDATA_ROW_ID = [output.json for output in ts.get_buffered_stream()\n ][0][\"data_row\"][\"id\"]\nGLOBAL_KEY = [output.json for output in ts.get_buffered_stream()\n ][0][\"data_row\"][\"global_key\"]\n\nprint(f\"Pick either a data row id : {DATA_ROW_ID} or global key: {GLOBAL_KEY}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Fetch a data row from the first dataset example\n", + "ts = dataset.export()\n", + "ts.wait_till_done()\n", + "DATA_ROW_ID = [output.json for output in ts.get_buffered_stream()\n", + " ][0][\"data_row\"][\"id\"]\n", + "GLOBAL_KEY = [output.json for output in ts.get_buffered_stream()\n", + " ][0][\"data_row\"][\"global_key\"]\n", + "\n", + "print(f\"Pick either a data row id : {DATA_ROW_ID} or global key: {GLOBAL_KEY}\")" + ] }, { - "metadata": {}, - "source": "# Update the global key assodicated with the DATAROW_ID or GLOBAL_KEY, and include a additional metadata\ndata = {\n \"key\":\n lb.UniqueId(DATA_ROW_ID),\n \"global_key\":\n \"NEW-ID-%id\" % uuid.uuid1(),\n \"metadata_fields\": [\n # New metadata\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"captureDateTime\"].uid,\n value=\"2000-01-01 00:00:00\",\n ),\n # Include original metadata otherwise it will be removed\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"tag\"].uid,\n value=\"tag_string\",\n ),\n ],\n}\n\ntask5 = dataset_2.upsert_data_rows([data])\ntask5.wait_till_done()\nprint(\"ERRORS: \", task5.errors)\nprint(\"RESULTS:\", task5.result)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Update the global key assodicated with the DATAROW_ID or GLOBAL_KEY, and include a additional metadata\n", + "data = {\n", + " \"key\":\n", + " lb.UniqueId(DATA_ROW_ID),\n", + " \"global_key\":\n", + " \"NEW-ID-%id\" % uuid.uuid1(),\n", + " \"metadata_fields\": [\n", + " # New metadata\n", + " lb.DataRowMetadataField(\n", + " schema_id=mdo.reserved_by_name[\"captureDateTime\"].uid,\n", + " value=\"2000-01-01 00:00:00\",\n", + " ),\n", + " # Include original metadata otherwise it will be removed\n", + " lb.DataRowMetadataField(\n", + " schema_id=mdo.reserved_by_name[\"tag\"].uid,\n", + " value=\"tag_string\",\n", + " ),\n", + " ],\n", + "}\n", + "\n", + "task5 = dataset_2.upsert_data_rows([data])\n", + "task5.wait_till_done()\n", + "print(\"ERRORS: \", task5.errors)\n", + "print(\"RESULTS:\", task5.result)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Create a single attachment on an existing data row" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# You can only create one attachment at the time.\nDATA_ROW_ID = \"\"\ndata_row = client.get_data_row(DATA_ROW_ID)\nattachment = data_row.create_attachment(\n attachment_type=\"RAW_TEXT\", attachment_value=\"LABELERS WILL SEE THIS\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# You can only create one attachment at the time.\n", + "DATA_ROW_ID = \"\"\n", + "data_row = client.get_data_row(DATA_ROW_ID)\n", + "attachment = data_row.create_attachment(\n", + " attachment_type=\"RAW_TEXT\", attachment_value=\"LABELERS WILL SEE THIS\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Update a recently created attachment " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "attachment.update(type=\"RAW_TEXT\", value=\"NEW RAW TEXT\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "attachment.update(type=\"RAW_TEXT\", value=\"NEW RAW TEXT\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Delete" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* Delete a single data row" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "DATAROW_ID_TO_DELETE = \"\"\ndata_row = client.get_data_row(DATAROW_ID_TO_DELETE)\ndata_row.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "DATAROW_ID_TO_DELETE = \"\"\n", + "data_row = client.get_data_row(DATAROW_ID_TO_DELETE)\n", + "data_row.delete()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* Bulk delete data row objects" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Bulk delete a list of data_rows ( limit: 4K data rows per call)\nlb.DataRow.bulk_delete(list(dataset.data_rows()))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Bulk delete a list of data_rows ( limit: 4K data rows per call)\n", + "lb.DataRow.bulk_delete(list(dataset.data_rows()))" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/basics/projects.ipynb b/basics/projects.ipynb index 1cc59af..d0985f5 100644 --- a/basics/projects.ipynb +++ b/basics/projects.ipynb @@ -206,7 +206,7 @@ }, { "metadata": {}, - "source": "project.setup_editor(ontology)", + "source": "project.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null @@ -313,7 +313,7 @@ }, { "metadata": {}, - "source": "# Create an empty destination project\ndestination_project = client.create_project(\n name=\"destination-test-project\",\n description=\"a description\",\n media_type=lb.MediaType.Image,\n)\n\n# Create ontology and attach to destination project\ndestination_ontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"destination_radio_question\",\n options=[\n lb.Option(value=\"destination_first_radio_answer\"),\n lb.Option(value=\"destination_second_radio_answer\"),\n ],\n ),\n ])\n\ndestination_ontology = client.create_ontology(\"dest-test-ontology\",\n ontology_builder.asdict())\n\ndestination_project.setup_editor(destination_ontology)", + "source": "# Create an empty destination project\ndestination_project = client.create_project(\n name=\"destination-test-project\",\n description=\"a description\",\n media_type=lb.MediaType.Image,\n)\n\n# Create ontology and attach to destination project\ndestination_ontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"destination_radio_question\",\n options=[\n lb.Option(value=\"destination_first_radio_answer\"),\n lb.Option(value=\"destination_second_radio_answer\"),\n ],\n ),\n ])\n\ndestination_ontology = client.create_ontology(\"dest-test-ontology\",\n ontology_builder.asdict())\n\ndestination_project.connect_ontology(destination_ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/basics/quick_start.ipynb b/basics/quick_start.ipynb index 9b5a0b9..4249bc2 100644 --- a/basics/quick_start.ipynb +++ b/basics/quick_start.ipynb @@ -134,7 +134,7 @@ }, { "metadata": {}, - "source": "# Create a new project\nproject = client.create_project(\n name=\"Quick Start Example Project\",\n media_type=lb.MediaType.Image,\n)\n\n# Attach created ontology\nproject.setup_editor(ontology)", + "source": "# Create a new project\nproject = client.create_project(\n name=\"Quick Start Example Project\",\n media_type=lb.MediaType.Image,\n)\n\n# Attach created ontology\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/basics/user_management.ipynb b/basics/user_management.ipynb index 11574f9..4bb3878 100644 --- a/basics/user_management.ipynb +++ b/basics/user_management.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# User Management\n", @@ -38,209 +36,263 @@ " * assign users to projects\n", " * set / update / revoke project role\n", " * delete users from org" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install \"labelbox[data]\"", "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { + "execution_count": null, "metadata": {}, - "source": "import labelbox as lb\nimport os", - "cell_type": "code", "outputs": [], - "execution_count": null + "source": [ + "%pip install \"labelbox[data]\"" + ] }, { + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "* You have to specifically enable experimental features to use this functionality. Notice the \n", - "`enable_experimental = True`\n", - " * enables users to send invites and checking the number of seats available via the sdk" - ], - "cell_type": "markdown" + "import labelbox as lb\n", + "import os" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# API Key and Client\n", "Provide a valid api key below in order to properly connect to the Labelbox Client." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = None\nclient = lb.Client(api_key=API_KEY, enable_experimental=True)\norganization = client.get_organization()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = None\n", + "client = lb.Client(api_key=API_KEY)\n", + "organization = client.get_organization()" + ] }, { - "metadata": {}, - "source": "# Please provide a dummy email here:\n# Preferrably one you can access. If you have a google account you can do email+1@.com\nDUMMY_EMAIL = \"SET THIS\"\n# This should be set to an account that you wan't to change the permissions for.\n# You could invite a new user, accept the invite and use that account if you don't want to effect any active users\nDUMMY_USER_ACCOUNT_ID = \"ckneh4n8c9qvq0706uwwg5i16\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Please provide a dummy email here:\n", + "# Preferrably one you can access. If you have a google account you can do email+1@.com\n", + "DUMMY_EMAIL = \"SET THIS\"\n", + "# This should be set to an account that you wan't to change the permissions for.\n", + "# You could invite a new user, accept the invite and use that account if you don't want to effect any active users\n", + "DUMMY_USER_ACCOUNT_ID = \"ckneh4n8c9qvq0706uwwg5i16\"" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Roles\n", "* When inviting a new user to an organization, there are various roles to select from.\n", "* All available roles to your org can be accessed via `client.get_roles()`" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "roles = client.get_roles()\nfor name, role in roles.items():\n print(role.name, \":\", role.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "roles = client.get_roles()\n", + "for name, role in roles.items():\n", + " print(role.name, \":\", role.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* Above we printed out all of the roles available to the current org.\n", "* Notice the `NONE`. That is for project level roles" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Create\n", "* Users are created by sending an invite\n", "* An email will be sent to them and they will be asked to join your organization" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Organization Level Permissions\n", "* Invite a new labeler with labeling permissions on all projects" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# First make sure that you have enough seats:\norganization.invite_limit()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# First make sure that you have enough seats:\n", + "organization.invite_limit()" + ] }, { - "metadata": {}, - "source": "invite = organization.invite_user(DUMMY_EMAIL, roles[\"LABELER\"])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "invite = organization.invite_user(DUMMY_EMAIL, roles[\"LABELER\"])" + ] }, { - "metadata": {}, - "source": "print(invite.created_at)\nprint(invite.organization_role_name)\nprint(invite.email)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "print(invite.created_at)\n", + "print(invite.organization_role_name)\n", + "print(invite.email)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Project Level Permissions\n", "* Invite a new labeler with labeling permissions specific to a set of projects\n", "* Here we set organization level permissions to Roles.NONE to indicate that the user only has project level permissions" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project = client.create_project(name=\"test_user_management\",\n media_type=lb.MediaType.Image)\nproject_role = lb.ProjectRole(project=project, role=roles[\"REVIEWER\"])\ninvite = organization.invite_user(DUMMY_EMAIL,\n roles[\"NONE\"],\n project_roles=[project_role])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project = client.create_project(name=\"test_user_management\",\n", + " media_type=lb.MediaType.Image)\n", + "project_role = lb.ProjectRole(project=project, role=roles[\"REVIEWER\"])\n", + "invite = organization.invite_user(DUMMY_EMAIL,\n", + " roles[\"NONE\"],\n", + " project_roles=[project_role])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Read\n", "* Outstanding invites cannot be queried for at this time. This information can be found in the members tab of the web app.\n", "* You are able to query for members once they have joined." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "users = list(organization.users())\nprint(users[0])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "users = list(organization.users())\n", + "print(users[0])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Update\n", "* There is no update on invites. Instead you must delete and resend them\n", "* You can update User roles" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "user = client._get_single(lb.User, DUMMY_USER_ACCOUNT_ID)\n\n# Give the user organization level permissions\nuser.update_org_role(roles[\"LABELER\"])\nprint(user.org_role())\n# Restore project level permissions\nuser.update_org_role(roles[\"NONE\"])\nprint(user.org_role())\n# Make the user a labeler for the current project\nuser.upsert_project_role(project, roles[\"LABELER\"])\nprint(user.org_role())", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "user = client._get_single(lb.User, DUMMY_USER_ACCOUNT_ID)\n", + "\n", + "# Give the user organization level permissions\n", + "user.update_org_role(roles[\"LABELER\"])\n", + "print(user.org_role())\n", + "# Restore project level permissions\n", + "user.update_org_role(roles[\"NONE\"])\n", + "print(user.org_role())\n", + "# Make the user a labeler for the current project\n", + "user.upsert_project_role(project, roles[\"LABELER\"])\n", + "print(user.org_role())" + ] }, { - "metadata": {}, - "source": "# Remove the user from a project (Same as setting the project role to `roles.NONE`)\nuser.remove_from_project(project)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Remove the user from a project (Same as setting the project role to `roles.NONE`)\n", + "user.remove_from_project(project)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Delete" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* Invites can only be deleted from the ui at this time. \n", "* Deleting invites can be done in the members tab of the web app." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* Delete the User\n", "* Make sure you want to remove the user from the org:\n", "* `>>> organization.remove_user(user)`" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Cleanup\n", "* We created an extra project. Let's delete it" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.delete()" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/exports/export_data.ipynb b/exports/export_data.ipynb index a16df5b..3282e89 100644 --- a/exports/export_data.ipynb +++ b/exports/export_data.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,49 +22,60 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Export data\n", "How to export data for projects, datasets, slices, data rows and models, with examples for each type of v2 export along with details on optional parameters and filters.\n", "\n", "***Beginning with SDK version 3.68, the `export_v2()` method has been enhanced to incorporate streamable functionality.***" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"\n%pip install -q urllib3", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"\n", + "%pip install -q urllib3" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport urllib.request\nfrom PIL import Image\nimport time", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import urllib.request\n", + "from PIL import Image\n", + "import time" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# API Key and Client\n", "See the developer guide for [creating an API key](https://docs.labelbox.com/reference/create-api-key)." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Export data rows from a project\n", @@ -96,7 +105,7 @@ "You can set the range for `last_activity_at` and `label_created_at` in the following formats: \n", "- `YYYY-MM-DD`\n", "- `YYYY-MM-DD hh:mm:ss`\n", - "- `YYYY-MM-DDThh:mm:ss\u00b1hhmm` (ISO 8601)\n", + "- `YYYY-MM-DDThh:mm:ss±hhmm` (ISO 8601)\n", "\n", "The ISO 8061 format allows you to specify the timezone, while the other two formats assume timezone from the user's workspace settings.\n", "\n", @@ -112,61 +121,164 @@ "- `InReview`\n", "- `InRework`\n", "- `Done`\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Insert the project ID of the project from which you wish to export data rows.\nPROJECT_ID = \"\"\nproject = client.get_project(PROJECT_ID)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Insert the project ID of the project from which you wish to export data rows.\n", + "PROJECT_ID = \"\"\n", + "project = client.get_project(PROJECT_ID)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Export V2 Method\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n}\n\n# Note: Filters follow AND logic, so typically using one filter is sufficient.\nfilters = {\n \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"global_keys\": [\"\", \"\"],\n # \"data_row_ids\": [\"\", \"\"],\n # \"batch_ids\": [\"\", \"\"],\n # \"workflow_status\": \"\"\n}\n\nexport_task = project.export_v2(params=export_params, filters=filters)\nexport_task.wait_till_done()\n\nif export_task.errors:\n print(export_task.errors)\n\nexport_json = export_task.result\nprint(\"results: \", export_json)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Set the export params to include/exclude certain fields.\n", + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"label_details\": True,\n", + " \"performance_details\": True,\n", + " \"interpolated_frames\": True,\n", + " \"embeddings\": True,\n", + "}\n", + "\n", + "# Note: Filters follow AND logic, so typically using one filter is sufficient.\n", + "filters = {\n", + " \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", + " \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", + " # \"global_keys\": [\"\", \"\"],\n", + " # \"data_row_ids\": [\"\", \"\"],\n", + " # \"batch_ids\": [\"\", \"\"],\n", + " # \"workflow_status\": \"\"\n", + "}\n", + "\n", + "export_task = project.export_v2(params=export_params, filters=filters)\n", + "export_task.wait_till_done()\n", + "\n", + "if export_task.errors:\n", + " print(export_task.errors)\n", + "\n", + "export_json = export_task.result\n", + "print(\"results: \", export_json)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Stream Task Export Method\n", "The return type of this method is an ExportTask, instead of a Task. This is just a wrapper around Task, and most of its features are also present in ExportTask.\n", "This allows streaming of task results and errors." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n}\n\n# Note: Filters follow AND logic, so typically using one filter is sufficient.\nfilters = {\n \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"global_keys\": [\"\", \"\"],\n # \"data_row_ids\": [\"\", \"\"],\n # \"batch_ids\": [\"\", \"\"],\n # \"workflow_status\": \"\"\n}\n\nclient.enable_experimental = True\n\nexport_task = project.export(params=export_params, filters=filters)\nexport_task.wait_till_done()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Set the export params to include/exclude certain fields.\n", + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"label_details\": True,\n", + " \"performance_details\": True,\n", + " \"interpolated_frames\": True,\n", + " \"embeddings\": True,\n", + "}\n", + "\n", + "# Note: Filters follow AND logic, so typically using one filter is sufficient.\n", + "filters = {\n", + " \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", + " \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", + " # \"global_keys\": [\"\", \"\"],\n", + " # \"data_row_ids\": [\"\", \"\"],\n", + " # \"batch_ids\": [\"\", \"\"],\n", + " # \"workflow_status\": \"\"\n", + "}\n", + "\n", + "export_task = project.export(params=export_params, filters=filters)\n", + "export_task.wait_till_done()" + ] }, { - "metadata": {}, - "source": "# Provide results with JSON converter\n# Returns streamed JSON output strings from export task results/errors, one by one\n\n\n# Callback used for JSON Converter\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n print(output.json)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nprint(\n \"file size: \",\n export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n)\nprint(\n \"line count: \",\n export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Provide results with JSON converter\n", + "# Returns streamed JSON output strings from export task results/errors, one by one\n", + "\n", + "\n", + "# Callback used for JSON Converter\n", + "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", + " print(output.json)\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", + " stream_handler=lambda error: print(error))\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_buffered_stream(\n", + " stream_type=lb.StreamType.RESULT).start(\n", + " stream_handler=json_stream_handler)\n", + "\n", + "print(\n", + " \"file size: \",\n", + " export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n", + ")\n", + "print(\n", + " \"line count: \",\n", + " export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n", + ")" + ] }, { - "metadata": {}, - "source": "# Uncomment to get stream results as a written file\n\n# Provide results with file converter\n\n# if export_task.has_errors():\n# export_task.get_buffered_stream(\n# converter=lb.FileConverter(file_path=\"./errors.txt\"),\n# stream_type=lb.StreamType.ERRORS\n# ).start()\n\n# if export_task.has_result():\n# export_task.get_buffered_stream(\n# converter=lb.FileConverter(file_path=\"./result.txt\"),\n# stream_type=lb.StreamType.RESULT\n# ).start()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Uncomment to get stream results as a written file\n", + "\n", + "# Provide results with file converter\n", + "\n", + "# if export_task.has_errors():\n", + "# export_task.get_buffered_stream(\n", + "# converter=lb.FileConverter(file_path=\"./errors.txt\"),\n", + "# stream_type=lb.StreamType.ERRORS\n", + "# ).start()\n", + "\n", + "# if export_task.has_result():\n", + "# export_task.get_buffered_stream(\n", + "# converter=lb.FileConverter(file_path=\"./result.txt\"),\n", + "# stream_type=lb.StreamType.RESULT\n", + "# ).start()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Export data rows from a dataset\n", @@ -195,54 +307,141 @@ "- `global_keys`\n", "\n", "See the _Export data rows from a project_ section above for additional details on each filter. " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Insert the dataset ID of the dataset from which you wish to export data rows.\nDATASET_ID = \"\"\ndataset = client.get_dataset(DATASET_ID)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Insert the dataset ID of the dataset from which you wish to export data rows.\n", + "DATASET_ID = \"\"\n", + "dataset = client.get_dataset(DATASET_ID)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Export V2 Method" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n # \"project_ids\": [\"\", \"\"],\n # \"model_run_ids\": [\"\", \"\"]\n}\n\n# Note: Filters follow AND logic, so typically using one filter is sufficient.\nfilters = {\n \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"global_keys\": [\"\", \"\"],\n # \"data_row_ids\": [\"\", \"\"],\n}\n\nexport_task = dataset.export_v2(params=export_params, filters=filters)\nexport_task.wait_till_done()\n\nif export_task.errors:\n print(export_task.errors)\n\nexport_json = export_task.result\nprint(\"results: \", export_json)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Set the export params to include/exclude certain fields.\n", + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"label_details\": True,\n", + " \"performance_details\": True,\n", + " \"interpolated_frames\": True,\n", + " \"embeddings\": True,\n", + " # \"project_ids\": [\"\", \"\"],\n", + " # \"model_run_ids\": [\"\", \"\"]\n", + "}\n", + "\n", + "# Note: Filters follow AND logic, so typically using one filter is sufficient.\n", + "filters = {\n", + " \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", + " # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", + " # \"global_keys\": [\"\", \"\"],\n", + " # \"data_row_ids\": [\"\", \"\"],\n", + "}\n", + "\n", + "export_task = dataset.export_v2(params=export_params, filters=filters)\n", + "export_task.wait_till_done()\n", + "\n", + "if export_task.errors:\n", + " print(export_task.errors)\n", + "\n", + "export_json = export_task.result\n", + "print(\"results: \", export_json)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Stream Task Export Method\n", "The return type of this method is an ExportTask, instead of a Task. This is just a wrapper around Task, and most of its features are also present in ExportTask.\n", "This allows streaming of task results and errors." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n # \"project_ids\": [\"\", \"\"],\n # \"model_run_ids\": [\"\", \"\"]\n}\n\n# Note: Filters follow AND logic, so typically using one filter is sufficient.\nfilters = {\n \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"global_keys\": [\"\", \"\"],\n # \"data_row_ids\": [\"\", \"\"],\n}\n\nclient.enable_experimental = True\n\nexport_task = dataset.export(params=export_params, filters=filters)\nexport_task.wait_till_done()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Set the export params to include/exclude certain fields.\n", + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"label_details\": True,\n", + " \"performance_details\": True,\n", + " \"interpolated_frames\": True,\n", + " \"embeddings\": True,\n", + " # \"project_ids\": [\"\", \"\"],\n", + " # \"model_run_ids\": [\"\", \"\"]\n", + "}\n", + "\n", + "# Note: Filters follow AND logic, so typically using one filter is sufficient.\n", + "filters = {\n", + " \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", + " # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", + " # \"global_keys\": [\"\", \"\"],\n", + " # \"data_row_ids\": [\"\", \"\"],\n", + "}\n", + "\n", + "export_task = dataset.export(params=export_params, filters=filters)\n", + "export_task.wait_till_done()" + ] }, { - "metadata": {}, - "source": "# Provide results with JSON converter\n# Returns streamed JSON output strings from export task results/errors, one by one\n\n\n# Callback used for JSON Converter\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n print(output.json)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nprint(\n \"file size: \",\n export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n)\nprint(\n \"line count: \",\n export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Provide results with JSON converter\n", + "# Returns streamed JSON output strings from export task results/errors, one by one\n", + "\n", + "\n", + "# Callback used for JSON Converter\n", + "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", + " print(output.json)\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", + " stream_handler=lambda error: print(error))\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_buffered_stream(\n", + " stream_type=lb.StreamType.RESULT).start(\n", + " stream_handler=json_stream_handler)\n", + "\n", + "print(\n", + " \"file size: \",\n", + " export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n", + ")\n", + "print(\n", + " \"line count: \",\n", + " export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Export data rows from Catalog\n", @@ -253,47 +452,122 @@ "\n", "### Filters\n", "When exporting from catalog, you can apply the same filters as exporting from a dataset.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "catalog = client.get_catalog()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "catalog = client.get_catalog()" + ] }, { - "metadata": {}, - "source": "export_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n # \"project_ids\": [\"\",\"\"],\n # \"model_run_ids\": [\"\", \"\"]\n}\n\nfilters = {\n \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"global_keys\": [\"\", \"\"],\n # \"data_row_ids\": [\"\", \"\"]\n}\nexport_task = catalog.export_v2(params=export_params, filters=filters)\nexport_task.wait_till_done()\n\nif export_task.errors:\n print(export_task.errors)\n\nexport_json = export_task.result\nprint(\"results: \", export_json)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"label_details\": True,\n", + " \"performance_details\": True,\n", + " \"interpolated_frames\": True,\n", + " \"embeddings\": True,\n", + " # \"project_ids\": [\"\",\"\"],\n", + " # \"model_run_ids\": [\"\", \"\"]\n", + "}\n", + "\n", + "filters = {\n", + " \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", + " # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", + " # \"global_keys\": [\"\", \"\"],\n", + " # \"data_row_ids\": [\"\", \"\"]\n", + "}\n", + "export_task = catalog.export_v2(params=export_params, filters=filters)\n", + "export_task.wait_till_done()\n", + "\n", + "if export_task.errors:\n", + " print(export_task.errors)\n", + "\n", + "export_json = export_task.result\n", + "print(\"results: \", export_json)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Stream Task Export Method\n", "The return type of this method is an ExportTask, instead of a Task. This is just a wrapper around Task, and most of its features are also present in ExportTask.\n", "This allows streaming of task results and errors." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "export_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n # \"project_ids\": [\"\",\"\"],\n # \"model_run_ids\": [\"\", \"\"]\n}\n\nfilters = {\n \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"global_keys\": [\"\", \"\"],\n # \"data_row_ids\": [\"\", \"\"]\n}\n\nclient.enable_experimental = True\nexport_task = catalog.export(params=export_params)\nexport_task.wait_till_done()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"label_details\": True,\n", + " \"performance_details\": True,\n", + " \"interpolated_frames\": True,\n", + " \"embeddings\": True,\n", + " # \"project_ids\": [\"\",\"\"],\n", + " # \"model_run_ids\": [\"\", \"\"]\n", + "}\n", + "\n", + "filters = {\n", + " \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", + " # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", + " # \"global_keys\": [\"\", \"\"],\n", + " # \"data_row_ids\": [\"\", \"\"]\n", + "}\n", + "\n", + "export_task = catalog.export(params=export_params)\n", + "export_task.wait_till_done()" + ] }, { - "metadata": {}, - "source": "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n print(output.json)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nprint(\n \"file size: \",\n export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n)\nprint(\n \"line count: \",\n export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", + " print(output.json)\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", + " stream_handler=lambda error: print(error))\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_buffered_stream(\n", + " stream_type=lb.StreamType.RESULT).start(\n", + " stream_handler=json_stream_handler)\n", + "\n", + "print(\n", + " \"file size: \",\n", + " export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n", + ")\n", + "print(\n", + " \"line count: \",\n", + " export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Export data rows from a slice\n", @@ -304,54 +578,125 @@ "\n", "### Filters\n", "No filters are applicable to exports from a slice. All the data rows of the slice must be exported." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Insert the Catalog slice ID of the slice from which you wish to export data rows.\nCATALOG_SLICE_ID = \"\"\ncatalog_slice = client.get_catalog_slice(CATALOG_SLICE_ID)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Insert the Catalog slice ID of the slice from which you wish to export data rows.\n", + "CATALOG_SLICE_ID = \"\"\n", + "catalog_slice = client.get_catalog_slice(CATALOG_SLICE_ID)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Export V2 Method" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n # \"project_ids\": [\"\", \"\"],\n # \"model_run_ids\": [\"\", \"\"]\n}\n\nexport_task = catalog_slice.export_v2(params=export_params)\nexport_task.wait_till_done()\n\nif export_task.errors:\n print(export_task.errors)\n\nexport_json = export_task.result\nprint(\"results: \", export_json)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Set the export params to include/exclude certain fields.\n", + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"label_details\": True,\n", + " \"performance_details\": True,\n", + " \"interpolated_frames\": True,\n", + " \"embeddings\": True,\n", + " # \"project_ids\": [\"\", \"\"],\n", + " # \"model_run_ids\": [\"\", \"\"]\n", + "}\n", + "\n", + "export_task = catalog_slice.export_v2(params=export_params)\n", + "export_task.wait_till_done()\n", + "\n", + "if export_task.errors:\n", + " print(export_task.errors)\n", + "\n", + "export_json = export_task.result\n", + "print(\"results: \", export_json)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Stream Task Export Method\n", "The return type of this method is an ExportTask, instead of a Task. This is just a wrapper around Task, and most of its features are also present in ExportTask.\n", "This allows streaming of task results and errors." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n # \"project_ids\": [\"\", \"\"],\n # \"model_run_ids\": [\"\", \"\"]\n}\n\nclient.enable_experimental = True\n\nexport_task = catalog_slice.export(params=export_params)\nexport_task.wait_till_done()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Set the export params to include/exclude certain fields.\n", + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"label_details\": True,\n", + " \"performance_details\": True,\n", + " \"interpolated_frames\": True,\n", + " \"embeddings\": True,\n", + " # \"project_ids\": [\"\", \"\"],\n", + " # \"model_run_ids\": [\"\", \"\"]\n", + "}\n", + "\n", + "export_task = catalog_slice.export(params=export_params)\n", + "export_task.wait_till_done()" + ] }, { - "metadata": {}, - "source": "# Provide results with JSON converter\n# Returns streamed JSON output strings from export task results/errors, one by one\n\n\n# Callback used for JSON Converter\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n print(output.json)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nprint(\n \"file size: \",\n export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n)\nprint(\n \"line count: \",\n export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Provide results with JSON converter\n", + "# Returns streamed JSON output strings from export task results/errors, one by one\n", + "\n", + "\n", + "# Callback used for JSON Converter\n", + "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", + " print(output.json)\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", + " stream_handler=lambda error: print(error))\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_buffered_stream(\n", + " stream_type=lb.StreamType.RESULT).start(\n", + " stream_handler=json_stream_handler)\n", + "\n", + "print(\n", + " \"file size: \",\n", + " export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n", + ")\n", + "print(\n", + " \"line count: \",\n", + " export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Export data rows from a model run\n", @@ -368,54 +713,117 @@ "\n", "### Filters\n", "No filters are applicable to exports from a model run. All the data rows of the model run must be exported.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Insert the model run ID of the model run from which you wish to export data rows.\nMODEL_RUN_ID = \"\"\nmodel_run = client.get_model_run(MODEL_RUN_ID)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Insert the model run ID of the model run from which you wish to export data rows.\n", + "MODEL_RUN_ID = \"\"\n", + "model_run = client.get_model_run(MODEL_RUN_ID)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Export V2 Method" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"interpolated_frames\": True,\n \"predictions\": True,\n \"embeddings\": True,\n}\n\nexport_task = model_run.export_v2(params=export_params)\nexport_task.wait_till_done()\n\nif export_task.errors:\n print(export_task.errors)\n\nexport_json = export_task.result\nprint(\"results: \", export_json)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Set the export params to include/exclude certain fields.\n", + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"interpolated_frames\": True,\n", + " \"predictions\": True,\n", + " \"embeddings\": True,\n", + "}\n", + "\n", + "export_task = model_run.export_v2(params=export_params)\n", + "export_task.wait_till_done()\n", + "\n", + "if export_task.errors:\n", + " print(export_task.errors)\n", + "\n", + "export_json = export_task.result\n", + "print(\"results: \", export_json)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Stream Task Export Method\n", "The return type of this method is an ExportTask, instead of a Task. This is just a wrapper around Task, and most of its features are also present in ExportTask.\n", "This allows streaming of task results and errors." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"interpolated_frames\": True,\n \"predictions\": True,\n \"embeddings\": True,\n}\n\nclient.enable_experimental = True\n\nexport_task = model_run.export(params=export_params)\nexport_task.wait_till_done()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Set the export params to include/exclude certain fields.\n", + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"interpolated_frames\": True,\n", + " \"predictions\": True,\n", + " \"embeddings\": True,\n", + "}\n", + "\n", + "export_task = model_run.export(params=export_params)\n", + "export_task.wait_till_done()" + ] }, { - "metadata": {}, - "source": "# Provide results with JSON converter\n# Returns streamed JSON output strings from export task results/errors, one by one\n\n\n# Callback used for JSON Converter\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n print(output.json)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nprint(\n \"file size: \",\n export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n)\nprint(\n \"line count: \",\n export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Provide results with JSON converter\n", + "# Returns streamed JSON output strings from export task results/errors, one by one\n", + "\n", + "\n", + "# Callback used for JSON Converter\n", + "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", + " print(output.json)\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", + " stream_handler=lambda error: print(error))\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_buffered_stream(\n", + " stream_type=lb.StreamType.RESULT).start(\n", + " stream_handler=json_stream_handler)\n", + "\n", + "print(\n", + " \"file size: \",\n", + " export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n", + ")\n", + "print(\n", + " \"line count: \",\n", + " export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Export Data Row\n", @@ -426,52 +834,130 @@ "\n", "### Filters\n", "No filters are applicable to export data rows. All the data rows specified in the export task are included." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Insert the global key of the data row you wish to export\nDATA_ROW_GLOBAL_KEY = \"\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Insert the global key of the data row you wish to export\n", + "DATA_ROW_GLOBAL_KEY = \"\"" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Export V2 Method" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n}\n\n# Provide a list of data row global keys\nexport_task = lb.DataRow.export_v2(client=client,\n global_keys=[DATA_ROW_GLOBAL_KEY],\n params=export_params)\nexport_task.wait_till_done()\n\nif export_task.errors:\n print(export_task.errors)\n\nexport_json = export_task.result\nprint(\"results: \", export_json)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Set the export params to include/exclude certain fields.\n", + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"label_details\": True,\n", + " \"performance_details\": True,\n", + " \"interpolated_frames\": True,\n", + " \"embeddings\": True,\n", + "}\n", + "\n", + "# Provide a list of data row global keys\n", + "export_task = lb.DataRow.export_v2(client=client,\n", + " global_keys=[DATA_ROW_GLOBAL_KEY],\n", + " params=export_params)\n", + "export_task.wait_till_done()\n", + "\n", + "if export_task.errors:\n", + " print(export_task.errors)\n", + "\n", + "export_json = export_task.result\n", + "print(\"results: \", export_json)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Stream Task Export Method\n", "The return type of this method is an ExportTask, instead of a Task. This is just a wrapper around Task, and most of its features are also present in ExportTask.\n", "This allows streaming of task results and errors." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n}\n\nclient.enable_experimental = True\n\n# Provide a list of data row global keys\nexport_task = lb.DataRow.export(client=client,\n global_keys=[DATA_ROW_GLOBAL_KEY],\n params=export_params)\nexport_task.wait_till_done()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Set the export params to include/exclude certain fields.\n", + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"label_details\": True,\n", + " \"performance_details\": True,\n", + " \"interpolated_frames\": True,\n", + " \"embeddings\": True,\n", + "}\n", + "\n", + "# Provide a list of data row global keys\n", + "export_task = lb.DataRow.export(client=client,\n", + " global_keys=[DATA_ROW_GLOBAL_KEY],\n", + " params=export_params)\n", + "export_task.wait_till_done()" + ] }, { - "metadata": {}, - "source": "# Provide results with JSON converter\n# Returns streamed JSON output strings from export task results/errors, one by one\n\n\n# Callback used for JSON Converter\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n print(output.json)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nprint(\n \"file size: \",\n export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n)\nprint(\n \"line count: \",\n export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Provide results with JSON converter\n", + "# Returns streamed JSON output strings from export task results/errors, one by one\n", + "\n", + "\n", + "# Callback used for JSON Converter\n", + "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", + " print(output.json)\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", + " stream_handler=lambda error: print(error))\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_buffered_stream(\n", + " stream_type=lb.StreamType.RESULT).start(\n", + " stream_handler=json_stream_handler)\n", + "\n", + "print(\n", + " \"file size: \",\n", + " export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n", + ")\n", + "print(\n", + " \"line count: \",\n", + " export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n", + ")" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/exports/exporting_to_csv.ipynb b/exports/exporting_to_csv.ipynb index 08a794d..f6e5b9f 100644 --- a/exports/exporting_to_csv.ipynb +++ b/exports/exporting_to_csv.ipynb @@ -101,7 +101,7 @@ }, { "metadata": {}, - "source": "# Create dataset with image data row\nglobal_key = str(uuid.uuid4())\n\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"image-demo-dataset\")\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)\n\n# Create ontology\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"tool_first_sub_radio_answer\")],\n ),\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Image CSV Demo Ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\n\n# Set up project and connect ontology\nproject = client.create_project(name=\"Image Annotation Import Demo\",\n media_type=lb.MediaType.Image)\nproject.setup_editor(ontology)\n\n# Send data row towards our project\nbatch = project.create_batch(\n \"image-demo-batch\",\n global_keys=[\n global_key\n ], # paginated collection of data row objects, list of data row ids or global keys\n priority=1,\n)\n\nprint(f\"Batch: {batch}\")\n\n# Create a label and imported it towards our project\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\",\n value=lb_types.Text(answer=\"sample text\"),\n)\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977),\n end=lb_types.Point(x=1915, y=1307),\n ),\n)\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"tool_first_sub_radio_answer\")),\n )\n ],\n)\n\nlabel = []\nannotations = [\n radio_annotation,\n nested_radio_annotation,\n checklist_annotation,\n text_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n]\n\nlabel.append(\n lb_types.Label(data={\"global_key\": global_key}, annotations=annotations))\n\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "source": "# Create dataset with image data row\nglobal_key = str(uuid.uuid4())\n\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"image-demo-dataset\")\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)\n\n# Create ontology\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"tool_first_sub_radio_answer\")],\n ),\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Image CSV Demo Ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\n\n# Set up project and connect ontology\nproject = client.create_project(name=\"Image Annotation Import Demo\",\n media_type=lb.MediaType.Image)\nproject.connect_ontology(ontology)\n\n# Send data row towards our project\nbatch = project.create_batch(\n \"image-demo-batch\",\n global_keys=[\n global_key\n ], # paginated collection of data row objects, list of data row ids or global keys\n priority=1,\n)\n\nprint(f\"Batch: {batch}\")\n\n# Create a label and imported it towards our project\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\",\n value=lb_types.Text(answer=\"sample text\"),\n)\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977),\n end=lb_types.Point(x=1915, y=1307),\n ),\n)\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"tool_first_sub_radio_answer\")),\n )\n ],\n)\n\nlabel = []\nannotations = [\n radio_annotation,\n nested_radio_annotation,\n checklist_annotation,\n text_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n]\n\nlabel.append(\n lb_types.Label(data={\"global_key\": global_key}, annotations=annotations))\n\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/foundry/object_detection.ipynb b/foundry/object_detection.ipynb index 6184abf..92a57eb 100644 --- a/foundry/object_detection.ipynb +++ b/foundry/object_detection.ipynb @@ -131,7 +131,7 @@ }, { "metadata": {}, - "source": "project = client.create_project(name=\"Foundry Image Demo\",\n media_type=lb.MediaType.Image)\n\nproject.setup_editor(ontology)", + "source": "project = client.create_project(name=\"Foundry Image Demo\",\n media_type=lb.MediaType.Image)\n\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/integrations/sam/meta_sam.ipynb b/integrations/sam/meta_sam.ipynb index 07854cc..70d1024 100644 --- a/integrations/sam/meta_sam.ipynb +++ b/integrations/sam/meta_sam.ipynb @@ -205,7 +205,7 @@ }, { "metadata": {}, - "source": "# Create a new project if you don't have one\n\n# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\nproject = client.create_project(name=\"auto-mask-classification-project\",\n media_type=lb.MediaType.Image)\n\n# Or get an existing project by ID (uncomment the below)\n\n# project = get_project(\"fill_in_project_id\")\n\n# If the project already has an ontology set up, comment out this line\nproject.setup_editor(ontology)", + "source": "# Create a new project if you don't have one\n\n# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\nproject = client.create_project(name=\"auto-mask-classification-project\",\n media_type=lb.MediaType.Image)\n\n# Or get an existing project by ID (uncomment the below)\n\n# project = get_project(\"fill_in_project_id\")\n\n# If the project already has an ontology set up, comment out this line\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/integrations/sam/meta_sam_video.ipynb b/integrations/sam/meta_sam_video.ipynb index ca42808..5e63088 100644 --- a/integrations/sam/meta_sam_video.ipynb +++ b/integrations/sam/meta_sam_video.ipynb @@ -171,7 +171,7 @@ }, { "metadata": {}, - "source": "# Create a new project if you don't have one\n\n# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\nproject = client.create_project(name=\"yolo-sam-video-masks-project\",\n media_type=lb.MediaType.Video)\n\n# Or get an existing project by ID (uncomment the below)\n\n# project = get_project(\"fill_in_project_id\")\n\n# If the project already has an ontology set up, comment out this line\nproject.setup_editor(ontology)", + "source": "# Create a new project if you don't have one\n\n# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\nproject = client.create_project(name=\"yolo-sam-video-masks-project\",\n media_type=lb.MediaType.Video)\n\n# Or get an existing project by ID (uncomment the below)\n\n# project = get_project(\"fill_in_project_id\")\n\n# If the project already has an ontology set up, comment out this line\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/integrations/yolo/import_yolov8_annotations.ipynb b/integrations/yolo/import_yolov8_annotations.ipynb index 446ba45..f5b02cb 100644 --- a/integrations/yolo/import_yolov8_annotations.ipynb +++ b/integrations/yolo/import_yolov8_annotations.ipynb @@ -154,7 +154,7 @@ }, { "metadata": {}, - "source": "project = client.create_project(name=\"YOLOv8 Demo Project\",\n media_type=lb.MediaType.Image)\n\nproject.create_batch(name=\"batch 1\", global_keys=[global_key])\n\nproject.setup_editor(ontology)", + "source": "project = client.create_project(name=\"YOLOv8 Demo Project\",\n media_type=lb.MediaType.Image)\n\nproject.create_batch(name=\"batch 1\", global_keys=[global_key])\n\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/model_experiments/custom_metrics_demo.ipynb b/model_experiments/custom_metrics_demo.ipynb index 2e6a0f3..87c4a02 100644 --- a/model_experiments/custom_metrics_demo.ipynb +++ b/model_experiments/custom_metrics_demo.ipynb @@ -336,7 +336,7 @@ }, { "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"image_prediction_many_kinds\",\n media_type=lb.MediaType.Image)\nproject.setup_editor(ontology)", + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"image_prediction_many_kinds\",\n media_type=lb.MediaType.Image)\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/model_experiments/model_predictions_to_project.ipynb b/model_experiments/model_predictions_to_project.ipynb index 78a5dec..7e27d2a 100644 --- a/model_experiments/model_predictions_to_project.ipynb +++ b/model_experiments/model_predictions_to_project.ipynb @@ -193,7 +193,7 @@ }, { "metadata": {}, - "source": "# Setup Ontology\nproject.setup_editor(ontology)", + "source": "# Setup Ontology\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/prediction_upload/conversational_LLM_predictions.ipynb b/prediction_upload/conversational_LLM_predictions.ipynb index c4fffff..7b75e95 100644 --- a/prediction_upload/conversational_LLM_predictions.ipynb +++ b/prediction_upload/conversational_LLM_predictions.ipynb @@ -293,7 +293,7 @@ }, { "metadata": {}, - "source": "project = client.create_project(\n name=\"Conversational Human Evaluation Demo\",\n media_type=lb.MediaType.Conversational,\n)\nproject.setup_editor(ontology)", + "source": "project = client.create_project(\n name=\"Conversational Human Evaluation Demo\",\n media_type=lb.MediaType.Conversational,\n)\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/prediction_upload/conversational_predictions.ipynb b/prediction_upload/conversational_predictions.ipynb index 0892ff9..37eeaf8 100644 --- a/prediction_upload/conversational_predictions.ipynb +++ b/prediction_upload/conversational_predictions.ipynb @@ -264,7 +264,7 @@ }, { "metadata": {}, - "source": "project = client.create_project(\n name=\"Conversational Text Prediction Import Demo\",\n media_type=lb.MediaType.Conversational,\n)\nproject.setup_editor(ontology)", + "source": "project = client.create_project(\n name=\"Conversational Text Prediction Import Demo\",\n media_type=lb.MediaType.Conversational,\n)\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/prediction_upload/geospatial_predictions.ipynb b/prediction_upload/geospatial_predictions.ipynb index 6be8174..9ff96ab 100644 --- a/prediction_upload/geospatial_predictions.ipynb +++ b/prediction_upload/geospatial_predictions.ipynb @@ -286,7 +286,7 @@ }, { "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"geospatial_prediction_demo\",\n media_type=lb.MediaType.Geospatial_Tile)\nproject.setup_editor(ontology)", + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"geospatial_prediction_demo\",\n media_type=lb.MediaType.Geospatial_Tile)\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/prediction_upload/html_predictions.ipynb b/prediction_upload/html_predictions.ipynb index cc18a4b..31a5cd5 100644 --- a/prediction_upload/html_predictions.ipynb +++ b/prediction_upload/html_predictions.ipynb @@ -244,7 +244,7 @@ }, { "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"HTML prediction import demo\",\n media_type=lb.MediaType.Html)\nproject.setup_editor(ontology)", + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"HTML prediction import demo\",\n media_type=lb.MediaType.Html)\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/prediction_upload/image_predictions.ipynb b/prediction_upload/image_predictions.ipynb index 18583b3..9b17dd0 100644 --- a/prediction_upload/image_predictions.ipynb +++ b/prediction_upload/image_predictions.ipynb @@ -378,7 +378,7 @@ }, { "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"Image Prediction Demo\",\n media_type=lb.MediaType.Image)\nproject.setup_editor(ontology)", + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"Image Prediction Demo\",\n media_type=lb.MediaType.Image)\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/prediction_upload/pdf_predictions.ipynb b/prediction_upload/pdf_predictions.ipynb index 9bc86d3..950c917 100644 --- a/prediction_upload/pdf_predictions.ipynb +++ b/prediction_upload/pdf_predictions.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 2, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,17 +22,17 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# PDF Prediction Import " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "*Annotation types*\n", @@ -53,115 +51,419 @@ "- Bounding box \n", "- Entities \n", "- Relationships (only supported for MAL imports)" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import uuid\nimport json\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import uuid\n", + "import json\n", + "import requests\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API key" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########## Entity ##########\n\n# Annotation Types\nentities_prediction = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n confidence=0.5,\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\n# NDJSON\nentities_prediction_ndjson = {\n \"name\":\n \"named_entity\",\n \"confidence\":\n 0.5,\n \"textSelections\": [{\n \"tokenIds\": [\"\",],\n \"groupId\": \"\",\n \"page\": 1,\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Entity ##########\n", + "\n", + "# Annotation Types\n", + "entities_prediction = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " confidence=0.5,\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "# NDJSON\n", + "entities_prediction_ndjson = {\n", + " \"name\":\n", + " \"named_entity\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\",],\n", + " \"groupId\": \"\",\n", + " \"page\": 1,\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "########### Radio Classification #########\n\n# Annotation types\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n)\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########### Radio Classification #########\n", + "\n", + "# Annotation types\n", + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\", confidence=0.5)),\n", + ")\n", + "# NDJSON\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "############ Checklist Classification ###########\n\n# Annotation types\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ Checklist Classification ###########\n", + "\n", + "# Annotation types\n", + "checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", + " confidence=0.5),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"checklist_question\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "############ Bounding Box ###########\n\nbbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\nbbox_prediction = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim_1[\"left\"],\n y=bbox_dim_1[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n ), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nbbox_prediction_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": bbox_dim_1,\n \"page\": 0,\n \"unit\": \"POINTS\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ Bounding Box ###########\n", + "\n", + "bbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\n", + "bbox_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\", # must match your ontology feature\"s name\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=bbox_dim_1[\"left\"],\n", + " y=bbox_dim_1[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n", + " y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " page=0,\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " ),\n", + ")\n", + "\n", + "bbox_prediction_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"bbox\": bbox_dim_1,\n", + " \"page\": 0,\n", + " \"unit\": \"POINTS\",\n", + "}" + ] }, { - "metadata": {}, - "source": "# ############ global nested classifications ###########\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n },\n }],\n }],\n}\n\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# ############ global nested classifications ###########\n", + "\n", + "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=\n", + " 0.5, # Confidence scores should be added to the answer\n", + " )\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " },\n", + " }],\n", + " }],\n", + "}\n", + "\n", + "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=\n", + " 0.5, # Confidence scores should be added to the answer\n", + " )),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " }],\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "############## Classification Free-form text ##############\n\ntext_prediction = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n)\n\ntext_prediction_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"confidence\": 0.5,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############## Classification Free-form text ##############\n", + "\n", + "text_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", # must match your ontology feature\"s name\n", + " value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n", + ")\n", + "\n", + "text_prediction_ndjson = {\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", + " \"confidence\": 0.5,\n", + "}" + ] }, { - "metadata": {}, - "source": "######### BBOX with nested classifications #########\n\nbbox_dim = {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n}\n\nbbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n confidence=0.5,\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim[\"left\"],\n y=bbox_dim[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n ), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\",\n confidence=0.5,\n )),\n )\n ],\n )),\n )\n ],\n)\n\nbbox_with_radio_subclass_prediction_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"second_sub_radio_question\",\n \"answer\": {\n \"name\": \"second_sub_radio_answer\",\n \"confidence\": 0.5,\n },\n }],\n },\n }],\n \"bbox\": bbox_dim,\n \"page\": 1,\n \"unit\": \"POINTS\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### BBOX with nested classifications #########\n", + "\n", + "bbox_dim = {\n", + " \"top\": 226.757,\n", + " \"left\": 317.271,\n", + " \"height\": 194.229,\n", + " \"width\": 249.386,\n", + "}\n", + "\n", + "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " confidence=0.5,\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=bbox_dim[\"left\"],\n", + " y=bbox_dim[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n", + " y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=0.5,\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"second_sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"second_sub_radio_answer\",\n", + " confidence=0.5,\n", + " )),\n", + " )\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "bbox_with_radio_subclass_prediction_ndjson = {\n", + " \"name\": \"bbox_with_radio_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_sub_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"second_sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"second_sub_radio_answer\",\n", + " \"confidence\": 0.5,\n", + " },\n", + " }],\n", + " },\n", + " }],\n", + " \"bbox\": bbox_dim,\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\",\n", + "}" + ] }, { - "metadata": {}, - "source": "############ NER with nested classifications ########\n\nner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n value=lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n text_selections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n confidence=0.5)\n ]),\n )\n ],\n)\n\nner_with_checklist_subclass_prediction_ndjson = {\n \"name\":\n \"ner_with_checklist_subclass\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": [{\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5\n }],\n }],\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ NER with nested classifications ########\n", + "\n", + "ner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " confidence=0.5,\n", + " value=lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\",\n", + " text_selections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n", + " confidence=0.5)\n", + " ]),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "ner_with_checklist_subclass_prediction_ndjson = {\n", + " \"name\":\n", + " \"ner_with_checklist_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " }],\n", + " }],\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\"],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", @@ -175,60 +477,200 @@ "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", "\n", "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "global_key = \"0801.3483.pdf\" + str(uuid.uuid4())\nimg_url = {\n \"row_data\": {\n \"pdf_url\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n },\n \"global_key\": global_key,\n}\n\ndataset = client.create_dataset(name=\"pdf_demo_dataset\")\ntask = dataset.create_data_rows([img_url])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "global_key = \"0801.3483.pdf\" + str(uuid.uuid4())\n", + "img_url = {\n", + " \"row_data\": {\n", + " \"pdf_url\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", + " },\n", + " \"global_key\": global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", + "task = dataset.create_data_rows([img_url])\n", + "task.wait_till_done()\n", + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if (\"Duplicate global key\" in error[\"message\"] and\n", + " dataset.row_count == 0):\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\",\n scope=lb.Classification.Scope.GLOBAL,\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n lb.Tool(\n tool=lb.Tool.Type.NER,\n name=\"ner_with_checklist_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(value=\"first_sub_checklist_answer\")],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.Option(\n value=\"first_sub_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"second_sub_radio_question\",\n options=[\n lb.Option(\"second_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Document Annotation Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Document,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "## Setup the ontology and link the tools created above.\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + " tools=[ # List of Tool objects\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", + " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.NER,\n", + " name=\"ner_with_checklist_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(value=\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_with_radio_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_sub_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"second_sub_radio_question\",\n", + " options=[\n", + " lb.Option(\"second_sub_radio_answer\")\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Document Annotation Import Demo\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Document,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# create Model\nmodel = client.create_model(name=\"PDF_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# create Model\n", + "model = client.create_model(name=\"PDF_model_run_\" + str(uuid.uuid4()),\n", + " ontology_id=ontology.uid)\n", + "# create Model Run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the predictions payload\n", @@ -237,184 +679,507 @@ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", "\n", "The resulting payload should have exactly the same content for annotations that are supported by both" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "To extract the generated text layer url we first need to export the data row" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "client.enable_experimental = True\ntask = lb.DataRow.export(client=client, global_keys=[global_key])\ntask.wait_till_done()\nstream = task.get_buffered_stream()\n\ntext_layer = \"\"\nfor output in stream:\n output_json = output.json\n text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\nprint(text_layer)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "task = lb.DataRow.export(client=client, global_keys=[global_key])\n", + "task.wait_till_done()\n", + "stream = task.get_buffered_stream()\n", + "\n", + "text_layer = \"\"\n", + "for output in stream:\n", + " output_json = output.json\n", + " text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\n", + "print(text_layer)" + ] }, { - "metadata": {}, - "source": "# Helper method\ndef update_text_selections(annotation, group_id, list_tokens, page):\n return annotation.update({\n \"textSelections\": [{\n \"groupId\": group_id,\n \"tokenIds\": list_tokens,\n \"page\": page\n }]\n })\n\n\n# Fetch the content of the text layer\nres = requests.get(text_layer)\n\n# Phrases that we want to annotation obtained from the text layer url\ncontent_phrases = [\n \"Metal-insulator (MI) transitions have been one of the\",\n \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n]\n\n# Parse the text layer\ntext_selections = []\ntext_selections_ner = []\n\nfor obj in json.loads(res.text):\n for group in obj[\"groups\"]:\n if group[\"content\"] == content_phrases[0]:\n list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n document_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n text_selections.append(document_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entities_prediction_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[1]:\n list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n ner_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n text_selections_ner.append(ner_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=ner_with_checklist_subclass_prediction_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens_2, # ids representing individual words from the group\n page=1,\n )\n\n# re-write the entity annotation with text selections\nentities_prediction_document_entity = lb_types.DocumentEntity(\n name=\"named_entity\", confidence=0.5, textSelections=text_selections)\nentities_prediction = lb_types.ObjectAnnotation(\n name=\"named_entity\", value=entities_prediction_document_entity)\n\n# re-write the entity annotation + subclassification with text selections\nclassifications = [\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n confidence=0.5)\n ]),\n )\n]\nner_annotation_with_subclass = lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n textSelections=text_selections_ner,\n)\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n value=ner_annotation_with_subclass,\n classifications=classifications,\n)\n\n# Final NDJSON and python annotations\nprint(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\nprint(f\"entities_annotation={entities_prediction}\")\nprint(\n f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\"\n)\nprint(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Helper method\n", + "def update_text_selections(annotation, group_id, list_tokens, page):\n", + " return annotation.update({\n", + " \"textSelections\": [{\n", + " \"groupId\": group_id,\n", + " \"tokenIds\": list_tokens,\n", + " \"page\": page\n", + " }]\n", + " })\n", + "\n", + "\n", + "# Fetch the content of the text layer\n", + "res = requests.get(text_layer)\n", + "\n", + "# Phrases that we want to annotation obtained from the text layer url\n", + "content_phrases = [\n", + " \"Metal-insulator (MI) transitions have been one of the\",\n", + " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", + "]\n", + "\n", + "# Parse the text layer\n", + "text_selections = []\n", + "text_selections_ner = []\n", + "\n", + "for obj in json.loads(res.text):\n", + " for group in obj[\"groups\"]:\n", + " if group[\"content\"] == content_phrases[0]:\n", + " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " document_text_selection = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", + " text_selections.append(document_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=entities_prediction_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " list_tokens, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + " if group[\"content\"] == content_phrases[1]:\n", + " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " ner_text_selection = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", + " text_selections_ner.append(ner_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=ner_with_checklist_subclass_prediction_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " list_tokens_2, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + "\n", + "# re-write the entity annotation with text selections\n", + "entities_prediction_document_entity = lb_types.DocumentEntity(\n", + " name=\"named_entity\", confidence=0.5, textSelections=text_selections)\n", + "entities_prediction = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\", value=entities_prediction_document_entity)\n", + "\n", + "# re-write the entity annotation + subclassification with text selections\n", + "classifications = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n", + " confidence=0.5)\n", + " ]),\n", + " )\n", + "]\n", + "ner_annotation_with_subclass = lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\",\n", + " confidence=0.5,\n", + " textSelections=text_selections_ner,\n", + ")\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " confidence=0.5,\n", + " value=ner_annotation_with_subclass,\n", + " classifications=classifications,\n", + ")\n", + "\n", + "# Final NDJSON and python annotations\n", + "print(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\n", + "print(f\"entities_annotation={entities_prediction}\")\n", + "print(\n", + " f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\"\n", + ")\n", + "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Python annotation \n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_predictions = []\n\nlabel_predictions.append(\n lb_types.Label(\n data=lb_types.DocumentData(global_key=global_key),\n annotations=[\n entities_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n text_prediction,\n radio_prediction,\n nested_radio_prediction,\n bbox_prediction,\n bbox_with_radio_subclass_prediction,\n ner_with_checklist_subclass_prediction,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_predictions = []\n", + "\n", + "label_predictions.append(\n", + " lb_types.Label(\n", + " data=lb_types.DocumentData(global_key=global_key),\n", + " annotations=[\n", + " entities_prediction,\n", + " checklist_prediction,\n", + " nested_checklist_prediction,\n", + " text_prediction,\n", + " radio_prediction,\n", + " nested_radio_prediction,\n", + " bbox_prediction,\n", + " bbox_with_radio_subclass_prediction,\n", + " ner_with_checklist_subclass_prediction,\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON: " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_predictions_ndjson = []\nfor annot in [\n entities_prediction_ndjson,\n checklist_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n text_prediction_ndjson,\n radio_prediction_ndjson,\n nested_radio_prediction_ndjson,\n bbox_prediction_ndjson,\n bbox_with_radio_subclass_prediction_ndjson,\n ner_with_checklist_subclass_prediction_ndjson,\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_predictions_ndjson.append(annot)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_predictions_ndjson = []\n", + "for annot in [\n", + " entities_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + " text_prediction_ndjson,\n", + " radio_prediction_ndjson,\n", + " nested_radio_prediction_ndjson,\n", + " bbox_prediction_ndjson,\n", + " bbox_with_radio_subclass_prediction_ndjson,\n", + " ner_with_checklist_subclass_prediction_ndjson,\n", + "]:\n", + " annot.update({\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", + " },\n", + " })\n", + " label_predictions_ndjson.append(annot)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6: Upload the predictions payload to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_predictions,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", + " predictions=label_predictions,\n", + ")\n", + "\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)\n", + "print(\"Status of uploads: \", upload_job_prediction.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run\n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.1 Create a labelbox project \n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project = client.create_project(name=\"Document Prediction Import Demo\",\n media_type=lb.MediaType.Document)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project = client.create_project(name=\"Document Prediction Import Demo\",\n", + " media_type=lb.MediaType.Document)\n", + "project.connect_ontology(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.2 Create a batch to send to the project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.3 Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "entities_annotation = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(name=\"named_entity\",\n textSelections=text_selections),\n)\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nbbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim_1[\"left\"],\n y=bbox_dim_1[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n ), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",)\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",)),\n )\n ],\n )),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\nbbox_dim = {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n}\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim[\"left\"],\n y=bbox_dim[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n ), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\")),\n )\n ],\n )),\n )\n ],\n)\n\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",\n text_selections=text_selections_ner),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "entities_annotation = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(name=\"named_entity\",\n", + " textSelections=text_selections),\n", + ")\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + ")\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "bbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\", # must match your ontology feature\"s name\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=bbox_dim_1[\"left\"],\n", + " y=bbox_dim_1[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n", + " y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " page=0,\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " ),\n", + ")\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",)\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",)),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", + "\n", + "bbox_dim = {\n", + " \"top\": 226.757,\n", + " \"left\": 317.271,\n", + " \"height\": 194.229,\n", + " \"width\": 249.386,\n", + "}\n", + "\n", + "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=bbox_dim[\"left\"],\n", + " y=bbox_dim[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n", + " y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"second_sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"second_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " value=lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",\n", + " text_selections=text_selections_ner),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.4 Create the label object " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "labels = []\n\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n radio_annotation,\n nested_radio_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n ner_with_checklist_subclass_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "labels = []\n", + "\n", + "labels.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " entities_annotation,\n", + " checklist_annotation,\n", + " nested_checklist_annotation,\n", + " text_annotation,\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " bbox_annotation,\n", + " bbox_with_radio_subclass_annotation,\n", + " ner_with_checklist_subclass_annotation,\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.5 Upload annotations to the project using Label import\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"text_label_import_job\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"text_label_import_job\" + str(uuid.uuid4()),\n", + " labels=labels,\n", + ")\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.6 Send the annotations to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# get the labels id from the project\n", + "model_run.upsert_labels(project_id=project.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Option deletions for cleanup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/prediction_upload/text_predictions.ipynb b/prediction_upload/text_predictions.ipynb index 307669f..54a6151 100644 --- a/prediction_upload/text_predictions.ipynb +++ b/prediction_upload/text_predictions.ipynb @@ -253,7 +253,7 @@ }, { "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"Text Prediction Import Demo\",\n media_type=lb.MediaType.Text)\nproject.setup_editor(ontology)", + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"Text Prediction Import Demo\",\n media_type=lb.MediaType.Text)\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/prediction_upload/video_predictions.ipynb b/prediction_upload/video_predictions.ipynb index 5652e6a..66473a3 100644 --- a/prediction_upload/video_predictions.ipynb +++ b/prediction_upload/video_predictions.ipynb @@ -278,7 +278,7 @@ }, { "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"video_prediction_demo\",\n media_type=lb.MediaType.Video)\nproject.setup_editor(ontology)", + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"video_prediction_demo\",\n media_type=lb.MediaType.Video)\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/project_configuration/multimodal_chat_project.ipynb b/project_configuration/multimodal_chat_project.ipynb index 6089f44..ba120e0 100644 --- a/project_configuration/multimodal_chat_project.ipynb +++ b/project_configuration/multimodal_chat_project.ipynb @@ -188,7 +188,7 @@ }, { "metadata": {}, - "source": "project = client.create_model_evaluation_project(\n name=\"Demo LMC Project\",\n media_type=lb.MediaType.Conversational,\n dataset_name=\"Demo LMC dataset\",\n data_row_count=100,\n)\n\n# Setup project with ontology created above\nproject.setup_editor(ontology)", + "source": "project = client.create_model_evaluation_project(\n name=\"Demo LMC Project\",\n media_type=lb.MediaType.Conversational,\n dataset_name=\"Demo LMC dataset\",\n data_row_count=100,\n)\n\n# Setup project with ontology created above\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/project_configuration/project_setup.ipynb b/project_configuration/project_setup.ipynb index d54f9f0..c6699b1 100644 --- a/project_configuration/project_setup.ipynb +++ b/project_configuration/project_setup.ipynb @@ -118,7 +118,7 @@ }, { "metadata": {}, - "source": "batch_project = client.create_project(\n name=\"Project Setup Demo\",\n quality_mode=QualityMode.\n Consensus, # For benchmarks use quality_mode = QualityMode.Benchmark\n media_type=lb.MediaType.Image,\n)\n\nbatch_project.setup_editor(ontology)", + "source": "batch_project = client.create_project(\n name=\"Project Setup Demo\",\n quality_mode=QualityMode.\n Consensus, # For benchmarks use quality_mode = QualityMode.Benchmark\n media_type=lb.MediaType.Image,\n)\n\nbatch_project.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null diff --git a/project_configuration/queue_management.ipynb b/project_configuration/queue_management.ipynb index 3d20236..9f30a7e 100644 --- a/project_configuration/queue_management.ipynb +++ b/project_configuration/queue_management.ipynb @@ -104,7 +104,7 @@ }, { "metadata": {}, - "source": "classification_features = [\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"Quality Issues\",\n options=[\n lb.Option(value=\"blurry\", label=\"Blurry\"),\n lb.Option(value=\"distorted\", label=\"Distorted\"),\n ],\n )\n]\n\nontology_builder = lb.OntologyBuilder(tools=[],\n classifications=classification_features)\n\nontology = client.create_ontology(\n \"Ontology from new features\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\n\nproject.setup_editor(ontology)", + "source": "classification_features = [\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"Quality Issues\",\n options=[\n lb.Option(value=\"blurry\", label=\"Blurry\"),\n lb.Option(value=\"distorted\", label=\"Distorted\"),\n ],\n )\n]\n\nontology_builder = lb.OntologyBuilder(tools=[],\n classifications=classification_features)\n\nontology = client.create_ontology(\n \"Ontology from new features\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\n\nproject.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null