Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version 2.1.0 #211

Merged
merged 51 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
c8404cd
Add dataset registry
steffencruz Mar 17, 2024
d285e08
Add task-dataset registry, which will enable multi-dataset tasks
steffencruz Mar 17, 2024
21beebc
Use registry for task creation
steffencruz Mar 17, 2024
58bdbd8
Add arbitrary selector for improved control
steffencruz Mar 17, 2024
ed2d07b
Merge branch 'pre-staging' into features/registry
bkb2135 Apr 8, 2024
35878b0
Resolve circular imports
bkb2135 Apr 8, 2024
35a53b1
Resolve circular imports in prompting/conversation.py
bkb2135 Apr 8, 2024
df1885d
Create separate task_registry file
bkb2135 Apr 8, 2024
dfb65a2
Remove llm import from init
bkb2135 Apr 8, 2024
cd9f592
Import TASKS and DATASETS
bkb2135 Apr 8, 2024
dc00d3b
Add Mock Task to Registry
bkb2135 Apr 8, 2024
a1eb07b
Add registry unit tests
bkb2135 Apr 8, 2024
55121b2
Add Chattensor System Prompt
bkb2135 Apr 8, 2024
b807ccc
Instantiate the datasets before task creation
bkb2135 Apr 9, 2024
6bb8dd4
Set Bittensor to 6.10.1
bkb2135 Apr 9, 2024
6f2dda4
Use class.name rather than hardcoded strings
bkb2135 Apr 10, 2024
1fb74fe
Rename date_qa
bkb2135 Apr 10, 2024
7f3d257
Explicitly delete other reference prompts
bkb2135 Apr 10, 2024
8cb53bd
Update Registry
steffencruz Apr 10, 2024
c014ff5
Simplify install.sh
bkb2135 Apr 10, 2024
9aa843e
Update qa name
steffencruz Apr 10, 2024
254bfa7
Update unit tests
bkb2135 Apr 10, 2024
ba7712b
Support Python 3.9-12
bkb2135 Apr 12, 2024
62ca944
Update python-package.yml
bkb2135 Apr 14, 2024
58aa884
Merge pull request #197 from opentensor/Bump-Bittensor-Version
bkb2135 Apr 15, 2024
a41ff1a
Remove print statements
bkb2135 Apr 15, 2024
f06994a
Update test_registry.py
bkb2135 Apr 15, 2024
be61a87
Merge pull request #162 from opentensor/features/registry
bkb2135 Apr 15, 2024
01d2fee
Merge pull request #194 from opentensor/features/chattensor_system_pr…
bkb2135 Apr 15, 2024
aa833b3
Add generic instruction task
bkb2135 Apr 15, 2024
9bd93ff
Add generic instruction dataset
bkb2135 Apr 15, 2024
8241395
Add Generic Instruction
bkb2135 Apr 15, 2024
e04af62
Fix Unit Tests
bkb2135 Apr 15, 2024
a7d2906
Update config
steffencruz Apr 16, 2024
4a3aa9c
Add back reference prompt
steffencruz Apr 16, 2024
f1fb5e1
Make info['source'] a string
steffencruz Apr 16, 2024
05351fb
Rename generic instruction to generic
steffencruz Apr 17, 2024
d5ba6f3
Update Config
steffencruz Apr 17, 2024
50d5e94
Update validator.py
bkb2135 Apr 17, 2024
1b75f13
Remove mock task from config
bkb2135 Apr 17, 2024
350716b
Update config.py
bkb2135 Apr 17, 2024
a59dc38
Merge pull request #202 from opentensor/features/generic-instruction-…
steffencruz Apr 17, 2024
d15784d
Update relevance scoring
bkb2135 Apr 18, 2024
6f57d1e
Update generic_instruction scoring
bkb2135 Apr 18, 2024
c7df6f4
Bump version to 2.1
steffencruz Apr 18, 2024
6168639
Merge pull request #206 from opentensor/hotfix/adjust-generic-scoring
steffencruz Apr 18, 2024
3b5d4b8
Merge pull request #208 from opentensor/version-2.1.0
steffencruz Apr 18, 2024
9e03a06
Remove Mock from registry
steffencruz Apr 18, 2024
773a3d4
Remove source from math
bkb2135 Apr 18, 2024
17ef4f6
Merge pull request #210 from opentensor/hotfix/remove-mock-from-registry
steffencruz Apr 18, 2024
8da8358
Merge pull request #204 from opentensor/pre-staging
steffencruz Apr 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add Generic Instruction
  • Loading branch information
bkb2135 committed Apr 15, 2024
commit 8241395ca47550cf689307685623ddcfd29db40c
6 changes: 4 additions & 2 deletions prompting/task_registry.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .tasks import Task, MockTask, SummarizationTask, QuestionAnsweringTask, DebuggingTask, MathTask, DateQuestionAnsweringTask
from .tools import MockDataset, WikiDataset, HFCodingDataset, StackOverflowDataset, MathDataset, WikiDateDataset
from .tasks import Task, MockTask, SummarizationTask, QuestionAnsweringTask, DebuggingTask, MathTask, DateQuestionAnsweringTask, GenericInstructionTask
from .tools import MockDataset, WikiDataset, HFCodingDataset, StackOverflowDataset, MathDataset, WikiDateDataset, GenericInstructionDataset

# TODO: Expand this to include extra information beyond just the task and dataset names
mock_task, mock_dataset = MockTask.name, [MockDataset.name]
Expand All @@ -8,6 +8,7 @@
debugging_task, debugging_dataset = DebuggingTask.name, [HFCodingDataset.name]
math_task, math_dataset = MathTask.name, [MathDataset.name]
date_qa_task, date_qa_dataset = DateQuestionAnsweringTask.name, [WikiDateDataset.name]
generic_instruction_task, generic_instruction_dataset = GenericInstructionTask.name, [GenericInstructionDataset.name]

TASK_REGISTRY = {
mock_task: mock_dataset,
Expand All @@ -16,4 +17,5 @@
debugging_task: debugging_dataset,
math_task: math_dataset,
date_qa_task: date_qa_dataset,
generic_instruction_task: generic_instruction_dataset
}
2 changes: 1 addition & 1 deletion prompting/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
DateQuestionAnsweringTask.name: DateQuestionAnsweringTask,
SummarizationTask.name: SummarizationTask,
DebuggingTask.name: DebuggingTask,
#GenericInstructionTask.name: GenericInstructionTask,
GenericInstructionTask.name: GenericInstructionTask,
MathTask.name: MathTask,
}
14 changes: 8 additions & 6 deletions prompting/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@
StackOverflowDataset,
WikiDateDataset,
MathDataset,
GenericInstructionDataset,
)
from .selector import Selector

DATASETS = {
"mock": MockDataset,
"hf_coding": HFCodingDataset,
"wiki": WikiDataset,
#"stack_overflow": StackOverflowDataset,
"wiki_date": WikiDateDataset,
"math": MathDataset,
MockDataset.name: MockDataset,
HFCodingDataset.name: HFCodingDataset,
WikiDataset.name: WikiDataset,
#StackOverflowDataset.name: StackOverflowDataset,
MathDataset.name: MathDataset,
WikiDateDataset.name: WikiDateDataset,
GenericInstructionDataset.name: GenericInstructionDataset,
}


Expand Down
1 change: 1 addition & 0 deletions prompting/tools/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .math import MathDataset
from .mock import MockDataset
from .wiki import WikiDataset, WikiDateDataset
from .generic_instruction import GenericInstructionDataset
46 changes: 44 additions & 2 deletions prompting/tools/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
# DEALINGS IN THE SOFTWARE.

import time
import random
import functools
from abc import ABC, abstractmethod
from typing import Dict
import bittensor as bt
Expand All @@ -28,7 +30,7 @@

class Dataset(ABC):
"""Base class for datasets."""
name = "dataset"

max_tries: int = 10

@abstractmethod
Expand Down Expand Up @@ -74,11 +76,51 @@ def next(
f"Could not find any samples which meet {self.__class__.__name__} requirements after {tries} tries."
)

info["source"] = (self.__class__.__name__,)
info["stats"] = {
"creator": self.__class__.__name__,
"fetch_time": time.time() - t0,
"num_tries": tries,
"fetch_method": method,
"next_kwargs": kwargs,
}
return Context(**info)


class TemplateDataset(Dataset):
"""Base class for datasets based on a template."""

@property
def size(self):
return functools.reduce(
lambda x, y: x * y, [len(v) for v in self.params.values()], 1
)

def __repr__(self):
return f"{self.__class__.__name__} with template: {self.query_template!r} and {self.size} possible phrases"

def get(self, params: dict):
content = self.query_template.format(**params)
keys, values = list(zip(*params.items()))

return {
"title": params.get(
"title", keys[0]
), # Use the first key as the title if no field called title is present
"topic": params.get("topic", keys[min(1, len(keys) - 1)]), # Same for topic
"subtopic": params.get(
"subtopic", keys[min(2, len(keys) - 2)]
), # Same for subtopic
"content": content, # content
"internal_links": values, # internal links
"external_links": values, # external links
"tags": values, # tags
"extra": {},
}

def random(self, selector: Selector = None):
selected = {k: selector(v) for k, v in self.params.items()}
return self.get(selected)

def search(self, params: dict, selector: Selector = None):
selected = {k: params.get(k, selector(v)) for k, v in self.params.items()}
return self.get(selected)
4 changes: 2 additions & 2 deletions prompting/tools/datasets/generic_instruction.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from .base import TemplateDataset


class GenericQuestionDataset(TemplateDataset):
class GenericInstructionDataset(TemplateDataset):
"Generic question dataset, which creates LLM prompts for asking questions."

name = "generic_instruction"
query_template = (
"Ask a {style} question about a {theme} {subtopic} related to {topic}"
)
Expand Down