Skip to content

Commit

Permalink
Merge pull request #264 from huangshiyu13/main
Browse files Browse the repository at this point in the history
update test
  • Loading branch information
huangshiyu13 authored Oct 26, 2023
2 parents 23cfd38 + 0707ba6 commit e5305cc
Show file tree
Hide file tree
Showing 16 changed files with 230 additions and 338 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ run_results/
api_docs
.vscode
*.pkl
api_docs
*.json
opponent_pool
!/examples/selfplay/opponent_templates/tictactoe_opponent/info.json
Expand Down
1 change: 1 addition & 0 deletions examples/nlp/nlp_ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ reward_class:
args: {
"intent_model": "rajkumarrrk/roberta-daily-dialog-intent-classifier",
"ref_model": "rajkumarrrk/gpt2-fine-tuned-on-daily-dialog",
"use_deepspeed": true,
}

3 changes: 0 additions & 3 deletions openrl/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,9 @@

toy_all_envs = [
"BitFlippingEnv",
"FakeImageEnv",
"IdentityEnv",
"IdentityEnvcontinuous",
"IdentityEnvBox",
"IdentityEnvMultiBinary",
"IdentityEnvMultiDiscrete",
"SimpleMultiObsEnv",
"SimpleMultiObsEnv",
]
Expand Down
2 changes: 1 addition & 1 deletion openrl/envs/nlp/daily_dialog_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(
self.env_name = "daily_dialog"
tokenizer_name = cfg.env.args["tokenizer_path"]
if tokenizer_name == "builtin_BPE":
from tokenizers import AddedToken, Tokenizer, models
from tokenizers import Tokenizer, models

self.tokenizer = Tokenizer(models.BPE())

Expand Down
38 changes: 33 additions & 5 deletions openrl/envs/nlp/rewards/intent.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,42 @@ def get_eval_ds_config(offload, stage=0):


class Intent:
def __init__(self, intent_model: str, intent_coeff: float = 1.0) -> None:
def __init__(
self, intent_model: str, intent_coeff: float = 1.0, use_deepspeed: bool = True
) -> None:
super().__init__()

self._intent_coeff = intent_coeff
self.use_deepspeed = True # TODO
self.use_deepspeed = use_deepspeed
if intent_model == "builtin_intent":
from transformers import GPT2Config, GPT2LMHeadModel

class TestTokenizer:
def __call__(
self,
input_texts,
return_tensors="pt",
truncation=True,
padding=True,
max_length=None,
):
class EncodedOutput:
def __init__(self, input_ids, attention_mask):
self.input_ids = input_ids
self.attention_mask = attention_mask

input_ids = torch.zeros((32), dtype=torch.long)
attention_masks = torch.zeros((32), dtype=torch.long)
return EncodedOutput(input_ids, attention_masks)

self._tokenizer = TestTokenizer()
config = GPT2Config()
self._model = GPT2LMHeadModel(config)

model_path = data_abs_path(intent_model)
self._tokenizer = AutoTokenizer.from_pretrained(intent_model)
self._model = AutoModelForSequenceClassification.from_pretrained(model_path)
else:
model_path = data_abs_path(intent_model)
self._tokenizer = AutoTokenizer.from_pretrained(intent_model)
self._model = AutoModelForSequenceClassification.from_pretrained(model_path)

if self.use_deepspeed:
import deepspeed
Expand Down Expand Up @@ -87,6 +114,7 @@ def get_input_for_classifier(prompt, generated_text):
input_ids=encoded.input_ids.to(self._device),
attention_mask=encoded.attention_mask.to(self._device),
)

pred_labels = torch.argmax(outputs.logits, dim=1).tolist()

score = (np.array(pred_labels) == np.array(target_intents)) * 1.0
Expand Down
11 changes: 9 additions & 2 deletions openrl/envs/nlp/rewards/kl_penalty.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,21 @@ def __init__(
action_space: gym.Space,
ref_model: str,
apply_model_parallel: bool = True,
use_deepspeed: bool = True,
):
super().__init__()
self.use_deepspeed = True
self.use_deepspeed = use_deepspeed
self.use_fp16 = True

# reference model
self._apply_model_parallel = apply_model_parallel
self._ref_net = AutoModelForCausalLM.from_pretrained(ref_model)
if ref_model == "builtin_ref":
from transformers import GPT2Config, GPT2LMHeadModel

config = GPT2Config()
self._ref_net = GPT2LMHeadModel(config)
else:
self._ref_net = AutoModelForCausalLM.from_pretrained(ref_model)
self._ref_net = self._ref_net.eval()
if self.use_deepspeed:
import deepspeed
Expand Down
16 changes: 12 additions & 4 deletions openrl/envs/nlp/rewards/meteor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,21 @@
import openrl.envs.nlp as nlp


class VirtualMetric:
def compute(self, predictions: Any, references: Any) -> Dict[str, float]:
return {"meteor": 0.0}


class Meteor:
def __init__(self, meteor_coeff: int) -> None:
def __init__(self, meteor_coeff: int, test: bool = False) -> None:
super().__init__()
self._meteor_coeff = meteor_coeff
self._metric = evaluate.load(
str(Path(nlp.__file__).parent / "utils/metrics/meteor.py")
)
if test:
self._metric = VirtualMetric()
else:
self._metric = evaluate.load(
str(Path(nlp.__file__).parent / "utils/metrics/meteor.py")
)

def __call__(
self,
Expand Down
20 changes: 1 addition & 19 deletions openrl/envs/toy_envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,12 @@
from typing import Any

from openrl.envs.toy_envs.bit_flipping_env import BitFlippingEnv
from openrl.envs.toy_envs.identity_env import (
FakeImageEnv,
IdentityEnv,
IdentityEnvBox,
IdentityEnvcontinuous,
IdentityEnvMultiBinary,
IdentityEnvMultiDiscrete,
)
from openrl.envs.toy_envs.multi_input_envs import SimpleMultiObsEnv
from openrl.envs.toy_envs.identity_env import IdentityEnv, IdentityEnvcontinuous

__all__ = [
"BitFlippingEnv",
"FakeImageEnv",
"IdentityEnv",
"IdentityEnvcontinuous",
"IdentityEnvBox",
"IdentityEnvMultiBinary",
"IdentityEnvMultiDiscrete",
"SimpleMultiObsEnv",
]


Expand All @@ -49,13 +36,8 @@

env_dict = {
"BitFlippingEnv": BitFlippingEnv,
"FakeImageEnv": FakeImageEnv,
"IdentityEnv": IdentityEnv,
"IdentityEnvcontinuous": IdentityEnvcontinuous,
"IdentityEnvBox": IdentityEnvBox,
"IdentityEnvMultiBinary": IdentityEnvMultiBinary,
"IdentityEnvMultiDiscrete": IdentityEnvMultiDiscrete,
"SimpleMultiObsEnv": SimpleMultiObsEnv,
}


Expand Down
111 changes: 0 additions & 111 deletions openrl/envs/toy_envs/identity_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,114 +157,3 @@ def _get_reward(self, action: T) -> float:

def render(self, mode: str = "human") -> None:
pass


# Not Work Yet
class IdentityEnvBox(IdentityEnv[np.ndarray]):
def __init__(
self,
low: float = -1.0,
high: float = 1.0,
eps: float = 0.05,
ep_length: int = 100,
):
"""
Identity environment for testing purposes
:param low: the lower bound of the box dim
:param high: the upper bound of the box dim
:param eps: the epsilon bound for correct value
:param ep_length: the length of each episode in timesteps
"""
space = spaces.Box(low=low, high=high, shape=(1,), dtype=np.float32)
super().__init__(ep_length=ep_length, space=space)
self.eps = eps

def step(
self, action: np.ndarray
) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]:
reward = self._get_reward(action)
self._choose_next_state()
self.current_step += 1
done = self.current_step >= self.ep_length
return self.state, reward, done, {}

def _get_reward(self, action: np.ndarray) -> float:
return (
1.0 if (self.state - self.eps) <= action <= (self.state + self.eps) else 0.0
)


# Not Work Yet
class IdentityEnvMultiDiscrete(IdentityEnv[np.ndarray]):
def __init__(self, dim: int = 1, ep_length: int = 100) -> None:
"""
Identity environment for testing purposes
:param dim: the size of the dimensions you want to learn
:param ep_length: the length of each episode in timesteps
"""
space = spaces.MultiDiscrete([dim, dim])
super().__init__(ep_length=ep_length, space=space)


# Not Work Yet
class IdentityEnvMultiBinary(IdentityEnv[np.ndarray]):
def __init__(self, dim: int = 1, ep_length: int = 100) -> None:
"""
Identity environment for testing purposes
:param dim: the size of the dimensions you want to learn
:param ep_length: the length of each episode in timesteps
"""
space = spaces.MultiBinary(dim)
super().__init__(ep_length=ep_length, space=space)


# Not Work Yet
class FakeImageEnv(gym.Env):
"""
Fake image environment for testing purposes, it mimics Atari games.
:param action_dim: Number of discrete actions
:param screen_height: Height of the image
:param screen_width: Width of the image
:param n_channels: Number of color channels
:param discrete: Create discrete action space instead of continuous
:param channel_first: Put channels on first axis instead of last
"""

def __init__(
self,
action_dim: int = 6,
screen_height: int = 84,
screen_width: int = 84,
n_channels: int = 1,
discrete: bool = True,
channel_first: bool = False,
) -> None:
self.observation_shape = (screen_height, screen_width, n_channels)
if channel_first:
self.observation_shape = (n_channels, screen_height, screen_width)
self.observation_space = spaces.Box(
low=0, high=255, shape=self.observation_shape, dtype=np.uint8
)
if discrete:
self.action_space = spaces.Discrete(action_dim)
else:
self.action_space = spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32)
self.ep_length = 10
self.current_step = 0

def reset(self) -> np.ndarray:
self.current_step = 0
return self.observation_space.sample()

def step(self, action: Union[np.ndarray, int]):
reward = 0.0
self.current_step += 1
done = self.current_step >= self.ep_length
return self.observation_space.sample(), reward, done, {}

def render(self, mode: str = "human") -> None:
pass
Loading

0 comments on commit e5305cc

Please sign in to comment.