Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port BeamSampler to core #1181

Merged
merged 7 commits into from
Jul 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 51 additions & 47 deletions keras_nlp/samplers/beam_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,13 @@
"""Beam Sampler."""

import tensorflow as tf
from tensorflow.compiler.tf2xla.python.xla import dynamic_update_slice

from keras_nlp.api_export import keras_nlp_export
from keras_nlp.backend import keras
from keras_nlp.backend import ops
from keras_nlp.samplers.sampler import Sampler
from keras_nlp.samplers.sampler import call_args_docstring
from keras_nlp.utils.python_utils import format_docstring
from keras_nlp.utils.tensor_utils import assert_tf_backend


@format_docstring(call_args=call_args_docstring)
Expand Down Expand Up @@ -102,10 +101,6 @@ def __init__(
return_all_beams=False,
**kwargs,
):
# Temporarily turn off beam search in other backends.
# No technical blockers here, just need tf -> ops rewrite.
assert_tf_backend(self.__class__.__name__)

super().__init__(**kwargs)
self.num_beams = num_beams
self.return_all_beams = return_all_beams
Expand All @@ -120,89 +115,96 @@ def __call__(
end_token_id=None,
hidden_states=None,
):
batch_size, max_length = tf.shape(prompt)[0], tf.shape(prompt)[1]
# Make sure max length and start index are the same dtype.
index = tf.cast(index, max_length.dtype)
batch_size, max_length = ops.shape(prompt)[0], ops.shape(prompt)[1]
index = ops.cast(index, "int32")

def create_beams(x):
"""Add initial beam state."""
return tf.repeat(x, self.num_beams, axis=0)
return ops.repeat(x, self.num_beams, axis=0)

def flatten_beams(x):
"""Combine the beam dim and batch dim."""
flat_shape = [batch_size * self.num_beams] + x.shape.as_list()[2:]
return tf.reshape(x, shape=flat_shape)
flat_shape = (batch_size * self.num_beams,) + tuple(x.shape)[2:]
return ops.reshape(x, flat_shape)

def unflatten_beams(x):
"""Separate the beam dim and batch dim."""
unflat_shape = [batch_size, self.num_beams] + x.shape.as_list()[1:]
return tf.reshape(x, shape=unflat_shape)
unflat_shape = (batch_size, self.num_beams) + tuple(x.shape)[1:]
return ops.reshape(x, unflat_shape)

if mask is None:
mask = tf.zeros_like(prompt, dtype="bool")
mask = ops.zeros_like(prompt, dtype="bool")
else:
mask = tf.cast(mask, dtype="bool")
# `tf.while_loop` will not accept `None` as a value for `loop_vars`.
cache = () if cache is None else cache
mask = ops.cast(mask, dtype="bool")
# `ops.while_loop` will not accept `None` as a value for `loop_vars`.
has_cache = cache is not None
cache = cache if has_cache else ()
# Add extra sequences for each beam.
prompt, mask = create_beams(prompt), create_beams(mask)
cache = tf.nest.map_structure(create_beams, cache)
# Setup the initial beam log-likelihoods.
# On the first loop, make sure only the original beam is considered.
log_probs = tf.constant([[0.0] + [-1e9] * (self.num_beams - 1)])
log_probs = flatten_beams(tf.repeat(log_probs, batch_size, axis=0))
log_probs = ops.array(
[[0.0] + [-1e9] * (self.num_beams - 1)], dtype="float32"
)
log_probs = flatten_beams(ops.repeat(log_probs, batch_size, axis=0))

def cond(prompt, cache, index, log_probs):
if end_token_id is None:
return True
# Stop if all sequences have produced a *new* end_token_id.
end_tokens = (prompt == end_token_id) & (~mask)
prompt_done = tf.reduce_any(end_tokens, axis=-1)
return not tf.reduce_all(prompt_done)
prompt_done = ops.any(end_tokens, axis=-1)
return ops.logical_not(ops.all(prompt_done))

def body(prompt, cache, index, log_probs):
# Compute the softmax distribution for the next token.
logits, _, cache = next(prompt, cache, index)
vocab_size = tf.shape(logits)[-1]
vocab_size = ops.shape(logits)[-1]
probs = keras.activations.softmax(logits / self.temperature)

# Compute the running log-likelihood of each new candidate.
next_log_probs = tf.math.log(probs) + log_probs[..., tf.newaxis]
next_log_probs = ops.log(probs) + log_probs[..., None]
# Reshape `preds` to shape `(batch_size, num_beams * vocab_size)`.
next_log_probs = tf.reshape(next_log_probs, shape=[batch_size, -1])
next_log_probs = ops.reshape(next_log_probs, [batch_size, -1])

# Compute the top beam indices and next tokens.
next_log_probs, indices = tf.math.top_k(
next_log_probs, indices = ops.top_k(
next_log_probs, k=self.num_beams, sorted=False
)
beam_indices = indices // vocab_size
next_token = flatten_beams(indices % vocab_size)
# Ensure shape is `[None]`, otherwise it causes issues after
# converting to TFLite.
next_token = tf.ensure_shape(next_token, [None])
# We need `ensure_shape` as `top_k` will change the static shape.
next_log_probs = flatten_beams(next_log_probs)
log_probs = tf.ensure_shape(next_log_probs, log_probs.shape)
# Work around for top_k output shape on tf backend.
if isinstance(log_probs, tf.Tensor):
log_probs = tf.ensure_shape(next_log_probs, log_probs.shape)
else:
log_probs = next_log_probs

def gather_beams(x):
x = unflatten_beams(x)
x = tf.gather(x, beam_indices, axis=1, batch_dims=1)
indices = beam_indices
for axis in range(2, len(x.shape)):
indices = ops.expand_dims(indices, axis=axis)
x = ops.take_along_axis(x, indices, axis=1)
return flatten_beams(x)

prompt = gather_beams(prompt)
cache = tf.nest.map_structure(gather_beams, cache)
if has_cache:
cache = tf.nest.map_structure(gather_beams, cache)

# Update each beam with the next token.
next_token = tf.cast(next_token, prompt.dtype)
next_token = ops.cast(next_token, prompt.dtype)
# Don't overwrite anywhere mask is True.
next_token = tf.where(mask[:, index], prompt[:, index], next_token)
next_token = ops.where(mask[:, index], prompt[:, index], next_token)
# Update the prompt with the next token.
next_token = next_token[:, tf.newaxis]
prompt = dynamic_update_slice(prompt, next_token, [0, index])
next_token = next_token[:, None]
prompt = ops.slice_update(prompt, [0, index], next_token)
# Return the iteration of the loop state.
return (prompt, cache, index + 1, log_probs)

prompt, _, _, log_probs = tf.while_loop(
prompt, _, _, log_probs = self.run_loop(
cond=cond,
body=body,
loop_vars=(prompt, cache, index, log_probs),
Expand All @@ -213,21 +215,23 @@ def gather_beams(x):
all_log_probs = unflatten_beams(log_probs)

if self.return_all_beams:
sorted_indices = tf.argsort(
all_log_probs, axis=-1, direction="DESCENDING"
)
sorted_log_probs = tf.gather(
all_log_probs, sorted_indices, axis=-1, batch_dims=1
sorted_indices = ops.argsort(-all_log_probs, axis=-1)
sorted_log_probs = ops.take_along_axis(
all_log_probs,
sorted_indices,
axis=1,
)
sorted_prompts = tf.gather(
all_prompts, sorted_indices, axis=1, batch_dims=1
sorted_prompts = ops.take_along_axis(
all_prompts,
ops.expand_dims(sorted_indices, -1),
axis=1,
)
return sorted_prompts, sorted_log_probs
else:
# Gather the top beam at each batch index.
top_beams = tf.math.argmax(all_log_probs, axis=-1)[:, tf.newaxis]
prompt = tf.gather(all_prompts, top_beams, axis=1, batch_dims=1)
return tf.squeeze(prompt, axis=1)
top_beams = ops.argmax(all_log_probs, axis=-1)[:, None, None]
prompt = ops.take_along_axis(all_prompts, top_beams, axis=1)
return ops.squeeze(prompt, axis=1)

def get_config(self):
config = super().get_config()
Expand Down
1 change: 0 additions & 1 deletion keras_nlp/samplers/beam_sampler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from keras_nlp.tests.test_case import TestCase


@pytest.mark.tf_only
class BeamSamplerTest(TestCase):
def setUp(self):
super().setUp()
Expand Down