Skip to content

Commit

Permalink
Add demo for eager mode
Browse files Browse the repository at this point in the history
  • Loading branch information
Lifann authored and oppenheimli committed Dec 29, 2021
1 parent 58bb534 commit 1de368e
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 147 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ It will produce a model to `export_dir`.

## Inference:
```bash
python main.py --mode=test --export_dir="export" --batch_size=10
python main.py --mode=test --export_dir="export" --batch_size=64
```
It will print accuracy to the prediction on verified purchase of the digital video games.
It will print accuracy to the prediction on verified purchase of the digital video games.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import tensorflow_datasets as tfds
import sys

ENCODDING_SEGMENT_LENGTH = 1000000
ENCODING_SEGMENT_LENGTH = 1000000
NON_LETTER_OR_NUMBER_PATTERN = r'[^a-zA-Z0-9]'

FAETURES = [
Expand All @@ -12,6 +12,8 @@
]
LABEL = 'verified_purchase'

NUM_FEATURE_SLOTS = 0


class _RawFeature(object):
"""
Expand All @@ -22,13 +24,15 @@ def __init__(self, dtype, category):
if not isinstance(category, int):
raise TypeError('category must be an integer.')
self.category = category
global NUM_FEATURE_SLOTS
NUM_FEATURE_SLOTS = max(NUM_FEATURE_SLOTS, self.category)

def encode(self, tensor):
raise NotImplementedError

def match_category(self, tensor):
min_code = self.category * ENCODDING_SEGMENT_LENGTH
max_code = (self.category + 1) * ENCODDING_SEGMENT_LENGTH
min_code = self.category * ENCODING_SEGMENT_LENGTH
max_code = (self.category + 1) * ENCODING_SEGMENT_LENGTH
mask = tf.math.logical_and(tf.greater_equal(tensor, min_code),
tf.less(tensor, max_code))
return mask
Expand All @@ -40,8 +44,8 @@ def __init__(self, dtype, category):
super(_StringFeature, self).__init__(dtype, category)

def encode(self, tensor):
tensor = tf.strings.to_hash_bucket_fast(tensor, ENCODDING_SEGMENT_LENGTH)
tensor += ENCODDING_SEGMENT_LENGTH * self.category
tensor = tf.strings.to_hash_bucket_fast(tensor, ENCODING_SEGMENT_LENGTH)
tensor += ENCODING_SEGMENT_LENGTH * self.category
return tensor


Expand All @@ -53,8 +57,8 @@ def __init__(self, dtype, category):
def encode(self, tensor):
tensor = tf.strings.regex_replace(tensor, NON_LETTER_OR_NUMBER_PATTERN, ' ')
tensor = tf.strings.split(tensor, sep=' ').to_tensor('')
tensor = tf.strings.to_hash_bucket_fast(tensor, ENCODDING_SEGMENT_LENGTH)
tensor += ENCODDING_SEGMENT_LENGTH * self.category
tensor = tf.strings.to_hash_bucket_fast(tensor, ENCODING_SEGMENT_LENGTH)
tensor += ENCODING_SEGMENT_LENGTH * self.category
return tensor


Expand All @@ -65,23 +69,23 @@ def __init__(self, dtype, category):

def encode(self, tensor):
tensor = tf.as_string(tensor)
tensor = tf.strings.to_hash_bucket_fast(tensor, ENCODDING_SEGMENT_LENGTH)
tensor += ENCODDING_SEGMENT_LENGTH * self.category
tensor = tf.strings.to_hash_bucket_fast(tensor, ENCODING_SEGMENT_LENGTH)
tensor += ENCODING_SEGMENT_LENGTH * self.category
return tensor


FEATURE_AND_ENCODER = {
'customer_id': _StringFeature(tf.string, 1),
'helpful_votes': _IntegerFeature(tf.int32, 2),
'product_category': _StringFeature(tf.string, 3),
'product_id': _StringFeature(tf.string, 4),
'product_parent': _StringFeature(tf.string, 5),
'product_title': _TextFeature(tf.string, 6),
#'review_body': _TextFeature(tf.string, 7), # bad feature
'review_headline': _TextFeature(tf.string, 8),
'review_id': _StringFeature(tf.string, 9),
'star_rating': _IntegerFeature(tf.int32, 10),
'total_votes': _IntegerFeature(tf.int32, 11),
'customer_id': _StringFeature(tf.string, 0),
'helpful_votes': _IntegerFeature(tf.int32, 1),
'product_category': _StringFeature(tf.string, 2),
'product_id': _StringFeature(tf.string, 3),
'product_parent': _StringFeature(tf.string, 4),
'product_title': _TextFeature(tf.string, 5),
'review_headline': _TextFeature(tf.string, 6),
'review_id': _StringFeature(tf.string, 7),
'star_rating': _IntegerFeature(tf.int32, 8),
'total_votes': _IntegerFeature(tf.int32, 9),
#'review_body': _TextFeature(tf.string, 10), # bad feature
}


Expand All @@ -99,6 +103,12 @@ def encode_feature(data):
return collected_features


@tf.function
def get_category(tensor):
x = tf.math.floordiv(tensor, ENCODING_SEGMENT_LENGTH)
return x


def get_labels(data):
return data['verified_purchase']

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
flags.DEFINE_integer('embedding_size', 4, 'Embedding size.')
flags.DEFINE_integer('shuffle_size', 3000,
'Shuffle pool size for input examples.')
flags.DEFINE_integer('reserved_features', 30000,
flags.DEFINE_integer('reserved_features', 80000,
'Number of reserved features in embedding.')
flags.DEFINE_string('export_dir', './export_dir', 'Directory to export model.')
flags.DEFINE_string('mode', 'train', 'Select the running mode: train or test.')
Expand All @@ -27,6 +27,12 @@ def train(num_steps):
# Create a model
model = video_game_model.VideoGameDnn(batch_size=FLAGS.batch_size,
embedding_size=FLAGS.embedding_size)
optimizer = de.DynamicEmbeddingOptimizer(tf.keras.optimizers.Adagrad(1E-2))
auc = tf.keras.metrics.AUC(num_thresholds=1000)
accuracy = tf.keras.metrics.BinaryAccuracy(dtype=tf.float32)
model.compile(optimizer=optimizer,
loss='binary_crossentropy',
metrics=[accuracy, auc])

# Get data iterator
iterator = feature.initialize_dataset(batch_size=FLAGS.batch_size,
Expand All @@ -39,7 +45,17 @@ def train(num_steps):
try:
for step in range(num_steps):
features, labels = feature.input_fn(iterator)
loss, auc = model.train(features, labels)

if step % 10 == 0:
verbose = 1
else:
verbose = 0

model.fit(features, labels, steps_per_epoch=1, epochs=1, verbose=verbose)

if verbose > 0:
print('step: {}, size of sparse domain: {}'.format(
step, model.embedding_store.size()))

# To avoid too many features burst the memory, we restrict
# the model embedding layer to `reserved_features` features.
Expand All @@ -48,20 +64,12 @@ def train(num_steps):
model.embedding_store.restrict(FLAGS.reserved_features,
trigger=int(FLAGS.reserved_features * 1.2))

if step % 10 == 0:
print('step: {}, loss: {}, var_size: {}, auc: {}'.format(
step, loss, model.embedding_store.size(), auc))

except tf.errors.OutOfRangeError:
print('Run out the training data.')

# Set TFRA ops become legit.
options = tf.saved_model.SaveOptions(namespace_whitelist=['TFRA'])

# Save the model for inference.
inference_model = video_game_model.VideoGameDnnInference(model)
inference_model(feature.input_fn(iterator)[0])
inference_model.save('export', signatures=None, options=options)
options = tf.saved_model.SaveOptions(namespace_whitelist=['TFRA'])
model.save(FLAGS.export_dir, options=options)


def test(num_steps):
Expand All @@ -71,7 +79,8 @@ def test(num_steps):

# Load model.
options = tf.saved_model.SaveOptions(namespace_whitelist=['TFRA'])
model = tf.saved_model.load('export', tags='serve', options=options)
model = tf.saved_model.load(FLAGS.export_dir, tags='serve', options=options)
ctr = tf.metrics.Accuracy()
sig = model.signatures['serving_default']

# Get data iterator
Expand All @@ -87,8 +96,8 @@ def test(num_steps):
probabilities = tf.reshape(probabilities, (-1))
preds = tf.cast(tf.round(probabilities), dtype=tf.int32)
labels = tf.cast(labels, dtype=tf.int32)
ctr = tf.metrics.Accuracy()(labels, preds)
print("step: {}, ctr: {}".format(step, ctr))
ctr.update_state(labels, preds)
print("step: {}, ctr: {}".format(step, ctr.result()))


def main(argv):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,134 +17,47 @@ def __init__(self, batch_size=1, embedding_size=1):
self.batch_size = batch_size
self.embedding_size = embedding_size

# Create embedding variable by `tfra.dynamic_embedding` API.
self.embedding_store = de.get_variable(
'video_feature_embedding',
key_dtype=tf.int64,
value_dtype=tf.float32,
dim=embedding_size,
devices=['/CPU:0'],
initializer=tf.keras.initializers.RandomNormal(-0.1, 0.1),
trainable=True,
restrict_policy=de.TimestampRestrictPolicy)
# Create embedding as first layer.
self.embedding_layer = de.layers.SlotEmbedding(
self.embedding_size,
feature.NUM_FEATURE_SLOTS + 1,
initializer=tf.keras.initializers.RandomNormal(0.0, 0.2),
restrict_policy=de.TimestampRestrictPolicy,
slot_map_fn=feature.get_category,
bp_v2=True,
combiner='mean',
name='rua')
self.embedding_store = self.embedding_layer.params

# Create dense layers.
self.flat = tf.keras.layers.Flatten()
self.dnn0 = tf.keras.layers.Dense(
64,
activation='relu',
use_bias=True,
bias_initializer='glorot_uniform',
kernel_regularizer=tf.keras.regularizers.L1(0.01),
bias_regularizer=tf.keras.regularizers.L1(0.02),
kernel_regularizer=tf.keras.regularizers.L1(1E-2),
bias_regularizer=tf.keras.regularizers.L1(2E-2),
)
self.dnn1 = tf.keras.layers.Dense(
16,
activation='relu',
use_bias=True,
bias_initializer='glorot_uniform',
kernel_regularizer=tf.keras.regularizers.L1(0.01),
bias_regularizer=tf.keras.regularizers.L1(0.02),
kernel_regularizer=tf.keras.regularizers.L1(1E-2),
bias_regularizer=tf.keras.regularizers.L1(2E-2),
)
self.dnn2 = tf.keras.layers.Dense(1, use_bias=False)
self.embedding_trainables = []

# Create optimizer.
self.optmz = de.DynamicEmbeddingOptimizer(tf.keras.optimizers.Adam(0.01))

# Metric observer.
self._auc = tf.metrics.AUC()

@staticmethod
def lookup_sparse_weights(model, features, name='lookup_sparse_weights'):
if not isinstance(model, VideoGameDnn):
raise TypeError('Only serve VideoGameDnn model.')
embed, tw = de.embedding_lookup_unique(model.embedding_store,
features,
name=name,
return_trainable=True)
if not model.embedding_trainables:
model.embedding_trainables.append(tw)
return embed

@staticmethod
def embedding_fn(model, x):
"""
Funcion to lookup the embedding. It was made static because we
need to reuse it in somewhere else.
"""
batch_size = tf.shape(x)[0]
x = tf.reshape(x, (-1,))
embed_w = model.lookup_sparse_weights(model, x)
embeds = []
for name, encoder in feature.FEATURE_AND_ENCODER.items():
mask = encoder.match_category(x)
indices = tf.where(mask)
categorical_w = tf.gather(embed_w, indices)
categorical_w = tf.reshape(categorical_w,
(batch_size, -1, model.embedding_size))
categorical_w = tf.reduce_sum(categorical_w, axis=1)
embeds.append(
tf.reshape(categorical_w, (batch_size, model.embedding_size)))
embeds = tf.concat(embeds, axis=1)
return embeds

def dnn_net(self, x):
out = x
out = self.dnn0(x)
out = self.dnn1(out)
out = self.dnn2(out)
return out

@tf.function
def call(self, x):
"""
`call` method override whom in `tf.keras.Model`.
`call` method override from `tf.keras.Model`.
"""
embed = self.embedding_fn(self, x)
logits = self.dnn_net(embed)
x = self.embedding_layer(x)
x = self.flat(x)
x = self.dnn0(x)
x = self.dnn1(x)
logits = self.dnn2(x)
preds = tf.nn.sigmoid(logits)
return preds

def train(self, features, labels):
"""
Train model with input features and labels. Here it uses
`GradientTape` for clipping the gradients to avoid explosion
of parameter. `optimizer.minimize` is also supported.
"""
with tf.GradientTape() as tape:
preds = self(features)
preds = tf.reshape(preds, (-1))
labels = tf.cast(labels, dtype=tf.float32)
loss = tf.keras.losses.MeanSquaredError()(preds, labels)
grads = tape.gradient(loss, self.trainable_variables)
grads = [tf.clip_by_value(g, -1.0, 1.0) for g in grads]
self.optmz.apply_gradients(zip(grads, self.trainable_variables))
self._auc.update_state(labels, preds)
return loss, self._auc.result()


class VideoGameDnnInference(tf.keras.Model):
"""
Model built for inference. It copies dense and sparse variables
from a training model, and slightly removes the `embedding_lookup`
by replacing `lookup_sparse_weights` method, because we don't need
the local trainables in inference.
"""

def __init__(self, model):
super(VideoGameDnnInference, self).__init__()
self.embedding_size = model.embedding_size
self.embedding_store = model.embedding_store
self.embedding_fn = model.embedding_fn
self.dnn0 = model.dnn0
self.dnn1 = model.dnn1
self.dnn2 = model.dnn2
self.dnn_net = model.dnn_net

@staticmethod
def lookup_sparse_weights(model, features):
return model.embedding_store.lookup(features)

def call(self, x):
x = self.embedding_fn(self, x)
out = self.dnn_net(x)
return tf.nn.sigmoid(out)

0 comments on commit 1de368e

Please sign in to comment.