Merge pull request #506 from rsepassi/push

v1.4.2
tensorflow · Jan 13, 2018 · a1d7ed7 · a1d7ed7
2 parents d9cba5c + c7f24da
commit a1d7ed7
Show file tree

Hide file tree

Showing 92 changed files with 11,664 additions and 1,966 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -14,10 +14,11 @@ env:
     - T2T_DATA_DIR=/tmp/t2t-data
     - T2T_TRAIN_DIR=/tmp/t2t-train
 script:
-  - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/problems_test.py --ignore=tensor2tensor/tpu/tpu_trainer_lib_test.py --ignore=tensor2tensor/data_generators/algorithmic_math_test.py
+  - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/problems_test.py --ignore=tensor2tensor/utils/trainer_lib_test.py --ignore=tensor2tensor/data_generators/algorithmic_math_test.py
   - pytest tensor2tensor/utils/registry_test.py
-  - pytest tensor2tensor/tpu/tpu_trainer_lib_test.py
+  - pytest tensor2tensor/utils/trainer_lib_test.py
   - t2t-datagen 2>&1 | grep translate && echo passed
+  - t2t-trainer --registry_help --t2t_usr_dir=./tensor2tensor/test_data/example_usr_dir 2>&1 | grep my_very_own_hparams && echo passed
   - python -c "from tensor2tensor.models import transformer; print(transformer.Transformer.__name__)"
   - t2t-trainer --registry_help
   - mkdir $T2T_DATA_DIR

diff --git a/README.md b/README.md
@@ -296,36 +296,8 @@ specifying the `--t2t_usr_dir` flag in `t2t-trainer`.
 You can do so for models, hyperparameter sets, modalities, and problems. Please
 do submit a pull request if your component might be useful to others.
 
-Here's an example with a new hyperparameter set:
-
-```python
-# In ~/usr/t2t_usr/my_registrations.py
-
-from tensor2tensor.models import transformer
-from tensor2tensor.utils import registry
-
-@registry.register_hparams
-def transformer_my_very_own_hparams_set():
-  hparams = transformer.transformer_base()
-  hparams.hidden_size = 1024
-  ...
-```
-
-```python
-# In ~/usr/t2t_usr/__init__.py
-from . import my_registrations
-```
-
-```
-t2t-trainer --t2t_usr_dir=~/usr/t2t_usr --registry_help
-```
-
-You'll see under the registered HParams your
-`transformer_my_very_own_hparams_set`, which you can directly use on the command
-line with the `--hparams_set` flag.
-
-`t2t-datagen` also supports the `--t2t_usr_dir` flag for `Problem`
-registrations.
+See the [`example_usr_dir`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/test_data/example_usr_dir)
+for an example user directory.
 
 ## Adding a dataset
 

diff --git a/docs/cloud_tpu.md b/docs/cloud_tpu.md
@@ -5,8 +5,10 @@ for ML training.
 
 Models and hparams that are known to work on TPU:
 * `transformer` with `transformer_tpu`
-* `xception` with `xception_base`
+* `transformer_encoder` with `transformer_tpu`
+* `transformer_decoder` with `transformer_tpu`
 * `resnet50` with `resnet_base`
+* `revnet104` with `revnet_base`
 
 To run on TPUs, you need to be part of the alpha program; if you're not, these
 commands won't work for you currently, but access will expand soon, so get
@@ -34,16 +36,15 @@ gcloud compute instances create $USER-vm \
 Launch the TPU instance; the Python program will connect to this to train on the
 TPU device.
 ```
+gcloud alpha compute tpus list
+# Make an IP with structure 10.240.X.2 that’s unique in the list
 TPU_IP=10.240.0.2
 gcloud alpha compute tpus create \
   $USER-tpu \
   --range=${TPU_IP/%2/0}/29 \
   --version=nightly
 ```
 
-To see all TPU instances running: `gcloud alpha compute tpus list`.  The
-`TPU_IP` should be unique amongst the list and follow the format `10.240.i.2`.
-
 SSH in with port forwarding for TensorBoard
 ```
 gcloud compute ssh $USER-vm -- -L 6006:localhost:6006
@@ -52,7 +53,7 @@ gcloud compute ssh $USER-vm -- -L 6006:localhost:6006
 Now that you're on the cloud instance, install T2T:
 ```
 pip install tensor2tensor --user
-# If your python bin dir isn't already in your path
+# Add the python bin dir to your path
 export PATH=$HOME/.local/bin:$PATH
 ```
 
@@ -67,9 +68,9 @@ t2t-datagen --problem=translate_ende_wmt8k --data_dir=$DATA_DIR
 Setup some vars used below. `TPU_IP` and `DATA_DIR` should be the same as what
 was used above. Note that the `DATA_DIR` and `OUT_DIR` must be GCS buckets.
 ```
-TPU_IP=<IP of TPU machine>
+TPU_IP=10.240.0.2
 DATA_DIR=$GCS_BUCKET/t2t/data/
-OUT_DIR=$GCS_BUCKET/t2t/training/
+OUT_DIR=$GCS_BUCKET/t2t/training/transformer_ende_1
 TPU_MASTER=grpc://$TPU_IP:8470
 ```
 

diff --git a/docs/new_problem.md b/docs/new_problem.md
@@ -264,16 +264,22 @@ t2t-datagen \
 ```
 
 Where:
-*   `PROBLEM` is the name of the class that was registered with `@registry.register_problem()`, but converted from `CamelCase` to `snake_case`.
-*   `PATH_TO_YOUR_PROBLEM_DIR` is a path to the directory of your python problem file.
+* `PROBLEM` is the name of the class that was registered with
+  `@registry.register_problem()`, but converted from `CamelCase` to
+  `snake_case`.
+* `PATH_TO_YOUR_PROBLEM_DIR` is a path to the directory of your python problem
+  file.
 
-If you plan to contribute to the tensor2tensor repository, you can install the local cloned version in developer mode with `pip install -e .` from the tensor2tensor directory. You can also add your new problem file to [`all_problems.py`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/all_problems.py).
+If you plan to contribute to the tensor2tensor repository, you can install the
+local cloned version in developer mode with `pip install -e .` from the
+tensor2tensor directory. You can also add your new problem file to
+[`all_problems.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/all_problems.py).
 
 # Run the problem
-Now that we've gotten our problem set up, let's train a model and generate definitions. 
+Now that we've gotten our problem set up, let's train a model and generate
+definitions.
 
 To train, specify the problem name, the model, and hparams:
-
 ```bash
 PROBLEM=word2def
 MODEL=transformer
@@ -282,6 +288,7 @@ HPARAMS=word2def_hparams
 
 The rest of the steps are as given in the [walkthrough](walkthrough.md).
 
-What if we wanted to train a model to generate words given definitions? In T2T, we can change the problem name to be `PROBLEM=word2def_rev`.
+What if we wanted to train a model to generate words given definitions? In T2T,
+we can change the problem name to be `PROBLEM=word2def_rev`.
 
 All done. Let us know what definitions your model generated.
diff --git a/docs/overview.md b/docs/overview.md
@@ -14,7 +14,7 @@ to training, evaluation, and decoding.
 
 Some key files and their functions:
 
-*   [`tpu_trainer.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/tpu/tpu_trainer.py) and [`tpu_trainer_lib.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/tpu/tpu_trainer_lib.py):
+*   [`t2t_trainer.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t_trainer.py) and [`trainer_lib.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/trainer_lib.py):
     Main entrypoint for training and evaluation.  Constructs and runs all the
     main components of the system (the `Problem`, the `HParams`, the
     `Estimator`, the `Experiment`, the `input_fn`s and `model_fn`).
@@ -134,7 +134,7 @@ The default implementations of `bottom`, `top`, and `loss` depend on the
 
 The actual training loop and related services (checkpointing, summaries,
 continuous evaluation, etc.) are all handled by `Estimator` and `Experiment`
-objects. `tpu_trainer.py` is the main entrypoint and uses `tpu_trainer_lib.py`
+objects. `t2t_trainer.py` is the main entrypoint and uses `trainer_lib.py`
 to construct the various components.
 
 ## Decoding
@@ -144,7 +144,7 @@ to construct the various components.
 
 ## System Overview for Train/Eval
 
-See `tpu_trainer.py`.
+See `t2t_trainer.py` and `trainer_lib.py`.
 
 * Create HParams
 * Create `RunConfig`, including `Parallelism` object (i.e. `data_parallelism`)

diff --git a/docs/walkthrough.md b/docs/walkthrough.md
@@ -296,36 +296,8 @@ specifying the `--t2t_usr_dir` flag in `t2t-trainer`.
 You can do so for models, hyperparameter sets, modalities, and problems. Please
 do submit a pull request if your component might be useful to others.
 
-Here's an example with a new hyperparameter set:
-
-```python
-# In ~/usr/t2t_usr/my_registrations.py
-
-from tensor2tensor.models import transformer
-from tensor2tensor.utils import registry
-
-@registry.register_hparams
-def transformer_my_very_own_hparams_set():
-  hparams = transformer.transformer_base()
-  hparams.hidden_size = 1024
-  ...
-```
-
-```python
-# In ~/usr/t2t_usr/__init__.py
-from . import my_registrations
-```
-
-```
-t2t-trainer --t2t_usr_dir=~/usr/t2t_usr --registry_help
-```
-
-You'll see under the registered HParams your
-`transformer_my_very_own_hparams_set`, which you can directly use on the command
-line with the `--hparams_set` flag.
-
-`t2t-datagen` also supports the `--t2t_usr_dir` flag for `Problem`
-registrations.
+See the [`example_usr_dir`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/test_data/example_usr_dir)
+for an example user directory.
 
 ## Adding a dataset
 

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name='tensor2tensor',
-    version='1.4.1',
+    version='1.4.2',
     description='Tensor2Tensor',
     author='Google Inc.',
     author_email='[email protected]',
@@ -23,10 +23,19 @@
         'tensor2tensor/bin/t2t-datagen',
         'tensor2tensor/bin/t2t-decoder',
         'tensor2tensor/bin/t2t-make-tf-configs',
+        'tensor2tensor/bin/t2t-exporter',
+        'tensor2tensor/bin/t2t-query-server',
+        'tensor2tensor/bin/t2t-insights-server',
+        'tensor2tensor/bin/t2t-avg-all',
+        'tensor2tensor/bin/t2t-bleu',
+        'tensor2tensor/bin/t2t-translate-all',
     ],
     install_requires=[
         'bz2file',
+        'flask',
         'future',
+        'gevent',
+        'gunicorn',
         'gym',
         'numpy',
         'requests',
@@ -35,8 +44,8 @@
         'six',
     ],
     extras_require={
-        'tensorflow': ['tensorflow>=1.4.0'],
-        'tensorflow_gpu': ['tensorflow-gpu>=1.4.0'],
+        'tensorflow': ['tensorflow>=1.4.1'],
+        'tensorflow_gpu': ['tensorflow-gpu>=1.4.1'],
         'tests': ['pytest', 'h5py', 'mock'],
     },
     classifiers=[

diff --git a/tensor2tensor/bin/t2t-avg-all b/tensor2tensor/bin/t2t-avg-all
@@ -1,105 +1,15 @@
 #!/usr/bin/env python
-# coding=utf-8
-# Copyright 2017 The Tensor2Tensor Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Script to continously average last N checkpoints in a given directory."""
+"""t2t-avg-all."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
-import logging
-
-# Dependency imports
+from tensor2tensor.bin import t2t_avg_all
 
-import numpy as np
-import six
-from six.moves import zip  # pylint: disable=redefined-builtin
-from collections import deque
-import shutil
 import tensorflow as tf
-from tensor2tensor.utils import bleu_hook
-
-flags = tf.flags
-FLAGS = flags.FLAGS
-
-flags.DEFINE_string("model_dir", "", "Directory to load model checkpoints from.")
-flags.DEFINE_string("output_dir", "avg/", "Directory to output the averaged checkpoints to.")
-flags.DEFINE_integer("n", 8, "How many checkpoints should be averaged?")
-flags.DEFINE_integer("min_steps", 0, "Ignore checkpoints with less steps.")
-flags.DEFINE_integer("wait_minutes", 0, "Wait upto N minutes for a new checkpoint.")
-
-
-def main(_):
-  tf.logging._handler.setFormatter(logging.Formatter("%(asctime)s:" + logging.BASIC_FORMAT, None))
-  tf.logging.set_verbosity(tf.logging.INFO)
-
-  model_dir = os.path.expanduser(FLAGS.model_dir)
-  output_dir = os.path.expanduser(FLAGS.output_dir)
-  out_base_file = os.path.join(output_dir, 'model.ckpt')
-
-  # Copy flags.txt with the original time, so t2t-bleu can report correct relative time.
-  os.makedirs(FLAGS.output_dir, exist_ok=True)
-  if not os.path.exists(os.path.join(output_dir, 'flags.txt')):
-    shutil.copy2(os.path.join(model_dir, 'flags.txt'), os.path.join(output_dir, 'flags.txt'))
-
-  models_processed = 0
-  queue = deque()
-  for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes, FLAGS.min_steps):
-    if models_processed == 0:
-      var_list = tf.contrib.framework.list_variables(model.filename)
-      avg_values = {}
-      for (name, shape) in var_list:
-        if not name.startswith("global_step"):
-          avg_values[name] = np.zeros(shape)      
-    models_processed += 1
-
-    tf.logging.info("Loading [%d]: %s" % (models_processed, model.filename))
-    reader = tf.contrib.framework.load_checkpoint(model.filename)
-    for name in avg_values:
-      avg_values[name] += reader.get_tensor(name) / FLAGS.n
-    queue.append(model)
-    if len(queue) < FLAGS.n:
-      continue
-
-    out_file = "%s-%d" % (out_base_file, model.steps)
-    tf_vars = []
-    tf.logging.info("Averaging %s" % (out_file))
-    for (name, value) in six.iteritems(avg_values):
-      tf_vars.append(tf.get_variable(name, shape=value.shape)) # TODO , dtype=var_dtypes[name]
-    placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars]
-    assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)]
-
-    global_step = tf.Variable(model.steps, name="global_step", trainable=False, dtype=tf.int64)
-    saver = tf.train.Saver(tf.global_variables())
-
-    tf.logging.info("Running session for %s" % (out_file))
-    with tf.Session() as sess:
-      sess.run(tf.global_variables_initializer())
-      for p, assign_op, (name, value) in zip(placeholders, assign_ops, six.iteritems(avg_values)):
-        sess.run(assign_op, {p: value})
-      tf.logging.info("Storing to %s" % out_file)
-      saver.save(sess, out_base_file, global_step=global_step)
-    os.utime(out_file + '.index', (model.mtime, model.mtime))
-
-    tf.reset_default_graph()
-    first_model = queue.popleft()
 
-    reader = tf.contrib.framework.load_checkpoint(first_model.filename)
-    for name in avg_values:
-      avg_values[name] -= reader.get_tensor(name) / FLAGS.n
+def main(argv):
+  t2t_avg_all.main(argv)
 
 
 if __name__ == "__main__":