Skip to content

Commit

Permalink
Add example code of binary/text data conversion.
Browse files Browse the repository at this point in the history
  • Loading branch information
Xin Pan committed Sep 6, 2016
1 parent a6d7a7b commit 8583611
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 0 deletions.
2 changes: 2 additions & 0 deletions textsum/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ for example vocabulary format. In <b>How To Run</b> below, users can use toy
data and vocab provided in the data/ directory to run the training by replacing
the data directory flag.

data_convert_example.py contains example of convert between binary and text.


<b>Experiment Result</b>

Expand Down
65 changes: 65 additions & 0 deletions textsum/data_convert_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Example of Converting TextSum model data.
Usage:
python data_convert_example.py --command binary_to_text --in_file data/data --out_file data/text_data
python data_convert_example.py --command text_to_binary --in_file data/text_data --out_file data/binary_data
python data_convert_example.py --command binary_to_text --in_file data/binary_data --out_file data/text_data2
diff data/text_data2 data/text_data
"""

import struct
import sys

import tensorflow as tf
from tensorflow.core.example import example_pb2

FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('command', 'binary_to_text',
'Either binary_to_text or text_to_binary.'
'Specify FLAGS.in_file accordingly.')
tf.app.flags.DEFINE_string('in_file', '', 'path to file')
tf.app.flags.DEFINE_string('out_file', '', 'path to file')

def _binary_to_text():
reader = open(FLAGS.in_file, 'rb')
writer = open(FLAGS.out_file, 'w')
while True:
len_bytes = reader.read(8)
if not len_bytes:
sys.stderr.write('Done reading\n')
return
str_len = struct.unpack('q', len_bytes)[0]
tf_example_str = struct.unpack('%ds' % str_len, reader.read(str_len))[0]
tf_example = example_pb2.Example.FromString(tf_example_str)
examples = []
for key in tf_example.features.feature:
examples.append('%s=%s' % (key, tf_example.features.feature[key].bytes_list.value[0]))
writer.write('%s\n' % '\t'.join(examples))
reader.close()
writer.close()


def _text_to_binary():
inputs = open(FLAGS.in_file, 'r').readlines()
writer = open(FLAGS.out_file, 'wb')
for inp in inputs:
tf_example = example_pb2.Example()
for feature in inp.strip().split('\t'):
(k, v) = feature.split('=')
tf_example.features.feature[k].bytes_list.value.extend([v])
tf_example_str = tf_example.SerializeToString()
str_len = len(tf_example_str)
writer.write(struct.pack('q', str_len))
writer.write(struct.pack('%ds' % str_len, tf_example_str))
writer.close()


def main(unused_argv):
assert FLAGS.command and FLAGS.in_file and FLAGS.out_file
if FLAGS.command == 'binary_to_text':
_binary_to_text()
elif FLAGS.command == 'text_to_binary':
_text_to_binary()


if __name__ == '__main__':
tf.app.run()

0 comments on commit 8583611

Please sign in to comment.