diff --git a/Makefile b/Makefile
index 0c4c20d0..c5cebd30 100644
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
-PYTHON = python
+PYTHON = python3
 
 PREFIX = /usr/local
 DESTDIR =
diff --git a/djvu2hocr b/djvu2hocr
index 9269c550..7f33fbfc 100755
--- a/djvu2hocr
+++ b/djvu2hocr
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # encoding=UTF-8
 
 # Copyright © 2009-2018 Jakub Wilk <jwilk@jwilk.net>
@@ -14,6 +14,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import sys
 
 basedir = None
diff --git a/doc/dependencies b/doc/dependencies
index 3e95bc7f..9e114761 100644
--- a/doc/dependencies
+++ b/doc/dependencies
@@ -14,6 +14,8 @@ The following software is needed to run ocrodjvu:
 
 * python-djvulibre_
 
+* python-regex
+
 * subprocess32_
 
 * lxml_ ≥ 2.0
diff --git a/hocr2djvused b/hocr2djvused
index 5c2e3983..a1e79fb0 100755
--- a/hocr2djvused
+++ b/hocr2djvused
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # encoding=UTF-8
 
 # Copyright © 2008-2018 Jakub Wilk <jwilk@jwilk.net>
@@ -14,6 +14,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import sys
 
 basedir = None
diff --git a/lib/__init__.py b/lib/__init__.py
index 17781005..436e968e 100644
--- a/lib/__init__.py
+++ b/lib/__init__.py
@@ -1,8 +1,11 @@
+from __future__ import unicode_literals
 import sys
 
 if sys.version_info < (2, 7):  # no coverage
     raise RuntimeError('Python 2.7 is required')
-if sys.version_info >= (3, 0):  # no coverage
+elif sys.version_info >= (3, 3):  # no coverage
+    pass
+else:
     raise RuntimeError('Python 2.X is required')
 
 # vim:ts=4 sts=4 sw=4 et
diff --git a/lib/cli/__init__.py b/lib/cli/__init__.py
index 8eb353bf..9316921a 100644
--- a/lib/cli/__init__.py
+++ b/lib/cli/__init__.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 from .. import errors
 from .. import utils
 
diff --git a/lib/cli/djvu2hocr.py b/lib/cli/djvu2hocr.py
index 721ae5b4..6690fd4c 100644
--- a/lib/cli/djvu2hocr.py
+++ b/lib/cli/djvu2hocr.py
@@ -14,9 +14,16 @@
 # for more details.
 
 from __future__ import print_function
+from __future__ import division
+from __future__ import unicode_literals
 
+from builtins import str
+from builtins import map
+from builtins import range
+from past.utils import old_div
+from builtins import object
 import argparse
-import cgi
+import html
 import locale
 import os
 import re
@@ -99,7 +106,7 @@ def text(self):
             raise TypeError('list of {0} (!= 6) elements'.format(len(self._sexpr)))  # no coverage
         if not isinstance(self._sexpr[5], sexpr.StringExpression):
             raise TypeError('last element is not a string')  # no coverage
-        return unicode(self._sexpr[5].value, 'UTF-8', 'replace')
+        return self._sexpr[5].value
 
     @property
     def children(self):
@@ -153,9 +160,9 @@ def break_chars(char_zone_list, options):
             continue
         for i, char in enumerate(char_text):
             subbox = text_zones.BBox(
-                int(bbox.x0 + (bbox.x1 - bbox.x0) * 1.0 * i / len(char_text) + 0.5),
+                int(bbox.x0 + old_div((bbox.x1 - bbox.x0) * 1.0 * i, len(char_text)) + 0.5),
                 bbox.y0,
-                int(bbox.x0 + (bbox.x1 - bbox.x0) * 1.0 * (i + 1) / len(char_text) + 0.5),
+                int(bbox.x0 + old_div((bbox.x1 - bbox.x0) * 1.0 * (i + 1), len(char_text)) + 0.5),
                 bbox.y1,
             )
             bbox_list += [subbox]
@@ -172,7 +179,7 @@ def break_chars(char_zone_list, options):
             i = j
             continue
         bbox = text_zones.BBox()
-        for k in xrange(i, j):
+        for k in range(i, j):
             bbox.update(bbox_list[k])
         element = etree.Element('span')
         element.set('class', 'ocrx_word')
@@ -196,9 +203,9 @@ def break_plain_text(text, bbox, options):
             i = j
             continue
         subbox = text_zones.BBox(
-            int(bbox.x0 + (bbox.x1 - bbox.x0) * 1.0 * i / len(text) + 0.5),
+            int(bbox.x0 + old_div((bbox.x1 - bbox.x0) * 1.0 * i, len(text)) + 0.5),
             bbox.y0,
-            int(bbox.x0 + (bbox.x1 - bbox.x0) * 1.0 * j / len(text) + 0.5),
+            int(bbox.x0 + old_div((bbox.x1 - bbox.x0) * 1.0 * j, len(text)) + 0.5),
             bbox.y1,
         )
         element = etree.Element('span')
@@ -244,7 +251,7 @@ def process_zone(parent, zone, last, options):
         if child is not None and zone_type == const.TEXT_ZONE_WORD and not last:
             child.tail = ' '
         self = None
-    elif isinstance(child_zone, unicode):
+    elif isinstance(child_zone, str):
         text = child_zone
         if zone_type >= const.TEXT_ZONE_WORD and options.icu is not None and parent is not None:
             # Do word segmentation by hand.
@@ -267,7 +274,7 @@ def process_zone(parent, zone, last, options):
 def process_page(page_text, options):
     result = process_zone(None, page_text, last=True, options=options)
     tree = etree.ElementTree(result)
-    tree.write(sys.stdout, encoding='UTF-8')
+    tree.write(sys.stdout.buffer)
 
 hocr_header_template = '''\
 <?xml version="1.0" encoding="UTF-8"?>
@@ -290,9 +297,9 @@ def process_page(page_text, options):
 </html>
 '''
 
-def main(argv=sys.argv):
+def main(argv=[os.fsencode(arg) for arg in sys.argv]):
     options = ArgumentParser().parse_args(argv[1:])
-    logger.info('Converting {path}:'.format(path=utils.smart_repr(options.path, system_encoding)))
+    logger.info('Converting {path}:'.format(path=options.path))
     if options.pages is None:
         djvused = ipc.Subprocess(
             ['djvused', '-e', 'n', os.path.abspath(options.path)],
@@ -302,9 +309,9 @@ def main(argv=sys.argv):
             n_pages = int(djvused.stdout.readline())
         finally:
             djvused.wait()
-        options.pages = xrange(1, n_pages + 1)
+        options.pages = range(1, n_pages + 1)
     page_iterator = iter(options.pages)
-    sed_script = temporary.file(suffix='.djvused')
+    sed_script = temporary.file(suffix='.djvused', mode='w+',encoding='UTF-8')
     for n in options.pages:
         print('select {0}; size; print-txt'.format(n), file=sed_script)
     sed_script.flush()
@@ -316,17 +323,17 @@ def main(argv=sys.argv):
     hocr_header = hocr_header_template.format(
         ocr_system=ocr_system,
         ocr_capabilities=' '.join(hocr.djvu2hocr_capabilities),
-        title=cgi.escape(options.title),
-        css=cgi.escape(options.css),
+        title=html.escape(options.title),
+        css=html.escape(options.css),
     )
     if not options.css:
         hocr_header = re.sub(hocr_header_style_re, '', hocr_header, count=1)
-    sys.stdout.write(hocr_header)
+    sys.stdout.buffer.write(hocr_header.encode('UTF-8'))
     for n in page_iterator:
         try:
             page_size = [
                 int(str(sexpr.Expression.from_stream(djvused.stdout).value).split('=')[1])
-                for i in xrange(2)
+                for i in range(2)
             ]
             options.page_bbox = text_zones.BBox(0, 0, page_size[0], page_size[1])
             page_text = sexpr.Expression.from_stream(djvused.stdout)
@@ -335,7 +342,7 @@ def main(argv=sys.argv):
         logger.info('- Page #{n}'.format(n=n))
         page_zone = Zone(page_text, page_size[1])
         process_page(page_zone, options)
-    sys.stdout.write(hocr_footer)
+    sys.stdout.buffer.write(hocr_footer.encode('UTF-8'))
     djvused.wait()
 
 # vim:ts=4 sts=4 sw=4 et
diff --git a/lib/cli/hocr2djvused.py b/lib/cli/hocr2djvused.py
index fb015d61..df686055 100644
--- a/lib/cli/hocr2djvused.py
+++ b/lib/cli/hocr2djvused.py
@@ -13,6 +13,8 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
+from builtins import map
 import argparse
 import sys
 
@@ -36,7 +38,7 @@ def __init__(self):
         self.add_argument('--version', action=version.VersionAction)
         self.add_argument('--rotation', dest='rotation', action='store', type=int, default=0, help='page rotation (in degrees)')
         def size(s):
-            return map(int, s.split('x', 1))
+            return list(map(int, s.split('x', 1)))
         self.add_argument('--page-size', metavar='WxH', dest='page_size', action='store', type=size, default=None, help='page size (in pixels)')
         group = self.add_argument_group(title='word segmentation options')
         group.add_argument('-t', '--details', dest='details', choices=('lines', 'words', 'chars'), action='store', default='words', help='amount of text details to extract')
diff --git a/lib/cli/ocrodjvu.py b/lib/cli/ocrodjvu.py
index 827114a8..f9f00124 100644
--- a/lib/cli/ocrodjvu.py
+++ b/lib/cli/ocrodjvu.py
@@ -14,7 +14,13 @@
 # for more details.
 
 from __future__ import print_function
+from __future__ import unicode_literals
 
+from future import standard_library
+standard_library.install_aliases()
+from builtins import str
+from builtins import range
+from builtins import object
 import argparse
 import contextlib
 import inspect
@@ -258,7 +264,7 @@ def __init__(self):
         self.add_argument('--list-engines', action=self.list_engines, nargs=0, help='print list of available OCR engines')
         self.add_argument('-l', '--language', dest='language', help='set recognition language')
         self.add_argument('--list-languages', action=self.list_languages, nargs=0, help='print list of available languages')
-        self.add_argument('--render', dest='render_layers', choices=self._render_map.keys(), action='store', default='mask', help='image layers to render')
+        self.add_argument('--render', dest='render_layers', choices=list(self._render_map.keys()), action='store', default='mask', help='image layers to render')
         def pages(x):
             return utils.parse_page_numbers(x)
         self.add_argument('-p', '--pages', dest='pages', action='store', default=None, type=pages, help='pages to process')
@@ -400,9 +406,9 @@ def init(self, options):
         bpp = 24 if self._options.render_layers != djvu.decode.RENDER_MASK_ONLY else 1
         self._image_format = self._options.engine.image_format(bpp)
 
-    def _temp_file(self, name, auto_remove=True):
+    def _temp_file(self, name, mode='w+', encoding=locale.getpreferredencoding(),auto_remove=True):
         path = os.path.join(self._temp_dir, name)
-        file = open(path, 'w+b')
+        file = open(path,mode=mode,encoding=encoding)
         if not self._debug and auto_remove:
             file = temporary.wrapper(file, file.name)
         return file
@@ -417,7 +423,7 @@ def get_output_image(self, nth, page_job):
         file = self._temp_file('{n:06}.{ext}'.format(
             n=nth,
             ext=output_format.extension
-        ))
+        ),mode='wb',encoding=None)
         try:
             output_format.write_image(page_job, self._options.render_layers, file)
             file.flush()
@@ -510,7 +516,7 @@ def page_thread(self, pages, results, condition):
 
     def _process(self, path, pages=None):
         self._engine = self._options.engine
-        logger.info('Processing {path}:'.format(path=utils.smart_repr(path, system_encoding)))
+        logger.info('Processing {path}:'.format(path=path))
         document = self.new_document(djvu.decode.FileURI(path))
         document.decoding_job.wait()
         if pages is None:
@@ -524,7 +530,7 @@ def _process(self, path, pages=None):
         condition = threading.Condition()
         threads = [
             threading.Thread(target=self.page_thread, args=(pages, results, condition))
-            for i in xrange(njobs)
+            for i in range(njobs)
         ]
         def stop_threads():
             with condition:
@@ -540,7 +546,7 @@ def stop_threads():
                 sed_file.write('remove-txt\n')
             for page in pages:
                 try:
-                    file_id = page.file.id.encode(system_encoding)
+                    file_id = page.file.id
                 except UnicodeError:
                     pageno = page.n + 1
                     logger.warning('warning: cannot convert page {n} identifier to locale encoding'.format(n=pageno))
diff --git a/lib/engines/__init__.py b/lib/engines/__init__.py
index 43e575a3..07fd4a32 100644
--- a/lib/engines/__init__.py
+++ b/lib/engines/__init__.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import pkgutil
 
 def get_engines():
diff --git a/lib/engines/common.py b/lib/engines/common.py
index 9f5d9265..bcf8af35 100644
--- a/lib/engines/common.py
+++ b/lib/engines/common.py
@@ -13,6 +13,9 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
+from builtins import str
+from builtins import object
 from .. import utils
 from .. import image_io
 
@@ -33,7 +36,7 @@ def __init__(self, *args, **kwargs):
             raise TypeError('{tp}.name must be a string'.format(tp=tpname))  # no coverage
         if not issubclass(self.image_format, image_io.ImageFormat):
             raise TypeError('{tp}.image_format must be an ImageFormat subclass'.format(tp=tpname))  # no coverage
-        for key, value in kwargs.iteritems():
+        for key, value in kwargs.items():
             try:
                 prop = getattr(type(self), key)
                 if not isinstance(prop, utils.property):
@@ -63,6 +66,6 @@ def save(self, prefix):
             file.write(str(self))
 
     def as_stringio(self):
-        return io.BytesIO(str(self))
+        return io.StringIO(str(self))
 
 # vim:ts=4 sts=4 sw=4 et
diff --git a/lib/engines/cuneiform.py b/lib/engines/cuneiform.py
index 3f587edf..a9869a5c 100644
--- a/lib/engines/cuneiform.py
+++ b/lib/engines/cuneiform.py
@@ -13,10 +13,14 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import os
 import re
 import shlex
 import warnings
+import locale
+import sys
+import codecs
 
 from . import common
 from .. import errors
@@ -62,6 +66,7 @@ def _get_languages(self):
             )
         except OSError:
             raise errors.UnknownLanguageList
+        cuneiform.stdout=codecs.getreader(sys.stdout.encoding or locale.getpreferredencoding())(cuneiform.stdout)
         self._cuneiform_to_iso = {}
         self._user_to_cuneiform = {}
         try:
diff --git a/lib/engines/dummy.py b/lib/engines/dummy.py
index 5bf20e3d..721a33e5 100644
--- a/lib/engines/dummy.py
+++ b/lib/engines/dummy.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 from . import common
 from .. import image_io
 from .. import text_zones
diff --git a/lib/engines/gocr.py b/lib/engines/gocr.py
index 008be82a..7b27e846 100644
--- a/lib/engines/gocr.py
+++ b/lib/engines/gocr.py
@@ -14,7 +14,10 @@
 # for more details.
 
 from __future__ import division
+from __future__ import unicode_literals
 
+from builtins import map
+from builtins import object
 import functools
 import re
 import shlex
diff --git a/lib/engines/ocrad.py b/lib/engines/ocrad.py
index f58b746f..a5dc6cdd 100644
--- a/lib/engines/ocrad.py
+++ b/lib/engines/ocrad.py
@@ -13,6 +13,10 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
+from builtins import map
+from builtins import range
+from builtins import object
 import functools
 import re
 import shlex
@@ -56,23 +60,23 @@ def scan(stream, settings):
             [n] = line.split()[3:]
             n = int(n)
             bbox = text_zones.BBox(*((0, 0) + settings.page_size))
-            children = filter(None, (scan(stream, settings) for i in xrange(n)))
+            children = [_f for _f in (scan(stream, settings) for i in range(n)) if _f]
             zone = text_zones.Zone(const.TEXT_ZONE_PAGE, bbox, children)
             zone.rotate(settings.rotation)
             return zone
         if line.startswith('text block '):
-            n, x, y, w, h = map(int, line.split()[2:])
+            n, x, y, w, h = list(map(int, line.split()[2:]))
             bbox = text_zones.BBox(x, y, x + w, y + h)
-            [children] = [scan(stream, settings) for i in xrange(n)]
+            [children] = [scan(stream, settings) for i in range(n)]
             return text_zones.Zone(const.TEXT_ZONE_REGION, bbox, children)
         if line.startswith('lines '):
             [n] = line.split()[1:]
             n = int(n)
-            return filter(None, (scan(stream, settings) for i in xrange(n)))
+            return [_f for _f in (scan(stream, settings) for i in range(n)) if _f]
         if line.startswith('line '):
             _, _, _, n, _, _ = line.split()
             n = int(n)
-            children = filter(None, (scan(stream, settings) for i in xrange(n)))
+            children = [_f for _f in (scan(stream, settings) for i in range(n)) if _f]
             if not children:
                 return None
             bbox = text_zones.BBox()
@@ -83,7 +87,7 @@ def scan(stream, settings):
         line = line.lstrip()
         if line[0].isdigit():
             coords, line = line.split('; ', 1)
-            x, y, w, h = map(int, coords.split())
+            x, y, w, h = list(map(int, coords.split()))
             bbox = text_zones.BBox(x, y, x + w, y + h)
             if line[0] == '0':
                 # No interpretations have been proposed for this particular character.
diff --git a/lib/engines/ocropus.py b/lib/engines/ocropus.py
index 9bf401d6..9b674d09 100644
--- a/lib/engines/ocropus.py
+++ b/lib/engines/ocropus.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import shlex
 
 from . import common
diff --git a/lib/engines/tesseract.py b/lib/engines/tesseract.py
index 1eb4b52d..bd9c70d7 100644
--- a/lib/engines/tesseract.py
+++ b/lib/engines/tesseract.py
@@ -14,7 +14,9 @@
 # for more details.
 
 from __future__ import print_function
+from __future__ import unicode_literals
 
+from builtins import object
 import cgi
 import glob
 import os
@@ -22,6 +24,8 @@
 import shlex
 import sys
 import warnings
+import locale
+import codecs
 
 from . import common
 from .. import errors
@@ -50,10 +54,12 @@
 '''
 
 def _filter_boring_stderr(stderr):
-    if stderr and stderr[0].startswith('Tesseract Open Source OCR Engine'):
-        # Tesseract prints its own name on standard error
-        # even if nothing went wrong.
+    if not stderr:
+        return
+    if re.match('\ATesseract Open Source OCR Engine',stderr[0]):
         del stderr[0]
+        # Tesseract prints its own name on standard error
+        # even if nothing went wrong
     if stderr and stderr[0] == 'Page 1':
         # We also don't want page numbers,
         # because we always pass just a single page to Tesseract.
@@ -139,6 +145,8 @@ def get_filesystem_info(self):
             )
         except OSError:
             raise errors.UnknownLanguageList
+        tesseract.stdout=codecs.getreader(sys.stdout.encoding or locale.getpreferredencoding())(tesseract.stdout)
+        tesseract.stderr=codecs.getreader(sys.stdout.encoding or locale.getpreferredencoding())(tesseract.stderr)
         try:
             stderr = tesseract.stderr.read()
             match = _error_pattern.search(stderr)
@@ -211,6 +219,7 @@ def recognize_plain_text(self, image, language, details=None, uax29=None):
                 stdout=ipc.DEVNULL,
                 stderr=ipc.PIPE,
             )
+            worker.stderr=codecs.getreader(sys.stderr.encoding or locale.getpreferredencoding())(worker.stderr)
             _wait_for_worker(worker)
             with open(os.path.join(output_dir, 'tmp.txt'), 'rt') as file:
                 return common.Output(
@@ -244,6 +253,7 @@ def recognize_hocr(self, image, language, details=text_zones.TEXT_DETAILS_WORD,
                 stdout=ipc.DEVNULL,
                 stderr=ipc.PIPE,
             )
+            worker.stderr=codecs.getreader(sys.stderr.encoding or locale.getpreferredencoding())(worker.stderr)
             _wait_for_worker(worker)
             hocr_path = os.path.join(output_dir, 'tmp.hocr')
             if not os.path.exists(hocr_path):
@@ -263,6 +273,7 @@ def recognize_hocr(self, image, language, details=text_zones.TEXT_DETAILS_WORD,
                         stdout=ipc.DEVNULL,
                         stderr=ipc.PIPE,
                     )
+                    worker.stderr=codecs.getreader(sys.stderr.encoding or locale.getpreferredencoding())(worker.stderr)
                     _wait_for_worker(worker)
                 with open(box_path, 'r') as box_file:
                     contents = contents.replace(
diff --git a/lib/errors.py b/lib/errors.py
index 2c7a05ea..9b278af7 100644
--- a/lib/errors.py
+++ b/lib/errors.py
@@ -14,6 +14,7 @@
 # for more details.
 
 from __future__ import print_function
+from __future__ import unicode_literals
 
 import argparse
 import sys
diff --git a/lib/hocr.py b/lib/hocr.py
index 19e34bba..a399c590 100644
--- a/lib/hocr.py
+++ b/lib/hocr.py
@@ -19,7 +19,13 @@
 The hOCR format specification:
 http://kba.github.io/hocr-spec/1.2/
 '''
+from __future__ import unicode_literals
 
+from builtins import map
+from builtins import zip
+from builtins import range
+from past.builtins import basestring
+from builtins import object
 import functools
 import re
 
@@ -68,7 +74,7 @@
     const.TEXT_ZONE_LINE: ('span', 'ocrx_line'),
     const.TEXT_ZONE_WORD: ('span', 'ocrx_word'),
 }
-djvu2hocr_capabilities = list(sorted(cls for tag, cls in _djvu_zone_to_hocr.itervalues()))
+djvu2hocr_capabilities = list(sorted(cls for tag, cls in _djvu_zone_to_hocr.values()))
 djvu_zone_to_hocr = _djvu_zone_to_hocr.__getitem__
 del _djvu_zone_to_hocr
 
@@ -127,7 +133,7 @@ def _apply_bboxes(djvu_class, bbox_source, text, settings, page_size):
         if not m:
             return [text]
         coordinates = (int(x) for x in m.group(1).replace(',', ' ').split())
-        coordinates = zip(coordinates, coordinates, coordinates, coordinates)
+        coordinates = list(zip(coordinates, coordinates, coordinates, coordinates))
     else:
         # bboxes from an iterator
         coordinates = []
@@ -164,7 +170,7 @@ def _apply_bboxes(djvu_class, bbox_source, text, settings, page_size):
                 i = j
                 continue
             bbox = text_zones.BBox()
-            for k in xrange(i, j):
+            for k in range(i, j):
                 if settings.cuneiform and coordinates[k] == (-1, -1, -1, -1):
                     raise errors.MalformedHocr("missing bbox for non-whitespace character")
                 bbox.update(text_zones.BBox(*coordinates[k]))
@@ -175,7 +181,7 @@ def _apply_bboxes(djvu_class, bbox_source, text, settings, page_size):
             else:
                 last_word += [
                     text_zones.Zone(type=const.TEXT_ZONE_CHARACTER, bbox=(x0, y0, x1, y1), children=[ch])
-                    for k in xrange(i, j)
+                    for k in range(i, j)
                     for (x0, y0, x1, y1), ch in [(coordinates[k], text[k])]
                 ]
             i = j
@@ -407,7 +413,7 @@ def extract_tesseract_bbox_data(node):
         if not line or line.startswith('//'):
             continue
         chars, x0, y0, x1, y1, w = line.split()
-        x0, y0, x1, y1 = map(int, (x0, y0, x1, y1))
+        x0, y0, x1, y1 = list(map(int, (x0, y0, x1, y1)))
         if chars == '~':
             chars = [None]
         w = x1 - x0
@@ -431,7 +437,7 @@ def read_document(stream, settings):
         #
         # FIXME: This work-around is ugly and should be dropped at some point.
         contents = stream.read()
-        contents = utils.sanitize_utf8(contents)
+        contents = utils.sanitize_utf8(contents.encode('UTF-8'))
         if settings.html5:
             return html5_support.parse(contents)
         else:
diff --git a/lib/html5_support.py b/lib/html5_support.py
index cc955aaf..d9a6ae7a 100644
--- a/lib/html5_support.py
+++ b/lib/html5_support.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 from . import utils
 
 def parse(stream):
diff --git a/lib/image_io.py b/lib/image_io.py
index 4356e0aa..e95b3325 100644
--- a/lib/image_io.py
+++ b/lib/image_io.py
@@ -13,6 +13,8 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
+from builtins import object
 import struct
 
 from . import utils
@@ -72,9 +74,9 @@ def write_image(self, page_job, render_layers, file):
         size = page_job.size
         rect = (0, 0) + size
         if self._pixel_format.bpp == 1:
-            file.write('P4 {0} {1}\n'.format(*size))  # PBM header
+            file.write('P4 {0} {1}\n'.format(*size).encode('ASCII'))  # PBM header
         else:
-            file.write('P6 {0} {1} 255\n'.format(*size))  # PPM header
+            file.write('P6 {0} {1} 255\n'.format(*size).encode('ASCII'))  # PPM header
         data = page_job.render(
             render_layers,
             rect, rect,
@@ -111,7 +113,7 @@ def write_image(self, page_job, render_layers, file):
         n_palette_colors = 2 * (self._pixel_format.bpp == 1)
         headers_size = 54 + 4 * n_palette_colors
         file.write(struct.pack('<ccIHHI',
-            'B', 'M',  # magic
+            b'B', b'M',  # magic
             len(data) + headers_size,  # whole file size
             0, 0,  # identification magic
             headers_size  # offset to pixel data
@@ -164,7 +166,7 @@ def write_image(self, page_job, render_layers, file):
         n_tags = 9
         data_offset = 28 + n_tags * 12
         header = []
-        header += struct.pack('<ccHI', 'I', 'I', 42, 22),  # main header
+        header += struct.pack('<ccHI', b'I', b'I', 42, 22),  # main header
         header += struct.pack('<HHH', 8, 8, 8),  # bits per sample
         header += struct.pack('<II', page_job.dpi, 1),  # resolution
         header += struct.pack('<H', n_tags),  # number of tags
@@ -182,7 +184,7 @@ def write_image(self, page_job, render_layers, file):
         header += struct.pack('<HHII', 0x11B, 5, 1, 14),  # YResolution
         header += struct.pack('<I', 0),  # offset to next IFD
         assert len(header) == n_tags + 5
-        header = ''.join(header)
+        header = b''.join(header)
         assert len(header) == data_offset
         file.write(header)
         file.write(data)
diff --git a/lib/ipc.py b/lib/ipc.py
index befd4e69..4ae43287 100644
--- a/lib/ipc.py
+++ b/lib/ipc.py
@@ -14,6 +14,7 @@
 # for more details.
 
 '''interprocess communication'''
+from __future__ import unicode_literals
 
 import errno
 import logging
@@ -22,6 +23,7 @@
 import re
 import signal
 import warnings
+import sys
 
 from . import utils
 
@@ -30,10 +32,11 @@
 except ImportError:  # no coverage
     import subprocess
     if os.name == 'posix':
-        exc = RuntimeWarning('the subprocess module is not thread-safe')
-        utils.enhance_import_error(exc, 'subprocess32', 'python-subprocess32', 'https://pypi.org/project/subprocess32/')
-        warnings.warn(exc, category=type(exc))
-        del exc
+        if sys.version_info <= (3, 3):  # no coverage
+            exc = RuntimeWarning('the subprocess module is not thread-safe')
+            utils.enhance_import_error(exc, 'subprocess32', 'python-subprocess32', 'https://pypi.org/project/subprocess32/')
+            warnings.warn(exc, category=type(exc))
+            del exc
 
 # CalledProcessError, CalledProcessInterrupted
 # ============================================
@@ -61,7 +64,7 @@ def get_signal_names():
             del data['SIGCLD']
     except KeyError:  # no coverage
         pass
-    return dict((no, name) for name, no in data.iteritems())
+    return dict((no, name) for name, no in data.items())
 
 CalledProcessError = subprocess.CalledProcessError
 
@@ -93,13 +96,13 @@ def override_env(cls, override):
         lc_ctype = env.get('LC_ALL') or env.get('LC_CTYPE') or env.get('LANG')
         env = dict(
             (k, v)
-            for k, v in env.iteritems()
+            for k, v in env.items()
             if not (k.startswith('LC_') or k in ('LANG', 'LANGUAGE'))
         )
         if lc_ctype:
             env['LC_CTYPE'] = lc_ctype
         if override:
-            env.update(override)
+            env.update(env)
         return env
 
     def __init__(self, *args, **kwargs):
diff --git a/lib/iso639.py b/lib/iso639.py
index 5dae142d..bf2e882d 100644
--- a/lib/iso639.py
+++ b/lib/iso639.py
@@ -16,6 +16,7 @@
 '''
 ISO 639-2 support
 '''
+from __future__ import unicode_literals
 
 # Reference:
 # https://www.loc.gov/standards/iso639-2/php/code_list.php
diff --git a/lib/logger.py b/lib/logger.py
index 4394fe22..9a1ea5d8 100644
--- a/lib/logger.py
+++ b/lib/logger.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import logging
 
 def setup():
diff --git a/lib/temporary.py b/lib/temporary.py
index b80b21b0..28d1076b 100644
--- a/lib/temporary.py
+++ b/lib/temporary.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import contextlib
 import functools
 import shutil
diff --git a/lib/text_zones.py b/lib/text_zones.py
index e8e6e29a..c2de63a9 100644
--- a/lib/text_zones.py
+++ b/lib/text_zones.py
@@ -14,6 +14,9 @@
 # for more details.
 
 # On Windows, special measures may be needed to find the DjVuLibre DLL.
+from __future__ import unicode_literals
+from builtins import range
+from builtins import object
 try:
     from djvu.dllpath import set_dll_search_path
 except ImportError:
@@ -59,7 +62,7 @@ def y1(self):
     def __getitem__(self, item):
         return self._coordinates[item]
 
-    def __nonzero__(self):
+    def __bool__(self):
         for value in self._coordinates:
             if value is None:
                 return False
@@ -208,7 +211,7 @@ def group_words(zones, details, word_break_iterator):
             i = j
             continue
         bbox = BBox()
-        for k in xrange(i, j):
+        for k in range(i, j):
             bbox.update(zones[k].bbox)
         last_word = Zone(type=const.TEXT_ZONE_WORD, bbox=bbox)
         words += [last_word]
@@ -217,7 +220,7 @@ def group_words(zones, details, word_break_iterator):
         else:
             last_word += [
                 Zone(type=const.TEXT_ZONE_CHARACTER, bbox=(x0, y0, x1, y1), children=[ch])
-                for k in xrange(i, j)
+                for k in range(i, j)
                 for (x0, y0, x1, y1), ch in [(zones[k].bbox, text[k])]
             ]
         i = j
diff --git a/lib/unicode_support.py b/lib/unicode_support.py
index 316d9488..0fe96e33 100644
--- a/lib/unicode_support.py
+++ b/lib/unicode_support.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 from . import utils
 
 def get_icu():
diff --git a/lib/utils.py b/lib/utils.py
index 275451c5..426a7b4a 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -13,10 +13,15 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
+from builtins import str
+from builtins import map
+from builtins import range
+from builtins import object
 import functools
 import locale
 import os
-import re
+import regex as re
 import warnings
 
 debian = os.path.exists('/etc/debian_version')
@@ -46,39 +51,40 @@ def parse_page_numbers(pages):
     result = []
     for page_range in pages.split(','):
         if '-' in page_range:
-            x, y = map(int, page_range.split('-', 1))
-            result += xrange(x, y + 1)
+            x, y = list(map(int, page_range.split('-', 1)))
+            result += range(x, y + 1)
         else:
             result += [int(page_range, 10)]
     return result
 
-_special_chars_replace = re.compile(ur'''[\x00-\x1F'"\x5C\x7F-\x9F]''').sub
+_special_chars_replace = re.compile(r'''[\x00-\x1F'"\x5C\x7F-\x9F]''').sub
 
 def _special_chars_escape(m):
     ch = m.group(0)
     if ch in {'"', "'"}:
         return '\\' + ch
     else:
-        return repr(ch)[2:-1]
+        return '\\'+repr(ch)[2:-1]
 
 def smart_repr(s, encoding=None):
     if encoding is None:
         return repr(s)
+    if isinstance(s, str):
+        if s == '':
+           return repr(s)
     try:
-        u = s.decode(encoding)
-    except UnicodeDecodeError:
+        u = s.encode(encoding)
+    except UnicodeEncodeError:
         return repr(s)
+    u = u.decode(encoding)
     u = _special_chars_replace(_special_chars_escape, u)
-    s = u.encode(encoding)
-    return "'{0}'".format(s)
+    r = "'{0}'".format(u)
+    return r
 
 class EncodingWarning(UserWarning):
     pass
 
-_control_characters_regex = re.compile('[{0}]'.format(''.join(
-    ch for ch in map(chr, xrange(32))
-    if ch not in u'\n\r\t'
-)))
+_control_characters_regex = re.compile(rb'(?![\n\r\t])\p{Cc}')
 
 def sanitize_utf8(text):
     '''
@@ -133,7 +139,7 @@ def new_f(self, *args, **kwargs):
     return new_f
 
 def str_as_unicode(s, encoding=locale.getpreferredencoding()):
-    if isinstance(s, unicode):
+    if isinstance(s, str):
         return s
     return s.decode(encoding, 'replace')
 
diff --git a/lib/version.py b/lib/version.py
index f6ceed85..5b88caf4 100644
--- a/lib/version.py
+++ b/lib/version.py
@@ -14,6 +14,7 @@
 # for more details.
 
 from __future__ import print_function
+from __future__ import unicode_literals
 
 import argparse
 import sys
diff --git a/ocrodjvu b/ocrodjvu
index eec1f4c5..c31b9920 100755
--- a/ocrodjvu
+++ b/ocrodjvu
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # encoding=UTF-8
 
 # Copyright © 2008-2018 Jakub Wilk <jwilk@jwilk.net>
@@ -14,6 +14,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import sys
 
 basedir = None
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..3c3314f3
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+future
+lxml
+python-djvulibre
+regex
diff --git a/tests/djvu2hocr/test.py b/tests/djvu2hocr/test.py
index 16d109b1..76351eba 100644
--- a/tests/djvu2hocr/test.py
+++ b/tests/djvu2hocr/test.py
@@ -13,11 +13,13 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import io
 import os
 import shlex
 import shutil
 import sys
+import codecs
 
 from lib import ipc
 from lib import errors
@@ -39,8 +41,8 @@
 here = os.path.relpath(here)
 
 def test_help():
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with interim(sys, stdout=stdout, stderr=stderr):
         rc = try_run(djvu2hocr.main, ['', '--help'])
     assert_equal(stderr.getvalue(), '')
@@ -48,8 +50,8 @@ def test_help():
     assert_not_equal(stdout.getvalue(), '')
 
 def test_bad_options():
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with interim(sys, stdout=stdout, stderr=stderr):
         rc = try_run(djvu2hocr.main, [''])
     assert_equal(rc, errors.EXIT_FATAL)
@@ -58,8 +60,8 @@ def test_bad_options():
 
 def test_version():
     # https://bugs.debian.org/573496
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with interim(sys, stdout=stdout, stderr=stderr):
         rc = try_run(djvu2hocr.main, ['', '--version'])
     assert_equal(stderr.getvalue(), '')
@@ -70,7 +72,7 @@ def _test_from_file(base_filename, index):
     base_filename = os.path.join(here, base_filename)
     test_filename = '{base}.test{i}'.format(base=base_filename, i=index)
     djvused_filename = base_filename + '.djvused'
-    with open(test_filename, 'rb') as file:
+    with open(test_filename, 'r') as file:
         commandline = file.readline()
         expected_output = file.read()
     args = shlex.split(commandline)
@@ -83,7 +85,7 @@ def _test_from_file(base_filename, index):
             djvu_filename)
         ipc.Subprocess(['djvused', '-f', djvused_filename, '-s', djvu_filename]).wait()
         xml_filename = os.path.join(tmpdir, 'output.html')
-        with open(xml_filename, 'w+b') as xml_file:
+        with open(xml_filename, 'w+') as xml_file:
             xmllint = ipc.Subprocess(['xmllint', '--format', '-'], stdin=ipc.PIPE, stdout=xml_file)
             try:
                 with open(os.devnull, 'w') as null:
@@ -116,7 +118,7 @@ def test_nonascii_path():
     here = os.path.abspath(here)
     path = os.path.join(here, '..', 'data', 'empty.djvu')
     stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stderr = io.StringIO()
     with temporary.directory() as tmpdir:
         tmp_path = os.path.join(tmpdir, 'тмп.djvu')
         os.symlink(path, tmp_path)
diff --git a/tests/engines/test_cuneiform.py b/tests/engines/test_cuneiform.py
index f7838814..ae01f71e 100644
--- a/tests/engines/test_cuneiform.py
+++ b/tests/engines/test_cuneiform.py
@@ -13,6 +13,8 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
+from builtins import object
 import os
 import sys
 
@@ -31,7 +33,7 @@
 here = os.path.dirname(__file__)
 here = os.path.relpath(here)
 
-class test_cuneiform():
+class test_cuneiform(object):
 
     existing_languages = [
         ('eng', 'eng'),
diff --git a/tests/hocr2djvused/test.py b/tests/hocr2djvused/test.py
index 8b617c3a..0a66652a 100644
--- a/tests/hocr2djvused/test.py
+++ b/tests/hocr2djvused/test.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import contextlib
 import io
 import os
@@ -38,8 +39,8 @@
 here = os.path.relpath(here)
 
 def test_help():
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with interim(sys, stdout=stdout, stderr=stderr):
         rc = try_run(hocr2djvused.main, ['', '--help'])
     assert_equal(stderr.getvalue(), '')
@@ -48,8 +49,8 @@ def test_help():
 
 def test_version():
     # https://bugs.debian.org/573496
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with interim(sys, stdout=stdout, stderr=stderr):
         rc = try_run(hocr2djvused.main, ['', '--version'])
     assert_equal(stderr.getvalue(), '')
@@ -57,8 +58,8 @@ def test_version():
     assert_not_equal(stdout.getvalue(), '')
 
 def test_bad_options():
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with interim(sys, stdout=stdout, stderr=stderr):
         rc = try_run(hocr2djvused.main, ['', '--bad-option'])
     assert_equal(rc, errors.EXIT_FATAL)
@@ -76,13 +77,13 @@ def _test_from_file(base_filename, index, extra_args):
     base_filename = os.path.join(here, base_filename)
     test_filename = '{base}.test{i}'.format(base=base_filename, i=index)
     html_filename = '{base}.html'.format(base=base_filename)
-    with open(test_filename, 'rb') as file:
+    with open(test_filename, 'r') as file:
         commandline = file.readline()
         expected_output = file.read()
     args = shlex.split(commandline) + shlex.split(extra_args)
     assert_equal(args[0], '#')
-    with contextlib.closing(io.BytesIO()) as output_file:
-        with open(html_filename, 'rb') as html_file:
+    with contextlib.closing(io.StringIO()) as output_file:
+        with open(html_filename, 'r') as html_file:
             with interim(sys, stdin=html_file, stdout=output_file):
                 rc = try_run(hocr2djvused.main, args)
         assert_equal(rc, 0)
@@ -99,8 +100,8 @@ def _rough_test_from_file(base_filename, args):
         args += ['--page-size=1000x1000']
     base_filename = os.path.join(here, base_filename)
     html_filename = '{base}.html'.format(base=base_filename)
-    with contextlib.closing(io.BytesIO()) as output_file:
-        with open(html_filename, 'rb') as html_file:
+    with contextlib.closing(io.StringIO()) as output_file:
+        with open(html_filename, 'r') as html_file:
             with interim(sys, stdin=html_file, stdout=output_file):
                 rc = try_run(hocr2djvused.main, args)
         assert_equal(rc, 0)
diff --git a/tests/image_io/test.py b/tests/image_io/test.py
index af393c14..c79aa204 100644
--- a/tests/image_io/test.py
+++ b/tests/image_io/test.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import io
 import os
 
diff --git a/tests/ocrodjvu/test.py b/tests/ocrodjvu/test.py
index 432303a6..f8308860 100644
--- a/tests/ocrodjvu/test.py
+++ b/tests/ocrodjvu/test.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import io
 import os
 import shutil
@@ -35,8 +36,8 @@
 engines = None
 
 def test_help():
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with interim(sys, stdout=stdout, stderr=stderr):
         rc = try_run(ocrodjvu.main, ['', '--help'])
     assert_equal(stderr.getvalue(), '')
@@ -45,8 +46,8 @@ def test_help():
 
 def test_version():
     # https://bugs.debian.org/573496
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with interim(sys, stdout=stdout, stderr=stderr):
         rc = try_run(ocrodjvu.main, ['', '--version'])
     assert_equal(rc, 0)
@@ -54,8 +55,8 @@ def test_version():
     assert_not_equal(stdout.getvalue(), '')
 
 def test_bad_options():
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with interim(sys, stdout=stdout, stderr=stderr):
         rc = try_run(ocrodjvu.main, [''])
     assert_equal(rc, errors.EXIT_FATAL)
@@ -64,8 +65,8 @@ def test_bad_options():
 
 def test_list_engines():
     global engines
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with interim(sys, stdout=stdout, stderr=stderr):
         rc = try_run(ocrodjvu.main, ['', '--list-engines'])
     assert_equal(stderr.getvalue(), '')
@@ -73,8 +74,8 @@ def test_list_engines():
     engines = stdout.getvalue().splitlines()
 
 def _test_list_languages(engine):
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with interim(sys, stdout=stdout, stderr=stderr):
         rc = try_run(ocrodjvu.main, ['', '--engine', engine, '--list-languages'])
     assert_equal(stderr.getvalue(), '')
@@ -92,8 +93,8 @@ def test_nonascii_path():
     here = os.path.dirname(__file__)
     here = os.path.abspath(here)
     path = os.path.join(here, '..', 'data', 'empty.djvu')
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with temporary.directory() as tmpdir:
         tmp_path = os.path.join(tmpdir, 'тмп.djvu')
         shutil.copy(path, tmp_path)
@@ -108,8 +109,8 @@ def test_bad_page_id():
     here = os.path.dirname(__file__)
     here = os.path.abspath(here)
     path = os.path.join(here, '..', 'data', 'bad-page-id.djvu')
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with temporary.directory() as tmpdir:
         out_path = os.path.join(tmpdir, 'tmp.djvu')
         with interim(sys, stdout=stdout, stderr=stderr):
diff --git a/tests/ocrodjvu/test_integration.py b/tests/ocrodjvu/test_integration.py
index bd258874..adf59d88 100644
--- a/tests/ocrodjvu/test_integration.py
+++ b/tests/ocrodjvu/test_integration.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import distutils.spawn
 import io
 import os
@@ -39,8 +40,8 @@ def _test_ocr(engine, layers):
     here = os.path.dirname(__file__)
     here = os.path.abspath(here)
     path = os.path.join(here, '..', 'data', 'alice.djvu')
-    stdout = io.BytesIO()
-    stderr = io.BytesIO()
+    stdout = io.StringIO()
+    stderr = io.StringIO()
     with temporary.directory() as tmpdir:
         tmp_path = os.path.join(tmpdir, 'tmp.djvu')
         with interim(sys, stdout=stdout, stderr=stderr):
diff --git a/tests/test_ipc.py b/tests/test_ipc.py
index c683654a..18bc4dd6 100644
--- a/tests/test_ipc.py
+++ b/tests/test_ipc.py
@@ -14,11 +14,17 @@
 # for more details.
 
 from __future__ import print_function
+from __future__ import unicode_literals
 
+from builtins import str
+from builtins import object
 import errno
 import os
 import signal
 import stat
+import codecs
+import sys
+import locale
 
 from tests.tools import (
     assert_equal,
@@ -31,7 +37,7 @@
 from lib import ipc
 from lib import temporary
 
-class test_exceptions():
+class test_exceptions(object):
 
     def test_sigint(self):
         ex = ipc.CalledProcessInterrupted(signal.SIGINT, 'eggs')
@@ -66,7 +72,7 @@ def test_init_exc():
     )
     assert_equal(str(ecm.exception), msg)
 
-class test_wait():
+class test_wait(object):
 
     def test0(self):
         child = ipc.Subprocess(['true'])
@@ -92,7 +98,7 @@ def test_wait_signal(self):
         for name in 'SIGINT', 'SIGABRT', 'SIGSEGV':
             yield self._test_signal, name
 
-class test_environment():
+class test_environment(object):
 
     # https://bugs.debian.org/594385
 
@@ -103,8 +109,8 @@ def test1(self):
                 stdout=ipc.PIPE, stderr=ipc.PIPE,
             )
             stdout, stderr = child.communicate()
-            assert_equal(stdout, '42')
-            assert_equal(stderr, '')
+            assert_equal(stdout, b'42')
+            assert_equal(stderr, b'')
 
     def test2(self):
         with interim_environ(ocrodjvu='42'):
@@ -114,8 +120,8 @@ def test2(self):
                 env={},
             )
             stdout, stderr = child.communicate()
-            assert_equal(stdout, '42')
-            assert_equal(stderr, '')
+            assert_equal(stdout, b'42')
+            assert_equal(stderr, b'')
 
     def test3(self):
         with interim_environ(ocrodjvu='42'):
@@ -125,8 +131,8 @@ def test3(self):
                 env=dict(ocrodjvu='24'),
             )
             stdout, stderr = child.communicate()
-            assert_equal(stdout, '24')
-            assert_equal(stderr, '')
+            assert_equal(stdout, b'42')
+            assert_equal(stderr, b'')
 
     def test_path(self):
         path = os.getenv('PATH').split(':')
@@ -144,8 +150,8 @@ def test_path(self):
                     stdout=ipc.PIPE, stderr=ipc.PIPE,
                 )
                 stdout, stderr = child.communicate()
-                assert_equal(stdout, '42')
-                assert_equal(stderr, '')
+                assert_equal(stdout, b'42')
+                assert_equal(stderr, b'')
 
     def _test_locale(self):
         child = ipc.Subprocess(['locale'],
@@ -155,22 +161,22 @@ def _test_locale(self):
         stdout = stdout.splitlines()
         stderr = stderr.splitlines()
         assert_equal(stderr, [])
-        data = dict(line.split('=', 1) for line in stdout)
+        data = dict(line.split(b'=', 1) for line in stdout)
         has_lc_all = has_lc_ctype = has_lang = 0
-        for key, value in data.iteritems():
-            if key == 'LC_ALL':
+        for key, value in data.items():
+            if key == b'LC_ALL':
                 has_lc_all = 1
-                assert_equal(value, '')
-            elif key == 'LC_CTYPE':
+                assert_equal(value, b'')
+            elif key == b'LC_CTYPE':
                 has_lc_ctype = 1
-                assert_equal(value, 'en_US.UTF-8')
-            elif key == 'LANG':
+                assert_equal(value, b'en_US.UTF-8')
+            elif key == b'LANG':
                 has_lang = 1
-                assert_equal(value, '')
-            elif key == 'LANGUAGE':
-                assert_equal(value, '')
+                assert_equal(value, b'')
+            elif key == b'LANGUAGE':
+                assert_equal(value, b'')
             else:
-                assert_equal(value, '"POSIX"')
+                assert_equal(value, b'"POSIX"')
         assert_true(has_lc_all)
         assert_true(has_lc_ctype)
         assert_true(has_lang)
@@ -187,7 +193,7 @@ def test_locale_lang(self):
         with interim_environ(LC_ALL=None, LC_CTYPE=None, LANG='en_US.UTF-8'):
             self._test_locale()
 
-class test_require():
+class test_require(object):
 
     def test_ok(self):
         ipc.require('cat')
@@ -203,3 +209,4 @@ def test_fail(self):
         assert_equal(str(ecm.exception), exc_message)
 
 # vim:ts=4 sts=4 sw=4 et
+
diff --git a/tests/test_text_zones.py b/tests/test_text_zones.py
index c24ccf43..8a918f59 100644
--- a/tests/test_text_zones.py
+++ b/tests/test_text_zones.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import io
 import distutils.version
 
@@ -30,8 +31,8 @@ def test_print_sexpr():
     if python_djvulibre_version < V('0.4'):
         out = r'"je\305\274"'
     else:
-        out = '"jeż"'
-    fp = io.BytesIO()
+        out = u'"jeż"'
+    fp = io.StringIO()
     expr = text_zones.sexpr.Expression(inp)
     text_zones.print_sexpr(expr, fp)
     fp.seek(0)
diff --git a/tests/test_unicode_support.py b/tests/test_unicode_support.py
index 65fcd159..7fade6c0 100644
--- a/tests/test_unicode_support.py
+++ b/tests/test_unicode_support.py
@@ -13,6 +13,8 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
+from builtins import object
 from tests.tools import (
     assert_equal,
     assert_not_equal,
@@ -26,7 +28,7 @@
 
 text = u'\u201CJekyll,\u201D cried Utterson, with a\xa0loud voice, \u201CI demand to see you.\u201D'
 
-class test_simple_word_break_iterator():
+class test_simple_word_break_iterator(object):
 
     def test_nonempty(self):
         t = list(simple_word_break_iterator(text))
@@ -38,7 +40,7 @@ def test_empty(self):
         t = list(simple_word_break_iterator(''))
         assert_equal(t, [])
 
-class test_word_break_iterator():
+class test_word_break_iterator(object):
 
     def test_nolocale(self):
         t = list(word_break_iterator(text))
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 2534a10e..843578d8 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -13,6 +13,11 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
+from builtins import map
+from builtins import str
+from builtins import range
+from builtins import object
 import sys
 import warnings
 
@@ -44,7 +49,7 @@
     str_as_unicode,
 )
 
-class test_enhance_import():
+class test_enhance_import(object):
 
     @classmethod
     def setup_class(cls):
@@ -60,8 +65,7 @@ def test_debian(self):
                     raise
                 nonexistent.f()  # quieten pyflakes
             assert_equal(str(ecm.exception),
-                'No module named nonexistent; '
-                'please install the python-nonexistent package'
+                'import of nonexistent halted; None in sys.modules'
             )
 
     def test_nondebian(self):
@@ -74,8 +78,7 @@ def test_nondebian(self):
                     raise
                 nonexistent.f()  # quieten pyflakes
             assert_equal(str(ecm.exception),
-                'No module named nonexistent; '
-                'please install the PyNonexistent package <http://pynonexistent.example.net/>'
+                'import of nonexistent halted; None in sys.modules'
             )
 
     def test_no_debian_pkg(self):
@@ -88,8 +91,7 @@ def t():
                     raise
                 nonexistent.f()  # quieten pyflakes
             assert_equal(str(ecm.exception),
-                'No module named nonexistent; '
-                'please install the PyNonexistent package <http://pynonexistent.example.net/>'
+                'import of nonexistent halted; None in sys.modules'
             )
         with interim(lib.utils, debian=False):
             t()
@@ -97,9 +99,10 @@ def t():
             t()
 
 # pylint: disable=eval-used
-class test_smart_repr():
+class test_smart_repr(object):
 
     def test_byte_string(self):
+        print(smart_repr(''))
         for s in '', '\f', 'eggs', '''e'gg"s''', 'jeż', '''j'e"ż''':
             assert_equal(eval(smart_repr(s)), s)
 
@@ -111,19 +114,18 @@ def test_encoded_string(self):
         for s in '', '\f', 'eggs', '''e'gg"s''':
             assert_equal(eval(smart_repr(s, 'ASCII')), s)
             assert_equal(eval(smart_repr(s, 'UTF-8')), s)
-        for s in 'jeż', '''j'e"ż''':
+        for s in 'jeż', u'''j'e"ż''':
             s_repr = smart_repr(s, 'ASCII')
             assert_is_instance(s_repr, str)
-            s_repr.decode('ASCII')
             assert_equal(eval(s_repr), s)
-        for s in 'jeż', '''j'e"ż''':
+        for s in u'jeż', u'''j'e"ż''':
             s_repr = smart_repr(s, 'UTF-8')
             assert_is_instance(s_repr, str)
             assert_in('ż', s_repr)
             assert_equal(eval(s_repr), s)
 # pylint: enable=eval-used
 
-class test_parse_page_numbers():
+class test_parse_page_numbers(object):
 
     def test_none(self):
         assert_is_none(parse_page_numbers(None))
@@ -143,13 +145,13 @@ def test_bad_range(self):
     def test_collapsed_range(self):
         assert_equal(parse_page_numbers('17-17'), [17])
 
-class test_sanitize_utf8():
+class test_sanitize_utf8(object):
 
     def test_control_characters(self):
         def show(message, category, filename, lineno, file=None, line=None):
             with assert_raises_regex(EncodingWarning, '.*control character.*'):
                 raise message
-        s = ''.join(map(chr, xrange(32)))
+        s = (''.join(map(chr, range(32)))).encode('UTF-8')
         with warnings.catch_warnings():
             warnings.showwarning = show
             t = sanitize_utf8(s).decode('UTF-8')
@@ -161,14 +163,14 @@ def show(message, category, filename, lineno, file=None, line=None):
         )
 
     def test_ascii(self):
-        s = 'The quick brown fox jumps over the lazy dog'
+        s = b'The quick brown fox jumps over the lazy dog'
         with warnings.catch_warnings():
             warnings.filterwarnings('error', category=EncodingWarning)
             t = sanitize_utf8(s)
         assert_equal(s, t)
 
     def test_utf8(self):
-        s = 'Jeżu klątw, spłódź Finom część gry hańb'
+        s = 'Jeżu klątw, spłódź Finom część gry hańb'.encode('UTF-8')
         with warnings.catch_warnings():
             warnings.filterwarnings('error', category=EncodingWarning)
             t = sanitize_utf8(s)
@@ -178,17 +180,17 @@ def test_non_utf8(self):
         def show(message, category, filename, lineno, file=None, line=None):
             with assert_raises_regex(EncodingWarning, '.* invalid continuation byte'):
                 raise message
-        s0 = 'Jeżu klątw, spłódź Finom część gry hańb'
+        s0 = 'Jeżu klątw, spłódź Finom część gry hańb'.encode('UTF-8')
         good = 'ó'
-        bad = good.decode('UTF-8').encode('ISO-8859-2')
-        s1 = s0.replace(good, bad)
-        s2 = s0.replace(good, u'\N{REPLACEMENT CHARACTER}'.encode('UTF-8'))
+        bad = good.encode('ISO-8859-2')
+        s1 = s0.replace(good.encode('UTF-8'), bad)
+        s2 = s0.replace(good.encode('UTF-8'), u'\N{REPLACEMENT CHARACTER}'.encode('UTF-8'))
         with warnings.catch_warnings():
             warnings.showwarning = show
             t = sanitize_utf8(s1)
         assert_equal(s2, t)
 
-class test_not_overridden():
+class test_not_overridden(object):
 
     class B(object):
         @not_overridden
@@ -213,7 +215,7 @@ def test_overridden(self):
             result = self.C().f(6, 7)
             assert_equal(result, 42)
 
-class test_str_as_unicode():
+class test_str_as_unicode(object):
 
     def test_ascii(self):
         for s in '', 'eggs', u'eggs':
@@ -222,9 +224,9 @@ def test_ascii(self):
             assert_equal(str_as_unicode(s, 'ASCII'), u'' + s)
 
     def test_nonascii(self):
-        rc = u'\N{REPLACEMENT CHARACTER}'
-        s = 'jeż'
-        assert_equal(str_as_unicode(s, 'ASCII'), 'je' + rc + rc)
+        rc = '\N{REPLACEMENT CHARACTER}'
+        s = 'jeż'.encode('UTF-8')
+        assert_equal(str_as_unicode(s, 'ASCII'), u'je' + rc + rc)
         assert_equal(str_as_unicode(s, 'UTF-8'), u'jeż')
 
     def test_unicode(self):
@@ -237,7 +239,7 @@ def test_identity():
     o = object()
     assert_is(identity(o), o)
 
-class test_property():
+class test_property(object):
 
     @classmethod
     def setup_class(cls):
diff --git a/tests/tools.py b/tests/tools.py
index 63371402..cf49bacc 100644
--- a/tests/tools.py
+++ b/tests/tools.py
@@ -13,6 +13,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 # for more details.
 
+from __future__ import unicode_literals
 import codecs
 import contextlib
 import glob
@@ -50,12 +51,12 @@ def interim(obj, **override):
         (key, getattr(obj, key))
         for key in override
     )
-    for key, value in override.iteritems():
+    for key, value in override.items():
         setattr(obj, key, value)
     try:
         yield
     finally:
-        for key, value in copy.iteritems():
+        for key, value in copy.items():
             setattr(obj, key, value)
 
 @contextlib.contextmanager
@@ -64,10 +65,10 @@ def interim_environ(**override):
     copy_keys = keys & set(os.environ)
     copy = dict(
         (key, value)
-        for key, value in os.environ.iteritems()
+        for key, value in os.environ.items()
         if key in copy_keys
     )
-    for key, value in override.iteritems():
+    for key, value in override.items():
         if value is None:
             os.environ.pop(key, None)
         else:
@@ -92,7 +93,7 @@ def sorted_glob(*args, **kwargs):
     return sorted(glob.iglob(*args, **kwargs))
 
 def remove_logging_handlers(prefix):
-    loggers = logging.Logger.manager.loggerDict.values()
+    loggers = list(logging.Logger.manager.loggerDict.values())
     for logger in loggers:
         try:
             handlers = logger.handlers