Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parallel mode #12

Open
jwilk opened this issue Aug 2, 2018 · 1 comment
Open

parallel mode #12

jwilk opened this issue Aug 2, 2018 · 1 comment

Comments

@jwilk
Copy link
Owner

jwilk commented Aug 2, 2018

I had this patch that adds -j/--jobs lying around:

diff --git a/lib/cli.py b/lib/cli.py
--- a/lib/cli.py
+++ b/lib/cli.py
@@ -61,6 +61,17 @@
     def __call__(self):
         return self.x
 
+def get_cpu_count():
+    try:
+        import multiprocessing
+        return multiprocessing.cpu_count()
+    except (ImportError, NotImplementedError):
+        pass
+    try:
+        return os.sysconf('SC_NPROCESSORS_ONLN')
+    except (ValueError, OSError, AttributeError):
+        return 1
+
 class ArgumentParser(argparse.ArgumentParser):
 
     def __init__(self, methods, default_method):
@@ -98,6 +109,7 @@
             if p is p_bundle:
                 p.add_argument('-p', '--pages-per-dict', type=int, metavar='N', help='how many pages to compress in one pass')
             p.add_argument('-m', '--method', choices=methods, default=default_method, help='binarization method')
+            p.add_argument('-j', '--jobs', dest='n_jobs', metavar='N', nargs='?', type=int, default=1, help='number of jobs to run simultaneously')
             p.add_argument('-v', '--verbose', dest='verbosity', action='append_const', const=None, help='more informational messages')
             p.add_argument('-q', '--quiet', dest='verbosity', action='store_const', const=[], help='no informational messages')
             p.add_argument('input', metavar='<input-image>', nargs='+')
@@ -146,6 +158,8 @@
             o.pages_per_dict = 1
         action = getattr(actions, vars(o).pop('_action_'))
         o.method = self.__methods[o.method]
+        if o.n_jobs is None:
+            o.n_jobs = get_cpu_count()
         return action(o)
 
 __all__ = ['ArgumentParser']
diff --git a/lib/didjvu.py b/lib/didjvu.py
--- a/lib/didjvu.py
+++ b/lib/didjvu.py
@@ -14,12 +14,18 @@
 
 from __future__ import with_statement
 
+import functools
 import itertools
 import os
 import re
 import logging
 import sys
 
+try:
+    import multiprocessing.dummy as multiprocessing
+except ImportError:
+    multiprocessing = None
+
 from . import cli
 from . import djvu_extra as djvu
 from . import filetype
@@ -49,9 +55,18 @@
     handler.setFormatter(formatter)
     ipc_logger.addHandler(handler)
 
+def _explode(f, a, b):
+    return f(a, *b)
+
 def parallel_for(o, f, *iterables):
-    for args in zip(*iterables):
-        f(o, *args)
+    iterables = map(list, iterables)
+    assert max(map(len, iterables)) == min(map(len, iterables))
+    if o.n_jobs <= 1 or multiprocessing is None:
+        for args in zip(*iterables):
+            f(o, *args)
+    else:
+        pool = multiprocessing.Pool(o.n_jobs)
+        pool.map(functools.partial(_explode, f, o), zip(*iterables))
 
 def check_tty():
     if sys.stdout.isatty():

It no longer applies cleanly and IIRC it didn't work correctly.

@FriedrichFroebel
Copy link

I recently used didjvu to bundle lots of pages into a DJVU file (400-900 pages per DJVU file) and observed it being quite slow while only using one thread. This allowed me to run different bundling operations in parallel, but real multiprocessing would be nice to speed up the process for each DJVU file.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants