diff --git a/doc/command_line_usage.rdoc b/doc/command_line_usage.rdoc
index 5661566b4..2a25f30b2 100644
--- a/doc/command_line_usage.rdoc
+++ b/doc/command_line_usage.rdoc
@@ -37,6 +37,15 @@ Options are:
 [<tt>--help</tt>  (-H)]
     Display some help text and exit.
 
+[<tt>--jobs</tt> _number_  (-j)]
+    Specifies the maximum number of concurrent tasks. The suggested
+    value is equal to the number of CPUs.
+    
+    Sample values:
+     (no -j) : unlimited concurrent tasks (standard rake behavior)
+     -j      : 2 concurrent tasks (exact number may change)
+     -j 16   : 16 concurrent tasks
+
 [<tt>--libdir</tt> _directory_  (-I)]
     Add _directory_ to the list of directories searched for require.
 
diff --git a/lib/rake/application.rb b/lib/rake/application.rb
index 08a0996b2..6afdf8254 100644
--- a/lib/rake/application.rb
+++ b/lib/rake/application.rb
@@ -2,6 +2,7 @@
 require 'optparse'
 
 require 'rake/task_manager'
+require 'rake/thread_pool'
 require 'rake/win32'
 
 module Rake
@@ -64,6 +65,7 @@ def run
         init
         load_rakefile
         top_level
+        thread_pool.join
       end
     end
 
@@ -106,6 +108,10 @@ def options
       @options ||= OpenStruct.new
     end
 
+    def thread_pool
+      @thread_pool ||= ThreadPool.new options.thread_pool_size
+    end
+
     # private ----------------------------------------------------------------
 
     def invoke_task(task_string)
@@ -349,6 +355,10 @@ def standard_rake_options
             "Execute some Ruby code, then continue with normal task processing.",
             lambda { |value| eval(value) }
           ],
+          ['--jobs',  '-j [NUMBER]',
+            "Specifies the maximum number of tasks to execute in parallel. (default:2)",
+            lambda { |value| options.thread_pool_size = [(value || 2).to_i,2].max }
+          ],
           ['--libdir', '-I LIBDIR', "Include LIBDIR in the search path for required modules.",
             lambda { |value| $:.push(value) }
           ],
diff --git a/lib/rake/multi_task.rb b/lib/rake/multi_task.rb
index 21c8de732..224bc7cce 100644
--- a/lib/rake/multi_task.rb
+++ b/lib/rake/multi_task.rb
@@ -6,10 +6,12 @@ module Rake
   class MultiTask < Task
     private
     def invoke_prerequisites(args, invocation_chain)
-      threads = @prerequisites.collect { |p|
-        Thread.new(p) { |r| application[r, @scope].invoke_with_call_chain(args, invocation_chain) }
-      }
-      threads.each { |t| t.join }
+      futures = @prerequisites.collect do |p|
+        application.thread_pool.future(p) do |r|
+          application[r, @scope].invoke_with_call_chain(args, invocation_chain)
+        end
+      end
+      futures.each { |f| f.call }
     end
   end
 
diff --git a/lib/rake/thread_pool.rb b/lib/rake/thread_pool.rb
new file mode 100644
index 000000000..9b6e88005
--- /dev/null
+++ b/lib/rake/thread_pool.rb
@@ -0,0 +1,133 @@
+require 'thread'
+require 'set'
+
+module Rake
+
+  class ThreadPool
+
+    # Creates a ThreadPool object.
+    # The parameter is the size of the pool. By default, the pool uses unlimited threads.
+    def initialize(thread_count=nil)
+      @max_thread_count = [(thread_count||FIXNUM_MAX), 0].max
+      @threads = Set.new
+      @threads_mon = Monitor.new
+      @queue = Queue.new
+      @join_cond = @threads_mon.new_cond
+    end
+    
+    # Creates a future to be executed in the ThreadPool.
+    # The args are passed to the block when executing (similarly to Thread#new)
+    # The return value is a Proc which may or may not be already executing in
+    # another thread. Calling Proc#call will sleep the current thread until
+    # the future is finished and will return the result (or raise an Exception
+    # thrown from the future)
+    def future(*args,&block)
+      # capture the local args for the block (like Thread#start)
+      local_args = args.collect { |a| begin; a.dup; rescue; a; end }
+
+      promise_mutex = Mutex.new
+      promise_result = promise_error = NOT_SET
+
+      # (promise code builds on Ben Lavender's public-domain 'promise' gem)
+      promise = lambda do
+        # return immediately if the future has been executed
+        unless promise_result.equal?(NOT_SET) && promise_error.equal?(NOT_SET)
+          return promise_error.equal?(NOT_SET) ? promise_result : raise(promise_error)
+        end
+        
+        # try to get the lock and execute the promise, otherwise, sleep.
+        if promise_mutex.try_lock
+          if promise_result.equal?(NOT_SET) && promise_error.equal?(NOT_SET)
+            #execute the promise
+            begin
+              promise_result = block.call(*local_args)
+            rescue Exception => e
+              promise_error = e
+            end
+            block = local_args = nil # GC can now clean these up
+          end
+          promise_mutex.unlock
+        else
+          # Even if we didn't get the lock, we need to sleep until the promise has
+          # finished executing. If, however, the current thread is part of the thread
+          # pool, we need to free up a new thread in the pool so there will
+          # always be a thread doing work.
+
+          wait_for_promise = lambda { promise_mutex.synchronize{} }
+
+          unless @threads_mon.synchronize { @threads.include? Thread.current }
+            wait_for_promise.call
+          else
+            @threads_mon.synchronize { @max_thread_count += 1 }
+            start_thread
+            wait_for_promise.call
+            @threads_mon.synchronize { @max_thread_count -= 1 }
+          end
+        end
+        promise_error.equal?(NOT_SET) ? promise_result : raise(promise_error)
+      end
+
+      @queue.enq promise
+      start_thread
+      promise
+    end
+    
+    # Waits until the queue of futures is empty and all threads have exited.
+    def join
+      @threads_mon.synchronize do
+        begin
+            @join_cond.wait unless @threads.empty?
+        rescue Exception => e
+          STDERR.puts e
+          STDERR.print "Queue contains #{@queue.size} items. Thread pool contains #{@threads.count} threads\n"
+          STDERR.print "Current Thread #{Thread.current} status = #{Thread.current.status}\n"
+          STDERR.puts e.backtrace.join("\n")
+          @threads.each do |t|
+            STDERR.print "Thread #{t} status = #{t.status}\n"
+            STDERR.puts t.backtrace.join("\n") if t.respond_to? :backtrace
+          end
+          raise e
+        end
+      end
+    end
+
+  private
+    def start_thread
+      @threads_mon.synchronize do
+        next unless @threads.count < @max_thread_count
+
+        @threads << Thread.new do
+          begin
+            while @threads.count <= @max_thread_count && !@queue.empty? do
+              # Even though we just asked if the queue was empty,
+              # it still could have had an item which by this statement is now gone.
+              # For this reason we pass true to Queue#deq because we will sleep
+              # indefinitely if it is empty.
+              @queue.deq(true).call
+            end
+          rescue ThreadError # this means the queue is empty
+          ensure
+            @threads_mon.synchronize do
+              @threads.delete Thread.current
+              @join_cond.broadcast if @threads.empty?
+            end
+          end
+        end
+      end
+    end
+    
+    # for testing only
+    
+    def __queue__
+      @queue
+    end
+    
+    def __threads__
+      @threads.dup
+    end
+    
+    NOT_SET = Object.new.freeze
+    FIXNUM_MAX = (2**(0.size * 8 - 2) - 1) # FIXNUM_MAX
+  end
+  
+end
diff --git a/test/test_rake_application_options.rb b/test/test_rake_application_options.rb
index 3d60cb697..a528e3705 100644
--- a/test/test_rake_application_options.rb
+++ b/test/test_rake_application_options.rb
@@ -41,6 +41,7 @@ def test_default_options
     assert_nil opts.show_tasks
     assert_nil opts.silent
     assert_nil opts.trace
+    assert_nil opts.thread_pool_size
     assert_equal ['rakelib'], opts.rakelib
     assert ! Rake::FileUtilsExt.verbose_flag
     assert ! Rake::FileUtilsExt.nowrite_flag
@@ -111,6 +112,18 @@ def test_help
     assert_equal :exit, @exit
   end
 
+  def test_jobs
+    flags(['--jobs', '4'], ['-j', '4']) do |opts|
+      assert_equal 4, opts.thread_pool_size
+    end
+    flags(['--jobs', 'asdas'], ['-j', 'asdas']) do |opts|
+      assert_equal 2, opts.thread_pool_size
+    end
+    flags('--jobs', '-j') do |opts|
+      assert_equal 2, opts.thread_pool_size
+    end
+  end
+
   def test_libdir
     flags(['--libdir', 'xx'], ['-I', 'xx'], ['-Ixx']) do |opts|
       $:.include?('xx')
diff --git a/test/test_rake_thread_pool.rb b/test/test_rake_thread_pool.rb
new file mode 100644
index 000000000..24148805c
--- /dev/null
+++ b/test/test_rake_thread_pool.rb
@@ -0,0 +1,146 @@
+require File.expand_path('../helper', __FILE__)
+require 'rake/thread_pool'
+require 'test/unit/assertions'
+
+class TestRakeTestThreadPool < Rake::TestCase
+  include Rake
+
+  def test_pool_executes_in_current_thread_for_zero_threads
+    pool = ThreadPool.new(0)
+    f = pool.future{Thread.current}
+    pool.join
+    assert_equal Thread.current, f.call
+  end
+
+  def test_pool_executes_in_other_thread_for_pool_of_size_one
+    pool = ThreadPool.new(1)
+    f = pool.future{Thread.current}
+    pool.join
+    refute_equal Thread.current, f.call
+  end
+
+  def test_pool_executes_in_two_other_threads_for_pool_of_size_two
+    pool = ThreadPool.new(2)
+    threads = 2.times.collect{ pool.future{ sleep 0.1; Thread.current } }.each{|f|f.call}
+
+    refute_equal threads[0], threads[1]
+    refute_equal Thread.current, threads[0]
+    refute_equal Thread.current, threads[1]
+  end
+
+  def test_pool_creates_the_correct_number_of_threads
+    pool = ThreadPool.new(2)
+    threads = Set.new
+    t_mutex = Mutex.new
+    10.times.each do
+      pool.future do
+        sleep 0.02
+        t_mutex.synchronize{ threads << Thread.current }
+      end
+    end
+    pool.join
+    assert_equal 2, threads.count
+  end
+
+  def test_pool_future_captures_arguments
+    pool = ThreadPool.new(2)
+    a = 'a'
+    b = 'b'
+    c = 5 # 5 throws an execption with 5.dup. It should be ignored
+    pool.future(a,c){ |a_var,ignore| a_var.capitalize!; b.capitalize! }
+    pool.join
+    assert_equal 'a', a
+    assert_equal 'b'.capitalize, b
+  end
+
+  def test_pool_join_empties_queue
+    pool = ThreadPool.new(2)
+    repeat = 25
+    repeat.times {
+      pool.future do
+        repeat.times {
+          pool.future do
+            repeat.times {
+              pool.future do end
+            }
+          end
+        }
+      end
+    }
+
+    pool.join
+    assert_equal true, pool.__send__(:__queue__).empty?
+  end
+
+  # test that throwing an exception way down in the blocks propagates
+  # to the top
+  def test_exceptions
+    pool = ThreadPool.new(10)
+
+    deep_exception_block = lambda do |count|
+      next raise Exception.new if ( count < 1 )
+      pool.future(count-1, &deep_exception_block).call
+    end
+
+    assert_raises(Exception) do
+      pool.future(2, &deep_exception_block).call
+    end
+
+  end
+
+  def test_pool_always_has_max_threads_doing_work
+    # here we need to test that even if some threads are halted, there
+    # are always at least max_threads that are not sleeping.
+    pool = ThreadPool.new(2)
+    initial_sleep_time = 0.2
+    future1 = pool.future { sleep initial_sleep_time }
+    dependent_futures = 5.times.collect { pool.future{ future1.call } }
+    future2 = pool.future { sleep initial_sleep_time }
+    future3 = pool.future { sleep 0.01 }
+
+    sleep initial_sleep_time / 2.0 # wait for everything to queue up
+
+    # at this point, we should have 5 threads sleeping depending on future1, and
+    # two threads doing work on future1 and future 2.
+    assert_equal pool.__send__(:__threads__).count, 7
+
+    # future 3 is in the queue because there aren't enough active threads to work on it.
+    assert_equal pool.__send__(:__queue__).size, 1
+
+    [future1, dependent_futures, future2, future3].flatten.each { |f| f.call }
+    pool.join
+  end
+
+  def test_pool_prevents_deadlock
+    pool = ThreadPool.new(5)
+
+    common_dependency_a = pool.future { sleep 0.2 }
+    futures_a = 10.times.collect { pool.future{ common_dependency_a.call; sleep(rand() * 0.01) } }
+
+    common_dependency_b = pool.future { futures_a.each { |f| f.call } }
+    futures_b = 10.times.collect { pool.future{ common_dependency_b.call; sleep(rand() * 0.01) } }
+
+    futures_b.each{|f|f.call}
+    pool.join
+  end
+
+  def test_pool_reports_correct_results
+    pool = ThreadPool.new(7)
+
+    a = 18
+    b = 5
+    c = 3
+
+    result = a.times.collect do
+      pool.future do
+        b.times.collect do
+          pool.future { sleep rand * 0.001; c }
+        end.inject(0) { |m,f| m+f.call }
+      end
+    end.inject(0) { |m,f| m+f.call }
+
+    assert_equal( (a*b*c), result )
+    pool.join
+  end
+
+end