diff --git a/test_verify.rb b/test_verify.rb
index 222f5dff12..0c35793ec4 100644
--- a/test_verify.rb
+++ b/test_verify.rb
@@ -21,6 +21,11 @@ def assert_fail(c, **kwargs)
   assert_one_at(r, 1)
 end
 
+def assert_error(c, **kwargs)
+  r = run([c], property: P, **kwargs) { yield }
+  assert_one_at(r, 2)
+end
+
 def test(**kwargs)
   normal_case = {
     'description' => 'normal',
@@ -42,12 +47,18 @@ def test(**kwargs)
   puts 'Fails when returning the wrong answer:'
   assert_fail(normal_case, **kwargs) { ANY_OLD_ANSWER + 100 }
 
-  puts 'Fails when erroring unexpectedly:'
-  assert_fail(normal_case, **kwargs) { raise 'no' }
+  puts 'Fails when using TestFailure:'
+  assert_fail(normal_case, **kwargs) { raise TestFailure, 'hello' }
+
+  puts 'Errors when erroring unexpectedly:'
+  assert_error(normal_case, **kwargs) { raise 'no' }
 
   puts 'Fails when not erroring even though it should:'
   assert_fail(error_case, **kwargs) { ANY_OLD_ANSWER }
 
+  puts 'Fails when using TestFailure (error expected):'
+  assert_fail(error_case, **kwargs) { raise TestFailure, 'hello' }
+
   puts 'Passes when erroring when it should:'
   assert_pass(error_case, **kwargs) { raise 'yes' }
 
@@ -60,10 +71,16 @@ def test(**kwargs)
   puts 'Passes when erroring with the right error when it should:'
   assert_pass(error_case, error_class: A, **kwargs) { raise A, 'hello' }
 
-  puts 'Fails when erroring with the wrong error:'
-  assert_fail(error_case, error_class: A, **kwargs) { raise 'hello' }
+  puts 'Fails when using TestFailure (specific error):'
+  assert_fail(error_case, error_class: A, **kwargs) { raise TestFailure, 'hello' }
+
+  puts 'Errors when erroring with the wrong error:'
+  assert_error(error_case, error_class: A, **kwargs) { raise 'hello' }
 end
 
 test
+# NOTE that should_fail does NOT change any of these pass/fail/error classifications.
+# It only changes their output and how they are interpreted by the runner.
+test(should_fail: true)
 
 puts 'yes'
diff --git a/verify.rb b/verify.rb
index 3db7beb2af..230e39e51a 100644
--- a/verify.rb
+++ b/verify.rb
@@ -4,6 +4,13 @@ def by_property(cases, properties)
   h
 end
 
+# Use this class to distinguish between failing a test vs any other error.
+# In should_fail mode, TestFailures are treated like any other failure
+# (there must be at least one failure for should_fail to be accepted).
+# In should_fail mode, all other errors are still rejected,
+# to catch bugs in should_fail implementations.
+class TestFailure < StandardError; end
+
 module Colours refine String do
   def colour(c)
     "\e[#{c}m#{self}\e[0m"
@@ -18,18 +25,29 @@ def bold; colour('1') end
 
 def run(
   cases, property:,
+  should_fail: false,
   error: ->(c) { (exp = c['expected']).is_a?(Hash) && exp.has_key?('error') },
   error_class: StandardError,
   accept: ->(c, answer) { c['expected'] == answer }
 )
+  # In should_fail mode:
+  # Passing test: No output (rather than output in green).
+  # Failing test: Output in green (rather than red).
+  #
+  # Expected error: No output (rather than output in green)
+  # Unexpected error: Output in red (we still don't want unexpected errors)
+  #
+  # No error, but should have had error: Output in green (rather than red)
   failed = 0
   passed = 0
+  errored = 0
+  fail_colour = should_fail ? '1;32' : '1;31'
 
   cases.each_with_index { |c, i|
     prefix = "#{i}. #{c['description']}"
     if property && c['property'] != property
       failed += 1
-      puts "#{prefix}: Invalid property #{c['property']} instead of #{property}".red
+      puts "#{prefix}: Invalid property #{c['property']} instead of #{property}".colour(fail_colour)
       next
     end
 
@@ -37,48 +55,65 @@ def run(
 
     begin
       answer = yield c['input'], c
+    rescue TestFailure => e
+      puts "#{prefix}: #{e}".colour(fail_colour)
+      failed += 1
     rescue => e
       if error_expected && e.is_a?(error_class)
-        puts "#{prefix}: Errored as expected: #{e}".green
+        puts "#{prefix}: Errored as expected: #{e}".green unless should_fail
         passed += 1
       else
         wanted = error_expected ? error_class.to_s : 'no error'
         puts "#{prefix}: Unwanted error in #{c['description']}, wanted #{wanted}".red
-        failed += 1
+        errored += 1
         puts e.message
         puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
       end
     else
       if error_expected
-        puts "#{prefix}: Unexpectedly no error, instead got #{answer}".red
+        puts "#{prefix}: Unexpectedly no error, instead got #{answer}".colour(fail_colour)
         failed += 1
       elsif !accept[c, answer]
         # TODO: expected isn't necessarily the right thing.
         expected = c['expected']
-        puts "#{prefix}: Got #{answer} instead of #{expected}".red
-        if answer.is_a?(Array) && expected.is_a?(Array)
+        puts "#{prefix}: Got #{answer} instead of #{expected}".colour(fail_colour)
+        if answer.is_a?(Array) && expected.is_a?(Array) && !should_fail
           puts "Extraneous elements: #{answer - expected}"
           puts "Missing elements: #{expected - answer}"
         end
         failed += 1
       else
-        puts prefix.green
+        puts prefix.green unless should_fail
         passed += 1
       end
     end
   }
 
-  [passed, failed]
+  [passed, failed, errored]
 end
 
 def verify(*args, **kwargs, &block)
   puts "== verify #{kwargs[:property]} ==".bold
 
-  passed, failed = run(*args, **kwargs, &block)
+  passed, failed, errored = run(*args, **kwargs, &block)
 
-  puts "#{passed} passed, #{failed} failed"
+  puts "#{passed} passed, #{failed} failed, #{errored} errored"
 
-  if failed > 0
-    at_exit { raise "#{failed} failed in a test of #{property}" }
+  if failed > 0 || errored > 0
+    at_exit { raise "#{failed} failed, #{errored} errored in a test of #{kwargs[:property]}" }
   end
 end
+
+def multi_verify(*args, implementations:, **kwargs)
+  implementations.each { |impl|
+    should_fail = !!impl[:should_fail]
+    puts "== #{'anti-' if should_fail}verify #{impl[:name]} #{kwargs[:property]} ==".bold
+
+    _, failed, errored = run(*args, **kwargs, should_fail: should_fail, &impl[:f])
+
+    actually_failed = failed > 0
+    if errored > 0 || should_fail != actually_failed
+      at_exit { raise "#{impl[:name]}: #{failed} failed, #{errored} errored in a test of #{kwargs[:property]}" }
+    end
+  }
+end