diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000..88ddd57e
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,51 @@
+cmake_minimum_required(VERSION 3.18)
+project(poppunk_refine)
+set(CMAKE_CXX_STANDARD 14)
+
+# Variable definitions
+set(TARGET_NAME poppunk_refine)
+add_compile_definitions(PYTHON_EXT)
+
+# Add -O0 to remove optimizations when using gcc
+IF(CMAKE_COMPILER_IS_GNUCC)
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
+    set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0")
+ENDIF(CMAKE_COMPILER_IS_GNUCC)
+
+if(UNIX AND NOT APPLE)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS")
+    set(CMAKE_LD_FLAGS "${CMAKE_LDFLAGS} -Wl,--as-needed")
+endif()
+
+# Set paths for non standard lib/ and include/ locations
+if(DEFINED ENV{CONDA_PREFIX})
+    include_directories($ENV{CONDA_PREFIX}/include)
+    link_directories($ENV{CONDA_PREFIX}/lib)
+    link_directories($ENV{CONDA_PREFIX}/lib/intel64)
+endif()
+
+# Add libraries
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
+find_package(pybind11 REQUIRED)
+find_package (Eigen3 3.3 REQUIRED NO_MODULE)
+#find_package(OpenMP) # This links system openmp if present - conda sorts out rpath but take care
+
+# Define python library target
+add_library("${TARGET_NAME}" MODULE)
+
+# Compile CPU library
+target_sources("${TARGET_NAME}" PRIVATE src/python_bindings.cpp
+                                        src/boundary.cpp)
+
+set_target_properties("${TARGET_NAME}" PROPERTIES
+    CXX_VISIBILITY_PRESET "hidden"
+    INTERPROCEDURAL_OPTIMIZATION TRUE
+    PREFIX "${PYTHON_MODULE_PREFIX}"
+    SUFFIX "${PYTHON_MODULE_EXTENSION}"
+)
+
+target_link_libraries("${TARGET_NAME}" PRIVATE pybind11::module Eigen3::Eigen
+                                               z gomp openblas gfortran m dl)
+#if(OpenMP_CXX_FOUND)
+#    target_link_libraries("${TARGET_NAME}" PRIVATE OpenMP::OpenMP_CXX)
+#endif()
diff --git a/PopPUNK/__init__.py b/PopPUNK/__init__.py
index 0d38638d..1022c21d 100644
--- a/PopPUNK/__init__.py
+++ b/PopPUNK/__init__.py
@@ -3,9 +3,9 @@
 
 '''PopPUNK (POPulation Partitioning Using Nucleotide Kmers)'''
 
-__version__ = '2.3.0'
+__version__ = '2.4.0'
 
 # Minimum sketchlib version
 SKETCHLIB_MAJOR = 1
 SKETCHLIB_MINOR = 6
-SKETCHLIB_PATCH = 0
+SKETCHLIB_PATCH = 2
diff --git a/PopPUNK/__main__.py b/PopPUNK/__main__.py
index 4c59b145..63924906 100644
--- a/PopPUNK/__main__.py
+++ b/PopPUNK/__main__.py
@@ -10,8 +10,15 @@
 import subprocess
 from collections import defaultdict
 
-# required from v2.1.1 onwards (no mash support)
-import pp_sketchlib
+# Try to import sketchlib
+try:
+    import pp_sketchlib
+except ImportError as e:
+    sys.stderr.write("Sketchlib backend not available\n")
+    sys.exit(1)
+
+import poppunk_refine
+import h5py
 
 # import poppunk package
 from .__init__ import __version__
@@ -114,13 +121,19 @@ def get_options():
             type=float, default = None)
     refinementGroup.add_argument('--manual-start', help='A file containing information for a start point. '
             'See documentation for help.', default=None)
-    refinementGroup.add_argument('--indiv-refine', help='Also run refinement for core and accessory individually',
-            choices=['both', 'core', 'accessory'], default = False)
     refinementGroup.add_argument('--no-local', help='Do not perform the local optimization step (speed up on very large datasets)',
             default=False, action='store_true')
     refinementGroup.add_argument('--model-dir', help='Directory containing model to use for assigning queries '
                                                    'to clusters [default = reference database directory]', type = str)
-    refinementGroup.add_argument('--core-only', help='Save the core distance fit (with ')
+    refinementGroup.add_argument('--score-idx',
+            help='Index of score to use [default = 0]',
+            type=int, default = 0, choices=[0, 1, 2])
+    refineMode = refinementGroup.add_mutually_exclusive_group()
+    refineMode.add_argument('--unconstrained',
+            help='Optimise both boundary gradient and intercept',
+            default=False, action='store_true')
+    refineMode.add_argument('--indiv-refine', help='Also run refinement for core and accessory individually',
+            choices=['both', 'core', 'accessory'], default=False)
 
     # lineage clustering within strains
     lineagesGroup = parser.add_argument_group('Lineage analysis options')
@@ -334,7 +347,8 @@ def main():
                                    model_prefix + "/" + os.path.basename(model_prefix) + '_fit.npz',
                                    output)
             sys.stderr.write("Loaded previous model of type: " + model.type + "\n")
-            if args.fit_model == "refine" and (model.type != 'bgmm' and model.type != 'dbscan'):
+            if args.fit_model == "refine" and args.manual_start == None \
+                and model.type != 'bgmm' and model.type != 'dbscan':
                 sys.stderr.write("Model needs to be from BGMM or DBSCAN to refine\n")
                 sys.exit(1)
 
@@ -368,6 +382,8 @@ def main():
                                             args.pos_shift, args.neg_shift,
                                             args.manual_start,
                                             args.indiv_refine,
+                                            args.unconstrained,
+                                            args.score_idx,
                                             args.no_local,
                                             args.threads)
                 new_model.plot(distMat)
diff --git a/PopPUNK/models.py b/PopPUNK/models.py
index 422b13f1..902cf738 100644
--- a/PopPUNK/models.py
+++ b/PopPUNK/models.py
@@ -20,6 +20,7 @@
 from scipy.sparse import coo_matrix, bmat, find
 
 import pp_sketchlib
+import poppunk_refine
 
 # BGMM
 from .bgmm import fit2dMultiGaussian
@@ -526,9 +527,10 @@ def __init__(self, outPrefix):
         self.within_label = -1
         self.slope = 2
         self.threshold = False
+        self.unconstrained = False
 
     def fit(self, X, sample_names, model, max_move, min_move, startFile = None, indiv_refine = False,
-            no_local = False, threads = 1):
+            unconstrained = False, score_idx = 0, no_local = False, threads = 1):
         '''Extends :func:`~ClusterFit.fit`
 
         Fits the distances by optimising network score, by calling
@@ -557,6 +559,11 @@ def fit(self, X, sample_names, model, max_move, min_move, startFile = None, indi
                 Run refinement for core and accessory distances separately
 
                 (default = False).
+            unconstrained (bool)
+                If True, search in 2D and change the slope of the boundary
+            score_idx (int)
+                Index of score from :func:`~PopPUNK.network.networkSummary` to use
+                [default = 0]
             no_local (bool)
                 Turn off the local optimisation step.
                 Quicker, but may be less well refined.
@@ -572,9 +579,9 @@ def fit(self, X, sample_names, model, max_move, min_move, startFile = None, indi
         self.scale = np.copy(model.scale)
         self.max_move = max_move
         self.min_move = min_move
+        self.unconstrained = unconstrained
 
         # Get starting point
-        assignment = model.assign(X)
         model.no_scale()
         if startFile:
             self.mean0, self.mean1, self.start_s = readManualStart(startFile)
@@ -607,9 +614,11 @@ def fit(self, X, sample_names, model, max_move, min_move, startFile = None, indi
             raise RuntimeError("Unrecognised model type")
 
         # Main refinement in 2D
-        self.start_point, self.optimal_x, self.optimal_y, self.min_move, self.max_move = refineFit(X/self.scale,
-                sample_names, self.start_s, self.mean0, self.mean1, self.max_move, self.min_move,
-                slope = 2, no_local = no_local, num_processes = threads)
+        self.start_point, self.optimal_x, self.optimal_y, self.min_move, self.max_move = \
+          refineFit(X/self.scale,
+                    sample_names, self.start_s, self.mean0, self.mean1, self.max_move, self.min_move,
+                    slope = 2, score_idx = score_idx, unconstrained = unconstrained,
+                    no_local = no_local, num_processes = threads)
         self.fitted = True
 
         # Try and do a 1D refinement for both core and accessory
@@ -619,13 +628,15 @@ def fit(self, X, sample_names, model, max_move, min_move, startFile = None, indi
             try:
                 sys.stderr.write("Refining core and accessory separately\n")
                 # optimise core distance boundary
-                start_point, self.core_boundary, core_acc, self.min_move, self.max_move = refineFit(X/self.scale,
-                sample_names, self.start_s, self.mean0, self.mean1, self.max_move, self.min_move,
-                slope = 0, no_local = no_local,num_processes = threads)
+                start_point, self.core_boundary, core_acc, self.min_move, self.max_move = \
+                  refineFit(X/self.scale,
+                            sample_names, self.start_s, self.mean0, self.mean1, self.max_move, self.min_move,
+                            slope = 0, score_idx = score_idx, no_local = no_local,num_processes = threads)
                 # optimise accessory distance boundary
-                start_point, acc_core, self.accessory_boundary, self.min_move, self.max_move = refineFit(X/self.scale,
-                sample_names, self.start_s,self.mean0, self.mean1, self.max_move, self.min_move, slope = 1,
-                no_local = no_local, num_processes = threads)
+                start_point, acc_core, self.accessory_boundary, self.min_move, self.max_move = \
+                  refineFit(X/self.scale,
+                            sample_names, self.start_s,self.mean0, self.mean1, self.max_move, self.min_move,
+                            slope = 1, score_idx = score_idx, no_local = no_local, num_processes = threads)
                 self.indiv_fitted = True
             except RuntimeError as e:
                 sys.stderr.write("Could not separately refine core and accessory boundaries. "
@@ -670,6 +681,7 @@ def apply_threshold(self, X, threshold):
         self.fitted = True
         self.threshold = True
         self.indiv_fitted = False
+        self.unconstrained = False
 
         y = self.assign(X)
         return y
@@ -740,8 +752,8 @@ def plot(self, X, y=None):
 
         plot_refined_results(plot_X, self.assign(plot_X), self.optimal_x, self.optimal_y, self.core_boundary,
             self.accessory_boundary, self.mean0, self.mean1, self.start_point, self.min_move,
-            self.max_move, self.scale, self.threshold, self.indiv_fitted, "Refined fit boundary",
-            self.outPrefix + "/" + os.path.basename(self.outPrefix) + "_refined_fit")
+            self.max_move, self.scale, self.threshold, self.indiv_fitted, self.unconstrained,
+            "Refined fit boundary", self.outPrefix + "/" + os.path.basename(self.outPrefix) + "_refined_fit")
 
 
     def assign(self, X, slope=None, cpus=1):
@@ -765,11 +777,11 @@ def assign(self, X, slope=None, cpus=1):
             raise RuntimeError("Trying to assign using an unfitted model")
         else:
             if slope == 2 or (slope == None and self.slope == 2):
-                y = pp_sketchlib.assignThreshold(X/self.scale, 2, self.optimal_x, self.optimal_y, cpus)
+                y = poppunk_refine.assignThreshold(X/self.scale, 2, self.optimal_x, self.optimal_y, cpus)
             elif slope == 0 or (slope == None and self.slope == 0):
-                y = pp_sketchlib.assignThreshold(X/self.scale, 0, self.core_boundary, 0, cpus)
+                y = poppunk_refine.assignThreshold(X/self.scale, 0, self.core_boundary, 0, cpus)
             elif slope == 1 or (slope == None and self.slope == 1):
-                y = pp_sketchlib.assignThreshold(X/self.scale, 1, 0, self.accessory_boundary, cpus)
+                y = poppunk_refine.assignThreshold(X/self.scale, 1, 0, self.accessory_boundary, cpus)
 
         return y
 
diff --git a/PopPUNK/network.py b/PopPUNK/network.py
index 3e993600..b5c9cadd 100644
--- a/PopPUNK/network.py
+++ b/PopPUNK/network.py
@@ -127,6 +127,8 @@ def cliquePrune(component, graph, reference_indices, components_list):
     """Wrapper function around :func:`~getCliqueRefs` so it can be
        called by a multiprocessing pool
     """
+    if gt.openmp_enabled():
+        gt.openmp_set_num_threads(1)
     subgraph = gt.GraphView(graph, vfilt=components_list == component)
     refs = reference_indices.copy()
     if subgraph.num_vertices() <= 2:
@@ -364,39 +366,60 @@ def constructNetwork(rlist, qlist, assignments, within_label,
 
     # print some summaries
     if summarise:
-        (components, density, transitivity, score) = networkSummary(G)
-        sys.stderr.write("Network summary:\n" + "\n".join(["\tComponents\t" + str(components),
-                                                       "\tDensity\t" + "{:.4f}".format(density),
-                                                       "\tTransitivity\t" + "{:.4f}".format(transitivity),
-                                                       "\tScore\t" + "{:.4f}".format(score)])
+        (metrics, scores) = networkSummary(G)
+        sys.stderr.write("Network summary:\n" + "\n".join(["\tComponents\t\t\t\t" + str(metrics[0]),
+                                                       "\tDensity\t\t\t\t\t" + "{:.4f}".format(metrics[1]),
+                                                       "\tTransitivity\t\t\t\t" + "{:.4f}".format(metrics[2]),
+                                                       "\tMean betweenness\t\t\t" + "{:.4f}".format(metrics[3]),
+                                                       "\tWeighted-mean betweenness\t\t" + "{:.4f}".format(metrics[4]),
+                                                       "\tScore\t\t\t\t\t" + "{:.4f}".format(scores[0]),
+                                                       "\tScore (w/ betweenness)\t\t\t" + "{:.4f}".format(scores[1]),
+                                                       "\tScore (w/ weighted-betweenness)\t\t" + "{:.4f}".format(scores[2])])
                                                        + "\n")
 
     return G
 
-def networkSummary(G):
+def networkSummary(G, calc_betweenness=True):
     """Provides summary values about the network
 
     Args:
         G (graph)
             The network of strains from :func:`~constructNetwork`
+        calc_betweenness (bool)
+            Whether to calculate betweenness stats
 
     Returns:
-        components (int)
-            The number of connected components (and clusters)
-        density (float)
-            The proportion of possible edges used
-        transitivity (float)
-            Network transitivity (triads/triangles)
-        score (float)
-            A score of network fit, given by :math:`\mathrm{transitivity} * (1-\mathrm{density})`
+        metrics (list)
+            List with # components, density, transitivity, mean betweenness
+            and weighted mean betweenness
+        scores (list)
+            List of scores
     """
     component_assignments, component_frequencies = gt.label_components(G)
     components = len(component_frequencies)
     density = len(list(G.edges()))/(0.5 * len(list(G.vertices())) * (len(list(G.vertices())) - 1))
     transitivity = gt.global_clustering(G)[0]
-    score = transitivity * (1-density)
 
-    return(components, density, transitivity, score)
+    mean_bt = 0
+    weighted_mean_bt = 0
+    if calc_betweenness:
+        betweenness = []
+        sizes = []
+        for component, size in enumerate(component_frequencies):
+            if size > 3:
+                vfilt = component_assignments.a == component
+                subgraph = gt.GraphView(G, vfilt=vfilt)
+                betweenness.append(max(gt.betweenness(subgraph, norm = True)[0].a))
+                sizes.append(size)
+
+        if len(betweenness) > 1:
+            mean_bt = np.mean(betweenness)
+            weighted_mean_bt = np.average(betweenness, weights=sizes)
+
+    metrics = [components, density, transitivity, mean_bt, weighted_mean_bt]
+    base_score = transitivity * (1 - density)
+    scores = [base_score, base_score * (1 - metrics[3]), base_score * (1 - metrics[4])]
+    return(metrics, scores)
 
 def addQueryToNetwork(dbFuncs, rList, qList, G, kmers,
                       assignments, model, queryDB, queryQuery = False,
diff --git a/PopPUNK/plot.py b/PopPUNK/plot.py
index 2f0f23dc..521b41ae 100644
--- a/PopPUNK/plot.py
+++ b/PopPUNK/plot.py
@@ -28,6 +28,7 @@
 from .trees import write_tree, mst_to_phylogeny
 
 from .utils import isolateNameToLabel
+from .utils import decisionBoundary
 
 def plot_scatter(X, out_prefix, title, kde = True):
     """Draws a 2D scatter plot (png) of the core and accessory distances
@@ -225,7 +226,7 @@ def plot_dbscan_results(X, y, n_clusters, out_prefix):
 
 def plot_refined_results(X, Y, x_boundary, y_boundary, core_boundary, accessory_boundary,
         mean0, mean1, start_point, min_move, max_move, scale, threshold, indiv_boundaries,
-        title, out_prefix):
+        unconstrained, title, out_prefix):
     """Draw a scatter plot (png) to show the refined model fit
 
     A scatter plot of core and accessory distances, coloured by component
@@ -285,12 +286,21 @@ def plot_refined_results(X, Y, x_boundary, y_boundary, core_boundary, accessory_
 
         # Draw boundary search range
         if mean0 is not None and mean1 is not None and min_move is not None and max_move is not None and start_point is not None:
-            minimum_xy = transformLine(-min_move, start_point, mean1) * scale
-            maximum_xy = transformLine(max_move, start_point, mean1) * scale
-            plt.plot([minimum_xy[0], maximum_xy[0]], [minimum_xy[1], maximum_xy[1]],
-                    color='k', linewidth=1, linestyle=':', label='Search range')
-            start_point *= scale
-            plt.plot(start_point[0], start_point[1], 'ro', label='Initial boundary')
+            if unconstrained:
+                gradient = (mean1[1] - mean0[1]) / (mean1[0] - mean0[0])
+                opt_start = decisionBoundary(mean0, gradient) * scale
+                opt_end = decisionBoundary(mean1, gradient) * scale
+                plt.fill([opt_start[0], opt_end[0], 0, 0],
+                         [0, 0, opt_end[1], opt_start[1]],
+                         fill=True, facecolor='lightcoral', alpha = 0.2,
+                         label='Search range')
+            else:
+                minimum_xy = transformLine(-min_move, start_point, mean1) * scale
+                maximum_xy = transformLine(max_move, start_point, mean1) * scale
+                plt.plot([minimum_xy[0], maximum_xy[0]], [minimum_xy[1], maximum_xy[1]],
+                        color='k', linewidth=1, linestyle=':', label='Search range')
+                start_point *= scale
+                plt.plot(start_point[0], start_point[1], 'ro', label='Initial boundary')
 
             mean0 *= scale
             mean1 *= scale
diff --git a/PopPUNK/refine.py b/PopPUNK/refine.py
index 7bb5aa9b..4dab0523 100644
--- a/PopPUNK/refine.py
+++ b/PopPUNK/refine.py
@@ -7,10 +7,12 @@
 import os
 import sys
 # additional
+from itertools import chain
 from functools import partial
 import numpy as np
 import scipy.optimize
 import collections
+from tqdm import tqdm
 try:
     from multiprocessing import Pool, shared_memory
     from multiprocessing.managers import SharedMemoryManager
@@ -19,13 +21,18 @@
     sys.stderr.write("This version of PopPUNK requires python v3.8 or higher\n")
     sys.exit(0)
 import pp_sketchlib
+import poppunk_refine
 import graph_tool.all as gt
 
 from .network import constructNetwork
 from .network import networkSummary
 
+from .utils import transformLine
+from .utils import decisionBoundary
+
 def refineFit(distMat, sample_names, start_s, mean0, mean1,
-        max_move, min_move, slope = 2, no_local = False, num_processes = 1):
+              max_move, min_move, slope = 2, score_idx = 0,
+              unconstrained = False, no_local = False, num_processes = 1):
     """Try to refine a fit by maximising a network score based on transitivity and density.
 
     Iteratively move the decision boundary to do this, using starting point from existing model.
@@ -48,6 +55,11 @@ def refineFit(distMat, sample_names, start_s, mean0, mean1,
         slope (int)
             Set to 0 for a vertical line, 1 for a horizontal line, or
             2 to use a slope
+        score_idx (int)
+            Index of score from :func:`~PopPUNK.network.networkSummary` to use
+            [default = 0]
+        unconstrained (bool)
+            If True, search in 2D and change the slope of the boundary
         no_local (bool)
             Turn off the local optimisation step.
             Quicker, but may be less well refined.
@@ -77,53 +89,90 @@ def refineFit(distMat, sample_names, start_s, mean0, mean1,
     # Boundary is left of line normal to this point and first line
     gradient = (mean1[1] - mean0[1]) / (mean1[0] - mean0[0])
 
-    # ALTERNATIVE - use a single network
-    # Move boundary along in steps, and find those samples which have changed
-    # Use remove_edges/add_edges with index k lookup (n total) to find sample IDs
-    # https://stackoverflow.com/questions/27086195/linear-index-upper-triangular-matrix
-    # i = n - 2 - int(sqrt(-8*k + 4*n*(n-1)-7)/2.0 - 0.5)
-    # j = k + i + 1 - n*(n-1)/2 + (n-i)*((n-i)-1)/2
-
     # Optimize boundary - grid search for global minimum
     sys.stderr.write("Trying to optimise score globally\n")
-    global_grid_resolution = 40 # Seems to work
-    s_range = np.linspace(-min_move, max_move, num = global_grid_resolution)
-
-    # Move distMat into shared memory
-    with SharedMemoryManager() as smm:
-        shm_distMat = smm.SharedMemory(size = distMat.nbytes)
-        distances_shared_array = np.ndarray(distMat.shape, dtype = distMat.dtype, buffer = shm_distMat.buf)
-        distances_shared_array[:] = distMat[:]
-        distances_shared = NumpyShared(name = shm_distMat.name, shape = distMat.shape, dtype = distMat.dtype)
-
-        with Pool(processes = num_processes) as pool:
-            global_s = pool.map(partial(newNetwork,
-                                        sample_names = sample_names,
-                                        distMat = distances_shared,
-                                        start_point = start_point,
-                                        mean1 = mean1,
-                                        gradient = gradient,
-                                        slope = slope),
-                                s_range)
+
+    if unconstrained:
+        if slope != 2:
+            raise RuntimeError("Unconstrained optimization and indiv-refine incompatible")
+
+        global_grid_resolution = 20
+        x_max_start, y_max_start = decisionBoundary(mean0, gradient)
+        x_max_end, y_max_end = decisionBoundary(mean1, gradient)
+        x_max = np.linspace(x_max_start, x_max_end, global_grid_resolution, dtype=np.float32)
+        y_max = np.linspace(y_max_start, y_max_end, global_grid_resolution, dtype=np.float32)
+
+        if gt.openmp_enabled():
+            gt.openmp_set_num_threads(1)
+
+        with SharedMemoryManager() as smm:
+            shm_distMat = smm.SharedMemory(size = distMat.nbytes)
+            distances_shared_array = np.ndarray(distMat.shape, dtype = distMat.dtype, buffer = shm_distMat.buf)
+            distances_shared_array[:] = distMat[:]
+            distances_shared = NumpyShared(name = shm_distMat.name, shape = distMat.shape, dtype = distMat.dtype)
+
+            with Pool(processes = num_processes) as pool:
+                global_s = pool.map(partial(newNetwork2D,
+                                            sample_names = sample_names,
+                                            distMat = distances_shared,
+                                            x_range = x_max,
+                                            y_range = y_max,
+                                            score_idx = score_idx),
+                                    range(global_grid_resolution))
+
+        if gt.openmp_enabled():
+            gt.openmp_set_num_threads(num_processes)
+
+        global_s = list(chain.from_iterable(global_s))
+        min_idx = np.argmin(np.array(global_s))
+        optimal_x = x_max[min_idx % global_grid_resolution]
+        optimal_y = y_max[min_idx // global_grid_resolution]
+
+        if not (optimal_x > x_max_start and optimal_x < x_max_end and \
+                optimal_y > y_max_start and optimal_y < y_max_end):
+            no_local = True
+        elif not no_local:
+            # We have a fixed gradient and want to optimised the intercept
+            # This parameterisation is a little awkward to match the 1D case:
+            # Make two points along the right slope
+            gradient = optimal_x / optimal_y # of 1D search
+            delta = x_max[1] - x_max[0]
+            bounds = [-delta, delta]
+            start_point = (optimal_x, 0)
+            mean1 = (optimal_x + delta, delta * gradient)
+
+    else:
+        global_grid_resolution = 40 # Seems to work
+        s_range = np.linspace(-min_move, max_move, num = global_grid_resolution)
+        i_vec, j_vec, idx_vec = \
+            poppunk_refine.thresholdIterate1D(distMat, s_range, slope,
+                                                  start_point[0], start_point[1],
+                                                  mean1[0], mean1[1], num_processes)
+        global_s = growNetwork(sample_names, i_vec, j_vec, idx_vec, s_range, score_idx)
+        min_idx = np.argmin(np.array(global_s))
+        if min_idx > 0 and min_idx < len(s_range) - 1:
+            bounds = [s_range[min_idx-1], s_range[min_idx+1]]
+        else:
+            no_local = True
+            optimised_s = s_range[min_idx]
 
     # Local optimisation around global optimum
-    min_idx = np.argmin(np.array(global_s))
-    if min_idx > 0 and min_idx < len(s_range) - 1 and not no_local:
+    if not no_local:
         sys.stderr.write("Trying to optimise score locally\n")
         local_s = scipy.optimize.minimize_scalar(newNetwork,
-                        bounds=[s_range[min_idx-1], s_range[min_idx+1]],
+                        bounds=bounds,
                         method='Bounded', options={'disp': True},
-                        args = (sample_names, distMat, start_point, mean1, gradient, slope))
+                        args = (sample_names, distMat, start_point, mean1, gradient, slope, score_idx))
         optimised_s = local_s.x
-    else:
-        optimised_s = s_range[min_idx]
 
-    optimised_coor = transformLine(optimised_s, start_point, mean1)
-    if slope == 2:
-        optimal_x, optimal_y = decisionBoundary(optimised_coor, gradient)
-    else:
-        optimal_x = optimised_coor[0]
-        optimal_y = optimised_coor[1]
+    # Convert to x_max, y_max if needed
+    if not unconstrained or not no_local:
+        optimised_coor = transformLine(optimised_s, start_point, mean1)
+        if slope == 2:
+            optimal_x, optimal_y = decisionBoundary(optimised_coor, gradient)
+        else:
+            optimal_x = optimised_coor[0]
+            optimal_y = optimised_coor[1]
 
     if optimal_x < 0 or optimal_y < 0:
         raise RuntimeError("Optimisation failed: produced a boundary outside of allowed range\n")
@@ -131,7 +180,71 @@ def refineFit(distMat, sample_names, start_s, mean0, mean1,
     return start_point, optimal_x, optimal_y, min_move, max_move
 
 
-def newNetwork(s, sample_names, distMat, start_point, mean1, gradient, slope=2, cpus = 1):
+def growNetwork(sample_names, i_vec, j_vec, idx_vec, s_range, score_idx, thread_idx = 0):
+    """Construct a network, then add edges to it iteratively.
+    Input is from ``pp_sketchlib.iterateBoundary1D`` or``pp_sketchlib.iterateBoundary2D``
+
+    Args:
+        sample_names (list)
+            Sample names corresponding to distMat (accessed by iterator)
+        i_vec (list)
+            Ordered ref vertex index to add
+        j_vec (list)
+            Ordered query (==ref) vertex index to add
+        idx_vec (list)
+            For each i, j tuple, the index of the intercept at which these enter
+            the network. These are sorted and increasing
+        s_range (list)
+            Offsets which correspond to idx_vec entries
+        score_idx (int)
+            Index of score from :func:`~PopPUNK.network.networkSummary` to use
+            [default = 0]
+        thread_idx (int)
+            Optional thread idx (if multithreaded) to offset progress bar by
+    Returns:
+        scores (list)
+            -1 * network score for each of x_range.
+            Where network score is from :func:`~PopPUNK.network.networkSummary`
+    """
+    scores = []
+    edge_list = []
+    prev_idx = 0
+    # Grow a network
+    with tqdm(total=(idx_vec[-1] + 1),
+              bar_format="{bar}| {n_fmt}/{total_fmt}",
+              ncols=40,
+              position=thread_idx) as pbar:
+        for i, j, idx in zip(i_vec, j_vec, idx_vec):
+            if idx > prev_idx:
+                # At first offset, make a new network, otherwise just add the new edges
+                if prev_idx == 0:
+                    G = constructNetwork(sample_names, sample_names, edge_list, -1,
+                                         summarise=False, edge_list=True)
+                else:
+                    G.add_edge_list(edge_list)
+                # Add score into vector for any offsets passed (should usually just be one)
+                for s in range(prev_idx, idx):
+                    scores.append(-networkSummary(G, score_idx > 0)[1][score_idx])
+                    pbar.update(1)
+                prev_idx = idx
+                edge_list = []
+            edge_list.append((i, j))
+
+        # Add score for final offset(s) at end of loop
+        if prev_idx == 0:
+            G = constructNetwork(sample_names, sample_names, edge_list, -1,
+                                 summarise=False, edge_list=True)
+        else:
+            G.add_edge_list(edge_list)
+        for s in range(prev_idx, len(s_range)):
+            scores.append(-networkSummary(G, score_idx > 0)[1][score_idx])
+            pbar.update(1)
+
+    return(scores)
+
+
+def newNetwork(s, sample_names, distMat, start_point, mean1, gradient,
+               slope=2, score_idx=0, cpus=1):
     """Wrapper function for :func:`~PopPUNK.network.constructNetwork` which is called
     by optimisation functions moving a triangular decision boundary.
 
@@ -154,6 +267,10 @@ def newNetwork(s, sample_names, distMat, start_point, mean1, gradient, slope=2,
         slope (int)
             Set to 0 for a vertical line, 1 for a horizontal line, or
             2 to use a slope
+            [default = 2]
+        score_idx (int)
+            Index of score from :func:`~PopPUNK.network.networkSummary` to use
+            [default = 0]
         cpus (int)
             Number of CPUs to use for calculating assignment
     Returns:
@@ -176,13 +293,50 @@ def newNetwork(s, sample_names, distMat, start_point, mean1, gradient, slope=2,
         y_max = new_intercept[1]
 
     # Make network
-    boundary_assignments = pp_sketchlib.assignThreshold(distMat, slope, x_max, y_max, cpus)
+    boundary_assignments = poppunk_refine.assignThreshold(distMat, slope, x_max, y_max, cpus)
     G = constructNetwork(sample_names, sample_names, boundary_assignments, -1, summarise = False)
 
     # Return score
-    score = networkSummary(G)[3]
+    score = networkSummary(G, score_idx > 0)[1][score_idx]
     return(-score)
 
+def newNetwork2D(y_idx, sample_names, distMat, x_range, y_range, score_idx=0):
+    """Wrapper function for thresholdIterate2D and :func:`growNetwork`.
+
+    For a given y_max, constructs networks across x_range and returns a list
+    of scores
+
+    Args:
+        y_idx (float)
+            Maximum y-intercept of boundary, as index into y_range
+        sample_names (list)
+            Sample names corresponding to distMat (accessed by iterator)
+        distMat (numpy.array or NumpyShared)
+            Core and accessory distances or NumpyShared describing these in sharedmem
+        x_range (list)
+            Sorted list of x-intercepts to search
+        y_range (list)
+            Sorted list of y-intercepts to search
+        score_idx (int)
+            Index of score from :func:`~PopPUNK.network.networkSummary` to use
+            [default = 0]
+    Returns:
+        scores (list)
+            -1 * network score for each of x_range.
+            Where network score is from :func:`~PopPUNK.network.networkSummary`
+    """
+    if gt.openmp_enabled():
+        gt.openmp_set_num_threads(1)
+    if isinstance(distMat, NumpyShared):
+        distMat_shm = shared_memory.SharedMemory(name = distMat.name)
+        distMat = np.ndarray(distMat.shape, dtype = distMat.dtype, buffer = distMat_shm.buf)
+
+    y_max = y_range[y_idx]
+    i_vec, j_vec, idx_vec = \
+            poppunk_refine.thresholdIterate2D(distMat, x_range, y_max)
+    scores = growNetwork(sample_names, i_vec, j_vec, idx_vec, x_range, score_idx, y_idx)
+    return(scores)
+
 def readManualStart(startFile):
     """Reads a file to define a manual start point, rather than using ``--fit-model``
 
@@ -261,48 +415,3 @@ def likelihoodBoundary(s, model, start, end, within, between):
     responsibilities = model.assign(X, values = True)
     return(responsibilities[0, within] - responsibilities[0, between])
 
-
-def transformLine(s, mean0, mean1):
-    """Return x and y co-ordinates for traversing along a line between mean0 and mean1, parameterised by
-    a single scalar distance s from the start point mean0.
-
-    Args:
-        s (float)
-            Distance along line from mean0
-        mean0 (numpy.array)
-            Start position of line (x0, y0)
-        mean1 (numpy.array)
-            End position of line (x1, y1)
-    Returns:
-        x (float)
-            The Cartesian x-coordinate
-        y (float)
-            The Cartesian y-coordinate
-    """
-    tan_theta = (mean1[1] - mean0[1]) / (mean1[0] - mean0[0])
-    x = mean0[0] + s * (1/np.sqrt(1+tan_theta))
-    y = mean0[1] + s * (tan_theta/np.sqrt(1+tan_theta))
-
-    return np.array([x, y])
-
-
-def decisionBoundary(intercept, gradient):
-    """Returns the co-ordinates where the triangle the decision boundary forms
-    meets the x- and y-axes.
-
-    Args:
-        intercept (numpy.array)
-            Cartesian co-ordinates of point along line (:func:`~transformLine`)
-            which intercepts the boundary
-        gradient (float)
-            Gradient of the line
-    Returns:
-        x (float)
-            The x-axis intercept
-        y (float)
-            The y-axis intercept
-    """
-    x = intercept[0] + intercept[1] * gradient
-    y = intercept[1] + intercept[0] / gradient
-    return(x, y)
-
diff --git a/PopPUNK/sketchlib.py b/PopPUNK/sketchlib.py
index 1ef1bb3e..0f33e196 100644
--- a/PopPUNK/sketchlib.py
+++ b/PopPUNK/sketchlib.py
@@ -20,13 +20,8 @@
 import numpy as np
 from scipy import optimize
 
-# Try to import sketchlib
-try:
-    import pp_sketchlib
-    import h5py
-except ImportError as e:
-    sys.stderr.write("Sketchlib backend not available")
-    sys.exit(1)
+import pp_sketchlib
+import h5py
 
 from .__init__ import SKETCHLIB_MAJOR, SKETCHLIB_MINOR, SKETCHLIB_PATCH
 from .utils import iterDistRows
@@ -610,6 +605,7 @@ def sketchlibAssemblyQC(prefix, klist, qc_dict, strand_preserved, threads):
     hdf_in = h5py.File(db_name, 'r+')
 
     # try/except structure to prevent h5 corruption
+    failed_samples = False
     try:
         # process data structures
         read_grp = hdf_in['sketches']
@@ -643,7 +639,6 @@ def sketchlibAssemblyQC(prefix, klist, qc_dict, strand_preserved, threads):
         # open file to report QC failures
         with open(prefix + '/' + os.path.basename(prefix) + '_qcreport.txt', 'a+') as qc_file:
             # iterate through and filter
-            failed_sample = False
             for dataset in seq_length.keys():
                 # determine if sequence passes filters
                 remove = False
@@ -662,6 +657,7 @@ def sketchlibAssemblyQC(prefix, klist, qc_dict, strand_preserved, threads):
 
                 if remove:
                     sys.stderr.write(dataset + ' failed QC\n')
+                    failed_samples = True
                     failed.append(dataset)
                 else:
                     retained.append(dataset)
@@ -690,7 +686,7 @@ def sketchlibAssemblyQC(prefix, klist, qc_dict, strand_preserved, threads):
         raise
 
     # stop if at least one sample fails QC and option is not continue/prune
-    if failed_sample and qc_dict['qc_filter'] == 'stop':
+    if failed_samples and qc_dict['qc_filter'] == 'stop':
         sys.stderr.write('Sequences failed QC filters - details in ' + \
                          prefix + '/' + os.path.basename(prefix) + \
                          '_qcreport.txt\n')
diff --git a/PopPUNK/utils.py b/PopPUNK/utils.py
index aec9b05e..bd3d2995 100644
--- a/PopPUNK/utils.py
+++ b/PopPUNK/utils.py
@@ -445,3 +445,48 @@ def createOverallLineage(rank_list, lineage_clusters):
         overall_lineages['overall'][isolate] = overall_lineage
 
     return overall_lineages
+
+
+def transformLine(s, mean0, mean1):
+    """Return x and y co-ordinates for traversing along a line between mean0 and mean1, parameterised by
+    a single scalar distance s from the start point mean0.
+
+    Args:
+        s (float)
+            Distance along line from mean0
+        mean0 (numpy.array)
+            Start position of line (x0, y0)
+        mean1 (numpy.array)
+            End position of line (x1, y1)
+    Returns:
+        x (float)
+            The Cartesian x-coordinate
+        y (float)
+            The Cartesian y-coordinate
+    """
+    tan_theta = (mean1[1] - mean0[1]) / (mean1[0] - mean0[0])
+    x = mean0[0] + s * (1/np.sqrt(1+tan_theta))
+    y = mean0[1] + s * (tan_theta/np.sqrt(1+tan_theta))
+
+    return np.array([x, y])
+
+
+def decisionBoundary(intercept, gradient):
+    """Returns the co-ordinates where the triangle the decision boundary forms
+    meets the x- and y-axes.
+
+    Args:
+        intercept (numpy.array)
+            Cartesian co-ordinates of point along line (:func:`~transformLine`)
+            which intercepts the boundary
+        gradient (float)
+            Gradient of the line
+    Returns:
+        x (float)
+            The x-axis intercept
+        y (float)
+            The y-axis intercept
+    """
+    x = intercept[0] + intercept[1] * gradient
+    y = intercept[1] + intercept[0] / gradient
+    return(x, y)
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
index 61baf7f4..b6376398 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -46,6 +46,7 @@
                         "matplotlib",
                         "dendropy",
                         "pp-sketchlib",
+                        "poppunk_refine",
                         "h5py",
                         "flask",
                         "flask-cors",
diff --git a/docs/images/ecoli_refine_constrained.png b/docs/images/ecoli_refine_constrained.png
new file mode 100644
index 00000000..00bec09a
Binary files /dev/null and b/docs/images/ecoli_refine_constrained.png differ
diff --git a/docs/images/ecoli_refine_unconstrained.png b/docs/images/ecoli_refine_unconstrained.png
new file mode 100644
index 00000000..8a6af138
Binary files /dev/null and b/docs/images/ecoli_refine_unconstrained.png differ
diff --git a/docs/images/unconstrained_refine.png b/docs/images/unconstrained_refine.png
new file mode 100644
index 00000000..f2626545
Binary files /dev/null and b/docs/images/unconstrained_refine.png differ
diff --git a/docs/installation.rst b/docs/installation.rst
index 1cfe6f56..7233a7b6 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -67,10 +67,10 @@ conda or pip for this purpose).
 
 Dependencies
 ------------
-We tested PopPUNK with the following packages:
+This documentation refers to a conda installation with the following packages:
 
 * python3 (3.8.2)
-* ``pp-sketchlib`` (1.5.1)
+* ``pp-sketchlib`` (1.6.2)
 * ``DendroPy`` (4.3.0)
 * ``hdbscan`` (0.8.13)
 * ``matplotlib`` (2.1.2)
diff --git a/docs/model_fitting.rst b/docs/model_fitting.rst
index d1239b04..1b0fde9c 100644
--- a/docs/model_fitting.rst
+++ b/docs/model_fitting.rst
@@ -99,10 +99,14 @@ Interpreting the network summary
 All fits will output a network summary which looks similar to this::
 
     Network summary:
-        Components	31
-        Density	0.0897
-        Transitivity	1.0000
-        Score	0.9103
+        Components				59
+        Density					0.0531
+        Transitivity				0.9966
+        Mean betweenness			0.0331
+        Weighted-mean betweenness		0.0454
+        Score					0.9438
+        Score (w/ betweenness)			0.9126
+        Score (w/ weighted-betweenness)		0.9009
 
 - Components are the number of strains (clusters) found using this model.
 - Density is the proportion of distances assigned as 'within-strain'. Generally
@@ -113,6 +117,8 @@ All fits will output a network summary which looks similar to this::
 - Score synthesises the above as :math:`(1 - \mathrm{density}) * \mathrm{transitivity}`,
   which gives a single number between 0 (bad) and 1 (good) which in many cases is
   at a maximum when it accurately describes strains in the data.
+- Two further scores for larger networks. See :ref:`alt-scores` for more information
+  on these.
 
 .. _bgmm:
 
@@ -490,6 +496,101 @@ Which, looking at the `microreact output <https://microreact.org/project/SJxxLMc
    :alt:  The refined fit, in microreact
    :align: center
 
+.. _alt-scores:
+
+Alternative network scores
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+Two additional network scores are now available using node betweenness. We have observed
+that in some refined fits to large datasets, some clusters are merged with a single high-stress
+edge at a relatively large distance. These scores aim to create a more conservative boundary that splits
+these clusters.
+
+For these scores:
+
+- The network is split into :math:`S` connected components (the strains) each of size :math:`w_i`
+- For each component with at least four nodes, the betweenness of the nodes are calculated
+- Each component is summarised by the maximum betweenness of any member node :math:`b^{\mathrm{max}}_i`
+
+.. math::
+
+    \mathrm{score}_1 &= \mathrm{score}_0 \cdot (1 - \frac{1}{S} \sum_{i = 1}^S  b^{\mathrm{max}}_i) \\
+    \mathrm{score}_2 &= \mathrm{score}_0 \cdot (1 - \frac{1}{S \cdot \Sigma w_i} \sum_{i = 1}^S  \left[ b^{\mathrm{max}}_i \cdot w_i \right])
+
+Score 1 is printed as score (w/ betweenness) and score 2 as score (w/ weighted-betweenness). Use ``--score-idx``
+with 0 (default), 1 (betweenness) or 2 (weighted-betweenness) to choose which score to optimise in refine
+mode. The default is the original score 0. Note that scores 1 and 2 may take longer to compute due to
+the betweenness calculation, though this can take advantage of multiple ``--threads``.
+
+Unconstrained (two-dimensional) optimisation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+In the default mode described above, the boundary gradient is set from the identified
+means in the input model, and the position of the intercept is optimised (one-dimensional optimisation).
+
+In cases where the gradient of the boundary is not well set by the two means in the
+plot, you can optimise both the intercept and the gradient by adding the ``--unconstrained`` option
+(which is incompatible with ``--indiv-refine``). This will perform a global search
+of 20 x 20 (400 total) x- and y-intercept positions, followed by a 1D local search
+to further optimise the intercept (unless ``--no-local`` is added).
+
+As this calculates the boundary at ten times as many positions, it is generally expected to
+take ten times longer. However, you can effectively parallelise this with up to 20 ``--threads``::
+
+    poppunk --fit-model refine --ref-db listeria --model-dir dbscan --unconstrained --threads 4
+    PopPUNK (POPulation Partitioning Using Nucleotide Kmers)
+        (with backend: sketchlib v1.6.2
+        sketchlib: /Users/jlees/Documents/Imperial/pp-sketchlib/build/lib.macosx-10.9-x86_64-3.8/pp_sketchlib.cpython-38-darwin.so)
+
+    Graph-tools OpenMP parallelisation enabled: with 4 threads
+    Mode: Fitting refine model to reference database
+
+    Loading BGMM 2D Gaussian model
+    Loaded previous model of type: bgmm
+    Initial model-based network construction based on Gaussian fit
+    Initial boundary based network construction
+    Decision boundary starts at (0.52,0.43)
+    Trying to optimise score globally
+    Trying to optimise score locally
+
+    Optimization terminated successfully;
+    The returned value satisfies the termination criteria
+    (using xtol =  1e-05 )
+    Network summary:
+        Components				59
+        Density					0.0531
+        Transitivity				0.9966
+        Mean betweenness			0.0331
+        Weighted-mean betweenness		0.0454
+        Score					0.9438
+        Score (w/ betweenness)			0.9126
+        Score (w/ weighted-betweenness)		0.9009
+    Removing 545 sequences
+
+    Done
+
+Which gives a slightly higher network score, though overall similar clusters:
+
+.. image:: images/unconstrained_refine.png
+   :alt:  Refining fit with --unconstrained
+   :align: center
+
+This is because the gradient from the 1D optimisation was well set. Unconstrained optimisation
+can be useful with clusters which aren't parallel to the line that connects them. This is an
+example in *E.*\ |nbsp| \ *coli*:
+
+.. list-table::
+
+    * - .. figure:: images/ecoli_refine_constrained.png
+
+           1D refine fit between DBSCAN cluster centroids
+
+      - .. figure:: images/ecoli_refine_unconstrained.png
+
+           Unconstrained 2D fit over a greater range
+
+The search range will always be defined by a trapezium in light red -- bounded by
+the two axes, and two lines passing through the means which are normal to the line
+which connects the means.
+
 .. _manual-start:
 
 Using fit refinement when mixture model totally fails
diff --git a/environment.yml b/environment.yml
index 46c38610..e9373e33 100644
--- a/environment.yml
+++ b/environment.yml
@@ -17,9 +17,17 @@ dependencies:
   - hdbscan
   - rapidnj
   - h5py
-  - pp-sketchlib >=1.6.0
+  - pp-sketchlib >=1.6.2
   - graph-tool
   - requests
   - flask
   - flask-cors
   - networkx
+  - pybind11
+  - eigen
+  - openblas
+  - cmake >= 3.18
+  - libgfortran-ng
+  - boost-cpp
+  - libgomp
+  - tqdm
diff --git a/setup.py b/setup.py
index 455167fc..a0bea06d 100644
--- a/setup.py
+++ b/setup.py
@@ -4,10 +4,16 @@
 from setuptools import setup, find_packages
 from codecs import open
 from os import path
-import os
+import os, sys
 import re
 import io
+import platform
+import subprocess
 
+from setuptools import setup, find_packages, Extension
+from setuptools.command.build_ext import build_ext
+from setuptools.command.install import install
+from distutils.version import LooseVersion
 
 def read(*names, **kwargs):
     with io.open(
@@ -25,6 +31,53 @@ def find_version(*file_paths):
         return version_match.group(1)
     raise RuntimeError("Unable to find version string.")
 
+class CMakeExtension(Extension):
+    def __init__(self, name, sourcedir=''):
+        Extension.__init__(self, name, sources=[])
+        self.sourcedir = os.path.abspath(sourcedir)
+
+class CMakeBuild(build_ext):
+    def run(self):
+        try:
+            out = subprocess.check_output(['cmake', '--version'])
+        except OSError:
+            raise RuntimeError("CMake must be installed to build the following extensions: " +
+                            ", ".join(e.name for e in self.extensions))
+
+        if platform.system() == "Windows":
+            cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1))
+            if cmake_version < '3.1.0':
+                raise RuntimeError("CMake >= 3.1.0 is required on Windows")
+
+        for ext in self.extensions:
+            self.build_extension(ext)
+
+    def build_extension(self, ext):
+        extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
+        cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir,
+                      '-DPYTHON_EXECUTABLE=' + sys.executable,
+                      '-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON']
+
+        cfg = 'Debug' if self.debug else 'Release'
+        build_args = ['--config', cfg]
+
+        if platform.system() == "Windows":
+            cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)]
+            if sys.maxsize > 2**32:
+                cmake_args += ['-A', 'x64']
+            build_args += ['--', '/m']
+        else:
+            cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
+            build_args += ['--', '-j2']
+
+        env = os.environ.copy()
+        env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''),
+                                                              self.distribution.get_version())
+
+        if not os.path.exists(self.build_temp):
+            os.makedirs(self.build_temp)
+        subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
+        subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp)
 
 here = path.abspath(path.dirname(__file__))
 
@@ -70,5 +123,8 @@ def find_version(*file_paths):
              'scripts/poppunk_add_weights.py',
              'scripts/poppunk_easy_run.py',
              'scripts/poppunk_pickle_fix.py'],
+    ext_modules=[CMakeExtension('poppunk_refine')],
     test_suite="test",
+    cmdclass=dict(build_ext=CMakeBuild),
+    zip_safe=False
 )
diff --git a/src/boundary.cpp b/src/boundary.cpp
new file mode 100644
index 00000000..8f38ed67
--- /dev/null
+++ b/src/boundary.cpp
@@ -0,0 +1,215 @@
+/*
+ *
+ * boundary.cpp
+ * Functions to move a network boundary
+ *
+ */
+#include <cstddef> // size_t
+#include <cstdint>
+#include <cmath>   // floor/sqrt
+#include <cassert> // assert
+#include <vector>
+#include <numeric>
+#include <algorithm>
+#include <string>
+
+// Parallel sort
+#include <boost/sort/sort.hpp>
+
+#include "boundary.hpp"
+
+const float epsilon = 1E-10;
+
+template <class T>
+inline size_t rows_to_samples(const T &longMat)
+{
+    return 0.5 * (1 + sqrt(1 + 8 * (longMat.rows())));
+}
+
+inline size_t calc_row_idx(const uint64_t k, const size_t n)
+{
+    return n - 2 - std::floor(std::sqrt(static_cast<double>(-8 * k + 4 * n * (n - 1) - 7)) / 2 - 0.5);
+}
+
+inline size_t calc_col_idx(const uint64_t k, const size_t i, const size_t n)
+{
+    return k + i + 1 - n * (n - 1) / 2 + (n - i) * ((n - i) - 1) / 2;
+}
+
+inline size_t square_to_condensed(const size_t i, const size_t j, const size_t n)
+{
+    assert(j > i);
+    return (n * i - ((i * (i + 1)) >> 1) + j - 1 - i);
+}
+
+//https://stackoverflow.com/a/12399290
+template <typename T>
+std::vector<long> sort_indexes(const T &v, const uint32_t n_threads)
+{
+    // initialize original index locations
+    std::vector<long> idx(v.size());
+    std::iota(idx.begin(), idx.end(), 0);
+
+    boost::sort::parallel_stable_sort(
+        idx.begin(), idx.end(),
+        [&v](long i1, long i2) { return v[i1] < v[i2]; },
+        n_threads);
+
+    return idx;
+}
+
+// Unnormalised (signed_ distance between a point (x0, y0) and a line defined
+// by the two points (xmax, 0) and (0, ymax)
+// Divide by 1/sqrt(xmax^2 + ymax^2) to get distance
+inline float line_dist(const float x0,
+                       const float y0,
+                       const float x_max,
+                       const float y_max,
+                       const int slope)
+{
+    float boundary_side = 0;
+    if (slope == 2)
+    {
+        boundary_side = y0 * x_max + x0 * y_max - x_max * y_max;
+    }
+    else if (slope == 0)
+    {
+        boundary_side = x0 - x_max;
+    }
+    else if (slope == 1)
+    {
+        boundary_side = y0 - y_max;
+    }
+
+    return boundary_side;
+}
+
+Eigen::VectorXf assign_threshold(const NumpyMatrix &distMat,
+                                 const int slope,
+                                 const float x_max,
+                                 const float y_max,
+                                 unsigned int num_threads)
+{
+    Eigen::VectorXf boundary_test(distMat.rows());
+
+#pragma omp parallel for schedule(static) num_threads(num_threads)
+    for (long row_idx = 0; row_idx < distMat.rows(); row_idx++)
+    {
+        float in_tri = line_dist(distMat(row_idx, 0), distMat(row_idx, 1),
+                                 x_max, y_max, slope);
+        float boundary_side;
+        if (in_tri == 0)
+        {
+            boundary_side = 0;
+        }
+        else if (in_tri > 0)
+        {
+            boundary_side = 1;
+        }
+        else
+        {
+            boundary_side = -1;
+        }
+        boundary_test[row_idx] = boundary_side;
+    }
+    return (boundary_test);
+}
+
+// Line defined between (x0, y0) and (x1, y1)
+// Offset is distance along this line, starting at (x0, y0)
+network_coo threshold_iterate_1D(const NumpyMatrix &distMat,
+                                 const std::vector<double> &offsets,
+                                 const int slope,
+                                 const float x0,
+                                 const float y0,
+                                 const float x1,
+                                 const float y1,
+                                 const int num_threads)
+{
+    std::vector<long> i_vec;
+    std::vector<long> j_vec;
+    std::vector<long> offset_idx;
+    const float gradient = (y1 - y0) / (x1 - x0); // == tan(theta)
+    const size_t n_samples = rows_to_samples(distMat);
+
+    std::vector<float> boundary_dist(distMat.rows());
+    std::vector<long> boundary_order;
+    long sorted_idx = 0;
+    for (int offset_nr = 0; offset_nr < offsets.size(); ++offset_nr)
+    {
+        float x_intercept = x0 + offsets[offset_nr] * (1 / std::sqrt(1 + gradient));
+        float y_intercept = y0 + offsets[offset_nr] * (gradient / std::sqrt(1 + gradient));
+        float x_max, y_max;
+        if (slope == 2)
+        {
+            x_max = x_intercept + y_intercept * gradient;
+            y_max = y_intercept + x_intercept / gradient;
+        }
+        else if (slope == 0)
+        {
+            x_max = x_intercept;
+            y_max = 0;
+        }
+        else
+        {
+            x_max = 0;
+            y_max = y_intercept;
+        }
+
+        // Calculate the distances and sort them on the first loop entry
+        if (offset_nr == 0)
+        {
+#pragma omp parallel for schedule(static) num_threads(num_threads)
+            for (long row_idx = 0; row_idx < distMat.rows(); row_idx++)
+            {
+                boundary_dist[row_idx] = line_dist(distMat(row_idx, 0), distMat(row_idx, 1), x_max, y_max, slope);
+            }
+            boundary_order = sort_indexes(boundary_dist, num_threads);
+        }
+
+        long row_idx = boundary_order[sorted_idx];
+        while (sorted_idx < boundary_order.size() &&
+               line_dist(distMat(row_idx, 0),
+                         distMat(row_idx, 1),
+                         x_max, y_max, slope) <= 0)
+        {
+            long i = calc_row_idx(row_idx, n_samples);
+            long j = calc_col_idx(row_idx, i, n_samples);
+            i_vec.push_back(i);
+            j_vec.push_back(j);
+            offset_idx.push_back(offset_nr);
+            sorted_idx++;
+            row_idx = boundary_order[sorted_idx];
+        }
+    }
+    return (std::make_tuple(i_vec, j_vec, offset_idx));
+}
+
+network_coo threshold_iterate_2D(const NumpyMatrix &distMat,
+                                 const std::vector<float> &x_max,
+                                 const float y_max)
+{
+    std::vector<long> i_vec;
+    std::vector<long> j_vec;
+    std::vector<long> offset_idx;
+    const size_t n_samples = rows_to_samples(distMat);
+
+    for (int offset_nr = 0; offset_nr < x_max.size(); ++offset_nr)
+    {
+        for (long row_idx = 0; row_idx < distMat.rows(); row_idx++)
+        {
+            if (line_dist(distMat(row_idx, 0), distMat(row_idx, 1), x_max[offset_nr], y_max, 2) <= 0)
+            {
+                if (offset_nr == 0 || (line_dist(distMat(row_idx, 0), distMat(row_idx, 1), x_max[offset_nr - 1], y_max, 2) > 0))
+                {
+                    long i = calc_row_idx(row_idx, n_samples);
+                    long j = calc_col_idx(row_idx, i, n_samples);
+                    i_vec.push_back(i);
+                    j_vec.push_back(j);
+                    offset_idx.push_back(offset_nr);
+                }
+            }
+        }
+    }
+    return (std::make_tuple(i_vec, j_vec, offset_idx));
+}
diff --git a/src/boundary.hpp b/src/boundary.hpp
new file mode 100644
index 00000000..820daff4
--- /dev/null
+++ b/src/boundary.hpp
@@ -0,0 +1,36 @@
+/*
+ *
+ * matrix.hpp
+ * functions in matrix_ops.cpp
+ *
+ */
+#pragma once
+
+#include <vector>
+#include <cstdint>
+#include <cstddef>
+#include <string>
+
+#include <Eigen/Dense>
+
+typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> NumpyMatrix;
+typedef std::tuple<std::vector<long>, std::vector<long>, std::vector<long>> network_coo;
+
+Eigen::VectorXf assign_threshold(const NumpyMatrix &distMat,
+                                 const int slope,
+                                 const float x_max,
+                                 const float y_max,
+                                 unsigned int num_threads);
+
+network_coo threshold_iterate_1D(const NumpyMatrix &distMat,
+                                 const std::vector<double> &offsets,
+                                 const int slope,
+                                 const float x0,
+                                 const float y0,
+                                 const float x1,
+                                 const float y1,
+                                 const int num_threads = 1);
+
+network_coo threshold_iterate_2D(const NumpyMatrix &distMat,
+                                 const std::vector<float> &x_max,
+                                 const float y_max);
diff --git a/src/python_bindings.cpp b/src/python_bindings.cpp
new file mode 100644
index 00000000..0171ad5e
--- /dev/null
+++ b/src/python_bindings.cpp
@@ -0,0 +1,88 @@
+/*
+ * sketchlib_bindings.cpp
+ * Python bindings for PopPUNK C++ functions
+ *
+ */
+
+// pybind11 headers
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/eigen.h>
+
+namespace py = pybind11;
+
+#include "boundary.hpp"
+
+// Wrapper which makes a ref to the python/numpy array
+Eigen::VectorXf assignThreshold(const Eigen::Ref<NumpyMatrix> &distMat,
+                                const int slope,
+                                const double x_max,
+                                const double y_max,
+                                const unsigned int num_threads = 1)
+{
+  Eigen::VectorXf assigned = assign_threshold(distMat,
+                                              slope,
+                                              x_max,
+                                              y_max,
+                                              num_threads);
+  return (assigned);
+}
+
+network_coo thresholdIterate1D(const Eigen::Ref<NumpyMatrix> &distMat,
+                 const std::vector<double> &offsets,
+                 const int slope,
+                 const double x0,
+                 const double y0,
+                 const double x1,
+                 const double y1,
+                 const int num_threads)
+{
+  if (!std::is_sorted(offsets.begin(), offsets.end()))
+  {
+    throw std::runtime_error("Offsets to thresholdIterate1D must be sorted");
+  }
+  std::tuple<std::vector<long>, std::vector<long>, std::vector<long>> add_idx =
+      threshold_iterate_1D(distMat, offsets, slope, x0, y0, x1, y1, num_threads);
+  return (add_idx);
+}
+
+network_coo thresholdIterate2D(const Eigen::Ref<NumpyMatrix> &distMat,
+                 const std::vector<float> &x_max,
+                 const float y_max)
+{
+  if (!std::is_sorted(x_max.begin(), x_max.end()))
+  {
+    throw std::runtime_error("x_max range to thresholdIterate2D must be sorted");
+  }
+  std::tuple<std::vector<long>, std::vector<long>, std::vector<long>> add_idx =
+      threshold_iterate_2D(distMat, x_max, y_max);
+  return (add_idx);
+}
+
+PYBIND11_MODULE(poppunk_refine, m)
+{
+  m.doc() = "Network refine helper functions";
+
+  // Exported functions
+  m.def("assignThreshold", &assignThreshold, py::return_value_policy::reference_internal, "Assign samples based on their relation to a 2D boundary",
+        py::arg("distMat").noconvert(),
+        py::arg("slope"),
+        py::arg("x_max"),
+        py::arg("y_max"),
+        py::arg("num_threads") = 1);
+
+  m.def("thresholdIterate1D", &thresholdIterate1D, py::return_value_policy::reference_internal, "Move a 2D boundary to grow a network by adding edges at each offset",
+        py::arg("distMat").noconvert(),
+        py::arg("offsets"),
+        py::arg("slope"),
+        py::arg("x0"),
+        py::arg("y0"),
+        py::arg("x1"),
+        py::arg("y1"),
+        py::arg("num_threads") = 1);
+
+  m.def("thresholdIterate2D", &thresholdIterate2D, py::return_value_policy::reference_internal, "Move a 2D boundary to grow a network by adding edges at each offset",
+        py::arg("distMat").noconvert(),
+        py::arg("x_max"),
+        py::arg("y_max"));
+}
diff --git a/test/run_test.py b/test/run_test.py
index 03ab8942..c3f11051 100755
--- a/test/run_test.py
+++ b/test/run_test.py
@@ -31,6 +31,8 @@
 sys.stderr.write("Running model refinement (--fit-model refine)\n")
 subprocess.run("python ../poppunk-runner.py --fit-model refine --ref-db example_db --output example_refine --neg-shift 0.8 --overwrite", shell=True, check=True)
 subprocess.run("python ../poppunk-runner.py --fit-model refine --ref-db example_db --output example_refine --neg-shift 0.8 --overwrite --indiv-refine both", shell=True, check=True)
+subprocess.run("python ../poppunk-runner.py --fit-model refine --ref-db example_db --output example_refine --neg-shift 0.8 --overwrite --score-idx 1", shell=True, check=True)
+subprocess.run("python ../poppunk-runner.py --fit-model refine --ref-db example_db --output example_refine --neg-shift 0.8 --overwrite --score-idx 2", shell=True, check=True)
 subprocess.run("python ../poppunk-runner.py --fit-model threshold --threshold 0.003 --ref-db example_db --output example_threshold", shell=True, check=True)
 
 # lineage clustering
@@ -42,6 +44,8 @@
 subprocess.run("python ../poppunk-runner.py --use-model --ref-db example_db --model-dir example_db --output example_use --overwrite", shell=True, check=True)
 
 # tests of other command line programs
+sys.stderr.write("Testing C++ extension\n")
+subprocess.run("python test-refine.py", shell=True, check=True)
 
 #assign query
 sys.stderr.write("Running query assignment\n")
diff --git a/test/test-refine.py b/test/test-refine.py
new file mode 100644
index 00000000..0a80447b
--- /dev/null
+++ b/test/test-refine.py
@@ -0,0 +1,106 @@
+import os, sys
+import numpy as np
+from math import sqrt
+
+# testing without install
+#sys.path.insert(0, '../build/lib.macosx-10.9-x86_64-3.8')
+import poppunk_refine
+
+# Original PopPUNK function (with some improvements)
+def withinBoundary(dists, x_max, y_max, slope=2):
+    boundary_test = np.ones((dists.shape[0]))
+    for row in range(boundary_test.size):
+        if slope == 2:
+            in_tri = dists[row, 1] * x_max + dists[row, 0] * y_max - x_max * y_max
+        elif slope == 0:
+            in_tri = dists[row, 0] - x_max
+        elif slope == 1:
+            in_tri = dists[row, 1] - y_max
+        if abs(in_tri) < np.finfo(np.float32).eps:
+          boundary_test[row] = 0
+        elif in_tri < 0:
+            boundary_test[row] = -1
+    return(boundary_test)
+
+def check_res(res, expected):
+    if (not np.all(res == expected)):
+        print(res)
+        print(expected)
+        raise RuntimeError("Results don't match")
+
+# assigning
+x = np.arange(0, 1, 0.1, dtype=np.float32)
+y = np.arange(0, 1, 0.1, dtype=np.float32)
+xv, yv = np.meshgrid(x, y)
+distMat = np.hstack((xv.reshape(-1,1), yv.reshape(-1,1)))
+assign0 = poppunk_refine.assignThreshold(distMat, 0, 0.5, 0.5, 2)
+assign1 = poppunk_refine.assignThreshold(distMat, 1, 0.5, 0.5, 2)
+assign2 = poppunk_refine.assignThreshold(distMat, 2, 0.5, 0.5, 2)
+
+assign0_res = withinBoundary(distMat, 0.5, 0.5, 0)
+assign1_res = withinBoundary(distMat, 0.5, 0.5, 1)
+assign2_res = withinBoundary(distMat, 0.5, 0.5, 2)
+
+check_res(assign0, assign0_res)
+check_res(assign1, assign1_res)
+check_res(assign2, assign2_res)
+
+# move boundary 1D
+# example is symmetrical at points (0.1, 0.1); (0.2, 0.2); (0.3, 0.3)
+samples = 100
+distMat = np.random.rand(int(0.5 * samples * (samples - 1)), 2)
+distMat = np.array(distMat, dtype = np.float32)
+offsets = [x * sqrt(2) for x in [-0.1, 0.0, 0.1]]
+i_vec, j_vec, idx_vec = poppunk_refine.thresholdIterate1D(distMat, offsets, 2, 0.2, 0.2, 0.3, 0.3)
+sketchlib_i = []
+sketchlib_j = []
+for offset_idx, offset in enumerate(offsets):
+  for i, j, idx in zip(i_vec, j_vec, idx_vec):
+    if idx > offset_idx:
+      break
+    elif idx == offset_idx:
+      sketchlib_i.append(i)
+      sketchlib_j.append(j)
+
+  py_i = []
+  py_j = []
+  xmax = 0.4 + (2 * (offset/sqrt(2)))
+  assign = poppunk_refine.assignThreshold(distMat, 2, xmax, xmax, 1)
+  dist_idx = 0
+  for i in range(samples):
+    for j in range(i + 1, samples):
+      if assign[dist_idx] <= 0:
+        py_i.append(i)
+        py_j.append(j)
+      dist_idx += 1
+  if set(zip(py_i, py_j)) != set(zip(sketchlib_i, sketchlib_j)):
+    raise RuntimeError("Threshold 1D iterate mismatch at offset " + str(offset))
+
+# move boundary 2D
+# example is for boundaries (0.1, 0.2); (0.2, 0.2); (0.3, 0.2)
+offsets = [0.1, 0.2, 0.3]
+y_max = 0.2
+i_vec, j_vec, idx_vec = poppunk_refine.thresholdIterate2D(distMat, offsets, y_max)
+sketchlib_i = []
+sketchlib_j = []
+for offset_idx, offset in enumerate(offsets):
+  for i, j, idx in zip(i_vec, j_vec, idx_vec):
+    if idx > offset_idx:
+      break
+    elif idx == offset_idx:
+      sketchlib_i.append(i)
+      sketchlib_j.append(j)
+
+  py_i = []
+  py_j = []
+  assign = poppunk_refine.assignThreshold(distMat, 2, offset, y_max, 1)
+  dist_idx = 0
+  for i in range(samples):
+    for j in range(i + 1, samples):
+      if assign[dist_idx] <= 0:
+        py_i.append(i)
+        py_j.append(j)
+      dist_idx += 1
+  if set(zip(py_i, py_j)) != set(zip(sketchlib_i, sketchlib_j)):
+    raise RuntimeError("Threshold 2D iterate mismatch at offset " + str(offset))
+