diff --git a/PopPUNK/__main__.py b/PopPUNK/__main__.py index c472d74c..436c6c3e 100644 --- a/PopPUNK/__main__.py +++ b/PopPUNK/__main__.py @@ -160,10 +160,10 @@ def get_options(): # model refinement refinementGroup = parser.add_argument_group('Refine model options') - refinementGroup.add_argument('--pos-shift', help='Maximum amount to move the boundary away from origin [default = 0.2]', - type=float, default=0.2) - refinementGroup.add_argument('--neg-shift', help='Maximum amount to move the boundary towards the origin [default = 0.4]', - type=float, default=0.4) + refinementGroup.add_argument('--pos-shift', help='Maximum amount to move the boundary away from origin [default = to between-strain mean]', + type=float, default = None) + refinementGroup.add_argument('--neg-shift', help='Maximum amount to move the boundary towards the origin [default = to within-strain mean]', + type=float, default = None) refinementGroup.add_argument('--manual-start', help='A file containing information for a start point. ' 'See documentation for help.', default=None) refinementGroup.add_argument('--indiv-refine', help='Also run refinement for core and accessory individually', default=False, diff --git a/PopPUNK/models.py b/PopPUNK/models.py index 9f98a8eb..b3d97a64 100644 --- a/PopPUNK/models.py +++ b/PopPUNK/models.py @@ -586,7 +586,7 @@ def fit(self, X, sample_names, model, max_move, min_move, startFile = None, indi raise RuntimeError("Unrecognised model type") # Main refinement in 2D - self.start_point, self.optimal_x, self.optimal_y = refineFit(X/self.scale, + self.start_point, self.optimal_x, self.optimal_y, self.min_move, self.max_move = refineFit(X/self.scale, sample_names, self.start_s, self.mean0, self.mean1, self.max_move, self.min_move, slope = 2, no_local = no_local, num_processes = threads) self.fitted = True @@ -597,13 +597,14 @@ def fit(self, X, sample_names, model, max_move, min_move, startFile = None, indi if indiv_refine: try: sys.stderr.write("Refining core and accessory separately\n") - - start_point, self.core_boundary, core_acc = refineFit(X/self.scale, sample_names, self.start_s, - self.mean0, self.mean1, self.max_move, self.min_move, slope = 0, no_local = no_local, - num_processes = threads) - start_point, acc_core, self.accessory_boundary = refineFit(X/self.scale, sample_names, self.start_s, - self.mean0, self.mean1, self.max_move, self.min_move, slope = 1, no_local = no_local, - num_processes = threads) + # optimise core distance boundary + start_point, self.core_boundary, core_acc, self.min_move, self.max_move = refineFit(X/self.scale, + sample_names, self.start_s, self.mean0, self.mean1, self.max_move, self.min_move, + slope = 0, no_local = no_local,num_processes = threads) + # optimise accessory distance boundary + start_point, acc_core, self.accessory_boundary, self.min_move, self.max_move = refineFit(X/self.scale, + sample_names, self.start_s,self.mean0, self.mean1, self.max_move, self.min_move, slope = 1, + no_local = no_local, num_processes = threads) self.indiv_fitted = True except RuntimeError as e: sys.stderr.write("Could not separately refine core and accessory boundaries. " diff --git a/PopPUNK/refine.py b/PopPUNK/refine.py index 25054e45..7df0fdc4 100644 --- a/PopPUNK/refine.py +++ b/PopPUNK/refine.py @@ -67,6 +67,12 @@ def refineFit(distMat, sample_names, start_s, mean0, mean1, sys.stderr.write("Decision boundary starts at (" + "{:.2f}".format(start_point[0]) + "," + "{:.2f}".format(start_point[1]) + ")\n") + # calculate distance between start point and means if none is supplied + if min_move is None: + min_move = ((mean0[0] - start_point[0])**2 + (mean0[1] - start_point[1])**2)**0.5 + if max_move is None: + max_move = ((mean1[0] - start_point[0])**2 + (mean1[1] - start_point[1])**2)**0.5 + # Boundary is left of line normal to this point and first line gradient = (mean1[1] - mean0[1]) / (mean1[0] - mean0[0]) @@ -121,7 +127,7 @@ def refineFit(distMat, sample_names, start_s, mean0, mean1, if optimal_x < 0 or optimal_y < 0: raise RuntimeError("Optimisation failed: produced a boundary outside of allowed range\n") - return start_point, optimal_x, optimal_y + return start_point, optimal_x, optimal_y, min_move, max_move def newNetwork(s, sample_names, distMat, start_point, mean1, gradient, slope=2, cpus = 1): @@ -299,4 +305,3 @@ def decisionBoundary(intercept, gradient): y = intercept[1] + intercept[0] / gradient return(x, y) -