From 78910ecfe014e68773ce7a8c9372d1d4bc5621c4 Mon Sep 17 00:00:00 2001 From: Richard Christie Date: Wed, 22 Apr 2020 15:06:11 +1200 Subject: [PATCH] Match groups and markers ignoring case and whitespace differences --- src/scaffoldfitter/fitter.py | 82 ++++++++++++++++++--------- src/scaffoldfitter/fitterstepalign.py | 34 +++++------ 2 files changed, 72 insertions(+), 44 deletions(-) diff --git a/src/scaffoldfitter/fitter.py b/src/scaffoldfitter/fitter.py index 290636e..4223331 100644 --- a/src/scaffoldfitter/fitter.py +++ b/src/scaffoldfitter/fitter.py @@ -105,24 +105,50 @@ def _loadModel(self): def _loadData(self): """ Load zinc data file into self._rawDataRegion. + Rename data groups to exactly match model groups where they differ by case and whitespace only. Transfer data points (and converted nodes) into self._region. """ result = self._rawDataRegion.readFile(self._zincDataFileName) assert result == RESULT_OK, "Failed to load data file " + str(self._zincDataFileName) - # if there both nodes and datapoints, offset datapoint identifiers to ensure different fieldmodule = self._rawDataRegion.getFieldmodule() - nodes = fieldmodule.findNodesetByFieldDomainType(Field.DOMAIN_TYPE_NODES) - if nodes.getSize() > 0: - datapoints = fieldmodule.findNodesetByFieldDomainType(Field.DOMAIN_TYPE_DATAPOINTS) - if datapoints.getSize() > 0: - maximumDatapointIdentifier = max(0, getMaximumNodeIdentifier(datapoints)) - maximumNodeIdentifier = max(0, getMaximumNodeIdentifier(nodes)) - # this assumes identifiers are in low ranges and can be improved if there is a problem: - identifierOffset = 100000 - while (maximumDatapointIdentifier > identifierOffset) or (maximumNodeIdentifier > identifierOffset): - assert identifierOffset < 1000000000, "Invalid node and datapoint identifier ranges" - identifierOffset *= 10 - with ChangeManager(fieldmodule): + with ChangeManager(fieldmodule): + # rename data groups to match model + # future: match with annotation terms + modelGroupNames = [ group.getName() for group in getGroupList(self._fieldmodule) ] + writeDiagnostics = self.getDiagnosticLevel() > 0 + for dataGroup in getGroupList(fieldmodule): + dataGroupName = dataGroup.getName() + compareName = dataGroupName.strip().casefold() + for modelGroupName in modelGroupNames: + if modelGroupName == dataGroupName: + if writeDiagnostics: + print("Load data: Data group '" + dataGroupName + "' found in model") + break + elif modelGroupName.strip().casefold() == compareName: + result = dataGroup.setName(modelGroupName) + if result == RESULT_OK: + if writeDiagnostics: + print("Load data: Data group '" + dataGroupName + "' found in model as '" + modelGroupName + "'. Renaming to match.") + else: + print("Error: Load data: Data group '" + dataGroupName + "' found in model as '" + modelGroupName + "'. Renaming to match FAILED.") + if fieldmodule.findFieldByName(modelGroupName).isValid(): + print(" Reason: field of that name already exists.") + break + else: + if writeDiagnostics: + print("Load data: Data group '" + dataGroupName + "' not found in model") + # if there both nodes and datapoints, offset datapoint identifiers to ensure different + nodes = fieldmodule.findNodesetByFieldDomainType(Field.DOMAIN_TYPE_NODES) + if nodes.getSize() > 0: + datapoints = fieldmodule.findNodesetByFieldDomainType(Field.DOMAIN_TYPE_DATAPOINTS) + if datapoints.getSize() > 0: + maximumDatapointIdentifier = max(0, getMaximumNodeIdentifier(datapoints)) + maximumNodeIdentifier = max(0, getMaximumNodeIdentifier(nodes)) + # this assumes identifiers are in low ranges and can be improved if there is a problem: + identifierOffset = 100000 + while (maximumDatapointIdentifier > identifierOffset) or (maximumNodeIdentifier > identifierOffset): + assert identifierOffset < 1000000000, "Invalid node and datapoint identifier ranges" + identifierOffset *= 10 while True: # logic relies on datapoints being in identifier order datapoint = datapoints.createNodeiterator().next() @@ -131,18 +157,18 @@ def _loadData(self): break; result = datapoint.setIdentifier(identifier + identifierOffset) assert result == RESULT_OK, "Failed to offset datapoint identifier" - # transfer nodes as datapoints to self._region - sir = self._rawDataRegion.createStreaminformationRegion() - srm = sir.createStreamresourceMemory() - sir.setResourceDomainTypes(srm, Field.DOMAIN_TYPE_NODES) - self._rawDataRegion.write(sir) - result, buffer = srm.getBuffer() - assert result == RESULT_OK, "Failed to write nodes" - buffer = buffer.replace(bytes("!#nodeset nodes", "utf-8"), bytes("!#nodeset datapoints", "utf-8")) - sir = self._region.createStreaminformationRegion() - srm = sir.createStreamresourceMemoryBuffer(buffer) - result = self._region.read(sir) - assert result == RESULT_OK, "Failed to load nodes as datapoints" + # transfer nodes as datapoints to self._region + sir = self._rawDataRegion.createStreaminformationRegion() + srm = sir.createStreamresourceMemory() + sir.setResourceDomainTypes(srm, Field.DOMAIN_TYPE_NODES) + self._rawDataRegion.write(sir) + result, buffer = srm.getBuffer() + assert result == RESULT_OK, "Failed to write nodes" + buffer = buffer.replace(bytes("!#nodeset nodes", "utf-8"), bytes("!#nodeset datapoints", "utf-8")) + sir = self._region.createStreaminformationRegion() + srm = sir.createStreamresourceMemoryBuffer(buffer) + result = self._region.read(sir) + assert result == RESULT_OK, "Failed to load nodes as datapoints" # transfer datapoints to self._region sir = self._rawDataRegion.createStreaminformationRegion() srm = sir.createStreamresourceMemory() @@ -319,8 +345,8 @@ def _calculateMarkerDataLocations(self): fieldcache.setNode(datapoint) name = self._markerDataNameField.evaluateString(fieldcache) # if this is the only datapoint with name: - if name and findNodeWithName(self._markerDataGroup, self._markerDataNameField, name): - node = findNodeWithName(self._markerNodeGroup, self._markerNameField, name) + if name and findNodeWithName(self._markerDataGroup, self._markerDataNameField, name, ignore_case=True, strip_whitespace=True): + node = findNodeWithName(self._markerNodeGroup, self._markerNameField, name, ignore_case=True, strip_whitespace=True) if node: fieldcache.setNode(node) element, xi = self._markerLocationField.evaluateMeshLocation(fieldcache, meshDimension) @@ -539,7 +565,7 @@ def calculateDataProjections(self): unprojectedDatapoints.removeNodesConditional(self._dataProjectionNodeGroupFields[d]) unprojectedCount = unprojectedDatapoints.getSize() if unprojectedCount > 0: - print("Warning: " + str(unprojected) + " data points with data coordinates have not been projected") + print("Warning: " + str(unprojectedCount) + " data points with data coordinates have not been projected") del unprojectedDatapoints # remove temporary objects before ChangeManager exits diff --git a/src/scaffoldfitter/fitterstepalign.py b/src/scaffoldfitter/fitterstepalign.py index b33078d..24aa419 100644 --- a/src/scaffoldfitter/fitterstepalign.py +++ b/src/scaffoldfitter/fitterstepalign.py @@ -135,23 +135,25 @@ def _doAlignMarkers(self): assert markerDataGroup and markerDataCoordinates and markerDataName, "Align: No marker data group, coordinates or name fields" dataMarkers = getNodeNameCentres(markerDataGroup, markerDataCoordinates, markerDataName) - # match model and data markers, warn of missing markers + # match model and data markers, warn of unmatched markers markerMap = {} - for name, modelx in modelMarkers.items(): - datax = dataMarkers.get(name) - if datax: - markerMap[name] = ( modelx, datax ) - if self.getDiagnosticLevel() > 0: - for name in modelMarkers: - datax = dataMarkers.get(name) - if datax: - print("Align: Found marker " + name + " in model and data") - for name in modelMarkers: - if not markerMap.get(name): - print("Align: Model marker " + name + " not found in data") - for name in dataMarkers: - if not markerMap.get(name): - print("Align: Data marker " + name + " not found in model") + writeDiagnostics = self.getDiagnosticLevel() > 0 + for modelName in modelMarkers: + # name match allows case and whitespace differences + matchName = modelName.strip().casefold() + for dataName in dataMarkers: + if dataName.strip().casefold() == matchName: + markerMap[modelName] = ( modelMarkers[modelName], dataMarkers[dataName] ) + if writeDiagnostics: + print("Align: Model marker '" + modelName + "' found in data" + (" as '" + dataName +"'" if (dataName != modelName) else "")) + dataMarkers.pop(dataName) + break + else: + if writeDiagnostics: + print("Align: Model marker '" + modelName + "' not found in data") + if writeDiagnostics: + for dataName in dataMarkers: + print("Align: Data marker '" + dataName + "' not found in model") self._optimiseAlignment(markerMap)