diff --git a/pds3label/pds3_label_visitor.py b/pds3label/pds3_label_visitor.py index 58f685e..5b4ef32 100644 --- a/pds3label/pds3_label_visitor.py +++ b/pds3label/pds3_label_visitor.py @@ -1,4 +1,5 @@ from __future__ import print_function +import re try: from collections import OrderedDict @@ -40,8 +41,25 @@ def visitScalarIdentifier(self, ctx): def visitScalarString(self, ctx): ODLv21Visitor.visitScalarString(self, ctx) - return ctx.STRING().getText() + return Pds3LabelVisitor._clean_string(ctx.STRING().getText()) def visitScalarSymbol(self, ctx): ODLv21Visitor.visitScalarSymbol(self, ctx) - return ctx.SYMBOL_STRING().getText() + return Pds3LabelVisitor._clean_symbol(ctx.SYMBOL_STRING().getText()) + + @classmethod + def _clean_symbol(cls, instring): + """Strips the single quotes off of the symbol""" + instring = re.sub(r"'", '', instring) + return instring + + @classmethod + def _clean_string(cls, instring): + """Cleans up the provided string, including the following things: + * Strips off double quotes + * Replaces all whitespace (including newlines) with a single space. + """ + instring = re.sub(r'"', '', instring) # strip " + instring = re.sub(r"-\s*\n\s+", '', instring) # remove hyphen and its trailing whitespace + instring = re.sub(r"\s+", ' ', instring) # replace whitespace with single space + return instring diff --git a/test/test_pds3label.py b/test/test_pds3label.py index 3c1e340..500f49b 100755 --- a/test/test_pds3label.py +++ b/test/test_pds3label.py @@ -32,3 +32,15 @@ def test_string2(self): assert label.label_dict['INTEGER1'] == 1 assert label.label_dict['FLOAT'] == 2.3 assert label.label_dict['COMMENT1'] == "THING TEST" + assert label.label_dict['COMMENT2'] == "Alive." + assert label.label_dict['COMMENT_1'] == "THING TEST" + assert label.label_dict['COMMENT_2'] == "Alive." + assert label.label_dict['COMMENT_2_A'] == "Alive Again." + assert label.label_dict['SYMBOL_STR'] == "JBD-123" + + def test_string3(self): + label = Pds3Label('test/data/string3.lbl') + assert label.infile == 'test/data/string3.lbl' + assert label.label_dict['MULTILINE'] == 'This is a test of the emergency broadcasting system.' + assert label.label_dict['HYPHENATED'] == 'The planet Jupiter is very big' + assert label.label_dict['EMPTY_STRING'] == ''