diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala index 15c7926ab..482d38dde 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala @@ -251,19 +251,9 @@ class Copybook(val ast: CopybookAST) extends Serializable { fieldStrings.mkString("\n") } - val strings = for (grp <- ast.children) yield { - val start = grp.binaryProperties.offset + 1 - val length = grp.binaryProperties.actualSize - val end = start + length - 1 - val groupStr = generateGroupLayoutPositions(grp.asInstanceOf[Group]) - val namePart = alignLeft(s"${grp.name}", 55) - val fieldStartPart = alignRight(s"$start", 7) - val fieldEndPart = alignRight(s"$end", 7) - val fieldLengthPart = alignRight(s"$length", 7) - s"$namePart$fieldStartPart$fieldEndPart$fieldLengthPart\n$groupStr" - } + val layout = generateGroupLayoutPositions(ast) val header = "-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH\n\n" - header + strings.mkString("\n") + header + layout } def dropRoot(): Copybook = { @@ -313,10 +303,14 @@ class Copybook(val ast: CopybookAST) extends Serializable { object Copybook { - def merge(copybooks: Iterable[Copybook]): Copybook = { + def merge(copybooks: Seq[Copybook]): Copybook = { if (copybooks.isEmpty) throw new RuntimeException("Cannot merge an empty iterable of copybooks.") + if (copybooks.size == 1) { + return copybooks.head + } + // make sure all segments are the same level val rootLevels: Set[Int] = copybooks.flatMap(cb => cb.ast.children.map({ case x: Group => x.level @@ -354,6 +348,7 @@ object Copybook { case x: Group => x.copy(redefines = None, isRedefined = true)(Some(newRoot)) case x: Primitive => x.copy(redefines = None, isRedefined = true)(Some(newRoot)) }) + newRoot.children ++= copybooks.head.ast.children.tail.map({ case x: Group => x.copy(redefines = Option(targetName), isRedefined = false)(Some(newRoot)) case x: Primitive => x.copy(redefines = Option(targetName), isRedefined = false)(Some(newRoot)) diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/CopybooksOperationsSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/CopybooksOperationsSpec.scala index bedf9e21a..912824d20 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/CopybooksOperationsSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/CopybooksOperationsSpec.scala @@ -42,7 +42,17 @@ class CopybooksOperationsSpec extends FunSuite { assert(copybook1.generateRecordLayoutPositions == """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH | - |RECORD_COPYBOOK_1 1 30 30 + | 1 RECORD_COPYBOOK_1 7 1 30 30 + | 5 GROUP_1 7 1 30 30 + | 6 FIELD_1 3 1 10 10 + | 6 FILLER 4 11 15 5 + | 6 GROUP_2 7 16 30 15 + | 10 NESTED_FIELD_1 6 16 25 10 + | 10 FILLER 7 26 30 5""" + .stripMargin.replace("\r\n", "\n")) + assert(copybookDR1.generateRecordLayoutPositions == + """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH + | | 5 GROUP_1 6 1 30 30 | 6 FIELD_1 2 1 10 10 | 6 FILLER 3 11 15 5 @@ -50,16 +60,6 @@ class CopybooksOperationsSpec extends FunSuite { | 10 NESTED_FIELD_1 5 16 25 10 | 10 FILLER 6 26 30 5""" .stripMargin.replace("\r\n", "\n")) - assert(copybookDR1.generateRecordLayoutPositions == - """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH - | - |GROUP_1 1 30 30 - | 6 FIELD_1 1 1 10 10 - | 6 FILLER 2 11 15 5 - | 6 GROUP_2 5 16 30 15 - | 10 NESTED_FIELD_1 4 16 25 10 - | 10 FILLER 5 26 30 5""" - .stripMargin.replace("\r\n", "\n")) val exception = intercept[RuntimeException] { copybookDR1.dropRoot() @@ -126,22 +126,22 @@ class CopybooksOperationsSpec extends FunSuite { assert(copybookR1.generateRecordLayoutPositions == """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH | - |GROUP_1A 1 30 30 - | 6 FIELD_1 1 1 10 10 - | 6 FILLER 2 11 15 5 - | 6 GROUP_2A 5 16 30 15 - | 10 NESTED_FIELD_1 4 16 25 10 - | 10 FILLER 5 26 30 5""" + | 5 GROUP_1A 6 1 30 30 + | 6 FIELD_1 2 1 10 10 + | 6 FILLER 3 11 15 5 + | 6 GROUP_2A 6 16 30 15 + | 10 NESTED_FIELD_1 5 16 25 10 + | 10 FILLER 6 26 30 5""" .stripMargin.replace("\r\n", "\n")) assert(copybookR2.generateRecordLayoutPositions == """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH | - |GROUP_1B 1 60 60 - | 6 FIELD_1 1 1 20 20 - | 6 FILLER 2 21 30 10 - | 6 GROUP_2B 5 31 60 30 - | 10 NESTED_FIELD_1 4 31 50 20 - | 10 FILLER 5 51 60 10""" + | 5 GROUP_1B 6 1 60 60 + | 6 FIELD_1 2 1 20 20 + | 6 FILLER 3 21 30 10 + | 6 GROUP_2B 6 31 60 30 + | 10 NESTED_FIELD_1 5 31 50 20 + | 10 FILLER 6 51 60 10""" .stripMargin.replace("\r\n", "\n")) val exception1 = intercept[RuntimeException] { diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/MergeCopybooksSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/MergeCopybooksSpec.scala index 8d4b6c402..b0d5ae5bf 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/MergeCopybooksSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/MergeCopybooksSpec.scala @@ -76,34 +76,34 @@ class MergeCopybooksSpec extends FunSuite { assert(copybook123.generateRecordLayoutPositions == """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH | - |RECORD_COPYBOOK_1 1 90 90 - | 5 GROUP_1 6 1 30 30 - | 6 FIELD_1 2 1 10 10 - | 6 FILLER 3 11 15 5 - | 6 GROUP_2 6 16 30 15 - | 10 NESTED_FIELD_1 5 16 25 10 - | 10 FILLER 6 26 30 5 - |RECORD_COPYBOOK_2A 1 90 90 - | 5 GROUP_1 12 1 60 60 - | 6 FIELD_1 8 1 20 20 - | 6 FILLER 9 21 30 10 - | 6 GROUP_2 12 31 60 30 - | 10 NESTED_FIELD_1 11 31 50 20 - | 10 FILLER 12 51 60 10 - |RECORD_COPYBOOK_2B 1 90 90 - | 5 GROUP_1 18 1 60 60 - | 6 FIELD_1 14 1 20 20 - | 6 FILLER 15 21 30 10 - | 6 GROUP_2 18 31 60 30 - | 10 NESTED_FIELD_1 17 31 50 20 - | 10 FILLER 18 51 60 10 - |RECORD_COPYBOOK_3 1 90 90 - | 5 GROUP_1 24 1 90 90 - | 6 FIELD_1 20 1 30 30 - | 6 FILLER 21 31 45 15 - | 6 GROUP_2 24 46 90 45 - | 10 NESTED_FIELD_1 23 46 75 30 - | 10 FILLER 24 76 90 15""" + | 1 RECORD_COPYBOOK_1 r 7 1 90 90 + | 5 GROUP_1 7 1 30 30 + | 6 FIELD_1 3 1 10 10 + | 6 FILLER 4 11 15 5 + | 6 GROUP_2 7 16 30 15 + | 10 NESTED_FIELD_1 6 16 25 10 + | 10 FILLER 7 26 30 5 + | 1 RECORD_COPYBOOK_2A rR 14 1 90 90 + | 5 GROUP_1 14 1 60 60 + | 6 FIELD_1 10 1 20 20 + | 6 FILLER 11 21 30 10 + | 6 GROUP_2 14 31 60 30 + | 10 NESTED_FIELD_1 13 31 50 20 + | 10 FILLER 14 51 60 10 + | 1 RECORD_COPYBOOK_2B rR 21 1 90 90 + | 5 GROUP_1 21 1 60 60 + | 6 FIELD_1 17 1 20 20 + | 6 FILLER 18 21 30 10 + | 6 GROUP_2 21 31 60 30 + | 10 NESTED_FIELD_1 20 31 50 20 + | 10 FILLER 21 51 60 10 + | 1 RECORD_COPYBOOK_3 R 28 1 90 90 + | 5 GROUP_1 28 1 90 90 + | 6 FIELD_1 24 1 30 30 + | 6 FILLER 25 31 45 15 + | 6 GROUP_2 28 46 90 45 + | 10 NESTED_FIELD_1 27 46 75 30 + | 10 FILLER 28 76 90 15""" .stripMargin.replace("\r\n", "\n")) } @@ -126,16 +126,17 @@ class MergeCopybooksSpec extends FunSuite { assert(copybook1M.getRecordSize == 30) assert(copybook1M.generateRecordLayoutPositions == copybook1.generateRecordLayoutPositions) + assert(copybook1M.generateRecordLayoutPositions == """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH | - |RECORD_COPYBOOK_1 1 30 30 - | 5 GROUP_1 6 1 30 30 - | 6 FIELD_1 2 1 10 10 - | 6 FILLER 3 11 15 5 - | 6 GROUP_2 6 16 30 15 - | 10 NESTED_FIELD_1 5 16 25 10 - | 10 FILLER 6 26 30 5""" + | 1 RECORD_COPYBOOK_1 7 1 30 30 + | 5 GROUP_1 7 1 30 30 + | 6 FIELD_1 3 1 10 10 + | 6 FILLER 4 11 15 5 + | 6 GROUP_2 7 16 30 15 + | 10 NESTED_FIELD_1 6 16 25 10 + | 10 FILLER 7 26 30 5""" .stripMargin.replace("\r\n", "\n")) } diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseCommentsSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseCommentsSpec.scala index 314ec773c..4ecb2d49c 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseCommentsSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseCommentsSpec.scala @@ -27,9 +27,9 @@ class ParseCommentsSpec extends FunSuite { private val expectedLayout = """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH | - |GRP_01 1 11 11 - | 3 FIELD1 1 1 1 1 - | 3 FIELD2 2 2 11 10""" + | 1 GRP_01 3 1 11 11 + | 3 FIELD1 2 1 1 1 + | 3 FIELD2 3 2 11 10""" .stripMargin.replace("\r\n", "\n") test("Test copybook parser handles comment lines") { diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseDebugRedefinedSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseDebugRedefinedSpec.scala index 9c146937e..b5e3722fc 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseDebugRedefinedSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseDebugRedefinedSpec.scala @@ -64,22 +64,21 @@ class ParseDebugRedefinedSpec extends FunSuite { val expectedLayout = """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH | - |TRANSDATA 1 7 7 - | 10 CURRENCY r 1 1 3 3 - | 10 CURRENCY_debug R 2 1 3 3 - | 10 FIELD1 r 3 4 7 4 - | 10 FIELD1_debug rR 4 4 7 4 - | 10 FIELD2 rR 5 4 7 4 - | 10 FIELD2_debug rR 6 4 7 4 - | 10 FIELD3 rR 7 4 7 4 - | 10 FIELD3_debug rR 8 4 7 4 - | 10 FIELD4 rR 9 4 7 4 - | 10 FIELD4_debug R 10 4 7 4""" + | 1 TRANSDATA 11 1 7 7 + | 10 CURRENCY r 2 1 3 3 + | 10 CURRENCY_debug R 3 1 3 3 + | 10 FIELD1 r 4 4 7 4 + | 10 FIELD1_debug rR 5 4 7 4 + | 10 FIELD2 rR 6 4 7 4 + | 10 FIELD2_debug rR 7 4 7 4 + | 10 FIELD3 rR 8 4 7 4 + | 10 FIELD3_debug rR 9 4 7 4 + | 10 FIELD4 rR 10 4 7 4 + | 10 FIELD4_debug R 11 4 7 4""" .stripMargin.replace("\r\n", "\n") val copybook = CopybookParser.parseTree(copybookWithRedefined, debugFieldsPolicy = HexValue) - val actualLayout = copybook.generateRecordLayoutPositions() assert(actualLayout == expectedLayout) diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseFieldNamesSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseFieldNamesSpec.scala index 917715f1d..495c18f7a 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseFieldNamesSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseFieldNamesSpec.scala @@ -44,18 +44,18 @@ class ParseFieldNamesSpec extends FunSuite { val expectedLayout = """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH | - |GRP_01_02 1 11 11 - | 3 FIELD_1 1 1 1 1 - | 3 FIELD_2 2 2 11 10 - |GRP_01_02 12 22 11 - | 3 FIELD_1 3 12 12 1 - | 3 FIELD_2 4 13 22 10 - |GRP0102 23 33 11 - | 3 FIELD1 5 23 23 1 - | 3 FIELD2 6 24 33 10 - |SOMETHING_SOMETHING_DATE_NUM 34 44 11 - | 3 FIELD1 7 34 34 1 - | 3 FIELD2 8 35 44 10""" + | 1 GRP_01_02 3 1 11 11 + | 3 FIELD_1 2 1 1 1 + | 3 FIELD_2 3 2 11 10 + | 1 GRP_01_02 6 12 22 11 + | 3 FIELD_1 5 12 12 1 + | 3 FIELD_2 6 13 22 10 + | 1 GRP0102 9 23 33 11 + | 3 FIELD1 8 23 23 1 + | 3 FIELD2 9 24 33 10 + | 1 SOMETHING_SOMETHING_DATE_NUM 12 34 44 11 + | 3 FIELD1 11 34 34 1 + | 3 FIELD2 12 35 44 10""" .stripMargin.replace("\r\n", "\n") val copybook = CopybookParser.parseTree(copybookWithCommentLines) diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseFieldsNestingSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseFieldsNestingSpec.scala index 1748e104a..0254fa326 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseFieldsNestingSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/copybooks/ParseFieldsNestingSpec.scala @@ -40,13 +40,13 @@ class ParseFieldsNestingSpec extends FunSuite { val expectedLayout = """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH | - |ROOT_GROUP 1 20 20 - | 3 NESTED_PRIMITIVE_01 1 1 4 4 - | 3 NESTED_GRP_01 3 5 11 7 - | 5 NESTED_NESTED_02 3 5 11 7 - | 3 FILL 4 12 18 7 - | 2 FILLER_1 6 19 20 2 - | 3 NUMERIC_FIELD_01 6 19 20 2""" + | 1 ROOT_GROUP 7 1 20 20 + | 3 NESTED_PRIMITIVE_01 2 1 4 4 + | 3 NESTED_GRP_01 4 5 11 7 + | 5 NESTED_NESTED_02 4 5 11 7 + | 3 FILL 5 12 18 7 + | 2 FILLER_1 7 19 20 2 + | 3 NUMERIC_FIELD_01 7 19 20 2""" .stripMargin.replace("\r\n", "\n") val copybook = CopybookParser.parseTree(copybookWithCommentLines) diff --git a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/regression/Test14AsciiMergedCopybooks.scala b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/regression/Test14AsciiMergedCopybooks.scala index 16d4776de..0e087f521 100644 --- a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/regression/Test14AsciiMergedCopybooks.scala +++ b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/regression/Test14AsciiMergedCopybooks.scala @@ -18,6 +18,7 @@ package za.co.absa.cobrix.spark.cobol.source.regression import org.scalatest.WordSpec import org.slf4j.{Logger, LoggerFactory} +import za.co.absa.cobrix.cobol.parser.CopybookParser import za.co.absa.cobrix.spark.cobol.source.base.{SimpleComparisonBase, SparkTestBase} import za.co.absa.cobrix.spark.cobol.source.fixtures.BinaryFileFixture @@ -38,10 +39,27 @@ class Test14AsciiMergedCopybooks extends WordSpec with SparkTestBase with Binary 05 B PIC X. """ + private val copybookMerged = + """ 01 ROOT. + 05 ENTITY1. + 10 A PIC X. + 05 ENTITY2 REDEFINES ENTITY1. + 10 B PIC X. + """ + val textFileContents: String = "1\n2\n3\n4" - "Test ASCII CRLF text file with merged copybooks" should { - "correctly identify empty lines when read as a text file" in { + "Test ASCII CRLF text file " should { + "with merged copybooks" in { + val expectedLayout = + """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH + | + |ENTITY1 1 1 1 + | 5 A 1 1 1 1 + |ENTITY2 1 1 1 + | 5 B 2 1 1 1 + |""".stripMargin + withTempDirectory("merged_copybook") { tempDir => val copybook1Path = Paths.get(tempDir, "copybook1.cpy") val copybook2Path = Paths.get(tempDir, "copybook2.cpy") @@ -73,5 +91,37 @@ class Test14AsciiMergedCopybooks extends WordSpec with SparkTestBase with Binary } } } + + "with a single copybook" in { + val expectedLayout = + """-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH + | + |ROOT 1 1 1 + | 5 ENTITY1 r 2 1 1 1 + | 10 A 2 1 1 1 + | 5 ENTITY2 R 4 1 1 1 + | 10 B 4 1 1 1 + |""".stripMargin + withTempTextFile("merged_crlf", ".dat", StandardCharsets.UTF_8, textFileContents) { tmpFileName => + val df = spark + .read + .format("cobol") + .option("copybook_contents", copybookMerged) + .option("pedantic", "true") + .option("is_text", "true") + .option("encoding", "ascii") + .option("schema_retention_policy", "collapse_root") + .load(tmpFileName) + + val expected = """[{"ENTITY1":{"A":"1"},"ENTITY2":{"B":"1"}},{"ENTITY1":{"A":"2"},"ENTITY2":{"B":"2"}},{"ENTITY1":{"A":"3"},"ENTITY2":{"B":"3"}},{"ENTITY1":{"A":"4"},"ENTITY2":{"B":"4"}}]""" + + val count = df.count() + val actual = df.toJSON.collect().mkString("[", ",", "]") + + assert(count == 4) + assertEqualsMultiline(actual, expected) + } + } + } }