Skip to content

Commit

Permalink
#510 Fix FILLERs being dropped for a FILLER with OCCURS GROUP.
Browse files Browse the repository at this point in the history
  • Loading branch information
yruslan committed Aug 9, 2022
1 parent c8db9d8 commit 3baed03
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,7 @@ object CopybookParser extends Logging {
if (newGrp.children.nonEmpty) {
newChildren += newGrp
}
if (!grp.isFiller) hasNonFillers = true
if (!newGrp.isFiller) hasNonFillers = true
case st: Primitive =>
val newSt = processPrimitive(st)
newChildren += newSt
Expand All @@ -878,7 +878,7 @@ object CopybookParser extends Logging {

val (newSchema, hasNonFillers) = renameFillers(ast)
if (!hasNonFillers) {
throw new IllegalStateException("The copybook is empty of consists only of FILLER fields.")
throw new IllegalStateException("The copybook is empty since it consists only of FILLER fields.")
}
newSchema
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package za.co.absa.cobrix.cobol.parser.copybooks
import org.scalatest.FunSuite
import org.slf4j.{Logger, LoggerFactory}
import za.co.absa.cobrix.cobol.parser.CopybookParser
import za.co.absa.cobrix.cobol.parser.ast.Group
import za.co.absa.cobrix.cobol.testutils.SimpleComparisonBase

class ParseCopybookFeaturesSpec extends FunSuite with SimpleComparisonBase {
Expand All @@ -28,6 +29,10 @@ class ParseCopybookFeaturesSpec extends FunSuite with SimpleComparisonBase {
""" 01 RECORD.
| 05 FILLER PIC X(1).
| 05 COMPANY_PREFIX PIC X(3).
| 05 FIELD1.
| 07 FILLER OCCURS 12 TIMES.
| 10 CHILD1 PIC S9(7) COMP-3.
| 10 CHILD2 PIC S99V99999 COMP-3.
| 05 FILLER PIC X(1).
| 05 FILLER PIC X(1).
| 05 COMPANY_NAME PIC X(9).
Expand Down Expand Up @@ -122,54 +127,65 @@ class ParseCopybookFeaturesSpec extends FunSuite with SimpleComparisonBase {
val expectedLayout =
"""-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH
|
|1 RECORD 1 1 60 60
|1 RECORD 1 1 156 156
| 5 FILLER_P1 2 1 1 1
| 5 COMPANY_PREFIX 3 2 4 3
| 5 FILLER_P2 4 5 5 1
| 5 FILLER_P3 5 6 6 1
| 5 COMPANY_NAME r 6 7 15 9
| 5 FILLER_1 R 7 7 15 9
| 10 STR1 8 7 11 5
| 10 STR2 9 12 13 2
| 10 FILLER_P4 10 14 14 1
| 5 ADDRESS r 11 16 45 30
| 5 FILLER_2 R 12 16 45 30
| 10 STR4 13 16 25 10
| 10 FILLER_P5 14 26 45 20
| 5 FILL_FIELD r 15 46 52 7
| 10 FILLER_P6 16 46 50 5
| 10 FILLER_P7 17 51 52 2
| 5 CONTACT_PERSON R 18 46 52 7
| 10 FIRST_NAME 19 46 51 6
| 5 AMOUNT 20 53 60 8
| 5 FIELD1 4 5 100 96
| 7 FILLER_1 [] 5 5 100 96
| 10 CHILD1 6 5 8 4
| 10 CHILD2 7 9 12 4
| 5 FILLER_P2 8 101 101 1
| 5 FILLER_P3 9 102 102 1
| 5 COMPANY_NAME r 10 103 111 9
| 5 FILLER_2 R 11 103 111 9
| 10 STR1 12 103 107 5
| 10 STR2 13 108 109 2
| 10 FILLER_P4 14 110 110 1
| 5 ADDRESS r 15 112 141 30
| 5 FILLER_3 R 16 112 141 30
| 10 STR4 17 112 121 10
| 10 FILLER_P5 18 122 141 20
| 5 FILL_FIELD r 19 142 148 7
| 10 FILLER_P6 20 142 146 5
| 10 FILLER_P7 21 147 148 2
| 5 CONTACT_PERSON R 22 142 148 7
| 10 FIRST_NAME 23 142 147 6
| 5 AMOUNT 24 149 156 8
|"""
.stripMargin.replace("\r\n", "\n")

assertEqualsMultiline(layout, expectedLayout)
assert(!copybook.ast.children(0).asInstanceOf[Group].children(2).isFiller)
}

test("Test parseSimple() drop value fillers") {
val copybook = CopybookParser.parseSimple(copybookFillers, dropGroupFillers = false, dropValueFillers = true, dropFillersFromAst = true)
val layout = copybook.generateRecordLayoutPositions()

val expectedLayout =
"""-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH
|
|1 RECORD 1 1 60 60
|1 RECORD 1 1 156 156
| 5 COMPANY_PREFIX 2 2 4 3
| 5 COMPANY_NAME r 3 7 15 9
| 5 FILLER_1 R 4 7 15 9
| 10 STR1 5 7 11 5
| 10 STR2 6 12 13 2
| 5 ADDRESS r 7 16 45 30
| 5 FILLER_2 R 8 16 45 30
| 10 STR4 9 16 25 10
| 5 CONTACT_PERSON R 10 46 52 7
| 10 FIRST_NAME 11 46 51 6
| 5 AMOUNT 12 53 60 8
| 5 FIELD1 3 5 100 96
| 7 FILLER_1 [] 4 5 100 96
| 10 CHILD1 5 5 8 4
| 10 CHILD2 6 9 12 4
| 5 COMPANY_NAME r 7 103 111 9
| 5 FILLER_2 R 8 103 111 9
| 10 STR1 9 103 107 5
| 10 STR2 10 108 109 2
| 5 ADDRESS r 11 112 141 30
| 5 FILLER_3 R 12 112 141 30
| 10 STR4 13 112 121 10
| 5 CONTACT_PERSON R 14 142 148 7
| 10 FIRST_NAME 15 142 147 6
| 5 AMOUNT 16 149 156 8
|"""
.stripMargin.replace("\r\n", "\n")

assertEqualsMultiline(layout, expectedLayout)
assert(!copybook.ast.children(0).asInstanceOf[Group].children(2).isFiller)
}

test("Test parseSimple() drop group fillers") {
Expand All @@ -179,19 +195,19 @@ class ParseCopybookFeaturesSpec extends FunSuite with SimpleComparisonBase {
val expectedLayout =
"""-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH
|
|1 RECORD 1 1 60 60
|1 RECORD 1 1 156 156
| 5 FILLER_P1 2 1 1 1
| 5 COMPANY_PREFIX 3 2 4 3
| 5 FILLER_P2 4 5 5 1
| 5 FILLER_P3 5 6 6 1
| 5 COMPANY_NAME r 6 7 15 9
| 5 ADDRESS r 7 16 45 30
| 5 FILL_FIELD r 8 46 52 7
| 10 FILLER_P6 9 46 50 5
| 10 FILLER_P7 10 51 52 2
| 5 CONTACT_PERSON R 11 46 52 7
| 10 FIRST_NAME 12 46 51 6
| 5 AMOUNT 13 53 60 8
| 5 FILLER_P2 4 101 101 1
| 5 FILLER_P3 5 102 102 1
| 5 COMPANY_NAME r 6 103 111 9
| 5 ADDRESS r 7 112 141 30
| 5 FILL_FIELD r 8 142 148 7
| 10 FILLER_P6 9 142 146 5
| 10 FILLER_P7 10 147 148 2
| 5 CONTACT_PERSON R 11 142 148 7
| 10 FIRST_NAME 12 142 147 6
| 5 AMOUNT 13 149 156 8
|"""
.stripMargin.replace("\r\n", "\n")

Expand All @@ -205,13 +221,13 @@ class ParseCopybookFeaturesSpec extends FunSuite with SimpleComparisonBase {
val expectedLayout =
"""-------- FIELD LEVEL/NAME --------- --ATTRIBS-- FLD START END LENGTH
|
|1 RECORD 1 1 60 60
|1 RECORD 1 1 156 156
| 5 COMPANY_PREFIX 2 2 4 3
| 5 COMPANY_NAME r 3 7 15 9
| 5 ADDRESS r 4 16 45 30
| 5 CONTACT_PERSON R 5 46 52 7
| 10 FIRST_NAME 6 46 51 6
| 5 AMOUNT 7 53 60 8
| 5 COMPANY_NAME r 3 103 111 9
| 5 ADDRESS r 4 112 141 30
| 5 CONTACT_PERSON R 5 142 148 7
| 10 FIRST_NAME 6 142 147 6
| 5 AMOUNT 7 149 156 8
|"""
.stripMargin.replace("\r\n", "\n")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.cobrix.spark.cobol.source.regression

import org.scalatest.WordSpec
import org.slf4j.{Logger, LoggerFactory}
import za.co.absa.cobrix.spark.cobol.source.base.{SimpleComparisonBase, SparkTestBase}
import za.co.absa.cobrix.spark.cobol.source.fixtures.BinaryFileFixture

import java.nio.charset.StandardCharsets

class Test20DropFillers extends WordSpec with SparkTestBase with BinaryFileFixture with SimpleComparisonBase {

private implicit val logger: Logger = LoggerFactory.getLogger(this.getClass)

private val copybook =
""" 01 R.
05 F.
07 FILLER OCCURS 3 TIMES.
10 A1 PIC X.
10 A2 PIC X.
05 B PIC X(1).
"""

"ASCII files" should {
val data =
"""1234567
|8901234
|""".stripMargin

"correctly retain all fillers" in {
withTempTextFile("drop_fillers", ".dat", StandardCharsets.US_ASCII, data) { tmpFileName =>
val df = spark
.read
.format("cobol")
.option("copybook_contents", copybook)
.option("record_format", "D")
.option("drop_group_fillers", "false")
.option("drop_value_fillers", "false")
.option("pedantic", "true")
.load(tmpFileName)

val expectedSchema = """root
| |-- F: struct (nullable = true)
| | |-- FILLER_1: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- A1: string (nullable = true)
| | | | |-- A2: string (nullable = true)
| |-- B: string (nullable = true)
|""".stripMargin

val expectedData = """[{"F":{"FILLER_1":[{"A1":"1","A2":"2"},{"A1":"3","A2":"4"},{"A1":"5","A2":"6"}]},"B":"7"},{"F":{"FILLER_1":[{"A1":"8","A2":"9"},{"A1":"0","A2":"1"},{"A1":"2","A2":"3"}]},"B":"4"}]"""

val actualSchema = df.schema.treeString

assertEqualsMultiline(actualSchema, expectedSchema)

val actualData = df.toJSON.collect().mkString("[", ",", "]")

assertEqualsMultiline(actualData, expectedData)
}
}

"correctly drop all fillers" in {
withTempTextFile("drop_fillers", ".dat", StandardCharsets.US_ASCII, data) { tmpFileName =>
val df = spark
.read
.format("cobol")
.option("copybook_contents", copybook)
.option("record_format", "D")
.option("drop_group_fillers", "true")
.option("drop_value_fillers", "true")
.option("pedantic", "true")
.load(tmpFileName)

val expectedSchema = """root
| |-- B: string (nullable = true)
|""".stripMargin

val expectedData = """[{"B":"7"},{"B":"4"}]"""

val actualSchema = df.schema.treeString

assertEqualsMultiline(actualSchema, expectedSchema)

val actualData = df.toJSON.collect().mkString("[", ",", "]")

assertEqualsMultiline(actualData, expectedData)
}
}
}
}

0 comments on commit 3baed03

Please sign in to comment.