Skip to content

Commit

Permalink
Add CP1160 code page (Thai) which is same as CP838 with FE is replace…
Browse files Browse the repository at this point in the history
…d with the "€" (euro) character.
  • Loading branch information
yruslan committed Jan 21, 2025
1 parent 4814eb1 commit 41cad6f
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 6 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1648,6 +1648,7 @@ The output looks like this:
| .option("ebcdic_code_page", "cp1145") | EBCDIC 1145 | Same as code page 284 with € at the position of the international currency symbol ¤. |
| .option("ebcdic_code_page", "cp1147") | EBCDIC 1147 | Same as code page 297 with € at the position of the international currency symbol ¤. |
| .option("ebcdic_code_page", "cp1148") | EBCDIC 1148 | Same as code page 500 with € at the position of the international currency symbol ¤. |
| .option("ebcdic_code_page", "cp1160") | EBCDIC 1160 | Same as code page 838 with € at the position 0xFE. |
| .option("ebcdic_code_page", "cp1364") | EBCDIC 1364 | Double-byte code page CCSID-1364, Korean. |
| .option("ebcdic_code_page", "cp1388") | EBCDIC 1388 | Double-byte code page CCSID-1388, Simplified Chinese. |

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ object CodePage extends Logging {
case "cp1146" => new CodePage1146
case "cp1147" => new CodePage1147
case "cp1148" => new CodePage1148
case "cp1160" => new CodePage1160
case "cp1364" => new CodePage1364
case "cp1388" => new CodePage1388
case codePage => throw new IllegalArgumentException(s"The code page '$codePage' is not one of the builtin EBCDIC code pages.")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.cobrix.cobol.parser.encoding.codepage

/**
* EBCDIC code page 1160 with support for Thai script used in IBM mainframes which is same as 838
* with € at the position 0xFE.
*/
class CodePage1160 extends SingleByteCodePage(CodePage1160.ebcdicToAsciiMapping) {
override def codePageShortName: String = "cp1160"
}

object CodePage1160 {
val ebcdicToAsciiMapping: Array[Char] = {
import EbcdicNonPrintable._

/* This is the EBCDIC Code Page 1160 to ASCII conversion table
from https://en.wikibooks.org/wiki/Character_Encodings/Code_Tables/EBCDIC/EBCDIC_838 */
val ebcdic2ascii: Array[Char] = {
val c01 = '\u0E48'
val c02 = '\u0E4E'
val c03 = '\u0E31'
val c04 = '\u0E34'
val c05 = '\u0E49'
val c06 = '\u0E35'
val c07 = '\u0E36'
val c08 = '\u0E37'
val c09 = '\u0E38'
val c10 = '\u0E39'
val c11 = '\u0E3A'
val c12 = '\u0E47'
val c13 = '\u0E48'
val c14 = '\u0E49'
val c15 = '\u0E4A'
val c16 = '\u0E4B'
val c18 = '\u0E4D'

Array[Char](
spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, ccr, spc, spc, // 0 - 15
spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, // 16 - 31
spc, spc, spc, spc, spc, clf, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, // 32 - 47
spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, // 48 - 63
spc, spc, 'ก', 'ข', 'ฃ', 'ค', 'ฅ', 'ฆ', 'ง', '[', '¢', '.', '<', '(', '+', '|', // 64 - 79
'&', c01, 'จ', 'ฉ', 'ช', 'ซ', 'ฌ', 'ญ', 'ฎ', ']', '!', '$', '*', ')', ';', '¬', // 80 - 95
'-', '/', 'ฏ', 'ฐ', 'ฑ', 'ฒ', 'ณ', 'ด', 'ต', '^', '¦', ',', '%', '_', '>', '?', // 96 - 111
'฿', c02, 'ถ', 'ท', 'ธ', 'น', 'บ', 'ป', 'ผ', '`', ':', '#', '@', qts, '=', qtd, // 112 - 127
'๏', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'ฝ', 'พ', 'ฟ', 'ภ', 'ม', 'ย', // 128 - 143
'๚', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 'ร', 'ฤ', 'ล', 'ฦ', 'ว', 'ศ', // 144 - 159
'๛', '~', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ษ', 'ส', 'ห', 'ฬ', 'อ', 'ฮ', // 160 - 175
'๐', '๑', '๒', '๓', '๔', '๕', '๖', '๗', '๘', '๙', 'ฯ', 'ะ', c03, 'า', 'ำ', c04, // 176 - 191
'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', c05, c06, c07, c08, c09, c10, // 192 - 207
'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', c11, 'เ', 'แ', 'โ', 'ใ', 'ไ', // 208 - 223
bsh, c15, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'ๅ', 'ๆ', c12, c13, c14, c15, // 224 - 239
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', c16, c17, c18, c16, '€', spc) // 240 - 255
}
ebcdic2ascii
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,11 @@ class CodePage838 extends SingleByteCodePage(CodePage838.ebcdicToAsciiMapping) {

object CodePage838 {
val ebcdicToAsciiMapping: Array[Char] = {
import EbcdicNonPrintable._

/* This is the EBCDIC Code Page 838 to ASCII conversion table with non-printable characters mapping
from https://en.everybodywiki.com/EBCDIC_838 */
val ebcdic2ascii: Array[Char] = {
val clf = '\r'
val ccr = '\n'
val spc = ' '
val qts = '\''
val qtd = '\"'
val bsh = '\\'
val c01 = '\u0E48'
val c02 = '\u0E4E'
val c03 = '\u0E31'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,16 @@ class StringDecodersSpec extends AnyWordSpec {
assert(actual == expected)
}

"decode a CP838 string special characters" in {
val expected = " ¢$~๐๑ฯัข#|แํ¬๕¦!]๐¢็{}ลึ~ฆ@ว๏ดฐ "
val bytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59,
0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0x40).map(_.toByte)

val actual = decodeEbcdicString(bytes, KeepAll, new CodePage838, improvedNullDetection = false)

assert(actual == expected)
}

"decode a CP1140 string special characters" in {
val expected = "âäàáãåçñ¢.<(+|&éêëèíîïìß!$*);¬-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ€µ~stuvwxyz¡¿ÐÝÞ®^£¥·©§¶¼½¾[]¯¨´×{ABCDEFGHI\u00ADôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ"
val bytes = Array(
Expand Down Expand Up @@ -405,6 +415,16 @@ class StringDecodersSpec extends AnyWordSpec {

assert(actual == expected)
}

"decode a CP1160 string special characters" in {
val expected = " ¢$~๐๑ฯัข#|แํ¬๕¦!]๐¢็{}ลึ~ฆ@ว๏ดฐ€ "
val bytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59,
0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0xFE, 0x40).map(_.toByte)

val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1160, improvedNullDetection = false)

assert(actual == expected)
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,11 @@ class CodePageSingleByteSpec extends AnyFunSuite {
assert(codePage.codePageShortName == "cp1148")
}

test("Ensure codepage 'cp1160' gives the associated CodePage") {
val codePage = CodePage.getCodePageByName("cp1160")
assert(codePage.codePageShortName == "cp1160")
}

test("Ensure codepage 'cp1364' gives the associated CodePage") {
val codePage = CodePage.getCodePageByName("cp1364")
assert(codePage.codePageShortName == "cp1364")
Expand Down

0 comments on commit 41cad6f

Please sign in to comment.