From eedec84135b1257aab71bd0af39635c17c619359 Mon Sep 17 00:00:00 2001 From: Laurent Prud'hon Date: Fri, 14 Aug 2015 10:24:59 +0200 Subject: [PATCH] =?UTF-8?q?As=20discussed=20in=20the=20comments=20of=20the?= =?UTF-8?q?=20issue,=20I=20propose=20a=20new=20fix=20for=20issue=20#59=20:?= =?UTF-8?q?=20=E2=80=A2restore=20support=20for=20single=20\r=20and=20\n=20?= =?UTF-8?q?characters=20as=20line=20endings=20in=20TextDocument=20(revert?= =?UTF-8?q?=20to=20the=20previous=20version=20of=20the=20file)=20=E2=80=A2?= =?UTF-8?q?update=20CobolFile=20class=20:=20when=20reading=20a=20fixed=20l?= =?UTF-8?q?ength=20line,=20if=20we=20encounter=20a=20line=20ending=20chara?= =?UTF-8?q?cter=20after=20Unicode=20conversion=20of=20an=20original=20EBCD?= =?UTF-8?q?IC=20character,=20replace=20it=20on=20the=20fly=20with=20a=20qu?= =?UTF-8?q?estion=20mark=20'=3F'=20char?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document clearly two restrictions of our compiler : •because of the internal conversion of the program text to Unicode characters in .Net or Java, we do not support alphanumeric literals containing non printable EBCDIC characters •because of the feature allowing free text format and variable line length, we do not support alphanumeric literals containing line ending characters NB : when we say we do not support these two cases, it will only have an impact if we generate Cobol from a TypeCobol program and then compile it with the IBM compiler. For Cobol code analysis in memory, it has no impact. In the two cases above, the solution is to modifiy the original EBCDIC program text before using our tool : •initialize numeric tables directly with numbers instead of their corresponding chars •set line ending chars individually Inside alphanumeric literals, for exemple with reference modification --- TypeCobol/Compiler/File/CobolFile.cs | 18 ++++++++++++++++- TypeCobol/Compiler/Text/TextDocument.cs | 26 +++++++++++-------------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/TypeCobol/Compiler/File/CobolFile.cs b/TypeCobol/Compiler/File/CobolFile.cs index aa30eae2e..1e2ef6d58 100644 --- a/TypeCobol/Compiler/File/CobolFile.cs +++ b/TypeCobol/Compiler/File/CobolFile.cs @@ -94,7 +94,23 @@ public IEnumerable ReadChars() { for (int i = 0; i < charsCount; i++) { - yield return lineBuffer[i]; + char convertedChar = lineBuffer[i]; + + // Because the mainframe input files use fixed length lines + // its alphanumeric literals could contain EBCDIC chars which + // translate to end of line Unicode characters. + // These characters would upset the later phases of the compiler : + // TextDocument would detect a new line in the middle of an + // alphnaumeric literal. + // To avoid this, we replace them with a question mark char. + // The alphanumeric char will off course be altered, but all + // the non printable characters will also be altered by the + // Unicode conversion anyway ... so it is not worse than doing + // nothing here. + if (convertedChar == '\r' || convertedChar == '\n') + convertedChar = '?'; + + yield return convertedChar; } if (charsCount == lineLength) { diff --git a/TypeCobol/Compiler/Text/TextDocument.cs b/TypeCobol/Compiler/Text/TextDocument.cs index 65558ccc1..a61dc46e1 100644 --- a/TypeCobol/Compiler/Text/TextDocument.cs +++ b/TypeCobol/Compiler/Text/TextDocument.cs @@ -36,10 +36,7 @@ public TextDocument(string fileName, Encoding encodingForHexadecimalAlphanumeric } /// - /// Reloads the text document with new chars. - /// The textSource must return unicode chars, with mandatory line endings of the form : '\r\n'. - /// Warning : Unix/Linux style line endings with only '\n' will not work. - /// This limitation was introduced to support individual '\r' or '\n' chars in alphanumric literals. + /// Reloads the text document with new chars /// public void LoadChars(IEnumerable textSource) { @@ -55,13 +52,20 @@ public void LoadChars(IEnumerable textSource) { if (chr == '\r') { + // If an end of line char is encountered, create a new line + TextLine line = new TextLine(lineIndex, charsCount, currentLineText.ToString()); + lines.Add(line); + lineIndex++; + charsCount += line.Length; + + // Reset StringBuffer contents for next line + currentLineText = new StringBuilder(); + previousCharWasCr = true; } else if (chr == '\n') { - // Mandatory line endings : '\r' '\n'. - // NB : BOTH are necessary, in this exact order. - if (previousCharWasCr) + if (!previousCharWasCr) { // If an end of line char is encountered, create a new line TextLine line = new TextLine(lineIndex, charsCount, currentLineText.ToString()); @@ -77,14 +81,6 @@ public void LoadChars(IEnumerable textSource) } else { - // Previous char was '\r' : - // It was not appended to the line until we could check if it was followed by \n. - // No we know it was'nt, so we can safely add a '\r' char to the line - if (previousCharWasCr) - { - currentLineText.Append('\r'); - } - // Append the current char to the text of the current line currentLineText.Append(chr);