rokucommunity · TwitchBronBron · Mar 18, 2021 · Mar 13, 2021 · Mar 13, 2021 · Mar 13, 2021
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -112,6 +112,7 @@
         "@xml-tools/parser": "^1.0.7",
         "array-flat-polyfill": "^1.0.1",
         "chalk": "^2.4.2",
+        "chevrotain": "^7.0.1",
         "chokidar": "^3.0.2",
         "clear": "^0.1.0",
         "cross-platform-clear-console": "^2.3.0",

diff --git a/src/CommentFlagProcessor.spec.ts b/src/CommentFlagProcessor.spec.ts
@@ -0,0 +1,156 @@
+import { expect } from 'chai';
+import { Range } from 'vscode-languageserver';
+import { CommentFlagProcessor } from './CommentFlagProcessor';
+import { Lexer } from './lexer/Lexer';
+
+describe('CommentFlagProcessor', () => {
+    let processor: CommentFlagProcessor;
+
+    describe('tokenizeByWhitespace', () => {
+        beforeEach(() => {
+            processor = new CommentFlagProcessor(null);
+        });
+
+        it('works with single chars', () => {
+            expect(processor['tokenizeByWhitespace']('a b c')).to.deep.equal([{
+                startIndex: 0,
+                text: 'a'
+            }, {
+                startIndex: 2,
+                text: 'b'
+            },
+            {
+                startIndex: 4,
+                text: 'c'
+            }]);
+        });
+
+        it('works with tabs', () => {
+            expect(processor['tokenizeByWhitespace']('a\tb\t c')).to.deep.equal([{
+                startIndex: 0,
+                text: 'a'
+            }, {
+                startIndex: 2,
+                text: 'b'
+            },
+            {
+                startIndex: 5,
+                text: 'c'
+            }]);
+
+            it('works with leading whitespace', () => {
+                expect(processor['tokenizeByWhitespace']('  \ta\tb\t c')).to.deep.equal([{
+                    startIndex: 4,
+                    text: 'a'
+                }, {
+                    startIndex: 6,
+                    text: 'b'
+                },
+                {
+                    startIndex: 9,
+                    text: 'c'
+                }]);
+            });
+
+            it('works with multiple characters in a word', () => {
+                expect(processor['tokenizeByWhitespace']('abc 123')).to.deep.equal([{
+                    startIndex: 0,
+                    text: 'abc'
+                }, {
+                    startIndex: 4,
+                    text: '123'
+                }]);
+            });
+        });
+    });
+
+    describe('tokenize', () => {
+        beforeEach(() => {
+            processor = new CommentFlagProcessor(null, [`'`]);
+        });
+
+        it('skips non disable comments', () => {
+            expect(
+                processor['tokenize'](`'not disable comment`, null)
+            ).not.to.exist;
+        });
+
+        it('tokenizes bs:disable-line comment', () => {
+            expect(
+                processor['tokenize'](`'bs:disable-line`, null)
+            ).to.eql({
+                commentTokenText: `'`,
+                disableType: 'line',
+                codes: []
+            });
+        });
+
+        it('works for special case', () => {
+            const token = Lexer.scan(`print "hi" 'bs:disable-line: 123456 999999   aaaab`).tokens[2];
+            expect(
+                processor['tokenize'](token.text, token.range)
+            ).to.eql({
+                commentTokenText: `'`,
+                disableType: 'line',
+                codes: [{
+                    code: '123456',
+                    range: Range.create(0, 29, 0, 35)
+                }, {
+                    code: '999999',
+                    range: Range.create(0, 36, 0, 42)
+                }, {
+                    code: 'aaaab',
+                    range: Range.create(0, 45, 0, 50)
+                }]
+            });
+        });
+
+        it('tokenizes bs:disable-line comment with codes', () => {
+            const token = Lexer.scan(`'bs:disable-line:1 2 3`).tokens[0];
+            expect(
+                processor['tokenize'](token.text, token.range)
+            ).to.eql({
+                commentTokenText: `'`,
+                disableType: 'line',
+                codes: [{
+                    code: '1',
+                    range: Range.create(0, 17, 0, 18)
+                }, {
+                    code: '2',
+                    range: Range.create(0, 19, 0, 20)
+                }, {
+                    code: '3',
+                    range: Range.create(0, 21, 0, 22)
+                }]
+            });
+        });
+
+        it('tokenizes bs:disable-line comment with leading space', () => {
+            const token = Lexer.scan(`' bs:disable-line:1`).tokens[0];
+            expect(
+                processor['tokenize'](token.text, token.range)
+            ).to.eql({
+                commentTokenText: `'`,
+                disableType: 'line',
+                codes: [{
+                    code: '1',
+                    range: Range.create(0, 18, 0, 19)
+                }]
+            });
+        });
+
+        it('tokenizes bs:disable-line comment with leading tab', () => {
+            const token = Lexer.scan(`'\tbs:disable-line:1`).tokens[0];
+            expect(
+                processor['tokenize'](token.text, token.range)
+            ).to.eql({
+                commentTokenText: `'`,
+                disableType: 'line',
+                codes: [{
+                    code: '1',
+                    range: Range.create(0, 18, 0, 19)
+                }]
+            });
+        });
+    });
+});
diff --git a/src/CommentFlagProcessor.ts b/src/CommentFlagProcessor.ts
@@ -0,0 +1,205 @@
+import type { Range } from 'vscode-languageserver';
+import { DiagnosticMessages } from './DiagnosticMessages';
+import type { BscFile, BsDiagnostic, CommentFlag, DiagnosticCode } from './interfaces';
+import { util } from './util';
+
+export class CommentFlagProcessor {
+    public constructor(
+        /**
+         * The file this processor applies to
+         */
+        public file: BscFile,
+        /**
+         * An array of strings containing the types of text that a comment starts with. (i.e. `REM`, `'`, `<!--`)
+         */
+        public commentStarters = [] as string[],
+        /**
+         * Valid diagnostic codes. Codes NOT in this list will be flagged
+         */
+        public diagnosticCodes = [] as DiagnosticCode[],
+        /**
+         * Diagnostic codes to never filter (these codes will always be flagged)
+         */
+        public ignoreDiagnosticCodes = [] as DiagnosticCode[]
+    ) {
+
+        this.allCodesExceptIgnores = this.diagnosticCodes.filter(x => !this.ignoreDiagnosticCodes.includes(x));
+
+    }
+
+    /**
+     * List of comment flags generated during processing
+     */
+    public commentFlags = [] as CommentFlag[];
+
+    /**
+     * List of diagnostics generated during processing
+     */
+    public diagnostics = [] as BsDiagnostic[];
+
+    /**
+     * A list of all codes EXCEPT the ones in `ignoreDiagnosticCodes`
+     */
+    public allCodesExceptIgnores: DiagnosticCode[];
+
+    public tryAdd(text: string, range: Range) {
+        const tokenized = this.tokenize(text, range);
+        if (!tokenized) {
+            return;
+        }
+
+        let affectedRange: Range;
+        if (tokenized.disableType === 'line') {
+            affectedRange = util.createRange(range.start.line, 0, range.start.line, range.start.character);
+        } else if (tokenized.disableType === 'next-line') {
+            affectedRange = util.createRange(range.start.line + 1, 0, range.start.line + 1, Number.MAX_SAFE_INTEGER);
+        }
+
+        let commentFlag: CommentFlag;
+
+        //statement to disable EVERYTHING
+        if (tokenized.codes.length === 0) {
+            commentFlag = {
+                file: this.file,
+                //null means all codes
+                codes: null,
+                range: range,
+                affectedRange: affectedRange
+            };
+
+            //disable specific diagnostic codes
+        } else {
+            let codes = [] as number[];
+            for (let codeToken of tokenized.codes) {
+                let codeInt = parseInt(codeToken.code);
+                if (isNaN(codeInt)) {
+                    //don't validate non-numeric codes
+                    continue;
+                    //add a warning for unknown codes
+                } else if (this.diagnosticCodes.includes(codeInt)) {
+                    codes.push(codeInt);
+                } else {
+                    this.diagnostics.push({
+                        ...DiagnosticMessages.unknownDiagnosticCode(codeInt),
+                        file: this.file,
+                        range: codeToken.range
+                    });
+                }
+            }
+            if (codes.length > 0) {
+                commentFlag = {
+                    file: this.file,
+                    codes: codes,
+                    range: range,
+                    affectedRange: affectedRange
+                };
+            }
+        }
+
+        if (commentFlag) {
+            this.commentFlags.push(commentFlag);
+
+            //add an ignore for everything in this comment except for Unknown_diagnostic_code_1014
+            this.commentFlags.push({
+                affectedRange: commentFlag.range,
+                range: commentFlag.range,
+                codes: this.allCodesExceptIgnores,
+                file: this.file
+            });
+        }
+    }
+
+    /**
+     * Small tokenizer for bs:disable comments
+     */
+    private tokenize(text: string, range: Range) {
+        let lowerText = text.toLowerCase();
+        let offset = 0;
+        let commentTokenText: string;
+
+        for (const starter of this.commentStarters) {
+            if (text.startsWith(starter)) {
+                commentTokenText = starter;
+                offset = starter.length;
+                lowerText = lowerText.substring(commentTokenText.length);
+                break;
+            }
+        }
+
+        let disableType: 'line' | 'next-line';
+        //trim leading/trailing whitespace
+        let len = lowerText.length;
+        lowerText = lowerText.trimLeft();
+        offset += len - lowerText.length;
+        if (lowerText.startsWith('bs:disable-line')) {
+            lowerText = lowerText.substring('bs:disable-line'.length);
+            offset += 'bs:disable-line'.length;
+            disableType = 'line';
+        } else if (lowerText.startsWith('bs:disable-next-line')) {
+            lowerText = lowerText.substring('bs:disable-next-line'.length);
+            offset += 'bs:disable-next-line'.length;
+            disableType = 'next-line';
+        } else {
+            return null;
+        }
+
+        //discard the colon
+        if (lowerText.startsWith(':')) {
+            lowerText = lowerText.substring(1);
+            offset += 1;
+        }
+
+        let items = this.tokenizeByWhitespace(lowerText);
+        let codes = [] as Array<{ code: string; range: Range }>;
+        for (let item of items) {
+            codes.push({
+                code: item.text,
+                range: util.createRange(
+                    range.start.line,
+                    range.start.character + offset + item.startIndex,
+                    range.start.line,
+                    range.start.character + offset + item.startIndex + item.text.length
+                )
+            });
+        }
+
+        return {
+            commentTokenText: commentTokenText,
+            disableType: disableType,
+            codes: codes
+        };
+    }
+
+    /**
+     * Given a string, extract each item split by whitespace
+     * @param text
+     */
+    private tokenizeByWhitespace(text: string) {
+        let tokens = [] as Array<{ startIndex: number; text: string }>;
+        let currentToken = null;
+        for (let i = 0; i < text.length; i++) {
+            let char = text[i];
+            //if we hit whitespace
+            if (char === ' ' || char === '\t') {
+                if (currentToken) {
+                    tokens.push(currentToken);
+                    currentToken = null;
+                }
+
+                //we hit non-whitespace
+            } else {
+                if (!currentToken) {
+                    currentToken = {
+                        startIndex: i,
+                        text: ''
+                    };
+                }
+                currentToken.text += char;
+            }
+        }
+        if (currentToken) {
+            tokens.push(currentToken);
+        }
+        return tokens;
+    }
+}