Skip to content

Commit

Permalink
JS embedding (#83)
Browse files Browse the repository at this point in the history
* add embedded language to `package.json`

* add embedded language to TMLanguage

* improve grammar loader

the grammar loader should be able to load the embedded grammar scope at least at a minimum level

* disable verbose mode

in fact it is sufficient to show only the names of the tests, no more details are needed for the moment

* basic tests for JS injection rules
  • Loading branch information
scripthunter7 authored May 2, 2023
1 parent 4d2703e commit 14ce25f
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 20 deletions.
2 changes: 1 addition & 1 deletion jest.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import type { Config } from '@jest/types';
const config: Config.InitialOptions = {
preset: 'ts-jest',
testEnvironment: 'node',
verbose: true,
verbose: false,
testTimeout: 30000,
testMatch: ['**/test/**/*.test.ts'],
};
Expand Down
5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@
{
"language": "adblock",
"scopeName": "text.adblock",
"path": "./syntaxes/out/adblock.plist"
"path": "./syntaxes/out/adblock.plist",
"embeddedLanguages": {
"source.js": "javascript"
}
}
]
},
Expand Down
29 changes: 22 additions & 7 deletions syntaxes/adblock.yaml-tmlanguage
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# GitHub Linguist can use this file directly, but during the build, we convert it to a .plist file,
# so the VSCode extension can use it. Development is done in this .yaml-tmlanguage file.
#
#
# If you aren't familiar with TextMate Language Grammars, you can read the following documentations:
# - https://macromates.com/manual/en/language_grammars
# - https://www.sublimetext.com/docs/3/syntax.html
Expand Down Expand Up @@ -239,7 +239,27 @@ repository:
"6":
name: punctuation.section.adblock
jsRules:
# In this case, we embed the JS grammar in the adblock grammar
# by using the include: "source.js" scope.
# This is an external grammar, so it should be installed separately,
# but both VSCode and GitHub Linguist will recognize it.
# See https://github.com/github-linguist/linguist/discussions/6020#discussioncomment-3397226
patterns:
# Do not confuse with #%#//scriptlet, so we use a negative lookahead
# to make sure the separator is not followed by //scriptlet
- begin: "^(.*?)(#@?%#(?!\\/\\/scriptlet))"
beginCaptures:
"1":
patterns:
- include: "#domainListCommaSeparated"
"2":
name: keyword.control.adblock
end: "$"
contentName: source.js
patterns:
- include: source.js
# Mark any other JS inject as invalid, typically unclosed / invalid
# scriptlet calls
- match: "^(.*?)(#@?%#)(.+)$"
captures:
"1":
Expand All @@ -248,8 +268,7 @@ repository:
"2":
name: keyword.control.adblock
"3":
patterns:
- include: "#jsFunction"
name: invalid.illegal
basicRulesNoUrl:
patterns:
- match: "^(\\$)(.+)$"
Expand Down Expand Up @@ -517,10 +536,6 @@ repository:
name: keyword.operator.adblock
- name: invalid.illegal.adblock
match: ".*"
jsFunction:
patterns:
- name: constant.character.jscode.adblock
match: ".+"
cssStyle:
patterns:
- match: "(@media[\\s]+[^\\{]*)(\\{)([\\s]*[^\\{]*)(\\{)([\\s]*[^\\}]*)(\\})[\\s]*(\\})"
Expand Down
42 changes: 31 additions & 11 deletions test/grammar/common/adblock-grammar-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,24 @@ import { loadWASM, OnigScanner, OnigString } from 'vscode-oniguruma';
import { convertYamlToPlist } from '../../../tools/grammar-converter';

/** Source file path for the grammar */
const GRAMMAR_PATH = join(__dirname, '../../../', 'syntaxes/adblock.yaml-tmlanguage');
const ADBLOCK_GRAMMAR_PATH = join(__dirname, '../../../', 'syntaxes/adblock.yaml-tmlanguage');

/** Scope name for the grammar */
const GRAMMAR_SCOPE = 'text.adblock';
/** Scope name for the adblock grammar */
const ADBLOCK_GRAMMAR_SCOPE = 'text.adblock';

/** Scope name for the JavaScript grammar */
const JS_GRAMMAR_SCOPE = 'source.js';

/** Dummy grammar for JavaScript (raw) */
const DUMMY_JS_GRAMMAR = `{
"name": "JavaScript",
"scopeName": "source.js",
"patterns": [],
"repository": {}
}`;

/** Fake file name for the dummy JavaScript grammar */
const DUMMY_JS_GRAMMAR_FILE_NAME = 'dummy-js-grammar.json';

/**
* Loads a grammar from YAML source, converts it to PList, and loads it into a registry.
Expand All @@ -23,7 +37,7 @@ const GRAMMAR_SCOPE = 'text.adblock';
*/
export async function loadAdblockGrammar(): Promise<IGrammar | null> {
// Read the raw contents of the grammar file
const rawYaml = await readFile(GRAMMAR_PATH, 'utf8');
const rawYaml = await readFile(ADBLOCK_GRAMMAR_PATH, 'utf8');

// Convert the raw yaml into a plist
const plist = convertYamlToPlist(rawYaml);
Expand All @@ -44,17 +58,23 @@ export async function loadAdblockGrammar(): Promise<IGrammar | null> {

// Load the grammar from the plist
loadGrammar: async (scopeName) => {
if (scopeName === GRAMMAR_SCOPE) {
return parseRawGrammar(plist);
}
switch (scopeName) {
case ADBLOCK_GRAMMAR_SCOPE:
return parseRawGrammar(plist);

// eslint-disable-next-line no-console
console.log(`Unknown scope name: ${scopeName}`);
case JS_GRAMMAR_SCOPE:
// "Fake json file name" should be specified for triggering the JSON
// parser in the textmate library
return parseRawGrammar(DUMMY_JS_GRAMMAR, DUMMY_JS_GRAMMAR_FILE_NAME);

return null;
default:
throw new Error(`Unknown scope name: ${scopeName}`);
}
},
});

// Load the adblock grammar from the registry by its scope name
return registry.loadGrammar(GRAMMAR_SCOPE);
return registry.loadGrammarWithEmbeddedLanguages(ADBLOCK_GRAMMAR_SCOPE, 1, {
[JS_GRAMMAR_SCOPE]: 2,
});
}
39 changes: 39 additions & 0 deletions test/grammar/cosmetic/js-inject.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* @file Tests for JS injection rules
*/

import { AdblockTokenizer, getAdblockTokenizer } from '../common/get-adblock-tokenizer';
import { expectTokens } from '../common/token-expectation';

let tokenize: AdblockTokenizer;

// Before running any tests, we should load the grammar and get the tokenizer
beforeAll(async () => {
tokenize = await getAdblockTokenizer();
});

describe('JS injection rules', () => {
test('should tokenize valid JS injections', () => {
expectTokens(
tokenize,
'#%#window.hello = 1',
[
{ fragment: '#%#', scopes: ['text.adblock', 'keyword.control.adblock'] },
{ fragment: 'window.hello = 1', scopes: ['text.adblock', 'source.js'] },
],
);
});

test('should detect invalid cases', () => {
// Unclosed scriptlet call. Since it's not closed, it's not matches as a scriptlet call,
// but #%# is "stronger" than scriptlet injection, and we shouldn't tokenize it as a JS comment
expectTokens(
tokenize,
'#%#//scriptlet(\'a\',',
[
{ fragment: '#%#', scopes: ['text.adblock', 'keyword.control.adblock'] },
{ fragment: '//scriptlet(\'a\',', scopes: ['text.adblock', 'invalid.illegal'] },
],
);
});
});

0 comments on commit 14ce25f

Please sign in to comment.