Skip to content

Commit

Permalink
[spell-check] Allow the user to whitelist sections of a buffer…
Browse files Browse the repository at this point in the history
…for spellchecking on a per-language basis.

This opens up things that aren't really possible with the `excludedScopes` setting. For instance: you can specify `source.python comment, source.python string, source.js comment`; this will enable spellchecking in Python files for both comments and strings, while enabling spellchecking in JavaScript files for _only_ comments.

Closes #1135.
  • Loading branch information
savetheclocktower committed Dec 1, 2024
1 parent c2fdb9e commit c4ce2a5
Show file tree
Hide file tree
Showing 5 changed files with 396 additions and 30 deletions.
20 changes: 3 additions & 17 deletions packages/spell-check/lib/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,9 @@ module.exports = {
// Hook up changes to the configuration settings.
this.excludedScopeRegexLists = [];
this.subs.add(
atom.config.observe(
'spell-check.excludedScopes',
(excludedScopes) => {
this.excludedScopeRegexLists = excludedScopes.map(
(excludedScope) =>
excludedScope
.split(/\s+/)[0]
.split('.')
.filter((className) => className)
.map(
(className) =>
new RegExp(`\\b${className}\\b`)
)
);
return this.updateViews();
}
)
atom.config.onDidChange('spell-check.excludedScopes', () => {
return this.updateViews();
})
);

this.subs.add(
Expand Down
155 changes: 155 additions & 0 deletions packages/spell-check/lib/scope-helper.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
function normalizeSegment(segment) {
if (!segment.startsWith('.')) return segment;
return segment.substring(1);
}

function segmentsMatch(
descriptorSegment,
selectorSegment,
{ enforceSegmentOrder = false } = {}
) {
let descriptorParts = normalizeSegment(descriptorSegment).split('.');
let selectorParts = normalizeSegment(selectorSegment).split('.');

if (selectorParts.length > descriptorParts.length) {
return false;
}

// Remove all parts from the descriptor scope name that aren't present in the
// selector scope name.
for (let i = descriptorParts.length - 1; i >= 0; i--) {
let part = descriptorParts[i];
if (!selectorParts.includes(part)) {
descriptorParts.splice(i, 1);
}
}
// Does order matter? It would if this were a TextMate scope, but Atom has
// broadly treated `.function.entity` as equivalent to `.entity.function`,
// even though it causes headaches in some places.
//
// We'll assume that order doesn't matter, but the user can opt into strict
// ordering if they want.
if (!enforceSegmentOrder) {
descriptorParts.sort();
selectorParts.sort();
}
return descriptorParts.join('.') === selectorParts.join('.');
}

class ScopeSelector {
static create(stringOrScopeSelector) {
if (typeof stringOrScopeSelector === 'string') {
return new ScopeSelector(stringOrScopeSelector);
} else if (stringOrScopeSelector instanceof ScopeSelector) {
return stringOrScopeSelector;
} else {
throw new TypeError(`Invalid argument`);
}
}

constructor(selectorString) {
this.selectorString = selectorString;
this.variations = selectorString.split(/,\s*/);
}

matches(scopeDescriptorOrArray, rawOptions = {}) {
let options = {
// Whether to treat (e.g.) `.function.entity` as distinct from
// `.entity.function`. Defaults to `false` to match prevailing Atom
// behavior.
enforceSegmentOrder: false,
...rawOptions,
};
console.log(this, 'matches', scopeDescriptorOrArray);
let scopeList;
if (Array.isArray(scopeDescriptorOrArray)) {
scopeList = scopeDescriptorOrArray;
} else {
scopeList = scopeDescriptorOrArray.getScopesArray();
}

return this.variations.some((variation) =>
this.matchesVariation(scopeList, variation, options)
);
}

matchesVariation(scopeList, selectorString, options) {
let parts = selectorString.split(/\s+/);
if (parts.length > scopeList.length) return false;

let lastIndex = -1;

outer: for (let selectorPart of parts) {
// Find something in the descriptor that matches this selector part.
for (let [i, descriptorPart] of scopeList.entries()) {
// Ignore everything before our index cursor; this is what enforces the
// ordering of the scope selector.
if (i <= lastIndex) continue;
let doesMatch = segmentsMatch(
descriptorPart,
selectorPart,
options
);
if (doesMatch) {
lastIndex = i;
continue outer;
}
}
// If we get this far, we searched the entire descriptor list for a
// selector part and failed to find it; hence this variation doesn't
// match.
return false;
}
// If we get this far, we made it through the entire gauntlet without
// hitting the early return. This variation matches!
return true;
}
}

// Private: A candidate for possible addition to the {ScopeDescriptor} API.
//
// Tests whether the given scope descriptor matches the given scope selector.
//
// A subset of the full TextMate scope selector syntax is supported:
//
// * Descendant scopes (e.g., `source.python string`); this function will
// enforce the ordering of segments.
// * Variations (e.g., `comment.block, string.quoted`); this function will
// return true if any of the variations match.
//
// Not supported:
//
// * Subtraction syntax (e.g., `comment - block`).
// * Left/right edge syntax (e.g., `L:comment.block`).
//
// For example: given the scope descriptor…
//
// [
// 'source.js',
// 'meta.block.function.js',
// 'string.quoted.single.js'
// ]
//
// …here are outcomes of various tests:
//
// scopeDescriptorMatchesSelector(descriptor, `source`) // -> true
// scopeDescriptorMatchesSelector(descriptor, `text`) // -> false
// scopeDescriptorMatchesSelector(descriptor, `source.js`) // -> true
// scopeDescriptorMatchesSelector(descriptor, `source.python`) // -> false
// scopeDescriptorMatchesSelector(descriptor, `source.js meta.block.function.js`) // -> true
// scopeDescriptorMatchesSelector(descriptor, `source meta`) // -> true
// scopeDescriptorMatchesSelector(descriptor, `source meta.block.class`) // -> false
// scopeDescriptorMatchesSelector(descriptor, `source meta string`) // -> true
// scopeDescriptorMatchesSelector(descriptor, `source string`) // -> true
// scopeDescriptorMatchesSelector(descriptor, `source string meta`) // -> false
//
// - `scopeDescriptor` A {ScopeDescriptor} or a scope descriptor {Array}.
// - `selector` A {String} representing a scope selector.
function scopeDescriptorMatchesSelector(scopeDescriptor, selector) {
let scopeSelector = ScopeSelector.create(selector);
return scopeSelector.matches(scopeDescriptor);
}

module.exports = {
scopeDescriptorMatchesSelector,
};
121 changes: 114 additions & 7 deletions packages/spell-check/lib/spell-check-view.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@ let SpellCheckView;
const _ = require('underscore-plus');
const { CompositeDisposable } = require('atom');
const SpellCheckTask = require('./spell-check-task');
const { scopeDescriptorMatchesSelector } = require('./scope-helper');

// Tests whether a grammar's root scope matches a scope specified in the
// `grammars` setting. Allows for a more generic name in the setting (e.g.,
// `source` to match all `source.[x]` grammars).
function topLevelScopeMatches(grammar, scope) {
if (scope === grammar) return true;
if (grammar.startsWith(`${scope}.`)) {
return true;
}
return false;
}

let CorrectionsView = null;

Expand Down Expand Up @@ -73,6 +85,15 @@ module.exports = SpellCheckView = class SpellCheckView {
})
);

this.disposables.add(
atom.config.observe(
'spell-check.excludedScopes',
(excludedScopes) => {
this.excludedScopes = excludedScopes;
}
)
);

this.subscribeToBuffer();

this.disposables.add(this.editor.onDidDestroy(this.destroy.bind(this)));
Expand Down Expand Up @@ -118,6 +139,7 @@ module.exports = SpellCheckView = class SpellCheckView {
this.unsubscribeFromBuffer();

if (this.spellCheckCurrentGrammar()) {
this.scopesToSpellCheck = this.getSpellCheckScopesForCurrentGrammar();
this.buffer = this.editor.getBuffer();
this.bufferDisposable = new CompositeDisposable(
this.buffer.onDidStopChanging(
Expand All @@ -131,7 +153,69 @@ module.exports = SpellCheckView = class SpellCheckView {

spellCheckCurrentGrammar() {
const grammar = this.editor.getGrammar().scopeName;
return _.contains(atom.config.get('spell-check.grammars'), grammar);
let grammars = atom.config.get('spell-check.grammars');
let topLevelScopes = grammars.map((rawScope) => {
if (!rawScope.includes(' ')) return rawScope;
return rawScope.substring(0, rawScope.indexOf(' '));
});
return topLevelScopes.some((scope) => {
return topLevelScopeMatches(grammar, scope);
});
// return topLevelScopes.includes(grammar);
}

// Returns:
//
// * `true` if the entire buffer should be checked;
// * `false` if none of the buffer should be checked; or, if only certain
// parts of the buffer should be checked,
// * an {Array} of scope names matching regions of the buffer that should
// be checked.
getSpellCheckScopesForCurrentGrammar() {
const grammar = this.editor.getGrammar().scopeName;
let grammars = atom.config.get('spell-check.grammars');
let scopeList = [];
// Despite the name of this setting, spell-checking is no longer all or
// nothing on a per-grammar basis; we now allow users to opt into
// checking subsections of a buffer by adding descendant scopes. Each
// segment must begin with all or part of a root scope name (e.g.,
// `source.js`, `text.html`, but otherwise any valid scope selector is
// accepted here.)
//
// Examples:
//
// * `source.js comment.block`
// * `source comment, source string.quoted`
// * `text`
//
// The first example targets just JS block comments; the second targets
// all comments and quoted strings in _all_ source files; and the third
// targets any text format, whether HTML or Markdown or plaintext.
//
// This allows for more granular spell-checking than was possible
// before, even if the `excludeScopes` setting was utilized.
for (let rawScope of grammars) {
if (!rawScope.includes(' ')) {
// Any value that's just the bare root scope of the language
// (like `source.python`) means that we're spell-checking the
// entire buffer. This applies even if there's a later match
// for this grammar that's more restrictive.
if (topLevelScopeMatches(grammar, rawScope)) {
return true;
}
} else {
// If the value also includes a descendant scope, it means we're
// spell-checking some subset of the buffer.
let index = rawScope.indexOf(' ');
let rootScope = rawScope.substring(0, index);
if (topLevelScopeMatches(grammar, rootScope)) {
// There could be multiple of these — e.g., `source.python string,
// source.python comment` — so we won't return early.
scopeList.push(rawScope);
}
}
}
return scopeList.length > 0 ? scopeList : false;
}

destroyMarkers() {
Expand All @@ -147,6 +231,9 @@ module.exports = SpellCheckView = class SpellCheckView {
const scope = this.editor.scopeDescriptorForBufferPosition(
misspelling[0]
);
// Under the hood, we spell-check the entire document; but we
// might end up ignoring some of the misspellings based on
// their scope descriptor.
if (!this.scopeIsExcluded(scope)) {
result.push(
this.markerLayer.markBufferRange(misspelling, {
Expand Down Expand Up @@ -308,11 +395,31 @@ module.exports = SpellCheckView = class SpellCheckView {
return (this.spellCheckModule.contextMenuEntries = []);
}

scopeIsExcluded(scopeDescriptor, excludedScopes) {
return this.spellCheckModule.excludedScopeRegexLists.some((regexList) =>
scopeDescriptor.scopes.some((scopeName) =>
regexList.every((regex) => regex.test(scopeName))
)
);
scopeIsExcluded(scopeDescriptor) {
// Practically speaking, `this.scopesToSpellCheck` will either be `true`
// or an array of scope selectors. If it's the latter, then we should
// apply whitelisting and exclude anything that doesn't match.
if (Array.isArray(this.scopesToSpellCheck)) {
// If we know none of the subscopes match this region, we can
// exclude it even before we get to the `excludedScopes` setting.
let someMatch = this.scopesToSpellCheck.some(
(scopeToSpellCheck) => {
return scopeDescriptorMatchesSelector(
scopeDescriptor,
scopeToSpellCheck
);
}
);
if (!someMatch) return true;
}
// Whether or not we applied whitelisting above, excluded scopes take
// precedence; anything that doesn't make it through this gauntlet
// gets excluded.
return this.excludedScopes.some((excludedScope) => {
return scopeDescriptorMatchesSelector(
scopeDescriptor,
excludedScope
);
});
}
};
5 changes: 3 additions & 2 deletions packages/spell-check/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
"repository": "https://github.com/pulsar-edit/pulsar",
"license": "MIT",
"engines": {
"atom": "*"
"atom": "*",
"node": ">=14"
},
"scripts": {
"format": "prettier --write \"spec/*.js\" \"lib/**/*.js\" \"script/*.js\" --loglevel warn"
Expand All @@ -37,7 +38,7 @@
"source.rst",
"text.restructuredtext"
],
"description": "List of scopes for languages which will be checked for misspellings. See [the README](https://github.com/pulsar-edit/pulsar/blob/master/packages/spell-check/README.md) for more information on finding the correct scope for a specific language.",
"description": "List of scopes for languages which will be checked for misspellings. See [the README](https://github.com/pulsar-edit/pulsar/blob/master/packages/spell-check/README.md) for more information on finding the correct scope for a specific language. If you want to spell-check only parts of some languages, you may specify a more detailed scope selector (like `source.js comment.block` or `source string`); but the first part of the selector must match the language scope.",
"order": 1
},
"excludedScopes": {
Expand Down
Loading

0 comments on commit c4ce2a5

Please sign in to comment.