Skip to content

Commit

Permalink
Improve performance and memory by caching XML Schema / DTD
Browse files Browse the repository at this point in the history
Fixes #534

Signed-off-by: azerr <[email protected]>
  • Loading branch information
angelozerr committed Oct 15, 2019
1 parent 802ce50 commit db69af4
Show file tree
Hide file tree
Showing 15 changed files with 645 additions and 81 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -153,4 +153,9 @@ public void stop(XMLExtensionsRegistry registry) {
public ContentModelSettings getContentModelSettings() {
return cmSettings;
}

public ContentModelManager getContentModelManager() {
return contentModelManager;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
import java.util.List;
import java.util.Map;

import org.apache.xerces.xni.grammars.XMLGrammarPool;
import org.eclipse.lsp4xml.dom.DOMDocument;
import org.eclipse.lsp4xml.dom.DOMElement;
import org.eclipse.lsp4xml.extensions.contentmodel.participants.diagnostics.LSPXMLGrammarPool;
import org.eclipse.lsp4xml.extensions.contentmodel.settings.XMLFileAssociation;
import org.eclipse.lsp4xml.extensions.contentmodel.uriresolver.XMLCacheResolverExtension;
import org.eclipse.lsp4xml.extensions.contentmodel.uriresolver.XMLCatalogResolverExtension;
Expand All @@ -43,6 +45,9 @@ public class ContentModelManager {
private final XMLCatalogResolverExtension catalogResolverExtension;
private final XMLFileAssociationResolverExtension fileAssociationResolver;

// the Grammar Pool to be shared similarly
private final XMLGrammarPool grammarPool;

public ContentModelManager(URIResolverExtensionManager resolverManager) {
this.resolverManager = resolverManager;
modelProviders = new ArrayList<>();
Expand All @@ -53,6 +58,7 @@ public ContentModelManager(URIResolverExtensionManager resolverManager) {
resolverManager.registerResolver(catalogResolverExtension);
cacheResolverExtension = new XMLCacheResolverExtension();
resolverManager.registerResolver(cacheResolverExtension);
grammarPool = new LSPXMLGrammarPool();
// Use cache by default
setUseCache(true);
}
Expand Down Expand Up @@ -180,7 +186,7 @@ private void cache(String key, CMDocument cmDocument) {
cmDocumentCache.put(key, cmDocument);
}
}

public CMElementDeclaration findInternalCMElement(DOMElement element) throws Exception {
return findInternalCMElement(element, element.getNamespaceURI());
}
Expand Down Expand Up @@ -273,6 +279,9 @@ public void setRootURI(String rootUri) {

public void setUseCache(boolean useCache) {
cacheResolverExtension.setUseCache(useCache);
if (!useCache) {
grammarPool.clear();
}
}

public void registerModelProvider(ContentModelProvider modelProvider) {
Expand All @@ -283,4 +292,8 @@ public void unregisterModelProvider(ContentModelProvider modelProvider) {
modelProviders.remove(modelProvider);
}

public XMLGrammarPool getGrammarPool() {
return cacheResolverExtension.isUseCache() ? grammarPool : null;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,12 @@ private static class FileChangedTracker {

public FileChangedTracker(Path file) {
this.file = file;
try {
lastModified = Files.getLastModifiedTime(file);
} catch (IOException e) {
LOGGER.log(Level.SEVERE, "Get last modified time failed", e);
if (Files.exists(file)) {
try {
lastModified = Files.getLastModifiedTime(file);
} catch (IOException e) {
LOGGER.log(Level.SEVERE, "Get last modified time failed", e);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ public void doDiagnostics(DOMDocument xmlDocument, List<Diagnostic> diagnostics,
XMLEntityResolver entityResolver = xmlDocument.getResolverExtensionManager();
// Process validation
XMLValidator.doDiagnostics(xmlDocument, entityResolver, diagnostics,
contentModelPlugin.getContentModelSettings(), monitor);
contentModelPlugin.getContentModelSettings(),
contentModelPlugin.getContentModelManager().getGrammarPool(), monitor);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.eclipse.lsp4xml.extensions.contentmodel.participants.diagnostics;

import org.apache.xerces.impl.dtd.DTDGrammar;
import org.apache.xerces.impl.xs.SchemaGrammar;
import org.apache.xerces.xni.grammars.Grammar;
import org.apache.xerces.xni.grammars.XMLGrammarDescription;
import org.apache.xerces.xni.grammars.XMLGrammarPool;
import org.eclipse.lsp4xml.extensions.contentmodel.model.FilesChangedTracker;
import org.eclipse.lsp4xml.extensions.dtd.utils.DTDUtils;
import org.eclipse.lsp4xml.extensions.xsd.utils.XSDUtils;

import com.google.common.base.Objects;

/**
* LSP XML grammar pool.
*
* <p>
* This class is a copy/paste of
* {@link org.apache.xerces.util.XMLGrammarPoolImpl.XMLGrammarPoolImpl} from
* Xerces adapated to use .lsp4xml cache.
* </p>
*
* @author Jeffrey Rodriguez, IBM
* @author Andy Clark, IBM
* @author Neil Graham, IBM
* @author Pavani Mukthipudi, Sun Microsystems
* @author Neeraj Bajaj, SUN Microsystems
* @author Angelo ZERR
*
*
*/
public class LSPXMLGrammarPool implements XMLGrammarPool {

private static final int TABLE_SIZE = 11;

/** Grammars. */
private final Entry[] fGrammars;

public LSPXMLGrammarPool() {
this(TABLE_SIZE);
}

public LSPXMLGrammarPool(int initialCapacity) {
fGrammars = new Entry[initialCapacity];
}

@Override
public Grammar[] retrieveInitialGrammarSet(String grammarType) {
// To avoid having trouble with xsi:noNamespaceSchemaLocation, we return nothing
// because in the case of xsi:noNamespaceSchemaLocation
// it's the first XML Schema which was registered as
// xs:noNamespaceSchemaLocation which is used.
return null;
}

@Override
public void cacheGrammars(String grammarType, Grammar[] grammars) {
for (int i = 0; i < grammars.length; i++) {
putGrammar(grammars[i]);
}
}

@Override
public Grammar retrieveGrammar(XMLGrammarDescription desc) {
return getGrammar(desc);
}

private void putGrammar(Grammar grammar) {
synchronized (fGrammars) {
XMLGrammarDescription desc = grammar.getGrammarDescription();
int hash = hashCode(desc);
int index = (hash & 0x7FFFFFFF) % fGrammars.length;
for (Entry entry = fGrammars[index]; entry != null; entry = entry.next) {
if (entry.hash == hash && equals(entry.desc, desc)) {
entry.grammar = grammar;
return;
}
}
// create a new entry
Entry entry = new Entry(hash, desc, grammar, fGrammars[index]);
fGrammars[index] = entry;
}
}

/**
* Returns the grammar associated to the specified grammar description.
* Currently, the root element name is used as the key for DTD grammars and the
* target namespace is used as the key for Schema grammars.
*
* @param desc The Grammar Description.
*/
private Grammar getGrammar(XMLGrammarDescription desc) {
synchronized (fGrammars) {
int hash = hashCode(desc);
int index = (hash & 0x7FFFFFFF) % fGrammars.length;
for (Entry entry = fGrammars[index]; entry != null; entry = entry.next) {
if ((entry.hash == hash) && equals(entry.desc, desc)) {
if (entry.isDirty()) {
removeGrammar(entry.desc);
return null;
}
return entry.grammar;
}
}
return null;
}
}

/**
* Removes the grammar associated to the specified grammar description from the
* grammar pool and returns the removed grammar. Currently, the root element
* name is used as the key for DTD grammars and the target namespace is used as
* the key for Schema grammars.
*
* @param desc The Grammar Description.
* @return The removed grammar.
*/
private Grammar removeGrammar(XMLGrammarDescription desc) {
synchronized (fGrammars) {
int hash = hashCode(desc);
int index = (hash & 0x7FFFFFFF) % fGrammars.length;
for (Entry entry = fGrammars[index], prev = null; entry != null; prev = entry, entry = entry.next) {
if ((entry.hash == hash) && equals(entry.desc, desc)) {
if (prev != null) {
prev.next = entry.next;
} else {
fGrammars[index] = entry.next;
}
Grammar tempGrammar = entry.grammar;
entry.grammar = null;
return tempGrammar;
}
}
return null;
}
}

@Override
public void lockPool() {
// Do nothing
}

@Override
public void unlockPool() {
// Do nothing
}

@Override
public void clear() {
for (int i = 0; i < fGrammars.length; i++) {
if (fGrammars[i] != null) {
fGrammars[i].clear();
fGrammars[i] = null;
}
}
}

/**
* This method checks whether two grammars are the same. Currently, we compare
* the root element names for DTD grammars and the target namespaces for Schema
* grammars. The application can override this behaviour and add its own logic.
*
* @param desc1 The grammar description
* @param desc2 The grammar description of the grammar to be compared to
* @return True if the grammars are equal, otherwise false
*/
public boolean equals(XMLGrammarDescription desc1, XMLGrammarDescription desc2) {
String systemId1 = desc1.getExpandedSystemId();
String systemId2 = desc2.getExpandedSystemId();
if (systemId1 != null && systemId2 != null) {
return Objects.equal(systemId1, systemId2);
}
return false; // desc1.equals(desc2);
}

/**
* Returns the hash code value for the given grammar description.
*
* @param desc The grammar description
* @return The hash code value
*/
public int hashCode(XMLGrammarDescription desc) {
return desc.hashCode();
}

/**
* This class is a grammar pool entry. Each entry acts as a node in a linked
* list.
*/
protected static final class Entry {
public int hash;
public XMLGrammarDescription desc;
public Grammar grammar;
public Entry next;
private final FilesChangedTracker tracker;

protected Entry(int hash, XMLGrammarDescription desc, Grammar grammar, Entry next) {
this.hash = hash;
this.desc = desc;
this.grammar = grammar;
this.next = next;
this.tracker = create(grammar);
}

private static FilesChangedTracker create(Grammar grammar) {
if (grammar instanceof SchemaGrammar) {
return XSDUtils.createFilesChangedTracker((SchemaGrammar) grammar);
}
if (grammar instanceof DTDGrammar) {
return DTDUtils.createFilesChangedTracker((DTDGrammar) grammar);
}
return null;
}

public boolean isDirty() {
return tracker != null ? tracker.isDirty() : true;
}

// clear this entry; useful to promote garbage collection
// since reduces reference count of objects to be destroyed
protected void clear() {
desc = null;
grammar = null;
if (next != null) {
next.clear();
next = null;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.xerces.impl.dtd.XMLDTDValidator;
import org.apache.xerces.parsers.XIncludeAwareParserConfiguration;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.grammars.XMLGrammarPool;
import org.apache.xerces.xni.parser.XMLComponentManager;
import org.apache.xerces.xni.parser.XMLConfigurationException;
import org.eclipse.lsp4xml.extensions.contentmodel.settings.XMLValidationSettings;
Expand All @@ -21,14 +22,20 @@
*
* <ul>
* <li>disable only DTD validation if required</li>
* <li>disable doctype declaration according validation settings</li>
* <li>disable external entities according validation settings</li>
* <li>manage a custom grammar pool to retrieve compiled XML Schema/DTD from a
* given XML file path</li>
* </ul>
*
*/
class LSPXMLParserConfiguration extends XIncludeAwareParserConfiguration {

private final boolean disableDTDValidation;

public LSPXMLParserConfiguration(boolean disableDTDValidation, XMLValidationSettings validationSettings) {
public LSPXMLParserConfiguration(XMLGrammarPool grammarPool, boolean disableDTDValidation,
XMLValidationSettings validationSettings) {
super(null, grammarPool);
this.disableDTDValidation = disableDTDValidation;
// Disable DOCTYPE declaration if settings is set to true.
boolean disallowDocTypeDecl = validationSettings != null ? validationSettings.isDisallowDocTypeDecl() : false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.xerces.impl.XMLEntityManager;
import org.apache.xerces.parsers.SAXParser;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.grammars.XMLGrammarPool;
import org.apache.xerces.xni.parser.XMLEntityResolver;
import org.apache.xerces.xni.parser.XMLInputSource;
import org.apache.xerces.xni.parser.XMLParserConfiguration;
Expand Down Expand Up @@ -58,12 +59,13 @@ public class XMLValidator {
private static final String DTD_NOT_FOUND = "Cannot find DTD ''{0}''.\nCreate the DTD file or configure an XML catalog for this DTD.";

public static void doDiagnostics(DOMDocument document, XMLEntityResolver entityResolver,
List<Diagnostic> diagnostics, ContentModelSettings contentModelSettings, CancelChecker monitor) {
List<Diagnostic> diagnostics, ContentModelSettings contentModelSettings, XMLGrammarPool grammarPool,
CancelChecker monitor) {
try {
XMLValidationSettings validationSettings = contentModelSettings != null
? contentModelSettings.getValidation()
: null;
LSPXMLParserConfiguration configuration = new LSPXMLParserConfiguration(
LSPXMLParserConfiguration configuration = new LSPXMLParserConfiguration(grammarPool,
isDisableOnlyDTDValidation(document), validationSettings);

if (entityResolver != null) {
Expand Down
Loading

0 comments on commit db69af4

Please sign in to comment.