Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Field Reference: handle special characters #14044

Merged
merged 12 commits into from
May 24, 2022
Merged
14 changes: 14 additions & 0 deletions logstash-core/spec/logstash/event_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,15 @@
expect { e.set('[', 'value') }.to raise_exception(::RuntimeError)
end
end

context 'with map value whose keys have FieldReference-special characters' do
let(:event) { LogStash::Event.new }
let(:value) { {"this[field]" => "okay"} }
it 'sets the value correctly' do
event.set('[some][field]', value.dup)
expect(event.get('[some][field]')).to eq(value)
end
end
end

context "timestamp" do
Expand Down Expand Up @@ -351,6 +360,11 @@
expect(e.timestamp.to_iso8601).to eq("2016-05-28T23:02:05.350Z")
end

it 'accepts maps whose keys contain FieldReference-special characters' do
e = LogStash::Event.new({"nested" => {"i][egal" => "okay"}, "n[0]" => "bene"})
expect(e.get('nested')).to eq({"i][egal" => "okay"})
expect(e.to_hash).to include("n[0]" => "bene")
end
end

context "method missing exception messages" do
Expand Down
49 changes: 43 additions & 6 deletions logstash-core/src/main/java/org/logstash/ConvertedMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.jruby.RubyHash;
import org.jruby.RubyString;
import org.jruby.runtime.ThreadContext;
Expand All @@ -35,15 +36,51 @@
* as key.</p>
* <p>The {@code put} method will work with any {@link String} key but is only intended for use in
* situations where {@link ConvertedMap#putInterned(String, Object)} would require manually
* interning the {@link String} key. This is due to the fact that we use our internal
* {@link FieldReference} cache to get an interned version of the given key instead of JDKs
* {@link String#intern()}, which is faster since it works from a much smaller and hotter cache
* in {@link FieldReference#CACHE} than using String interning directly.</p>
* interning the {@link String} key.
* This is due to the fact that it is based on {@link IdentityHashMap}, and we rely on the String
* intern pool to ensure identity match of equivalent strings.
* For performance, we keep a global cache of strings that have been interned for use with {@link ConvertedMap},
* and encourage interning through {@link ConvertedMap#internStringForUseAsKey(String)} to avoid
* the performance pentalty of the global string intern pool.
*/
public final class ConvertedMap extends IdentityHashMap<String, Object> {

private static final long serialVersionUID = 1L;

private static final ConcurrentHashMap<String,String> KEY_CACHE = new ConcurrentHashMap<>(100, 0.2F, 16);

/**
* Returns an equivalent interned string, possibly avoiding the
* global intern pool.
*
* @param candidate the candidate {@link String}
* @return an interned string from the global String intern pool
*/
static String internStringForUseAsKey(final String candidate) {
// TODO: replace with LRU cache and/or isolated intern pool
final String cached = KEY_CACHE.get(candidate);
if (cached != null) { return cached; }

final String interned = candidate.intern();
if (KEY_CACHE.size() <= 10_000 ) {
KEY_CACHE.put(interned, interned);
}
return interned;
}

/**
* Ensures that the provided {@code String[]} contains only
* instances that have been {@link ConvertedMap::internStringForUseAsKey},
* possibly replacing entries with equivalent interned strings.
*
* @param candidates an array of non-null strings
*/
static void internStringsForUseAsKeys(final String[] candidates) {
for (int i = 0; i < candidates.length; i++) {
candidates[i] = internStringForUseAsKey(candidates[i]);
}
}

private static final RubyHash.VisitorWithState<ConvertedMap> RUBY_HASH_VISITOR =
new RubyHash.VisitorWithState<ConvertedMap>() {
@Override
Expand Down Expand Up @@ -91,7 +128,7 @@ public static ConvertedMap newFromRubyHash(final ThreadContext context, final Ru

@Override
public Object put(final String key, final Object value) {
return super.put(FieldReference.from(key).getKey(), value);
return super.put(internStringForUseAsKey(key), value);
}

/**
Expand All @@ -118,6 +155,6 @@ public Object unconvert() {
* @return Interned String
*/
private static String convertKey(final RubyString key) {
return FieldReference.from(key).getKey();
return internStringForUseAsKey(key.asJavaString());
}
}
26 changes: 15 additions & 11 deletions logstash-core/src/main/java/org/logstash/FieldReference.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.Map;

import java.util.concurrent.ConcurrentHashMap;

import org.jruby.RubyString;

/**
Expand Down Expand Up @@ -75,11 +76,7 @@ public static class IllegalSyntaxException extends RuntimeException {
/**
* Unique {@link FieldReference} pointing at the timestamp field in a {@link Event}.
*/
public static final FieldReference TIMESTAMP_REFERENCE =
deduplicate(new FieldReference(EMPTY_STRING_ARRAY, Event.TIMESTAMP, DATA_CHILD));

private static final FieldReference METADATA_PARENT_REFERENCE =
new FieldReference(EMPTY_STRING_ARRAY, Event.METADATA, META_PARENT);
public static final FieldReference TIMESTAMP_REFERENCE = FieldReference.from(Event.TIMESTAMP);

/**
* Cache of all existing {@link FieldReference} by their {@link RubyString} source.
Expand All @@ -106,9 +103,10 @@ public static class IllegalSyntaxException extends RuntimeException {
private final int type;

private FieldReference(final String[] path, final String key, final int type) {
this.key = key;
this.type = type;
this.key = ConvertedMap.internStringForUseAsKey(key);
ConvertedMap.internStringsForUseAsKeys(path);
this.path = path;
this.type = type;
hash = calculateHash(this.key, this.path, this.type);
}

Expand Down Expand Up @@ -221,10 +219,10 @@ private static FieldReference parseToCache(final String reference) {
private static FieldReference parse(final CharSequence reference) {
final List<String> path = TOKENIZER.tokenize(reference);

final String key = path.remove(path.size() - 1).intern();
final String key = path.remove(path.size() - 1);
final boolean empty = path.isEmpty();
if (empty && key.equals(Event.METADATA)) {
return METADATA_PARENT_REFERENCE;
return new FieldReference(EMPTY_STRING_ARRAY, key, META_PARENT);
} else if (!empty && path.get(0).equals(Event.METADATA)) {
return new FieldReference(
path.subList(1, path.size()).toArray(EMPTY_STRING_ARRAY), key, META_CHILD
Expand All @@ -240,7 +238,11 @@ private static FieldReference parse(final CharSequence reference) {
**/
private static class StrictTokenizer {

public List<String> tokenize(CharSequence reference) {
/**
* @param reference a sequence of characters representing a reference to a field
* @return a list of string path fragments.
*/
public List<String> tokenize(final CharSequence reference) {
ArrayList<String> path = new ArrayList<>();
final int length = reference.length();

Expand Down Expand Up @@ -286,7 +288,7 @@ public List<String> tokenize(CharSequence reference) {

if (splitPoint < i) {
// if we have something to add, add it.
path.add(reference.subSequence(splitPoint, i).toString().intern());
path.add(reference.subSequence(splitPoint, i).toString());
}

depth--;
Expand All @@ -309,7 +311,9 @@ public List<String> tokenize(CharSequence reference) {
// if we saw no brackets, this is a top-level reference that can be emitted as-is without
// further processing
path.add(reference.toString());
path.trimToSize();
return path;

} else if (depth > 0) {
// when we hit the end-of-input while still in an open bracket, we have an invalid field reference
potentiallyAmbiguousSyntaxDetected = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@
import java.util.Map;
import org.assertj.core.api.Assertions;
import org.hamcrest.CoreMatchers;
import org.jruby.RubyBoolean;
import org.jruby.RubyHash;
import org.jruby.RubyString;
import org.jruby.exceptions.RuntimeError;
import org.jruby.javasupport.JavaUtil;
import org.jruby.runtime.Block;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.junit.Assert;
Expand Down Expand Up @@ -98,19 +100,21 @@ public void correctlyRaiseRubyRuntimeErrorWhenGivenInvalidFieldReferences() {
}

@Test
public void correctlyRaiseRubyRuntimeErrorWhenGivenInvalidFieldReferencesInMap() {
public void correctlySetsValueWhenGivenMapWithKeysThatHaveFieldReferenceSpecialCharacters() {
final ThreadContext context = RubyUtil.RUBY.getCurrentContext();
final JrubyEventExtLibrary.RubyEvent event =
JrubyEventExtLibrary.RubyEvent.newRubyEvent(context.runtime);
final RubyString key = rubyString("foo");
final RubyHash value = RubyHash.newHash(context.runtime, Collections.singletonMap(rubyString("il[[]]]legal"), rubyString("okay")), context.nil);
try {
event.ruby_set_field(context, key, value);
} catch (RuntimeError rubyRuntimeError) {
Assert.assertThat(rubyRuntimeError.getLocalizedMessage(), CoreMatchers.containsString("Invalid FieldReference"));
return;
}
Assert.fail("expected ruby RuntimeError was not thrown.");

event.ruby_set_field(context, key, value);
IRubyObject retrievedValue = event.ruby_get_field(context, key);
Assert.assertThat(retrievedValue, CoreMatchers.equalTo(value));

RubyHash eventHash = (RubyHash) event.ruby_to_hash_with_metadata(context);
IRubyObject nestedValue = eventHash.dig(context, rubyString("foo"), rubyString("il[[]]]legal"));
Assert.assertFalse(nestedValue.isNil());
Assert.assertEquals(rubyString("okay"), nestedValue);
}

private static RubyString rubyString(final String java) {
Expand Down