Skip to content

Commit

Permalink
[8.17] Replace encoder with url encoder (#116699) (#119080)
Browse files Browse the repository at this point in the history
Document IDs are frequently used in HTTP requests, such as `GET /index/_doc/{id}`, where they must be URL-safe to avoid issues with invalid characters. This change ensures that IDs generated by `TimeBasedKOrderedUUIDGenerator` are properly Base64 URL-encoded, free of characters that could break URLs. We also test that no IDs include invalid characters like +, /, or = to guarantee they are fully compliant with URL-safe requirements.

Moreover `TimeBasedKOrderedUUIDGenerator` and `TimeBasedUUIDGenerator` are refactored to allow injection of dependencies which enables us to increase test coverage by including tests for high-throughput scenarios, sequence id overflow and unreliable clocks usage.
  • Loading branch information
salvatore-campagna authored Dec 19, 2024
1 parent 81f725d commit d86b6f4
Show file tree
Hide file tree
Showing 6 changed files with 364 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,14 @@ public void process(IndexRequest indexRequest) {
}
}

private static boolean shouldUseTimeBasedId(final IndexMode indexMode, final IndexVersion creationVersion) {
return indexMode == IndexMode.LOGSDB && isNewIndexVersion(creationVersion);
}

private static boolean isNewIndexVersion(final IndexVersion creationVersion) {
return creationVersion.onOrAfter(IndexVersions.TIME_BASED_K_ORDERED_DOC_ID_BACKPORT);
}

@Override
public int indexShard(
String id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import java.nio.ByteBuffer;
import java.util.Base64;
import java.util.function.Supplier;

/**
* Generates a base64-encoded, k-ordered UUID string optimized for compression and efficient indexing.
Expand All @@ -28,18 +29,27 @@
* The result is a compact base64-encoded string, optimized for efficient compression of the _id field in an inverted index.
*/
public class TimeBasedKOrderedUUIDGenerator extends TimeBasedUUIDGenerator {
private static final Base64.Encoder BASE_64_NO_PADDING = Base64.getEncoder().withoutPadding();

private static final Base64.Encoder BASE_64_NO_PADDING_URL_ENCODER = Base64.getUrlEncoder().withoutPadding();

public TimeBasedKOrderedUUIDGenerator(
final Supplier<Long> timestampSupplier,
final Supplier<Integer> sequenceIdSupplier,
final Supplier<byte[]> macAddressSupplier
) {
super(timestampSupplier, sequenceIdSupplier, macAddressSupplier);
}

@Override
public String getBase64UUID() {
final int sequenceId = this.sequenceNumber.incrementAndGet() & 0x00FF_FFFF;
final int sequenceId = sequenceNumber.incrementAndGet() & 0x00FF_FFFF;

// Calculate timestamp to ensure ordering and avoid backward movement in case of time shifts.
// Uses AtomicLong to guarantee that timestamp increases even if the system clock moves backward.
// If the sequenceId overflows (reaches 0 within the same millisecond), the timestamp is incremented
// to ensure strict ordering.
long timestamp = this.lastTimestamp.accumulateAndGet(
currentTimeMillis(),
timestampSupplier.get(),
sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max
);

Expand Down Expand Up @@ -68,6 +78,6 @@ public String getBase64UUID() {

assert buffer.position() == uuidBytes.length;

return BASE_64_NO_PADDING.encodeToString(uuidBytes);
return BASE_64_NO_PADDING_URL_ENCODER.encodeToString(uuidBytes);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import java.util.Base64;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;

/**
* These are essentially flake ids but we use 6 (not 8) bytes for timestamp, and use 3 (not 2) bytes for sequence number. We also reorder
Expand All @@ -20,36 +21,43 @@
* For more information about flake ids, check out
* https://archive.fo/2015.07.08-082503/http://www.boundary.com/blog/2012/01/flake-a-decentralized-k-ordered-unique-id-generator-in-erlang/
*/

class TimeBasedUUIDGenerator implements UUIDGenerator {

// We only use bottom 3 bytes for the sequence number. Paranoia: init with random int so that if JVM/OS/machine goes down, clock slips
// backwards, and JVM comes back up, we are less likely to be on the same sequenceNumber at the same time:
protected final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt());
protected final AtomicInteger sequenceNumber;
protected final AtomicLong lastTimestamp;

// Used to ensure clock moves forward:
protected final AtomicLong lastTimestamp = new AtomicLong(0);
protected final Supplier<Long> timestampSupplier;

private static final byte[] SECURE_MUNGED_ADDRESS = MacAddressProvider.getSecureMungedAddress();

static {
assert SECURE_MUNGED_ADDRESS.length == 6;
}

private static final Base64.Encoder BASE_64_NO_PADDING = Base64.getUrlEncoder().withoutPadding();

// protected for testing
protected long currentTimeMillis() {
return System.currentTimeMillis();
static final int SIZE_IN_BYTES = 15;
private final byte[] macAddress;

TimeBasedUUIDGenerator(
final Supplier<Long> timestampSupplier,
final Supplier<Integer> sequenceIdSupplier,
final Supplier<byte[]> macAddressSupplier
) {
this.timestampSupplier = timestampSupplier;
// NOTE: getting the mac address every time using the supplier is expensive, hence we cache it.
this.macAddress = macAddressSupplier.get();
this.sequenceNumber = new AtomicInteger(sequenceIdSupplier.get());
// Used to ensure clock moves forward:
this.lastTimestamp = new AtomicLong(0);
}

// protected for testing
protected byte[] macAddress() {
return SECURE_MUNGED_ADDRESS;
return macAddress;
}

static final int SIZE_IN_BYTES = 15;

@Override
public String getBase64UUID() {
final int sequenceId = sequenceNumber.incrementAndGet() & 0xffffff;
Expand All @@ -58,7 +66,7 @@ public String getBase64UUID() {
// still vulnerable if we are shut down, clock goes backwards, and we restart... for this we
// randomize the sequenceNumber on init to decrease chance of collision:
long timestamp = this.lastTimestamp.accumulateAndGet(
currentTimeMillis(),
timestampSupplier.get(),
// Always force the clock to increment whenever sequence number is 0, in case we have a long
// time-slip backwards:
sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max
Expand Down
24 changes: 20 additions & 4 deletions server/src/main/java/org/elasticsearch/common/UUIDs.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,29 @@
import org.elasticsearch.common.settings.SecureString;

import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Supplier;

/**
* Utility class for generating various types of UUIDs.
*/
public class UUIDs {
private static final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt());
public static final Supplier<Long> DEFAULT_TIMESTAMP_SUPPLIER = System::currentTimeMillis;
public static final Supplier<Integer> DEFAULT_SEQUENCE_ID_SUPPLIER = sequenceNumber::incrementAndGet;
public static final Supplier<byte[]> DEFAULT_MAC_ADDRESS_SUPPLIER = MacAddressProvider::getSecureMungedAddress;
private static final UUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator();
private static final UUIDGenerator TIME_BASED_K_ORDERED_GENERATOR = new TimeBasedKOrderedUUIDGenerator(
DEFAULT_TIMESTAMP_SUPPLIER,
DEFAULT_SEQUENCE_ID_SUPPLIER,
DEFAULT_MAC_ADDRESS_SUPPLIER
);

private static final RandomBasedUUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator();

private static final UUIDGenerator TIME_BASED_K_ORDERED_GENERATOR = new TimeBasedKOrderedUUIDGenerator();
private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator();
private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator(
DEFAULT_TIMESTAMP_SUPPLIER,
DEFAULT_SEQUENCE_ID_SUPPLIER,
DEFAULT_MAC_ADDRESS_SUPPLIER
);

/**
* The length of a UUID string generated by {@link #base64UUID}.
Expand Down
Loading

0 comments on commit d86b6f4

Please sign in to comment.